From 2cb41d8a2b93de6bec0007a776d697c66a76c42b Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Thu, 29 Jun 2023 12:23:39 +0900 Subject: [PATCH] Upgrade bundled mimalloc to v2.1.2 --- third-party/mimalloc/CMakeLists.txt | 120 +- third-party/mimalloc/azure-pipelines.yml | 18 +- .../mimalloc/bin/mimalloc-redirect.dll | Bin 56832 -> 68096 bytes .../mimalloc/bin/mimalloc-redirect.lib | Bin 2874 -> 2874 bytes .../mimalloc/bin/mimalloc-redirect32.dll | Bin 40448 -> 41984 bytes .../mimalloc/bin/mimalloc-redirect32.lib | Bin 2928 -> 2928 bytes .../cmake/mimalloc-config-version.cmake | 4 +- third-party/mimalloc/doc/doxyfile | 2 +- third-party/mimalloc/doc/mimalloc-doc.h | 10 +- .../ide/vs2017/mimalloc-override-test.vcxproj | 4 +- .../ide/vs2017/mimalloc-override.vcxproj | 13 +- .../ide/vs2017/mimalloc-test-stress.vcxproj | 4 +- .../mimalloc/ide/vs2017/mimalloc-test.vcxproj | 6 +- third-party/mimalloc/ide/vs2017/mimalloc.sln | Bin 4422 -> 4492 bytes .../mimalloc/ide/vs2017/mimalloc.vcxproj | 23 +- .../ide/vs2019/mimalloc-override.vcxproj | 11 +- .../mimalloc/ide/vs2019/mimalloc.vcxproj | 25 +- .../ide/vs2022/mimalloc-override.vcxproj | 23 +- .../ide/vs2022/mimalloc-test-api.vcxproj | 7 + .../ide/vs2022/mimalloc-test-stress.vcxproj | 4 +- .../mimalloc/ide/vs2022/mimalloc.vcxproj | 36 +- third-party/mimalloc/include/mimalloc-track.h | 62 - third-party/mimalloc/include/mimalloc.h | 82 +- .../{mimalloc-atomic.h => mimalloc/atomic.h} | 63 +- .../internal.h} | 315 ++-- third-party/mimalloc/include/mimalloc/prim.h | 323 ++++ third-party/mimalloc/include/mimalloc/track.h | 147 ++ .../{mimalloc-types.h => mimalloc/types.h} | 107 +- third-party/mimalloc/readme.md | 233 +-- third-party/mimalloc/src/alloc-aligned.c | 92 +- third-party/mimalloc/src/alloc-override.c | 6 +- third-party/mimalloc/src/alloc-posix.c | 7 +- third-party/mimalloc/src/alloc.c | 219 +-- third-party/mimalloc/src/arena.c | 746 ++++++--- third-party/mimalloc/src/bitmap.c | 102 +- third-party/mimalloc/src/bitmap.h | 6 +- third-party/mimalloc/src/heap.c | 68 +- third-party/mimalloc/src/init.c | 189 ++- third-party/mimalloc/src/options.c | 269 ++-- third-party/mimalloc/src/os.c | 1334 ++++------------- third-party/mimalloc/src/page.c | 61 +- .../osx/alloc-override-zone.c} | 8 +- third-party/mimalloc/src/prim/osx/prim.c | 9 + third-party/mimalloc/src/prim/prim.c | 24 + third-party/mimalloc/src/prim/readme.md | 9 + third-party/mimalloc/src/prim/unix/prim.c | 859 +++++++++++ third-party/mimalloc/src/prim/wasi/prim.c | 275 ++++ .../src/prim/windows/etw-mimalloc.wprp | 61 + third-party/mimalloc/src/prim/windows/etw.h | 905 +++++++++++ third-party/mimalloc/src/prim/windows/etw.man | Bin 0 -> 3926 bytes third-party/mimalloc/src/prim/windows/prim.c | 622 ++++++++ .../mimalloc/src/prim/windows/readme.md | 17 + third-party/mimalloc/src/random.c | 162 +- third-party/mimalloc/src/region.c | 516 ------- third-party/mimalloc/src/segment-cache.c | 409 ----- third-party/mimalloc/src/segment-map.c | 153 ++ third-party/mimalloc/src/segment.c | 336 ++--- third-party/mimalloc/src/static.c | 29 +- third-party/mimalloc/src/stats.c | 261 +--- .../mimalloc/test/main-override-static.c | 9 + third-party/mimalloc/test/main-override.cpp | 69 +- third-party/mimalloc/test/test-api-fill.c | 11 +- third-party/mimalloc/test/test-api.c | 45 +- third-party/mimalloc/test/test-stress.c | 28 +- third-party/mimalloc/test/test-wrong.c | 26 +- 65 files changed, 5786 insertions(+), 3798 deletions(-) delete mode 100644 third-party/mimalloc/include/mimalloc-track.h rename third-party/mimalloc/include/{mimalloc-atomic.h => mimalloc/atomic.h} (87%) rename third-party/mimalloc/include/{mimalloc-internal.h => mimalloc/internal.h} (74%) create mode 100644 third-party/mimalloc/include/mimalloc/prim.h create mode 100644 third-party/mimalloc/include/mimalloc/track.h rename third-party/mimalloc/include/{mimalloc-types.h => mimalloc/types.h} (85%) rename third-party/mimalloc/src/{alloc-override-osx.c => prim/osx/alloc-override-zone.c} (98%) create mode 100644 third-party/mimalloc/src/prim/osx/prim.c create mode 100644 third-party/mimalloc/src/prim/prim.c create mode 100644 third-party/mimalloc/src/prim/readme.md create mode 100644 third-party/mimalloc/src/prim/unix/prim.c create mode 100644 third-party/mimalloc/src/prim/wasi/prim.c create mode 100644 third-party/mimalloc/src/prim/windows/etw-mimalloc.wprp create mode 100644 third-party/mimalloc/src/prim/windows/etw.h create mode 100644 third-party/mimalloc/src/prim/windows/etw.man create mode 100644 third-party/mimalloc/src/prim/windows/prim.c create mode 100644 third-party/mimalloc/src/prim/windows/readme.md delete mode 100644 third-party/mimalloc/src/region.c delete mode 100644 third-party/mimalloc/src/segment-cache.c create mode 100644 third-party/mimalloc/src/segment-map.c diff --git a/third-party/mimalloc/CMakeLists.txt b/third-party/mimalloc/CMakeLists.txt index 74c1f291..2bcd1ef7 100644 --- a/third-party/mimalloc/CMakeLists.txt +++ b/third-party/mimalloc/CMakeLists.txt @@ -6,12 +6,13 @@ set(CMAKE_CXX_STANDARD 17) option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF) option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode (expensive)" OFF) -option(MI_PADDING "Enable padding to detect heap block overflow (used only in DEBUG mode or with Valgrind)" ON) +option(MI_PADDING "Enable padding to detect heap block overflow (always on in DEBUG or SECURE mode, or with Valgrind/ASAN)" OFF) option(MI_OVERRIDE "Override the standard malloc interface (e.g. define entry points for malloc() etc)" ON) option(MI_XMALLOC "Enable abort() call on memory allocation failure by default" OFF) option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF) -option(MI_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF) -option(MI_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) +option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF) +option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) +option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) @@ -24,7 +25,8 @@ option(MI_BUILD_OBJECT "Build object library" ON) option(MI_BUILD_TESTS "Build test executables" ON) option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF) option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF) -option(MI_SKIP_COLLECT_ON_EXIT, "Skip collecting memory on program exit" OFF) +option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF) +option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF) # deprecated options option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) @@ -36,21 +38,24 @@ include(GNUInstallDirs) include("cmake/mimalloc-config-version.cmake") set(mi_sources - src/stats.c - src/random.c - src/os.c - src/bitmap.c - src/arena.c - src/segment-cache.c - src/segment.c - src/page.c src/alloc.c src/alloc-aligned.c src/alloc-posix.c + src/arena.c + src/bitmap.c src/heap.c + src/init.c src/options.c - src/init.c) + src/os.c + src/page.c + src/random.c + src/segment.c + src/segment-map.c + src/stats.c + src/prim/prim.c) +set(mi_cflags "") +set(mi_libraries "") # ----------------------------------------------------------------------------- # Convenience: set default build type depending on the build directory @@ -87,7 +92,7 @@ if(MI_OVERRIDE) if(MI_OSX_ZONE) # use zone's on macOS message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)") - list(APPEND mi_sources src/alloc-override-osx.c) + list(APPEND mi_sources src/prim/osx/alloc-override-zone.c) list(APPEND mi_defines MI_OSX_ZONE=1) if (NOT MI_OSX_INTERPOSE) message(STATUS " WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)") @@ -122,43 +127,60 @@ endif() if(MI_SECURE) message(STATUS "Set full secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=4) - #if (MI_VALGRIND) - # message(WARNING "Secure mode is a bit weakened when compiling with Valgrind support as buffer overflow detection is no longer byte-precise (if running without valgrind)") - #endif() + list(APPEND mi_defines MI_SECURE=4) endif() -if(MI_VALGRIND) +if(MI_TRACK_VALGRIND) CHECK_INCLUDE_FILES("valgrind/valgrind.h;valgrind/memcheck.h" MI_HAS_VALGRINDH) if (NOT MI_HAS_VALGRINDH) - set(MI_VALGRIND OFF) + set(MI_TRACK_VALGRIND OFF) message(WARNING "Cannot find the 'valgrind/valgrind.h' and 'valgrind/memcheck.h' -- install valgrind first") - message(STATUS "Compile **without** Valgrind support (MI_VALGRIND=OFF)") + message(STATUS "Compile **without** Valgrind support (MI_TRACK_VALGRIND=OFF)") else() - message(STATUS "Compile with Valgrind support (MI_VALGRIND=ON)") - list(APPEND mi_defines MI_VALGRIND=1) + message(STATUS "Compile with Valgrind support (MI_TRACK_VALGRIND=ON)") + list(APPEND mi_defines MI_TRACK_VALGRIND=1) endif() endif() -if(MI_ASAN) - if (MI_VALGRIND) - set(MI_ASAN OFF) - message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_ASAN=OFF)") - else() +if(MI_TRACK_ASAN) + if (APPLE AND MI_OVERRIDE) + set(MI_TRACK_ASAN OFF) + message(WARNING "Cannot enable address sanitizer support on macOS if MI_OVERRIDE is ON (MI_TRACK_ASAN=OFF)") + endif() + if (MI_TRACK_VALGRIND) + set(MI_TRACK_ASAN OFF) + message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_TRACK_ASAN=OFF)") + endif() + if(MI_TRACK_ASAN) CHECK_INCLUDE_FILES("sanitizer/asan_interface.h" MI_HAS_ASANH) if (NOT MI_HAS_ASANH) - set(MI_ASAN OFF) + set(MI_TRACK_ASAN OFF) message(WARNING "Cannot find the 'sanitizer/asan_interface.h' -- install address sanitizer support first") - message(STATUS "Compile **without** address sanitizer support (MI_ASAN=OFF)") + message(STATUS "Compile **without** address sanitizer support (MI_TRACK_ASAN=OFF)") else() - message(STATUS "Compile with address sanitizer support (MI_ASAN=ON)") - list(APPEND mi_defines MI_ASAN=1) + message(STATUS "Compile with address sanitizer support (MI_TRACK_ASAN=ON)") + list(APPEND mi_defines MI_TRACK_ASAN=1) list(APPEND mi_cflags -fsanitize=address) - list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=address) + list(APPEND mi_libraries -fsanitize=address) endif() endif() endif() +if(MI_TRACK_ETW) + if(NOT WIN32) + set(MI_TRACK_ETW OFF) + message(WARNING "Can only enable ETW support on Windows (MI_TRACK_ETW=OFF)") + endif() + if (MI_TRACK_VALGRIND OR MI_TRACK_ASAN) + set(MI_TRACK_ETW OFF) + message(WARNING "Cannot enable ETW support with also Valgrind or ASAN support enabled (MI_TRACK_ETW=OFF)") + endif() + if(MI_TRACK_ETW) + message(STATUS "Compile with Windows event tracing support (MI_TRACK_ETW=ON)") + list(APPEND mi_defines MI_TRACK_ETW=1) + endif() +endif() + if(MI_SEE_ASM) message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)") list(APPEND mi_cflags -save-temps) @@ -179,9 +201,14 @@ if(MI_DEBUG_FULL) list(APPEND mi_defines MI_DEBUG=3) # full invariant checking endif() -if(NOT MI_PADDING) - message(STATUS "Disable padding of heap blocks in debug mode (MI_PADDING=OFF)") +if(MI_NO_PADDING) + message(STATUS "Suppress any padding of heap blocks (MI_NO_PADDING=ON)") list(APPEND mi_defines MI_PADDING=0) +else() + if(MI_PADDING) + message(STATUS "Enable explicit padding of heap blocks (MI_PADDING=ON)") + list(APPEND mi_defines MI_PADDING=1) + endif() endif() if(MI_XMALLOC) @@ -199,7 +226,7 @@ if(MI_DEBUG_TSAN) message(STATUS "Build with thread sanitizer (MI_DEBUG_TSAN=ON)") list(APPEND mi_defines MI_TSAN=1) list(APPEND mi_cflags -fsanitize=thread -g -O1) - list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=thread) + list(APPEND mi_libraries -fsanitize=thread) else() message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)") endif() @@ -210,7 +237,7 @@ if(MI_DEBUG_UBSAN) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") message(STATUS "Build with undefined-behavior sanitizer (MI_DEBUG_UBSAN=ON)") list(APPEND mi_cflags -fsanitize=undefined -g -fno-sanitize-recover=undefined) - list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=undefined) + list(APPEND mi_libraries -fsanitize=undefined) if (NOT MI_USE_CXX) message(STATUS "(switch to use C++ due to MI_DEBUG_UBSAN)") set(MI_USE_CXX "ON") @@ -235,6 +262,11 @@ if(MI_USE_CXX) endif() endif() +if(CMAKE_SYSTEM_NAME MATCHES "Haiku") + SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib) + SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers) + endif() + # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) @@ -314,10 +346,10 @@ set(mi_basename "mimalloc") if(MI_SECURE) set(mi_basename "${mi_basename}-secure") endif() -if(MI_VALGRIND) +if(MI_TRACK_VALGRIND) set(mi_basename "${mi_basename}-valgrind") endif() -if(MI_ASAN) +if(MI_TRACK_ASAN) set(mi_basename "${mi_basename}-asan") endif() string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) @@ -430,12 +462,20 @@ if (MI_BUILD_OBJECT) $ ) + # Copy the generated object file (`static.o`) to the output directory (as `mimalloc.o`) + if(NOT WIN32) + set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}") + set(mimalloc-obj-out "${CMAKE_CURRENT_BINARY_DIR}/${mi_basename}${CMAKE_C_OUTPUT_EXTENSION}") + add_custom_command(OUTPUT ${mimalloc-obj-out} DEPENDS mimalloc-obj COMMAND "${CMAKE_COMMAND}" -E copy "${mimalloc-obj-static}" "${mimalloc-obj-out}") + add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out}) + endif() + # the following seems to lead to cmake warnings/errors on some systems, disable for now :-( # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_objdir}) # the FILES expression can also be: $ # but that fails cmake versions less than 3.10 so we leave it as is for now - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION} + install(FILES ${mimalloc-obj-static} DESTINATION ${mi_install_objdir} RENAME ${mi_basename}${CMAKE_C_OUTPUT_EXTENSION} ) endif() diff --git a/third-party/mimalloc/azure-pipelines.yml b/third-party/mimalloc/azure-pipelines.yml index e3639fef..0247c76f 100644 --- a/third-party/mimalloc/azure-pipelines.yml +++ b/third-party/mimalloc/azure-pipelines.yml @@ -98,6 +98,22 @@ jobs: CXX: clang++ BuildType: debug-clang-cxx cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON + Debug ASAN Clang: + CC: clang + CXX: clang++ + BuildType: debug-asan-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_TRACK_ASAN=ON + Debug UBSAN Clang: + CC: clang + CXX: clang++ + BuildType: debug-ubsan-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_DEBUG_UBSAN=ON + Debug TSAN Clang++: + CC: clang + CXX: clang++ + BuildType: debug-tsan-clang-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON + steps: - task: CMake@1 inputs: @@ -105,7 +121,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 120 + - script: ctest --verbose --timeout 180 workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) diff --git a/third-party/mimalloc/bin/mimalloc-redirect.dll b/third-party/mimalloc/bin/mimalloc-redirect.dll index 83b6bd4f379ba4d9c0fcdf6b846c3b60867a31d1..a3a3591ff9b07edada16ad83ca1391963a126b2d 100644 GIT binary patch delta 10075 zcmb_heOQy%**{Mt5S5SkfkqI0kf5=kM(~6BQDbZCQ^hv6w5CQ`?D1V$*KPqtBOtJ4v$0kiowez7Tl88j`|8-%t`Ebm(X}huwY$=bHKKFf}bME^-pLy`SG01Z;!RwI@eACnW?w|h_jM3SePflJ{>YoIE2VB|k*OL(i z|90|M3JwdZculE4R=(E3+8)R@9O%|%PdfFRwS@mg$pmqzCN-O^gJ7PZNUe0RU=pIV zK$tbtT0_W}5c@*{F)?jKAoCJ{odW4(82jrM$nRKl4hgM2d$&GgmJT$4FT%`C4(HG+x{$sSf;Kktt^FX-Wc@i1>*r#(nmBIsU?YEF+R}& zKc(9>;|F6f!o2w>{k~>=0emFPh5&E-v#@Trm2KPtPxcOi0zNCP5{1Agn$dzgF`JN{ z>w{6eF%dOK@FbEl!4!NyEuz21VmJ)g105xClOY7&Zx ztk*r`P+UTM^oQwhP`h@TX5S0+fVN=HjxPz3>QZ*@7RZGJ9g(W%Zw5;$4MCJddOk#? z`jiqFrJYF-_Jyhv3PZg~QoVNJmw_*}i#5gn+zC4mAr-SA?7hW0OEyY%B}P#ibXltV znN*kldzhr17+jOAm+G_;RHXipRI9xJuJ;wR`iasi<-|_v)xD^hu#*}GzaTd4fL&f# zsyi#yE;5>>x+8Zu` z6z83LxGqv$ul;==EgdpWbGnc28Sj-lplj7e0(qTBVGJH0?x*QCBlk2WNGt zs?L7(4RH0^Pd;azr$kX>*`k+CBdK%f!Hmkj{(j~s21UV!m!!I4qeV^(j7$EDAXHLq z>NMdIIx|%iei3*w^(~EN*bSOBY`iA8jovqGujYlVf%C(Ds?n5f3Dk}pE{tgWSQ)5k zQ*2t!yD->z8cP4WGJ6khqSr@lNF26_5P23Y9X)qwbR*c$uJ!lt3(D|J(3XqL#5O^kYLs#-$!Yo?9495TyEpMz(q|@80L3 z&Z0ZVEKPitvHA4knBv4N#_pt98A}sC*Z{VcZqAsKVf&_VS9>2*ze~@-ay?@|pd&ID zq!fTf=X06lSM=G;iNZztt<0RKz9@%auZ&FAYtxub?OtPp;)qtyt3I_lhFAS+bpo$; zs?|bX4XV|78>nXNq|_1LU>V(SjAGv9#dP-AtSrI|A*z9){MV1z6l(W-FwxcX_j>5o zv7?7qFkyZA@$H11Wcxvm(3i*NxI37qzl83BHV5`X@8s%TMyIA~6vFyD(*>Y=dy$bH zVWc-1(kU29eihQR6WZ%;f$R%arb0Z6i6=p<7};3Or5asQjYh+lFj!KV0!RD6^rxQ` z+=2O7ZwVvTdfxZ+RxVU~!)oku1 zP8>9@vtTn9!`oVc*Wo=&Tihw^)qbwe}8{rNUA<>`{||FA8pGv(ojMcjbuAC zXA9_@2~#yM70?Y69?)zrpdAxl&~#0w({kR?{PBHyE61q$`2B!rd_mAWQcr!kOEhWI z=(XIuQSwwmit7*r+d$Q%Y(cXppEj7* zYexNs3X=~OuBa6V%Mns9lHJ;#B$m7Sl)lxNJkzh)x`p>reTmV=;*$E)C)NhSlb;lF zQdhy^#Cxhm5lc!DsnYiBW9?Zo<*V<}g?Tv>f3*?>#sXpvw@*8{VVe z&O2#3XJTy^8EthI2ks^vU&`8Hypdh$V#G@Ne12w9B5Sz*Dy`4|So6}Pz^*CZ66kl5 za!n@b-UMUCc&RP{cSSL~E7lXV$sR&`9Vxse_nzS?k7lVS;2ZtJCPu$on_o1WbcAXExPGV?EMkz1%sHV zK5_U2Ny45rkv9a}|GVW{U-`qjL#ZnVLDRE%V+_Y>5nijB(3Vq)vS zvJzPW6{uSH&A#8l?2+XB&uP{_9)uAM*y1S3ffzrsX1R*cS{!|+!DRx+)POY*; zsdvb{7pILWEQ)(s?J|=pO>Yh}G-KyjSQqBvx`o(r5mi_xndRp$Q^9Pc(bUmyL^zT~ zYTJbYxwEPoW%sj0 z)Jn>llX(A<5?*hr+I3wKwxt>`Zy=%pvZaixx^Ge4$4x5LEr%*BZ;$l%(`efL)NIbl zyH5$L8f$tO%QON-#PXlvUT@CAAl_P}@QNz?0sSv?0kxZk*+ur`V;d2HSiru{%$Siy zZu_Njc%;XceXsm zn|Q^suACA(l@LB6dTHreIY@JOK?1@lH8b$Mv=Uoz?zVl zFi{b*EV-oGkwQ$2c~x8YGKrfFkroq0ds-l4j_P)~)QGEPHU5P`H6nJ@C_dKw^G+Y{ z8cI3OzvlYj*hn$g-Jv0h2)^IIj_Jw6_-f*-T_#esBE}m?c?R`$eHHCndBF%vda4kpJ8C zD$H{*5XLPpJv6iPa|121F9i;%Y>?JmgF^{G{s{=y)Bs@$HR1IY7T z`RPHM8L>2AiK=OcnQ3EbR8}2Zbn5c7V`1OyUBF)miqFavO#8P(RLk1CMu~@wvqcU2 z4_1V2zlc+)^Bz0i<1H^1+@rbiDa|i~$-5h1IpC4-M{BMKw`_2r0wfMyR;K~jeg+1P};sQ z>j$}(itB&})wegiWP{6lhCqplR8BPqhWa07jqBEej9b|pAUvT6!aJlaZw{%pM3|1( z;O*v2fh|)aSnrl24JCq&7SG7Ybg(=q@iW(?Iz28O=Ogf_^liFlMn=jAJ9|x6>6J*R ze}{r|q+rXMBkjr(WVa zSd*|tx*@XpWY+lxhLvA=9GsGpX8jfIw--Y~#g8UKyPkQfp`oe6FU=8%o zo*=kw*jP@oD~UnEuB3uc&r&wc$h0#yF?OjQGs7lNIi2vTX19D&r_dwv6=7n--z(`A z;ZO+Cn}by0ea70bH$c@0g=coPk{(SzM0&8Ei+Nh%|Z*-G<8nDfSNWHGQo%<4ZdNHv3D1w z^WtsG2ig|K+a~a~_p=eg^0(q`@jD^ipqjajR(Z-m#z&f{}Z}kn?pMG7U zyO%l)(!W&hkm#37uWCMOr|%X^!eTmZStiOk%gPFluNFvuZiiC-P$>gS`4gplO({<* zQ1DDHE1-hto zq%bJ(RH;Rim@1IeCPH6YyJqGHfs9y#FPeh|G8mByDFR8E0AwaHWO`=zO)fu2Q`e0b zn&{MZIfmgkFRSME+1*q!C%Kj}A@5>mCfbj`*xv2u&( zEUjGY5OXU;Q!ZV$^(A_3>%nQ8wydk#ScZ-()IR5kYqzW`gGZ&?Dr2KCU*@QlfP33; zVPvJFOx#qysdOX0F^e0^*K8}@wuAaBa5e`nR4fswevlaO1+3Z>&!@!0(%!%jiI_o! zm*0chp5o`4AlDRXoln39OQTRo33FvvTPpe5tv2&FtrV_f!w!w*XstZHw`p3agpVYrrmcmwQ;>U-ZaI* zMJ|TQoaTDD$j>pvF&@h^s%eQ}d|7o(&(Xj!W_jDpn7GQq(RPO^VF|+cm>CyW)o}E2 zjI2^e>*V7cA_7 ztT0z~-C;sj#!aFz)hnuRF`q&Mz3910yu-v2?ItqTGuN0S|)=;6uPE^Py*Q z;AH&U-UQwb41jdtBfvuokP-0Rz{M6!AF~0vuc8o9Aq(+Udl4c8AAJy=;%A=)yaiYZ za)S2)KLgc(*W*X^wTF>j@SRIAgT>H;_W+++hN%JX1CGWwB`cEyZ-Sx-Mzj)gA3pEd z_hSCNU_6gu?x6@Vd>nIw2G$Znrs3e>lFAl$Oj&eN%A?!51u?pNIs|& zyal)(6a?=CdO;!ZKHxb}7<>eH<4Mea1QoIZa|7xEZw9Uab%VD9_k(2cKHz0g6nr-@ z*#>99;~htqgUCME0}p_7;C;X#NDm&50rDr10X%MdGWIF718;VNDFvBO=>&cRGK0tc zP7I(T@OVLx`#~1)cp{KpAPGD!0&)Um1&`yLTm#v_SRC23m#`Nc>~k{9%mo90rG*z0Zj5ie(*SH z$yQJ&cpQ4<1SrVtfxM)qX0+;&beGN7;x-NR_j`o@2U@ za3DMV|Ev4-H3^SZZhUxidF8^*73GgtZY$rsp8T5DUmh=fMvq^fE?l60xI9LfL2q83 zDP#wxUCGdBFAHQhy9g!-0q46165L*74$fISY#rVXFg|?x0)%}LpRUo;BsF=Of`>wf z`VN_!i<+Iy{v(}7tJG?1?Q9FS zh1w!*rgn3CQM=Uc>+rJ_$b6yFnm-46gWVXfPe*YEg;e^hyhxx3MN?4 z*4o{aVSVboUT;cEo0gUaqZsh68&@dB1vjEst0-o7bvL!T#_Rgr`oZ@8=A22;?(Tm# z&-2djH}Ct-%=^yFnKMb8N8+9T73Xrw(;BJ=9*lh9kIK!BpP$~L)Iay%4}AAf|LLDA z_{Hg+3Xbus_@z=`UB0D^wQa~YG=#L-e>(r+Cc-}vGEo{SreuGNV%fg3gE^A6-k9}DYt_tqT`ZXsk#91MtAY4ri|eJQ31j#F8L^#5T5 ziXC|<{uk!{F&QX+osOcBvF2n@2n*iA&%&tyivlO|C7@HOY0w zxpsLD*q%x9fuhi2dU?cMqv{sNoa%LZ&rx&IjT!5ZHXGk)t<$WA-8${J%j$yGdQH(e zx-02MG4mX4NP1>*ZLu2nZgF4^V%U3@wbDHV?Wj|0a*?YYNqFCPf66&g z-1REWADt!kZl-sPJ}6!-^1ULR7_&xT0cpQy0tWR(J(n}sMP{q^{UlHyy{o0)(v#bgvlv?DrGbJ zbZ!Le9$rnCPRPvEGe@Xu;28eqFKh`lpIfoe)kU9J>46DpW2%|5K6O9Zo@UQO_zLZq zkmLBxde9GG>(NxhUl^EDy)V};R^EWL;?QaMRoKoK=dz=V3?xHa#Izo0N1f8FLX$&* ziWI1qFm(acic<-zu_56!U3E%BEJjdLp+lg9AVaC+1*`A&%yYun2R=n~*k&fVRzQ<^ z%LPJDuF9a6tVHd9V>=XH2+yV~vkb{Ep`yk=_ES1D`5qd~N=#nNlp;&2`4dHHkt|}% z7W%lfNQ_LU=cWG?w@s&I*$>m~oNeN!Y4oX_8R9L|=$V{F;tkX2P|jmw)l}-py&z5x z(dBu$qB-QV=RGEf&;N`@CM_4cC)4?pr`=G4lgV0d%{5!=@n{rYF1#?5+J6r`Qt9Ez zHZgHB{bcgTV&O-=bH;2z%+90XDH!bvT{ZPcVcQ0Q;Hi=@yAs*2=}$!dm^Eo7g5L81 zTd!g~X!UDyOV~8XL#cZ=_zct52|2TGhllXK>S31A-b^Yr{fAh4c$&l6@6(3qIg@^{ zLV->!Z2mRhMQiKwQhkyt_!Nl zI$0a;d53nd%BCL`raH`|VjRoFG`x>((H;$CuQ;$?$u?!Lw7{Q&#bv3Ok;?pKi0INg zm=WwtvuaV!Rm_H5D`Qk8s-MKcc2?ag9E82$(P|vQ_!Jr{%rVYW+^aFEqtyY}E(|YX zyInfN-mFSC;s2pCZZ4qLM(Q0hi^|3$x2f4{`mZRS#kn@Q&Z1S$Pu4HaPb*gR`r*$) zW7|ND5zH)aVO6{i%oUr?sL5D!ZQ*5q9~!#;-a;?No_K+29EfOpKwup+RbJFdR5a(~ zu`dXTDiahX%d14L9VZOTIiukj*}~y#|73)I1SnO9HOeCQ#lzpz33FG9PrRip>o4Z!jQF>=_%5X{&Apoj<4020;7{H5OG(96Hu)AzCpHHjy7R>XZX9s= ziI&Yksa(58upoGPa(>#vQfuA8Tqkpc25(0_Hl?@({lEJAp`m_uQBw^{gaT)hxsEcVG~kHXPr#CMB8sGEC|8OIw=e1pcB;*54`I5HlCJnF7zjb@I(L(>x@e@ z#o`o9zF%4<3J!I;_a(AfkKdPA5a>6vWyCa>Mkn7U*JJj7^x!C`N}^Zj*YOoMV)&9d zvc8fj*|tG%C8RQo&9xD`!9Xg{Ul zOET?Qm_74z_$mYN@RV?hF_1JYnl~;C4S5CXPTEABixwx|bt?j>x z5eN&hR`X^)m5V3MP@f`p)UZdW!o(gQ({*KBFD{>0EacQ17Js1670(d|34O0P)9^2x z8ffz3zsx$o#{Q;94ZKvz8F%8|@CBw)vTS8bYa+MF5k3My2&IJ zmO^82>V*ipWDGOgSv`5O1hg3j`q@_n3?Fho~{8r$tE^XbaCttw^+xlX*K+G26wn)t8;9E+An9Z~@8cPYcRmE)fn9ap)-k8lB zvt8u2K5iS(eZn;nuK7IotU9cgqk$tLQ9c2i8pPXq)49dW(K5`_8S}JpO9{uaVV;qg zr;}S;9Noh_gE7w+xh23cILwo*;&JLi+!A4k1s~99^Y7%CbQ*H~g28`qRZHF3dX-#L>*r!adzFPaC(CaRIr@ir#b!9Wms>&{2ZwpuSbv#@ zUNur9M+--34U4P3`iicEYwQe_NNc#p&C$;>8mXQ2oE?g4FQ_9(9HU+?Wqg@yONLoP ze66lzmT^nfFb~^mHJ&S(E^cuT^U%jUyxbBP=HZHZNX{Pfl)0sZV;M&a?=uqh%ye=~Bgg0**_2ff;;v+Sxuut5n4^n%z}>6t z4(;#Mn#9q_G1^~jcVwEm#m2FWV{F~Com|t%(a$l+M+~wN$M8%f0lz1&41D@B(l2Ir~ZzUUYXLpo;hsfYT;ID4L$p9WnBV>aCc5o-~ zYe-}_`WjEjE08dF1o%KYemjAW3;2FIzL@th11jHS5HbjEnLx<1kPx^R_!-241|$;$ zK#bsaV0ShlCE$UHgy`@Kxfxsnu9$)Vz#~%$NuNeY6}Snw6jBOq13nKa0}o8Y4+AOF z(IImJ4nX?ArF=q~AwlpEaNSJ&i@_QIb8aG}5!?xU1rh=e0#_6eQUh)WepQGJ!Aox@ z3qLb}JAp6Z*L$x675y!Q^qA2(xMdC@Ef6<&0O-fnR$qbaado~ELcr}n z?R?A@d`!S25D9uO@Z}=dm_NQg6hRE&HsC>s4DJRd;oDg$xC!WZaseDrk#M1`!&lNW z@Cfite2+DO>+#jO0$QUrSaatyQri9qiLI;>bG zaNOI-A0c*d+|x+f?O2)$tUoShWGTc611^4KAEX8x7ccTM#Kjr|Ly$(+7&zt*bPkT2 z6j=!IfaAtRc0;`2xJ8jRh#wrcCh{R903HEm#+6wAAS$>9ktRqVYXJNM zB*Yp3wKil7j(005fJDIY_9QzXgWxz7$VmvPgsdXu7Kj!c?@Lk!(SzfCOb$Z~;CSPb zix3Gs|LZt*a=gg#D#t;NV-BnSGdNlgvzN=(u|f7%9{=3WY%ZZ8JN5so`_z^3t1EUa z+gV<*VCSClJ1cgV@7zW%)B0;!!bRG3?I!%|`SG>!!bJM*wOK-fZ|2|AMNN}H_6ztj zH&J-mE(i_r4ky+W5$f9w?Y4GphrUDZAjh@GEyo9slTLGIq;s&-)D`ZEbQw>CPJ~ZL z-F@AmZZvRV!vPHLpu5klZ|ZFdHffIrj`kiU&Hm;<^I)^L#orQX@f`CW3m<=EM@x^In=Q?9b4jzQ#oS_Pky~sn#$%>q=3|y)@-eB^ z*lKDuw^~{aZBm=5&C({f={*LI(PQ>lJlb}ByVP!KH#^!%hqhx-$=Pr`qGYY_3@dqS zyFyClgI(lAAF>ZBxkp&`UhEA(uabS(9YFqlO@8DbJnBXMz0IB$Z%d%XeT?PrKNdLF z*vjU>+v;z1wXu2dw0YZVJTA{dx5wG8PKdj`s)J2QV~72?IwP**Wu5Ar)O411m0?aw ZPOv$#pQt)v>t-`k)@|?h-L%Uh{vRbqS3&>) diff --git a/third-party/mimalloc/bin/mimalloc-redirect.lib b/third-party/mimalloc/bin/mimalloc-redirect.lib index 059fb8702a71277ea4eb9995e0826eb53f88f6e5..de128bb9483df71a613fbfd3099fbba2f75f878d 100644 GIT binary patch delta 100 zcmdlbwo7b-1PjaM`aORp%dqTVQDo?=o}9&@Joz_^15;o1W?NQWMwavsuD>Sdu}wh8 lc5?6lWhdLRN=^1)7GiwV{%mH*$0`0RTALBn$um delta 100 zcmdlbwo7b-1PjZx_CHrA%dqTVk!9FfIys9&dGc=-2d15+n{8Qj8Cg2T_q>^$$2I{W l+sVNLl$~tLDmB@cJ(h(_eck`b>)7GiwV{%mH*$0`0RSkuBMJZj diff --git a/third-party/mimalloc/bin/mimalloc-redirect32.dll b/third-party/mimalloc/bin/mimalloc-redirect32.dll index 2892d459fc5302190558bb941046049b87488271..522723e5017b71b458afb4152bd6c7eec55e9e62 100644 GIT binary patch delta 5079 zcmZu#4NzOxm45G8n1?OH!xld_1Pg;>FvQU>=wC|AbJ-Ma1-2Tx_D*n(Q(&kx4_{nz&<*63#Nw>LhJ`QJ_~;eGh> zXE)~L_q`hn@;iSb{a%szmX2dSYJ0J~_~J6ZL-k)D9%C|p%M4e(nawR{ngHA*9Q%Au zg2S?nVN7J6t0gzsJ)2(oIuHTWL_T9{sy$fU_vd05*S_|n{-eMi`v8w_kOTBh;^6kM zm&lXc9`ZA8D|wYGWPcbL=X%+~r)nAI?zeLAm@2^IlPo+o?Ta)hx>?OoEi2p_9K4Pl z19r+V{%to{`0Jc!^4@x$3eHA*6#8f^Yl1T5Vt(mAZ0u((vhEl8 zguKGrO=(f`TyOwBgYK`w1v27IhQ;^>5jb@iJV&KrZ+wF{c~1N)h(i-0=eUuoGC1KW zE>`xYEl7RRMBhj4iD7fD1q&jyPhm5?L`!2VcdRx;euXut) z6{jL?TSX3fksqt}u$<;VWUgQr%Z`%I3-?u29YFkc`egj7ym5Ex?7Mzt*vq#_h5xy_ z8kkb}MoB}Fk*hpFzE`BBufH!UtTH{z2-4JwtdKXJRSEO6QGel@qb*^#vU#Of?r zD9Gkh@n8OYR{rybSK#sN_FdTj+V&^#`;Hmo+kc8`WYdXJuR*nun2osT6_(}wrfb=Qo=A{_j0(|aOOW4hG+aa)8bNVMtP~7DvL{= z3m2xoT3u}pdlM}_KlyyuPq;HXBCl5LW<4wXw3tHSF|nMmm_qG@P*m=e)vfH;2Hsn( z@uoub-cf7Gym9`_A7-KIFF;Ch?Rn#zeVFoDFpXu!-U^FKlvyc>R_4~?26Dn~KJc89 z>EU7zz#{yp;GxJ(6%GPU>5=w-LX)nf*H1jz8wX&N7wD%^V9P+Sf|@=fdl8t^T*$Pio*(23sm%tPsD>La=c3yw@CF4y@*ldsol?tN+2Um<&i+Au)+I4cyG4MhHKdKFxmID)JZG3P-Quwma1K9Y}w@r9(N0`EZb_`2QQ zMI^%)m#Y%jf9fNm+0#_B^F9+=gSj5*5E^IAMY`hP5$zaTewq! zn>o$WU#;th$?|qJ=EOk~*DDH85h@g&fYjh19kL;vrbQ(B(8jW9F^=;vnGU!hraTwi z@n)%6R^!QZRTEKH7wbCQiSzUly+XjYiOCykN8BBvjBOAFG|aS^q<)YUMgR6rce0eU zS34{A^R!fyA#o+pzry(UQvp&k-LR>YYIlmzyH#D8Z99!RW^M-gp4QP*taUb#(-HG6BvsLD&X zsS59lg26?4q+@9yD6aZuDTaEHw6{sP=CPvNsa4e59Lnvlsj6?0I#AvwD>YX0FFufO zB5!;Kv$UF4o=>9UIT`0qu$dLwLAsUJ)kS06D2uXrf_zfJv!!IF_RE1{^e)r45l=;5 zwk~=%7g2SJG)DfyA+{8KO6L!4`Rq~lF1;5-A8zKYUZsruN=`6_$#z*!V0sV&8d@1@ z=3($Q#JC0pwZjB!R0Qs3hujhdW-g!$-hpJx30GKLK`}_p(%&r5py)TY3^x*fQ~{U| z=ePJ2JqT??mMRg(NjgV*a^cYFo9<=!eyt$kZj;C17J2e$QrOyM(JoS;*Jf zqo5(iejfYIEpe626fyOclzk;T#63y5@H5W1*l%d#pMEIqRF-WJ;nYQ%H6`pnlarc* zh(bZ48k@H3yNKc!^YU&eD)eAjb*4p{ml4d6MC^?~e}d`B_YG0av3c^4w#+za!upN0 z6)H77p?XU2DOaKV@_g5*?uE3E82x>;0fP;d<|0w;7S1_O-q#ilbRmKC3n5*clefn; zSNi$@`hrwKPrM<$g-3cUA*-N?W2b$Ws2IaSYQ4{Jgc@F>5;{WOOG`Xb{vQqPzERn) z;E{Gh{3P-#ZyBYQGgL%}SLQGtY4P**EolVyK!1wqG5F@>Eik|p=`Hfcqws?Mpu>xA z@hw9x*=3pXRu%@uKj}^ub-2?vpXCZjpbqx=I>kl^yqwVfr0iEoe9pDX@K*FW1XGcOhBkj_B5YwbNhc zogfc>!+Q?W3TK!#nUbzSlaNgwjLdUoY~;{41din*$D4~3*?O&7r_t#n0q;*(UR`xs zyH8uqkcS>`;P+)X{N{g^C|idkBdsA$<@X&|pX@k!w5#jsW9qJsZ=X7P>N(;%ypyyY zHnB|P`-eBPS-#7R&_{0iO4vBL`0c}FgcY)az%EmaD32YHk*h$)WF(kDqCn8mrO$HIjBPTlfMsV#JL?a`8;I3p4kBm%!i-M5E zA$}H%g$$FwjD-YBfary3$nT!*q>0JrUtt6RR1Bh$I63po(-$?${3-w%kS!zdtebd% z90B<<64$z<56BScQby9YCgHCF83V~jo55mnjak7WwZ`yQa1szz%df=30CIu;^w5WZ zw1IpX$@-yt0R%x;K*RfyjkN>hj{%tiErOzu#MdObaUdy>@^2ZTdq27N>{B!`rIlbc zAiA^An0~v!)1Jfex=WkDv}P2#GYUTNwDDvGKbT-fVL78P0-m-VRp76KnFP^jRdZO} z0VP3n&*87hg5G@94^eWUS`Zbi1*@_F@POJgf{~1cY7oFM=qjk~0HVqW_(=c@AbGNV&h2XmPq@(lnQK^_oQUr%Qn06!=Qx<6~G5dagQ1yB^Ome=x;03f`+=7#R7 z0oMfT0trxyXH->}0E~eaK`wv{>LD`EDcHzFZ({)ltmmZ{=GZ1O|H3x*WAd99cCw2k z>qTwJEc@qv V$B+5H*>TF>bF|BS{N&T8o?{p_X@1ejDYGKM7aMa1T!)`S~iL1m(A~4KepA|o9yQux;TJ~BrmO6{gLjQy12=qryW2QNi z&FnHC88FY7XU&rNkfqh)w=7%kSQu-UwcC2$nzW{@#kL_^(00`(*j09o{fd3WK54IV z)H)g+V~z>Og2Ux(aJD&Topa6=C;jh?=XJxn z%Q}_5SYN8I)Q{+|>aXcX^#ipAjlp2B872&qh6ZD!an874JW|(IC)KUg#p{xF{`%hf zzWSkh#-uRuCZ$O*oi`1eE}1Tyu9%9=rDnCc(p+V}YQAP3HBXo)%^HgVkv3QwEt8h0 zWyUgRS+F!(o2(1gMeCCFjF&TG!=&M_y#8b*EMWzd*3E=^Q3rJ2<Rx`ZyNOX(PWpMFpu)bj?VK`^Kc#fD3U%Z4k4s|K~P(pY7zH9n{@UN?>!$BYxk lNu#OGR@YEBRX0;NTQ^s?Q0J*XRNq>Eq`s{_(j3U$@P7kX_lGCJ$iP$NL%w{^=kfGczXpzL*w@+Nr!ha6!=)Ff1zL$+*v)@~ zJcsMcq_ z9UjrT=eV4oy+jokBK=}zB$_cn1#%@<`{T8Pd@ZvLaNNmK#UZV6B$aH+T$G2dP_yq- zAsFHRnckzXr<8Zkv31D%Yxj`FOuL{hBRZ)opPH0AV#AKO;*8fh3s3GmhfK(I$!1BZ z5*LRG5Oc`+P}$5FPeO6T);Pe^fI}ClR`JBvcoG*K4~LL6k+Zr?)RaSpeL{2(%jcp~ z8e$%)Fu`B&!z>E&8Jof< zHyQ=lhJTQ^n-^vp!?T-r@O+4ToWHL$(uf||=#}&5<=l2AFTCm3C?2Ur&Hrd+B{(H| zL!_y|BuqAvuN4^R?MDUqW%`qxtWGU6gPhwLlW@Nf@#n7w%2u2j_sZr-V|ck>h|i_! z!@s=8eja5R5ie}nf%V6>9KluE`kL@s3HkZfpwL`Gnu`8O2OZcyCL@90+zP>H|ezBPsd|I+q_YOZm;<~@^8WJtBZz}Y2 zax`<=v8=;`;#qF->WOv7`N|(};W)*)sJN@neZX-N=g*pUEHtH!7hKfXvEaIVdFtWH zid*r-TfFV$;~lRE?-z!@SGtR@9)!zAoTuP`2EK|@3Z53Zej&OIT~qdJT$Fi3h9@M^ zrH0?5UDD7!BMnbvJO`(oAdQj5VlY=Xi=yF93DM662Q?X|wjR$=Jen472-&D*sR+{! z4Tge)7rHy#OgAaLoZ=Q+Xt0RvQjL$6+lA#e?Wv@pYq@}v3d5eH;?R^klEaRK;!F%X<1|N{lef_)_)oBboZ%Et z@~*gUNpZx3Y6hFqH{Pe@u(%FGr6aN3#X9?^?BpO)a4^B;TzCq;-g&l7nijS7U9zvN zX_GWHEIf8`B!iI+F4g(SCEDR# zruI0RYWD&u*kdWK_$zy7GWbT8TmA5p)g=_NxJ8E%dYUZm*_ipxe^Q>lS_(*Due3Xc z^|@(BlHM*mlCI0nm|J~=8A-#A<#U6{mYbqheDmbMXX=c*n46e$dj>C@J&GsM;+>~z zeJb2>KK&#f=hP<{_p;H&GFmLn6D{w)el3&y!0=_fHL!)^%BMfN>M4ye68#E`zX5)3^~t|bR*Y7IA=H&sTp-8uV@w z(%UjC`jJ5PZ^;Aw(;fHl;;nN?tk_$ud}%nFl5EEJPdnluIjrxA(?1$doE}Il614K1 zzl2VA>V#s#{*BF%#P+LSL}kxM-;8#U?;7$;aSR_h2~m}f^r^im=LC*7oPFe7LlHkq z{@d_0aT%?K_P5EPHO1(~WheodG#qju{ zVWD96iDM@^e7>HepXoW()z^2dvumI4*q3(`=i%+7`*2No?6Aa36IVI;9)M3`(ba&@ z$T{JlG!H^eld<|V83!R=OIgaRmNE!kn$)k76(Fo>a%7ck0@0c#dsoSJ5dJhdzDk}4 zF`6c0tK=AnsWdseN-ltir$`=|f9Y9C`X=%LP!6wWOcf+onsG6v6H*|}^fG1?(ybIj z+K#kI_wc-&mQ~2oKXS4RpgDd?i-(RHG1VD1Vxi|4J4om=#vzvi54=e+*M)Z$#bam&7-{RyvpbXIMM-Lg>0OUBOs9z& zWF#SI$XpleK(6lFoLmJo19Z_CX*|`P+X=}JoJq4TVmRuPA%uWiX_6!S>F(TFNQ*!m zh%})V8bH!q$JPRLpL4BvXa#%#T|;J$BamJNX40&VvShYf%u1x00C}OWhqU=Le1tw_ zSKPo60cea6W11ngrkO>`p1p*8X9_!8>g)|0C0vygt2*iK}up!E3 zi{IhoLV&Ka2&N}j52+dGq^yIJ^(=pU9Lb|Vh_d*l4(K?fDPSIW)Q;>JB{+RYbN0nF+6mw zIqU2JEWY0mHtwptQTd^1ef77hkC=VtfcbakTFb%O!*xHYyH}UAK4JS?Tet1^woh!F zeY?HNUSmIKKVk2)U$?(*f7AX_J%TiFTp4bo)jzCWYc4YXk9m{DXX&#HSYEW8w_LQ0 zSiWhwVSzy}KDu!WS#DY)7VX&@U5&M7wB|<5L``08VQp3INbS|y@mfutTvt&yRClRv ztWLB_)}7XX^}ID?O^Q2j4*=eb;R9P&jIb<2Lj9YG53Tw1AS6QhyYvfv5tgiObSyu0jTB(j! zoL5&^cOF%9R$8-cJ;SQLWKE!M(dI|p0b2}pCv85|Jz!T^-En)u-pOk3t@qb2vig_n z@lP{?1BoWI>D%>Q{d~F$s=n1g`_O5aMIZLkMl2c*7+pr%i8kX5dU4;_R7pGGs%)){ QuzuXDoUIJ=!|T@kFGx(BZ2$lO diff --git a/third-party/mimalloc/bin/mimalloc-redirect32.lib b/third-party/mimalloc/bin/mimalloc-redirect32.lib index 7dadab3d657e82efecf90d288b13df7d1ffab7a2..87f19b8ec0f7ae1024ff508a738b517c71f11aad 100644 GIT binary patch delta 118 zcmew$_Caie84F7UkK4D&RxG<%6dC%eCu^~*PnKu3W9qBk9M5XV$Qe;K{r^!01_q|d z_AJtqx3Cq#l<5PNOpa%jnq0~r%ejdy_Sr+IDznLg9Q80&(v#P7a6naVzRWR~2>^ga BBijG~ delta 118 zcmew$_Caie84JtnFDi#7Te0k7k!9FfI$4WdeX=~O9n;Rz&GD>;jGSgqmpneoz`(#X z*`7su@)outm@<8!lF9L`Qj<&BV>!E@9lG)ms>*D#AV)n+mGtEG92`)Un=f struct mi_stl_allocator { } /*! \page build Building -Checkout the sources from Github: +Checkout the sources from GitHub: ``` git clone https://github.com/microsoft/mimalloc ``` diff --git a/third-party/mimalloc/ide/vs2017/mimalloc-override-test.vcxproj b/third-party/mimalloc/ide/vs2017/mimalloc-override-test.vcxproj index faaa00e3..04c16a9f 100644 --- a/third-party/mimalloc/ide/vs2017/mimalloc-override-test.vcxproj +++ b/third-party/mimalloc/ide/vs2017/mimalloc-override-test.vcxproj @@ -1,4 +1,4 @@ - + @@ -22,8 +22,8 @@ 15.0 {FEF7868F-750E-4C21-A04D-22707CC66879} mimalloc-override-test - 10.0.17134.0 mimalloc-override-test + 10.0.19041.0 diff --git a/third-party/mimalloc/ide/vs2017/mimalloc-override.vcxproj b/third-party/mimalloc/ide/vs2017/mimalloc-override.vcxproj index 6fdd83d9..3d5c1f75 100644 --- a/third-party/mimalloc/ide/vs2017/mimalloc-override.vcxproj +++ b/third-party/mimalloc/ide/vs2017/mimalloc-override.vcxproj @@ -22,8 +22,8 @@ 15.0 {ABB5EAE7-B3E6-432E-B636-333449892EA7} mimalloc-override - 10.0.17134.0 mimalloc-override + 10.0.19041.0 @@ -209,12 +209,14 @@ - - - + + + + + @@ -236,6 +238,7 @@ + @@ -246,7 +249,7 @@ - + diff --git a/third-party/mimalloc/ide/vs2017/mimalloc-test-stress.vcxproj b/third-party/mimalloc/ide/vs2017/mimalloc-test-stress.vcxproj index b8267d0b..061b8605 100644 --- a/third-party/mimalloc/ide/vs2017/mimalloc-test-stress.vcxproj +++ b/third-party/mimalloc/ide/vs2017/mimalloc-test-stress.vcxproj @@ -1,4 +1,4 @@ - + @@ -22,8 +22,8 @@ 15.0 {FEF7958F-750E-4C21-A04D-22707CC66878} mimalloc-test-stress - 10.0.17134.0 mimalloc-test-stress + 10.0.19041.0 diff --git a/third-party/mimalloc/ide/vs2017/mimalloc-test.vcxproj b/third-party/mimalloc/ide/vs2017/mimalloc-test.vcxproj index 27c7bb6e..04bd6537 100644 --- a/third-party/mimalloc/ide/vs2017/mimalloc-test.vcxproj +++ b/third-party/mimalloc/ide/vs2017/mimalloc-test.vcxproj @@ -1,4 +1,4 @@ - + @@ -22,8 +22,8 @@ 15.0 {FEF7858F-750E-4C21-A04D-22707CC66878} mimalloctest - 10.0.17134.0 mimalloc-test + 10.0.19041.0 @@ -102,7 +102,7 @@ true true ..\..\include - stdcpp17 + stdcpp14 Console diff --git a/third-party/mimalloc/ide/vs2017/mimalloc.sln b/third-party/mimalloc/ide/vs2017/mimalloc.sln index 7dbf53e1619a42863884ae8f44265cc745171eaa..515c03f2e780c4d5159689e0aba12e8fed646e7e 100644 GIT binary patch delta 774 zcmZva&r1S96vx?ib=`5NT`9|~Xv^CMQktTWc`51={etKeX-XKs+#ymJTt zfxvTxcnSI+BKiY@=okbwZ^u~*y)2*a+j(z3J3CKPS25+|zD4Wu{17EEYrAq%D^#tm z(^{$GShFcg>X*a)srXDhkxnEs@!6RKt)JTxg$HS&{+6edEFG2BkDbHv{wWsoP?1); zs@GkWWw`Cz?5gG=D{qJY%_PfJr&y>`XvjIX?W-J#Qle>#Od#0`6fLg@U4d3l!SL*v_n~c#HBU& znTEJF%qSGFVNWIStY8OE;~uGSr(!>^tMH}bj8I_JeWqdF&%2WF62K1#1(GnWVTTm7 z!O_r_lW@iKqoFG&;VK1%D-w5=g6Jv*g=<^4!PJE-Pw2`?gLXk+^TuK;Nd}`u6^j4LJg5<_KGMV~+M-@@USn*Pmv_ N>U9tB0wXc-&AruT;rS<4r9w)1ERal8wDKau5?>`rC>^f(r!z4SE#qK3Grd5KJ$ zV29GQ585&g#W}mx07EL0j>0t#eAa9PZ_rPtwJFk}iwZgz@Cky7Y{wsJp+%Vp``hHWrI9 upSqj*)ZG@X>5fIw8EeqxHv~WKJQH~JOb$x7JrP7kWBUJR>-+@l9my8}x6r2m diff --git a/third-party/mimalloc/ide/vs2017/mimalloc.vcxproj b/third-party/mimalloc/ide/vs2017/mimalloc.vcxproj index 41fb77c1..46eb05d8 100644 --- a/third-party/mimalloc/ide/vs2017/mimalloc.vcxproj +++ b/third-party/mimalloc/ide/vs2017/mimalloc.vcxproj @@ -22,7 +22,7 @@ 15.0 {ABB5EAE7-B3E6-432E-B636-333449892EA6} mimalloc - 10.0.17134.0 + 10.0.19041.0 mimalloc @@ -129,9 +129,9 @@ true ../../include _CRT_SECURE_NO_WARNINGS;MI_DEBUG=3;%(PreprocessorDefinitions); - CompileAsC + CompileAsCpp false - stdcpp17 + stdcpp14 @@ -215,12 +215,6 @@ false false - - true - true - true - true - true true @@ -233,6 +227,7 @@ + true @@ -242,18 +237,20 @@ - + - - - + + + + + diff --git a/third-party/mimalloc/ide/vs2019/mimalloc-override.vcxproj b/third-party/mimalloc/ide/vs2019/mimalloc-override.vcxproj index 4c4235b4..1c5c61b7 100644 --- a/third-party/mimalloc/ide/vs2019/mimalloc-override.vcxproj +++ b/third-party/mimalloc/ide/vs2019/mimalloc-override.vcxproj @@ -209,12 +209,14 @@ - - - + + + + + @@ -236,6 +238,7 @@ + @@ -246,7 +249,7 @@ - + diff --git a/third-party/mimalloc/ide/vs2019/mimalloc.vcxproj b/third-party/mimalloc/ide/vs2019/mimalloc.vcxproj index 9f967d94..0e2eb312 100644 --- a/third-party/mimalloc/ide/vs2019/mimalloc.vcxproj +++ b/third-party/mimalloc/ide/vs2019/mimalloc.vcxproj @@ -114,7 +114,7 @@ Level4 Disabled true - true + Default ../../include MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp @@ -205,12 +205,6 @@ false false - - true - true - true - true - true true @@ -225,6 +219,13 @@ + + + true + true + true + true + true @@ -234,18 +235,20 @@ - + - - - + + + + + diff --git a/third-party/mimalloc/ide/vs2022/mimalloc-override.vcxproj b/third-party/mimalloc/ide/vs2022/mimalloc-override.vcxproj index 54964d96..e2c7f71d 100644 --- a/third-party/mimalloc/ide/vs2022/mimalloc-override.vcxproj +++ b/third-party/mimalloc/ide/vs2022/mimalloc-override.vcxproj @@ -209,13 +209,16 @@ - - + + - - + + + + + @@ -237,6 +240,13 @@ + + + true + true + true + true + @@ -247,10 +257,13 @@ - + + + + diff --git a/third-party/mimalloc/ide/vs2022/mimalloc-test-api.vcxproj b/third-party/mimalloc/ide/vs2022/mimalloc-test-api.vcxproj index 6023c251..d9b9cae4 100644 --- a/third-party/mimalloc/ide/vs2022/mimalloc-test-api.vcxproj +++ b/third-party/mimalloc/ide/vs2022/mimalloc-test-api.vcxproj @@ -141,7 +141,14 @@ + + true + true + true + true + + false diff --git a/third-party/mimalloc/ide/vs2022/mimalloc-test-stress.vcxproj b/third-party/mimalloc/ide/vs2022/mimalloc-test-stress.vcxproj index c7e820df..14bd3e69 100644 --- a/third-party/mimalloc/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/third-party/mimalloc/ide/vs2022/mimalloc-test-stress.vcxproj @@ -149,8 +149,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/third-party/mimalloc/ide/vs2022/mimalloc.vcxproj b/third-party/mimalloc/ide/vs2022/mimalloc.vcxproj index 9811aa55..c298550a 100644 --- a/third-party/mimalloc/ide/vs2022/mimalloc.vcxproj +++ b/third-party/mimalloc/ide/vs2022/mimalloc.vcxproj @@ -95,12 +95,12 @@ Level4 Disabled true - true + Default ../../include MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - Default + stdcpp20 @@ -114,7 +114,7 @@ Level4 Disabled true - true + Default ../../include MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions); CompileAsCpp @@ -141,7 +141,7 @@ Level4 MaxSpeed true - true + Default ../../include %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode @@ -151,7 +151,7 @@ Default CompileAsCpp true - Default + stdcpp20 true @@ -169,7 +169,7 @@ Level4 MaxSpeed true - true + Default ../../include %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode @@ -205,12 +205,6 @@ false false - - true - true - true - true - true true @@ -225,6 +219,13 @@ + + + true + true + true + true + true @@ -234,19 +235,20 @@ - + - - - - + + + + + diff --git a/third-party/mimalloc/include/mimalloc-track.h b/third-party/mimalloc/include/mimalloc-track.h deleted file mode 100644 index f60d7acd..00000000 --- a/third-party/mimalloc/include/mimalloc-track.h +++ /dev/null @@ -1,62 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ -#pragma once -#ifndef MIMALLOC_TRACK_H -#define MIMALLOC_TRACK_H - -// ------------------------------------------------------ -// Track memory ranges with macros for tools like Valgrind -// address sanitizer, or other memory checkers. -// ------------------------------------------------------ - -#if MI_VALGRIND - -#define MI_TRACK_ENABLED 1 -#define MI_TRACK_TOOL "valgrind" - -#include -#include - -#define mi_track_malloc(p,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero) -#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) -#define mi_track_free(p) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/) -#define mi_track_free_size(p,_size) mi_track_free(p) -#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size) -#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) -#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) - -#elif MI_ASAN - -#define MI_TRACK_ENABLED 1 -#define MI_TRACK_TOOL "asan" - -#include - -#define mi_track_malloc(p,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size) -#define mi_track_resize(p,oldsize,newsize) ASAN_POISON_MEMORY_REGION(p,oldsize); ASAN_UNPOISON_MEMORY_REGION(p,newsize) -#define mi_track_free(p) ASAN_POISON_MEMORY_REGION(p,mi_usable_size(p)) -#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size) -#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) -#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) -#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) - -#else - -#define MI_TRACK_ENABLED 0 -#define MI_TRACK_TOOL "none" - -#define mi_track_malloc(p,size,zero) -#define mi_track_resize(p,oldsize,newsize) -#define mi_track_free(p) -#define mi_track_free_size(p,_size) -#define mi_track_mem_defined(p,size) -#define mi_track_mem_undefined(p,size) -#define mi_track_mem_noaccess(p,size) - -#endif - -#endif diff --git a/third-party/mimalloc/include/mimalloc.h b/third-party/mimalloc/include/mimalloc.h index 9b72fbfd..f77c2ea1 100644 --- a/third-party/mimalloc/include/mimalloc.h +++ b/third-party/mimalloc/include/mimalloc.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 209 // major + 2 digits minor +#define MI_MALLOC_VERSION 212 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -284,7 +284,7 @@ mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; -#if MI_MALLOC_VERSION >= 200 +#if MI_MALLOC_VERSION >= 182 // Create a heap that only allocates in the specified arena mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); #endif @@ -318,35 +318,40 @@ mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size typedef enum mi_option_e { // stable options - mi_option_show_errors, - mi_option_show_stats, - mi_option_verbose, - // some of the following options are experimental - // (deprecated options are kept for binary backward compatibility with v1.x versions) - mi_option_eager_commit, - mi_option_deprecated_eager_region_commit, - mi_option_deprecated_reset_decommits, - mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit - mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup + mi_option_show_errors, // print error messages + mi_option_show_stats, // print statistics on termination + mi_option_verbose, // print verbose messages + // the following options are experimental (see src/options.h) + mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) + mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) + mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1) + mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node - mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup + mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup mi_option_deprecated_segment_cache, - mi_option_page_reset, - mi_option_abandoned_page_decommit, - mi_option_deprecated_segment_reset, - mi_option_eager_commit_delay, - mi_option_decommit_delay, - mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes. - mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas) - mi_option_os_tag, - mi_option_max_errors, - mi_option_max_warnings, - mi_option_max_segment_reclaim, - mi_option_allow_decommit, - mi_option_segment_decommit_delay, - mi_option_decommit_extend_delay, - mi_option_destroy_on_exit, - _mi_option_last + mi_option_deprecated_page_reset, + mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination + mi_option_deprecated_segment_reset, + mi_option_eager_commit_delay, + mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. + mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. + mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) + mi_option_os_tag, // tag used for OS logging (macOS only for now) + mi_option_max_errors, // issue at most N error messages + mi_option_max_warnings, // issue at most N warning messages + mi_option_max_segment_reclaim, + mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe. + mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) + mi_option_arena_purge_mult, + mi_option_purge_extend_delay, + _mi_option_last, + // legacy option names + mi_option_large_os_pages = mi_option_allow_large_os_pages, + mi_option_eager_region_commit = mi_option_arena_eager_commit, + mi_option_reset_decommits = mi_option_purge_decommits, + mi_option_reset_delay = mi_option_purge_delay, + mi_option_abandoned_page_reset = mi_option_abandoned_page_purge } mi_option_t; @@ -356,8 +361,9 @@ mi_decl_export void mi_option_disable(mi_option_t option); mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); -mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); -mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option); mi_decl_export void mi_option_set(mi_option_t option, long value); mi_decl_export void mi_option_set_default(mi_option_t option, long value); @@ -477,11 +483,13 @@ template bool operator==(const mi_stl_allocator& , const template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } -#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900) // C++11 +#define MI_HAS_HEAP_STL_ALLOCATOR 1 + #include // std::shared_ptr // Common base class for STL allocators in a specific heap -template struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common { +template struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common { using typename _mi_stl_allocator_common::size_type; using typename _mi_stl_allocator_common::value_type; using typename _mi_stl_allocator_common::pointer; @@ -500,7 +508,7 @@ template struct _mi_heap_stl_allocator_common : public _m #endif void collect(bool force) { mi_heap_collect(this->heap.get(), force); } - template bool is_equal(const _mi_heap_stl_allocator_common& x) const { return (this->heap == x.heap); } + template bool is_equal(const _mi_heap_stl_allocator_common& x) const { return (this->heap == x.heap); } protected: std::shared_ptr heap; @@ -508,10 +516,10 @@ protected: _mi_heap_stl_allocator_common() { mi_heap_t* hp = mi_heap_new(); - this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ + this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ } _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } - template _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + template _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } private: static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } } diff --git a/third-party/mimalloc/include/mimalloc-atomic.h b/third-party/mimalloc/include/mimalloc/atomic.h similarity index 87% rename from third-party/mimalloc/include/mimalloc-atomic.h rename to third-party/mimalloc/include/mimalloc/atomic.h index c66f8049..fe418fab 100644 --- a/third-party/mimalloc/include/mimalloc-atomic.h +++ b/third-party/mimalloc/include/mimalloc/atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021 Microsoft Research, Daan Leijen +Copyright (c) 2018-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -39,7 +39,11 @@ terms of the MIT license. A copy of the license can be found in the file #include #define mi_atomic(name) atomic_##name #define mi_memory_order(name) memory_order_##name -#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#if !defined(ATOMIC_VAR_INIT) || (__STDC_VERSION__ >= 201710L) // c17, see issue #735 + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif #endif // Various defines for all used memory orders in mimalloc @@ -113,11 +117,13 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { } // Used by timers -#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) -#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) -#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) -#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_casi64_strong_acq_rel(p,e,d) mi_atomic_cas_strong_acq_rel(p,e,d) +#define mi_atomic_addi64_acq_rel(p,i) mi_atomic_add_acq_rel(p,i) #elif defined(_MSC_VER) @@ -245,6 +251,21 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } +static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) { + mi_atomic_addi64_relaxed(p, i); +} + +static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) { + int64_t read = _InterlockedCompareExchange64(p, des, *exp); + if (read == *exp) { + return true; + } + else { + *exp = read; + return false; + } +} + // The pointer macros cast to `uintptr_t`. #define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) #define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) @@ -275,6 +296,26 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { return (intptr_t)mi_atomic_addi(p, -sub); } +typedef _Atomic(uintptr_t) mi_atomic_once_t; + +// Returns true only on the first invocation +static inline bool mi_atomic_once( mi_atomic_once_t* once ) { + if (mi_atomic_load_relaxed(once) != 0) return false; // quick test + uintptr_t expected = 0; + return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 +} + +typedef _Atomic(uintptr_t) mi_atomic_guard_t; + +// Allows only one thread to execute at a time +#define mi_atomic_guard(guard) \ + uintptr_t _mi_guard_expected = 0; \ + for(bool _mi_guard_once = true; \ + _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \ + (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) ) + + + // Yield #if defined(__cplusplus) #include @@ -294,7 +335,7 @@ static inline void mi_atomic_yield(void) { } #elif (defined(__GNUC__) || defined(__clang__)) && \ (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ - defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); @@ -307,10 +348,16 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } -#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) +#ifdef __APPLE__ +static inline void mi_atomic_yield(void) { + __asm__ volatile ("or r27,r27,r27" ::: "memory"); +} +#else static inline void mi_atomic_yield(void) { __asm__ __volatile__ ("or 27,27,27" ::: "memory"); } +#endif #elif defined(__armel__) || defined(__ARMEL__) static inline void mi_atomic_yield(void) { __asm__ volatile ("nop" ::: "memory"); diff --git a/third-party/mimalloc/include/mimalloc-internal.h b/third-party/mimalloc/include/mimalloc/internal.h similarity index 74% rename from third-party/mimalloc/include/mimalloc-internal.h rename to third-party/mimalloc/include/mimalloc/internal.h index a68e6966..00d26260 100644 --- a/third-party/mimalloc/include/mimalloc-internal.h +++ b/third-party/mimalloc/include/mimalloc/internal.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,8 +8,14 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H -#include "mimalloc-types.h" -#include "mimalloc-track.h" + +// -------------------------------------------------------------------------- +// This file contains the interal API's of mimalloc and various utility +// functions and macros. +// -------------------------------------------------------------------------- + +#include "mimalloc/types.h" +#include "mimalloc/track.h" #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) @@ -44,6 +50,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_externc #endif +// pthreads #if !defined(_WIN32) && !defined(__wasi__) #define MI_USE_PTHREADS #include @@ -73,39 +80,52 @@ extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); -bool _mi_preloading(void); // true while the C runtime is not ready +bool _mi_preloading(void); // true while the C runtime is not initialized yet +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap +void _mi_thread_done(mi_heap_t* heap); +void _mi_thread_data_collect(void); // os.c -size_t _mi_os_page_size(void); -void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -// bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); +bool _mi_os_has_virtual_reserve(void); -void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); -void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); + +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); + +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); +bool _mi_os_use_large_page(size_t size, size_t alignment); +size_t _mi_os_large_page_size(void); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); // arena.c -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); mi_arena_id_t _mi_arena_id_none(void); -bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id); +void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +bool _mi_arena_contains(const void* p); +void _mi_arena_collect(bool force_purge, mi_stats_t* stats); +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); -// "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); -void _mi_segment_cache_free_all(mi_os_tld_t* tld); +// "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); @@ -127,8 +147,6 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_await_readers(void); void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); - - // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; @@ -155,12 +173,11 @@ uint8_t _mi_bin(size_t size); // for stats void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); -bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid); -void _mi_heap_destroy_all(void); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); +void _mi_heap_unsafe_destroy_all(void); // "stats.c" void _mi_stats_done(mi_stats_t* stats); - mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); @@ -173,6 +190,16 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); + +// option.c, c primitives +char _mi_toupper(char c); +int _mi_strnicmp(const char* s, const char* t, size_t n); +void _mi_strlcpy(char* dest, const char* src, size_t dest_size); +void _mi_strlcat(char* dest, const char* src, size_t dest_size); +size_t _mi_strlen(const char* s); +size_t _mi_strnlen(const char* s, size_t max_len); + #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); @@ -242,6 +269,10 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) +#include +// initialize a local variable to zero; use memset as compilers optimize constant sized memset's +#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) + // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); @@ -284,7 +315,7 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { } // Is memory zero initialized? -static inline bool mi_mem_is_zero(void* p, size_t size) { +static inline bool mi_mem_is_zero(const void* p, size_t size) { for (size_t i = 0; i < size; i++) { if (((uint8_t*)p)[i] != 0) return false; } @@ -340,93 +371,11 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot } -/* ---------------------------------------------------------------------------------------- -The thread local default heap: `_mi_get_default_heap` returns the thread local heap. -On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a -__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures -that the storage will always be available (allocated on the thread stacks). -On some platforms though we cannot use that when overriding `malloc` since the underlying -TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. -We try to circumvent this in an efficient way: -- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the - loader itself calls `malloc` even before the modules are initialized. -- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). -- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) +/*---------------------------------------------------------------------------------------- + Heap functions ------------------------------------------------------------------------------------------- */ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap -extern bool _mi_process_is_initialized; -mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap - -#if defined(MI_MALLOC_OVERRIDE) -#if defined(__APPLE__) // macOS -#define MI_TLS_SLOT 89 // seems unused? -// #define MI_TLS_RECURSE_GUARD 1 -// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) -// see -#elif defined(__OpenBSD__) -// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) -// see -#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) -// #elif defined(__DragonFly__) -// #warning "mimalloc is not working correctly on DragonFly yet." -// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) -#elif defined(__ANDROID__) -// See issue #381 -#define MI_TLS_PTHREAD -#endif -#endif - -#if defined(MI_TLS_SLOT) -static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration -#elif defined(MI_TLS_PTHREAD_SLOT_OFS) -static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { - pthread_t self = pthread_self(); - #if defined(__DragonFly__) - if (self==NULL) { - mi_heap_t* pheap_main = _mi_heap_main_get(); - return &pheap_main; - } - #endif - return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); -} -#elif defined(MI_TLS_PTHREAD) -extern pthread_key_t _mi_heap_default_key; -#endif - -// Default heap to allocate from (if not using TLS- or pthread slots). -// Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`). -// This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined. -// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356). -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from - -static inline mi_heap_t* mi_get_default_heap(void) { -#if defined(MI_TLS_SLOT) - mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); - if mi_unlikely(heap == NULL) { - #ifdef __GNUC__ - __asm(""); // prevent conditional load of the address of _mi_heap_empty - #endif - heap = (mi_heap_t*)&_mi_heap_empty; - } - return heap; -#elif defined(MI_TLS_PTHREAD_SLOT_OFS) - mi_heap_t* heap = *mi_tls_pthread_heap_slot(); - return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); -#elif defined(MI_TLS_PTHREAD) - mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); - return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); -#else - #if defined(MI_TLS_RECURSE_GUARD) - if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); - #endif - return _mi_heap_default; -#endif -} - -static inline bool mi_heap_is_default(const mi_heap_t* heap) { - return (heap == mi_get_default_heap()); -} static inline bool mi_heap_is_backing(const mi_heap_t* heap) { return (heap->tld->heap_backing == heap); @@ -454,11 +403,6 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si return heap->pages_free_direct[idx]; } -// Get the page belonging to a certain size class -static inline mi_page_t* _mi_get_free_small_page(size_t size) { - return _mi_heap_get_free_small_page(mi_get_default_heap(), size); -} - // Segment that contains the pointer // Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), // and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; @@ -790,6 +734,29 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); +/* ----------------------------------------------------------- + memory id's +----------------------------------------------------------- */ + +static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { + mi_memid_t memid; + _mi_memzero_var(memid); + memid.memkind = memkind; + return memid; +} + +static inline mi_memid_t _mi_memid_none(void) { + return _mi_memid_create(MI_MEM_NONE); +} + +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { + mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.initially_committed = committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return memid; +} + // ------------------------------------------------------------------- // Fast "random" shuffle @@ -834,107 +801,6 @@ static inline size_t _mi_os_numa_node_count(void) { } -// ------------------------------------------------------------------- -// Getting the thread id should be performant as it is called in the -// fast path of `_mi_free` and we specialize for various platforms. -// We only require _mi_threadid() to return a unique id for each thread. -// ------------------------------------------------------------------- -#if defined(_WIN32) - -#define WIN32_LEAN_AND_MEAN -#include -static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { - // Windows: works on Intel and ARM in both 32- and 64-bit - return (uintptr_t)NtCurrentTeb(); -} - -// We use assembly for a fast thread id on the main platforms. The TLS layout depends on -// both the OS and libc implementation so we use specific tests for each main platform. -// If you test on another platform and it works please send a PR :-) -// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. -#elif defined(__GNUC__) && ( \ - (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ - || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ - || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ - || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ - || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ - ) - -static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { - void* res; - const size_t ofs = (slot*sizeof(void*)); - #if defined(__i386__) - __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS - #elif defined(__APPLE__) && defined(__x86_64__) - __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS - #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) - __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI - #elif defined(__x86_64__) - __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS - #elif defined(__arm__) - void** tcb; MI_UNUSED(ofs); - __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); - res = tcb[slot]; - #elif defined(__aarch64__) - void** tcb; MI_UNUSED(ofs); - #if defined(__APPLE__) // M1, issue #343 - __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); - #else - __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); - #endif - res = tcb[slot]; - #endif - return res; -} - -// setting a tls slot is only used on macOS for now -static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { - const size_t ofs = (slot*sizeof(void*)); - #if defined(__i386__) - __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS - #elif defined(__APPLE__) && defined(__x86_64__) - __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS - #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) - __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI - #elif defined(__x86_64__) - __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS - #elif defined(__arm__) - void** tcb; MI_UNUSED(ofs); - __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); - tcb[slot] = value; - #elif defined(__aarch64__) - void** tcb; MI_UNUSED(ofs); - #if defined(__APPLE__) // M1, issue #343 - __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); - #else - __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); - #endif - tcb[slot] = value; - #endif -} - -static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { - #if defined(__BIONIC__) - // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id - // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 - return (uintptr_t)mi_tls_slot(1); - #else - // in all our other targets, slot 0 is the thread id - // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h - // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 - return (uintptr_t)mi_tls_slot(0); - #endif -} - -#else - -// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). -static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { - return (uintptr_t)&_mi_heap_default; -} - -#endif - // ----------------------------------------------------------------------- // Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) @@ -964,6 +830,7 @@ static inline size_t mi_ctz(uintptr_t x) { #elif defined(_MSC_VER) #include // LONG_MAX +#include // BitScanReverse64 #define MI_HAVE_FAST_BITSCAN static inline size_t mi_clz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; @@ -1050,7 +917,6 @@ static inline size_t mi_bsr(uintptr_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include -#include extern bool _mi_cpu_has_fsrm; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if (_mi_cpu_has_fsrm) { @@ -1069,7 +935,6 @@ static inline void _mi_memzero(void* dst, size_t n) { } } #else -#include static inline void _mi_memcpy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); } @@ -1078,7 +943,6 @@ static inline void _mi_memzero(void* dst, size_t n) { } #endif - // ------------------------------------------------------------------------------- // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // This is used for example in `mi_realloc`. @@ -1086,7 +950,6 @@ static inline void _mi_memzero(void* dst, size_t n) { #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // On GCC/CLang we provide a hint that the pointers are word aligned. -#include static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); diff --git a/third-party/mimalloc/include/mimalloc/prim.h b/third-party/mimalloc/include/mimalloc/prim.h new file mode 100644 index 00000000..9e560696 --- /dev/null +++ b/third-party/mimalloc/include/mimalloc/prim.h @@ -0,0 +1,323 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_PRIM_H +#define MIMALLOC_PRIM_H + + +// -------------------------------------------------------------------------- +// This file specifies the primitive portability API. +// Each OS/host needs to implement these primitives, see `src/prim` +// for implementations on Window, macOS, WASI, and Linux/Unix. +// +// note: on all primitive functions, we always have result parameters != NUL, and: +// addr != NULL and page aligned +// size > 0 and page aligned +// return value is an error code an int where 0 is success. +// -------------------------------------------------------------------------- + +// OS memory configuration +typedef struct mi_os_mem_config_s { + size_t page_size; // 4KiB + size_t large_page_size; // 2MiB + size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + bool has_overcommit; // can we reserve more memory than can be actually committed? + bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) + bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) +} mi_os_mem_config_t; + +// Initialize +void _mi_prim_mem_init( mi_os_mem_config_t* config ); + +// Free OS memory +int _mi_prim_free(void* addr, size_t size ); + +// Allocate OS memory. Return NULL on error. +// The `try_alignment` is just a hint and the returned pointer does not have to be aligned. +// If `commit` is false, the virtual memory range only needs to be reserved (with no access) +// which will later be committed explicitly using `_mi_prim_commit`. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// pre: !commit => !allow_large +// try_alignment >= _mi_os_page_size() and a power of 2 +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); + +// Commit memory. Returns error code or 0 on success. +// For example, on Linux this would make the memory PROT_READ|PROT_WRITE. +// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows) +int _mi_prim_commit(void* addr, size_t size, bool* is_zero); + +// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true +// if the memory would need to be re-committed. For example, on Windows this is always true, +// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. +// pre: needs_recommit != NULL +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); + +// Reset memory. The range keeps being accessible but the content might be reset. +// Returns error code or 0 on success. +int _mi_prim_reset(void* addr, size_t size); + +// Protect memory. Returns error code or 0 on success. +int _mi_prim_protect(void* addr, size_t size, bool protect); + +// Allocate huge (1GiB) pages possibly associated with a NUMA node. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// pre: size > 0 and a multiple of 1GiB. +// numa_node is either negative (don't care), or a numa node number. +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr); + +// Return the current NUMA node +size_t _mi_prim_numa_node(void); + +// Return the number of logical NUMA nodes +size_t _mi_prim_numa_node_count(void); + +// Clock ticks +mi_msecs_t _mi_prim_clock_now(void); + +// Return process information (only for statistics) +typedef struct mi_process_info_s { + mi_msecs_t elapsed; + mi_msecs_t utime; + mi_msecs_t stime; + size_t current_rss; + size_t peak_rss; + size_t current_commit; + size_t peak_commit; + size_t page_faults; +} mi_process_info_t; + +void _mi_prim_process_info(mi_process_info_t* pinfo); + +// Default stderr output. (only for warnings etc. with verbose enabled) +// msg != NULL && _mi_strlen(msg) > 0 +void _mi_prim_out_stderr( const char* msg ); + +// Get an environment variable. (only for options) +// name != NULL, result != NULL, result_size >= 64 +bool _mi_prim_getenv(const char* name, char* result, size_t result_size); + + +// Fill a buffer with strong randomness; return `false` on error or if +// there is no strong randomization available. +bool _mi_prim_random_buf(void* buf, size_t buf_len); + +// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination. +void _mi_prim_thread_init_auto_done(void); + +// Called on process exit and may take action to clean up resources associated with the thread auto done. +void _mi_prim_thread_done_auto_done(void); + +// Called when the default heap for a thread changes +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); + + +//------------------------------------------------------------------- +// Thread id: `_mi_prim_thread_id()` +// +// Getting the thread id should be performant as it is called in the +// fast path of `_mi_free` and we specialize for various platforms as +// inlined definitions. Regular code should call `init.c:_mi_thread_id()`. +// We only require _mi_prim_thread_id() to return a unique id +// for each thread (unequal to zero). +//------------------------------------------------------------------- + +// defined in `init.c`; do not use these directly +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern bool _mi_process_is_initialized; // has mi_process_init been called? + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; + +#if defined(_WIN32) + +#define WIN32_LEAN_AND_MEAN +#include +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} + +// We use assembly for a fast thread id on the main platforms. The TLS layout depends on +// both the OS and libc implementation so we use specific tests for each main platform. +// If you test on another platform and it works please send a PR :-) +// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. +#elif defined(__GNUC__) && ( \ + (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ + || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + ) + +static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { + void* res; + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + res = tcb[slot]; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + res = tcb[slot]; + #endif + return res; +} + +// setting a tls slot is only used on macOS for now +static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + tcb[slot] = value; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + tcb[slot] = value; + #endif +} + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + #if defined(__BIONIC__) + // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id + // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 + return (uintptr_t)mi_prim_tls_slot(1); + #else + // in all our other targets, slot 0 is the thread id + // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h + // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 + return (uintptr_t)mi_prim_tls_slot(0); + #endif +} + +#else + +// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + return (uintptr_t)&_mi_heap_default; +} + +#endif + + + +/* ---------------------------------------------------------------------------------------- +The thread local default heap: `_mi_prim_get_default_heap()` +This is inlined here as it is on the fast path for allocation functions. + +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a +__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures +that the storage will always be available (allocated on the thread stacks). + +On some platforms though we cannot use that when overriding `malloc` since the underlying +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. +We try to circumvent this in an efficient way: +- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the + loader itself calls `malloc` even before the modules are initialized. +- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). +- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) +------------------------------------------------------------------------------------------- */ + +static inline mi_heap_t* mi_prim_get_default_heap(void); + +#if defined(MI_MALLOC_OVERRIDE) +#if defined(__APPLE__) // macOS + #define MI_TLS_SLOT 89 // seems unused? + // #define MI_TLS_RECURSE_GUARD 1 + // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) + // see +#elif defined(__OpenBSD__) + // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) + // see + #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) + // #elif defined(__DragonFly__) + // #warning "mimalloc is not working correctly on DragonFly yet." + // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) +#elif defined(__ANDROID__) + // See issue #381 + #define MI_TLS_PTHREAD +#endif +#endif + + +#if defined(MI_TLS_SLOT) + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); + if mi_unlikely(heap == NULL) { + #ifdef __GNUC__ + __asm(""); // prevent conditional load of the address of _mi_heap_empty + #endif + heap = (mi_heap_t*)&_mi_heap_empty; + } + return heap; +} + +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) + +static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) { + pthread_t self = pthread_self(); + #if defined(__DragonFly__) + if (self==NULL) return NULL; + #endif + return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); +} + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot(); + if mi_unlikely(pheap == NULL) return _mi_heap_main_get(); + mi_heap_t* heap = *pheap; + if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty; + return heap; +} + +#elif defined(MI_TLS_PTHREAD) + +extern pthread_key_t _mi_heap_default_key; +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +} + +#else // default using a thread local variable; used on most platforms. + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + #if defined(MI_TLS_RECURSE_GUARD) + if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); + #endif + return _mi_heap_default; +} + +#endif // mi_prim_get_default_heap() + + + +#endif // MIMALLOC_PRIM_H diff --git a/third-party/mimalloc/include/mimalloc/track.h b/third-party/mimalloc/include/mimalloc/track.h new file mode 100644 index 00000000..9545f750 --- /dev/null +++ b/third-party/mimalloc/include/mimalloc/track.h @@ -0,0 +1,147 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TRACK_H +#define MIMALLOC_TRACK_H + +/* ------------------------------------------------------------------------------------------------------ +Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers. +These can be defined for tracking allocation: + + #define mi_track_malloc_size(p,reqsize,size,zero) + #define mi_track_free_size(p,_size) + +The macros are set up such that the size passed to `mi_track_free_size` +always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`). +The `reqsize` is what the user requested, and `size >= reqsize`. +The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled, +or otherwise it is the usable block size which may be larger than the original request. +Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc). +The `zero` parameter is `true` if the allocated block is zero initialized. + +Optional: + + #define mi_track_align(p,alignedp,offset,size) + #define mi_track_resize(p,oldsize,newsize) + #define mi_track_init() + +The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block. +The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`). +The `mi_track_resize` is currently unused but could be called on reallocations within a block. +`mi_track_init` is called at program start. + +The following macros are for tools like asan and valgrind to track whether memory is +defined, undefined, or not accessible at all: + + #define mi_track_mem_defined(p,size) + #define mi_track_mem_undefined(p,size) + #define mi_track_mem_noaccess(p,size) + +-------------------------------------------------------------------------------------------------------*/ + +#if MI_TRACK_VALGRIND +// valgrind tool + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy +#define MI_TRACK_TOOL "valgrind" + +#include +#include + +#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero) +#define mi_track_free_size(p,_size) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/) +#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) +#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size) +#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) +#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) + +#elif MI_TRACK_ASAN +// address sanitizer + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "asan" + +#include + +#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size) +#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) + +#elif MI_TRACK_ETW +// windows event tracing + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 +#define MI_TRACK_TOOL "ETW" + +#define WIN32_LEAN_AND_MEAN +#include +#include "../src/prim/windows/etw.h" + +#define mi_track_init() EventRegistermicrosoft_windows_mimalloc(); +#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size) +#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size) + +#else +// no tracking + +#define MI_TRACK_ENABLED 0 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "none" + +#define mi_track_malloc_size(p,reqsize,size,zero) +#define mi_track_free_size(p,_size) + +#endif + +// ------------------- +// Utility definitions + +#ifndef mi_track_resize +#define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false) +#endif + +#ifndef mi_track_align +#define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset) +#endif + +#ifndef mi_track_init +#define mi_track_init() +#endif + +#ifndef mi_track_mem_defined +#define mi_track_mem_defined(p,size) +#endif + +#ifndef mi_track_mem_undefined +#define mi_track_mem_undefined(p,size) +#endif + +#ifndef mi_track_mem_noaccess +#define mi_track_mem_noaccess(p,size) +#endif + + +#if MI_PADDING +#define mi_track_malloc(p,reqsize,zero) \ + if ((p)!=NULL) { \ + mi_assert_internal(mi_usable_size(p)==(reqsize)); \ + mi_track_malloc_size(p,reqsize,reqsize,zero); \ + } +#else +#define mi_track_malloc(p,reqsize,zero) \ + if ((p)!=NULL) { \ + mi_assert_internal(mi_usable_size(p)>=(reqsize)); \ + mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \ + } +#endif + +#endif diff --git a/third-party/mimalloc/include/mimalloc-types.h b/third-party/mimalloc/include/mimalloc/types.h similarity index 85% rename from third-party/mimalloc/include/mimalloc-types.h rename to third-party/mimalloc/include/mimalloc/types.h index f3af528e..2005238a 100644 --- a/third-party/mimalloc/include/mimalloc-types.h +++ b/third-party/mimalloc/include/mimalloc/types.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,9 +8,20 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_TYPES_H #define MIMALLOC_TYPES_H +// -------------------------------------------------------------------------- +// This file contains the main type definitions for mimalloc: +// mi_heap_t : all data for a thread-local heap, contains +// lists of all managed heap pages. +// mi_segment_t : a larger chunk of memory (32GiB) from where pages +// are allocated. +// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from +// where objects are allocated. +// -------------------------------------------------------------------------- + + #include // ptrdiff_t #include // uintptr_t, uint16_t, etc -#include "mimalloc-atomic.h" // _Atomic +#include "mimalloc/atomic.h" // _Atomic #ifdef _MSC_VER #pragma warning(disable:4214) // bitfield is not int @@ -29,8 +40,10 @@ terms of the MIT license. A copy of the license can be found in the file // Define NDEBUG in the release version to disable assertions. // #define NDEBUG -// Define MI_VALGRIND to enable valgrind support -// #define MI_VALGRIND 1 +// Define MI_TRACK_ to enable tracking support +// #define MI_TRACK_VALGRIND 1 +// #define MI_TRACK_ASAN 1 +// #define MI_TRACK_ETW 1 // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 @@ -58,11 +71,16 @@ terms of the MIT license. A copy of the license can be found in the file #endif // Reserve extra padding at the end of each block to be more resilient against heap block overflows. -// The padding can detect byte-precise buffer overflow on free. -#if !defined(MI_PADDING) && (MI_DEBUG>=1 || MI_VALGRIND) +// The padding can detect buffer overflow on free. +#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW)) #define MI_PADDING 1 #endif +// Check padding bytes; allows byte-precise buffer overflow detection +#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_PADDING_CHECK 1 +#endif + // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows, modify after free, and double `free`s. @@ -154,7 +172,7 @@ typedef int32_t mi_ssize_t; // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)<0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - #ifdef MI_ENCODE_FREELIST - uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) + #if (MI_ENCODE_FREELIST || MI_PADDING) + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary #endif _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads @@ -308,6 +325,10 @@ typedef struct mi_page_s { +// ------------------------------------------------------ +// Mimalloc segments contain mimalloc pages +// ------------------------------------------------------ + typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment @@ -332,7 +353,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS @@ -350,20 +371,57 @@ typedef mi_page_t mi_slice_t; typedef int64_t mi_msecs_t; +// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. +typedef enum mi_memkind_e { + MI_MEM_NONE, // not allocated + MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) + MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) + MI_MEM_OS, // allocated from the OS + MI_MEM_OS_HUGE, // allocated as huge os pages + MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) + MI_MEM_ARENA // allocated from an arena (the usual case) +} mi_memkind_t; + +static inline bool mi_memkind_is_os(mi_memkind_t memkind) { + return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); +} + +typedef struct mi_memid_os_info { + void* base; // actual base address of the block (used for offset aligned allocations) + size_t alignment; // alignment at allocation +} mi_memid_os_info_t; + +typedef struct mi_memid_arena_info { + size_t block_index; // index in the arena + mi_arena_id_t id; // arena id (>= 1) + bool is_exclusive; // the arena can only be used for specific arena allocations +} mi_memid_arena_info_t; + +typedef struct mi_memid_s { + union { + mi_memid_os_info_t os; // only used for MI_MEM_OS + mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA + } mem; + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) + bool initially_committed;// `true` if the memory was originally allocated as committed + bool initially_zero; // `true` if the memory was originally zero initialized + mi_memkind_t memkind; +} mi_memid_t; + + // Segments are large allocated memory blocks (8mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { - size_t memid; // memory id for arena allocation - bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) - bool mem_is_large; // in large/huge os pages? - bool mem_is_committed; // `true` if the whole segment is eagerly committed - size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) - size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) + // constant fields + mi_memid_t memid; // memory id for arena allocation + bool allow_decommit; + bool allow_purge; + size_t segment_size; - bool allow_decommit; - mi_msecs_t decommit_expire; - mi_commit_mask_t decommit_mask; + // segment fields + mi_msecs_t purge_expire; + mi_commit_mask_t purge_mask; mi_commit_mask_t commit_mask; _Atomic(struct mi_segment_s*) abandoned_next; @@ -522,6 +580,7 @@ typedef struct mi_stats_s { mi_stat_count_t reserved; mi_stat_count_t committed; mi_stat_count_t reset; + mi_stat_count_t purged; mi_stat_count_t page_committed; mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; @@ -534,6 +593,8 @@ typedef struct mi_stats_s { mi_stat_counter_t pages_extended; mi_stat_counter_t mmap_calls; mi_stat_counter_t commit_calls; + mi_stat_counter_t reset_calls; + mi_stat_counter_t purge_calls; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t normal_count; diff --git a/third-party/mimalloc/readme.md b/third-party/mimalloc/readme.md index 364b974b..ecab8131 100644 --- a/third-party/mimalloc/readme.md +++ b/third-party/mimalloc/readme.md @@ -9,18 +9,18 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. -Initially developed by Daan Leijen for the run-time systems of the +Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.0.9` (2022-12-23). -Latest stable tag: `v1.7.9` (2022-12-23). +Latest release tag: `v2.1.2` (2023-04-24). +Latest stable tag: `v1.8.2` (2023-04-24). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: ``` > LD_PRELOAD=/usr/lib/libmimalloc.so myprogram ``` -It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: +It also includes a robust way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: - __small and consistent__: the library is about 8k LOC using simple and consistent data structures. This makes it very suitable @@ -43,7 +43,7 @@ It also has an easy way to override the default allocator in [Windows](#override and the chance of contending on a single location will be low -- this is quite similar to randomized algorithms like skip lists where adding a random oracle removes the need for a more complex algorithm. -- __eager page reset__: when a "page" becomes empty (with increased chance +- __eager page purging__: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused (reset or decommitted) reducing (real) memory pressure and fragmentation, especially in long running programs. @@ -78,13 +78,24 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. -* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with asan and improved [Valgrind] support. Support abitrary large - alignments (in particular for `std::pmr` pools). +* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity + by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory + usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. + +* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. + +* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision + with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS + abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes. + +* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support. + Support abitrary large alignments (in particular for `std::pmr` pools). Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). Various small bug fixes. -* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind] for leak testing and heap block overflow detection. Initial +* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow + detection. Initial support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . * 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation @@ -98,20 +109,6 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page improved wasm support, faster aligned allocation, various small fixes. -* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including - M1), improved performance for v2 for large objects, Python integration improvements, more standard - installation directories, various small fixes. - -* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix - thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. - -* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). - -* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. - -* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, - improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. - * [Older release notes](#older-release-notes) Special thanks to: @@ -273,43 +270,48 @@ completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options -You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), -or via environment variables: +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables: - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. -- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS - that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server) - programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs. - As an alternative, the `MIMALLOC_RESET_DELAY=` can be set higher (100ms by default) to make the page - reset occur less frequently instead of turning it off completely. + +Advanced options: + +- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge + OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which + can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when + a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher + value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times. + Setting it to `-1` disables purging completely. +- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc + allocates segments and pages. This is by default + only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS) + may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set + (rss) so it is generally ok to enable this. + +Further options for large workloads and services: + - `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed). -- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly +- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that - can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). - -- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at startup and sometimes this can give a large (latency) performance improvement on big workloads. - Usually it is better to not use - `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large + OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived - and allocate just a little to take up space in the huge OS page area (which cannot be reset). + and allocate just a little to take up space in the huge OS page area (which cannot be purged). The huge pages are usually allocated evenly among NUMA nodes. We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all the huge pages at a specific numa node instead. @@ -347,44 +349,6 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to - Double free's, and freeing invalid heap pointers are detected. - Corrupted free-lists and some forms of use-after-free are detected. -## Valgrind - -Generally, we recommend using the standard allocator with the amazing [Valgrind] tool (and -also for other address sanitizers). -However, it is possible to build mimalloc with Valgrind support. This has a small performance -overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final -executables. To build with valgrind support, use the `MI_VALGRIND=ON` cmake option: - -``` -> cmake ../.. -DMI_VALGRIND=ON -``` - -This can also be combined with secure mode or debug mode. -You can then run your programs directly under valgrind: - -``` -> valgrind -``` - -If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly), -you also need to tell `valgrind` to not intercept those calls itself, and use: - -``` -> MIMALLOC_SHOW_STATS=1 valgrind --soname-synonyms=somalloc=*mimalloc* -- -``` - -By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed -used and not the standard allocator. Even though the [Valgrind option][valgrind-soname] -is called `--soname-synonyms`, this also -works when overriding with a static library or object file. Unfortunately, it is not possible to -dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`. -See also the `test/test-wrong.c` file to test with `valgrind`. - -Valgrind support is in its initial development -- please report any issues. - -[Valgrind]: https://valgrind.org/ -[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms - # Overriding Standard Malloc @@ -394,7 +358,7 @@ Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or This is the recommended way to override the standard malloc interface. -### Override on Linux, BSD +### Dynamic Override on Linux, BSD On these ELF-based systems we preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -413,7 +377,7 @@ or run with the debug version to get detailed statistics: > env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram ``` -### Override on MacOS +### Dynamic Override on MacOS On macOS we can also preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -426,7 +390,7 @@ Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -### Override on Windows +### Dynamic Override on Windows Overriding on Windows is robust and has the particular advantage to be able to redirect all malloc/free calls that go through @@ -459,13 +423,13 @@ Such patching can be done for example with [CFF Explorer](https://ntcore.com/?pa On Unix-like systems, you can also statically link with _mimalloc_ to override the standard malloc interface. The recommended way is to link the final program with the -_mimalloc_ single object file (`mimalloc-override.o`). We use +_mimalloc_ single object file (`mimalloc.o`). We use an object file instead of a library file as linkers give preference to that over archives to resolve symbols. To ensure that the standard malloc interface resolves to the _mimalloc_ library, link it as the first object file. For example: ``` -> gcc -o myprogram mimalloc-override.o myfile1.c ... +> gcc -o myprogram mimalloc.o myfile1.c ... ``` Another way to override statically that works on all platforms, is to @@ -475,6 +439,96 @@ This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimallo under your control or otherwise mixing of pointers from different heaps may occur! +## Tools + +Generally, we recommend using the standard allocator with memory tracking tools, but mimalloc +can also be build to support the [address sanitizer][asan] or the excellent [Valgrind] tool. +Moreover, it can be build to support Windows event tracing ([ETW]). +This has a small performance overhead but does allow detecting memory leaks and byte-precise +buffer overflows directly on final executables. See also the `test/test-wrong.c` file to test with various tools. + +### Valgrind + +To build with [valgrind] support, use the `MI_TRACK_VALGRIND=ON` cmake option: + +``` +> cmake ../.. -DMI_TRACK_VALGRIND=ON +``` + +This can also be combined with secure mode or debug mode. +You can then run your programs directly under valgrind: + +``` +> valgrind +``` + +If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly), +you also need to tell `valgrind` to not intercept those calls itself, and use: + +``` +> MIMALLOC_SHOW_STATS=1 valgrind --soname-synonyms=somalloc=*mimalloc* -- +``` + +By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed +used and not the standard allocator. Even though the [Valgrind option][valgrind-soname] +is called `--soname-synonyms`, this also +works when overriding with a static library or object file. Unfortunately, it is not possible to +dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`. +See also the `test/test-wrong.c` file to test with `valgrind`. + +Valgrind support is in its initial development -- please report any issues. + +[Valgrind]: https://valgrind.org/ +[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms + +### ASAN + +To build with the address sanitizer, use the `-DMI_TRACK_ASAN=ON` cmake option: + +``` +> cmake ../.. -DMI_TRACK_ASAN=ON +``` + +This can also be combined with secure mode or debug mode. +You can then run your programs as:' + +``` +> ASAN_OPTIONS=verbosity=1 +``` + +When you link a program with an address sanitizer build of mimalloc, you should +generally compile that program too with the address sanitizer enabled. +For example, assuming you build mimalloc in `out/debug`: + +``` +clang -g -o test-wrong -Iinclude test/test-wrong.c out/debug/libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address +``` + +Since the address sanitizer redirects the standard allocation functions, on some platforms (macOSX for example) +it is required to compile mimalloc with `-DMI_OVERRIDE=OFF`. +Adress sanitizer support is in its initial development -- please report any issues. + +[asan]: https://github.com/google/sanitizers/wiki/AddressSanitizer + +### ETW + +Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though +mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACK_ETW=ON` cmake option. + +You can then capture an allocation trace using the Windows performance recorder (WPR), using the +`src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use: +``` +> wpr -start src\prim\windows\etw-mimalloc.wprp -filemode +> +> wpr -stop .etl +``` +and then open `.etl` in the Windows Performance Analyzer (WPA), or +use a tool like [TraceControl] that is specialized for analyzing mimalloc traces. + +[ETW]: https://learn.microsoft.com/en-us/windows-hardware/test/wpt/event-tracing-for-windows +[TraceControl]: https://github.com/xinglonghe/TraceControl + + # Performance Last update: 2021-01-30 @@ -735,6 +789,16 @@ provided by the bot. You will only need to do this once across all repos using o # Older Release Notes +* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including + M1), improved performance for v2 for large objects, Python integration improvements, more standard + installation directories, various small fixes. +* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix + thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. +* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). +* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. +* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, + improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. + * 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. * 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, @@ -756,6 +820,7 @@ provided by the bot. You will only need to do this once across all repos using o more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. + * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. diff --git a/third-party/mimalloc/src/alloc-aligned.c b/third-party/mimalloc/src/alloc-aligned.c index 9fe82890..1cd809f1 100644 --- a/third-party/mimalloc/src/alloc-aligned.c +++ b/third-party/mimalloc/src/alloc-aligned.c @@ -6,9 +6,10 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // mi_prim_get_default_heap -#include // memset +#include // memset // ------------------------------------------------------ // Aligned Allocation @@ -46,7 +47,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block // zero afterwards as only the area from the aligned_p may be committed! - if (p == NULL) return NULL; + if (p == NULL) return NULL; } else { // otherwise over-allocate @@ -61,30 +62,30 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(adjust < alignment); void* aligned_p = (void*)((uintptr_t)p + adjust); if (aligned_p != p) { - mi_page_set_has_aligned(_mi_ptr_page(p), true); + mi_page_t* page = _mi_ptr_page(p); + mi_page_set_has_aligned(page, true); + _mi_padding_shrink(page, (mi_block_t*)p, adjust + size); } + // todo: expand padding if overallocated ? mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); - mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); - + mi_assert_internal(mi_usable_size(aligned_p)>=size); + mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust); + // now zero the block if needed - if (zero && alignment > MI_ALIGNMENT_MAX) { - const ptrdiff_t diff = (uint8_t*)aligned_p - (uint8_t*)p; - const ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE; - if (zsize > 0) { _mi_memzero(aligned_p, zsize); } + if (alignment > MI_ALIGNMENT_MAX) { + // for the tracker, on huge aligned allocations only from the start of the large block is defined + mi_track_mem_undefined(aligned_p, size); + if (zero) { + _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); + } } - #if MI_TRACK_ENABLED if (p != aligned_p) { - mi_track_free_size(p, oversize); - mi_track_malloc(aligned_p, size, zero); - } - else { - mi_track_resize(aligned_p, oversize, size); - } - #endif + mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p)); + } return aligned_p; } @@ -92,21 +93,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. - mi_assert(alignment > 0); if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } - /* - if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers) - #if MI_DEBUG > 0 - _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment); - #endif - return NULL; - } - */ + if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); @@ -146,9 +139,9 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he } mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { + if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; #if !MI_PADDING // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) - if (!_mi_is_power_of_two(alignment)) return NULL; if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) #else // with padding, we can only guarantee this for fixed alignments @@ -164,6 +157,11 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, } } +// ensure a definition is emitted +#if defined(__cplusplus) +static void* _mi_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned; +#endif + // ------------------------------------------------------ // Aligned Allocation // ------------------------------------------------------ @@ -187,27 +185,27 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, } mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); + return mi_heap_malloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); + return mi_heap_malloc_aligned(mi_prim_get_default_heap(), size, alignment); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); + return mi_heap_zalloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); + return mi_heap_zalloc_aligned(mi_prim_get_default_heap(), size, alignment); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); + return mi_heap_calloc_aligned_at(mi_prim_get_default_heap(), count, size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); + return mi_heap_calloc_aligned(mi_prim_get_default_heap(), count, size, alignment); } @@ -225,19 +223,13 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne return p; // reallocation still fits, is aligned and not more than 50% waste } else { + // note: we don't zero allocate upfront so we only zero initialize the expanded part void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset); if (newp != NULL) { if (zero && newsize > size) { - const mi_page_t* page = _mi_ptr_page(newp); - if (page->is_zero) { - // already zero initialized - mi_assert_expensive(mi_mem_is_zero(newp,newsize)); - } - else { - // also set last word in the previous allocation to zero to ensure any padding is zero-initialized - size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); - memset((uint8_t*)newp + start, 0, newsize - start); - } + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + _mi_memzero((uint8_t*)newp + start, newsize - start); } _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); mi_free(p); // only free if successful @@ -282,25 +274,25 @@ mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_ } mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); + return mi_heap_realloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { - return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); + return mi_heap_realloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); + return mi_heap_rezalloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { - return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); + return mi_heap_rezalloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); + return mi_heap_recalloc_aligned_at(mi_prim_get_default_heap(), p, newcount, size, alignment, offset); } mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); + return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment); } diff --git a/third-party/mimalloc/src/alloc-override.c b/third-party/mimalloc/src/alloc-override.c index 84a0d19d..873065dc 100644 --- a/third-party/mimalloc/src/alloc-override.c +++ b/third-party/mimalloc/src/alloc-override.c @@ -57,7 +57,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; // functions that are interposed (or the interposing does not work) #define MI_OSX_IS_INTERPOSED - mi_decl_externc static size_t mi_malloc_size_checked(void *p) { + mi_decl_externc size_t mi_malloc_size_checked(void *p) { if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } @@ -245,11 +245,13 @@ extern "C" { int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); } // `aligned_alloc` is only available when __USE_ISOC11 is defined. + // Note: it seems __USE_ISOC11 is not defined in musl (and perhaps other libc's) so we only check + // for it if using glibc. // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9. // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves. - #if __USE_ISOC11 + #if !defined(__GLIBC__) || __USE_ISOC11 void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } #endif #endif diff --git a/third-party/mimalloc/src/alloc-posix.c b/third-party/mimalloc/src/alloc-posix.c index e6505f29..225752fd 100644 --- a/third-party/mimalloc/src/alloc-posix.c +++ b/third-party/mimalloc/src/alloc-posix.c @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file // for convenience and used when overriding these functions. // ------------------------------------------------------------------------ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" // ------------------------------------------------------ // Posix & Unix functions definitions @@ -56,7 +56,8 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept // Note: The spec dictates we should not modify `*p` on an error. (issue#27) // if (p == NULL) return EINVAL; - if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment + if ((alignment % sizeof(void*)) != 0) return EINVAL; // natural alignment + // it is also required that alignment is a power of 2 and > 0; this is checked in `mi_malloc_aligned` if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 void* q = mi_malloc_aligned(size, alignment); if (q==NULL && size != 0) return ENOMEM; @@ -149,7 +150,7 @@ int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { else { *buf = mi_strdup(p); if (*buf==NULL) return ENOMEM; - if (size != NULL) *size = strlen(p); + if (size != NULL) *size = _mi_strlen(p); } return 0; } diff --git a/third-party/mimalloc/src/alloc.c b/third-party/mimalloc/src/alloc.c index 86453f15..ffc1747d 100644 --- a/third-party/mimalloc/src/alloc.c +++ b/third-party/mimalloc/src/alloc.c @@ -9,12 +9,12 @@ terms of the MIT license. A copy of the license can be found in the file #endif #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // _mi_prim_thread_id() - -#include // memset, strlen -#include // malloc, exit +#include // memset, strlen (for mi_strdup) +#include // malloc, abort #define MI_IN_ALLOC_C #include "alloc-override.c" @@ -37,21 +37,32 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz page->used++; page->free = mi_block_next(page, block); mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); + #if MI_DEBUG>3 + if (page->free_is_zero) { + mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block))); + } + #endif // allow use of the block internally // note: when tracking we need to avoid ever touching the MI_PADDING since - // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc-track.h`) + // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc/track.h`) mi_track_mem_undefined(block, mi_page_usable_block_size(page)); // zero the block? note: we need to zero the full block size (issue #63) if mi_unlikely(zero) { mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) - const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size); - _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE); + mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE); + if (page->free_is_zero) { + block->next = 0; + mi_track_mem_defined(block, page->xblock_size - MI_PADDING_SIZE); + } + else { + _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE); + } } -#if (MI_DEBUG>0) && !MI_TRACK_ENABLED - if (!page->is_zero && !zero && !mi_page_is_huge(page)) { +#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN + if (!zero && !mi_page_is_huge(page)) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); } #elif (MI_SECURE!=0) @@ -70,20 +81,22 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } #endif -#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED +#if MI_PADDING // && !MI_TRACK_ENABLED mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); - #if (MI_DEBUG>1) + #if (MI_DEBUG>=2) mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); - mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess #endif + mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); padding->delta = (uint32_t)(delta); + #if MI_PADDING_CHECK if (!mi_page_is_huge(page)) { uint8_t* fill = (uint8_t*)padding - delta; const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } } + #endif #endif return block; @@ -96,21 +109,23 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local #endif mi_assert(size <= MI_SMALL_SIZE_MAX); -#if (MI_PADDING) - if (size == 0) { - size = sizeof(void*); - } -#endif + #if (MI_PADDING) + if (size == 0) { size = sizeof(void*); } + #endif mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); - void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); - mi_assert_internal(p == NULL || mi_usable_size(p) >= size); -#if MI_STAT>1 + void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); + mi_track_malloc(p,size,zero); + #if MI_STAT>1 if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } -#endif - mi_track_malloc(p,size,zero); + #endif + #if MI_DEBUG>3 + if (p != NULL && zero) { + mi_assert_expensive(mi_mem_is_zero(p, size)); + } + #endif return p; } @@ -120,7 +135,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_h } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { - return mi_heap_malloc_small(mi_get_default_heap(), size); + return mi_heap_malloc_small(mi_prim_get_default_heap(), size); } // The main allocation function @@ -133,14 +148,18 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic - mi_assert_internal(p == NULL || mi_usable_size(p) >= size); + mi_track_malloc(p,size,zero); #if MI_STAT>1 if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif - mi_track_malloc(p,size,zero); + #if MI_DEBUG>3 + if (p != NULL && zero) { + mi_assert_expensive(mi_mem_is_zero(p, size)); + } + #endif return p; } } @@ -154,12 +173,12 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { - return mi_heap_malloc(mi_get_default_heap(), size); + return mi_heap_malloc(mi_prim_get_default_heap(), size); } // zero initialized small block mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { - return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true); + return mi_heap_malloc_small_zero(mi_prim_get_default_heap(), size, true); } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { @@ -167,7 +186,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { - return mi_heap_zalloc(mi_get_default_heap(),size); + return mi_heap_zalloc(mi_prim_get_default_heap(),size); } @@ -225,7 +244,7 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // Check for heap block overflow by setting up padding at the end of the block // --------------------------------------------------------------------------- -#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED +#if MI_PADDING // && !MI_TRACK_ENABLED static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { *bsize = mi_page_usable_block_size(page); const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); @@ -249,6 +268,40 @@ static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* bl return (ok ? bsize - delta : 0); } +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space + mi_assert_internal(bsize >= min_size); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); + padding->delta = (uint32_t)new_delta; + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); +} +#else +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(block); + return mi_page_usable_block_size(page); +} + +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + MI_UNUSED(page); + MI_UNUSED(block); + MI_UNUSED(min_size); +} +#endif + +#if MI_PADDING && MI_PADDING_CHECK + static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { size_t bsize; size_t delta; @@ -281,39 +334,13 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { } } -// When a non-thread-local block is freed, it becomes part of the thread delayed free -// list that is freed later by the owning heap. If the exact usable size is too small to -// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) -// so it will later not trigger an overflow error in `mi_free_block`. -static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - mi_assert_internal(ok); - if (!ok || (bsize - delta) >= min_size) return; // usually already enough space - mi_assert_internal(bsize >= min_size); - if (bsize < min_size) return; // should never happen - size_t new_delta = (bsize - min_size); - mi_assert_internal(new_delta < bsize); - mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); - padding->delta = (uint32_t)new_delta; -} #else + static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { MI_UNUSED(page); MI_UNUSED(block); } -static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(block); - return mi_page_usable_block_size(page); -} - -static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - MI_UNUSED(page); - MI_UNUSED(block); - MI_UNUSED(min_size); -} #endif // only maintain stats for smaller objects if requested @@ -377,7 +404,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // The padding check may access the non-thread-owned page for the key values. // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); - mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + _mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); @@ -395,8 +422,8 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc #endif } - #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED // note: when tracking, cannot use mi_usable_size with multi-threading - if (segment->kind != MI_SEGMENT_HUGE) { // not for huge segments as we just reset the content + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading + if (segment->kind != MI_SEGMENT_HUGE) { // not for huge segments as we just reset the content memset(block, MI_DEBUG_FREED, mi_usable_size(block)); } #endif @@ -449,7 +476,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // owning thread can free a block directly if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); - #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN if (!mi_page_is_huge(page)) { // huge page content may be already decommitted memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); } @@ -481,8 +508,8 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); - mi_stat_free(page, block); // stat_free may access the padding - mi_track_free(p); + mi_stat_free(page, block); // stat_free may access the padding + mi_track_free_size(block, mi_page_usable_size_of(page,block)); _mi_free_block(page, is_local, block); } @@ -535,7 +562,7 @@ void mi_free(void* p) mi_attr_noexcept { if mi_unlikely(p == NULL) return; mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); - const bool is_local= (_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); mi_page_t* const page = _mi_segment_page_of(segment, p); if mi_likely(is_local) { // thread-local free? @@ -545,10 +572,10 @@ void mi_free(void* p) mi_attr_noexcept if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); mi_stat_free(page, block); - #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif - mi_track_free(p); + mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned mi_block_set_next(page, block, page->local_free); page->local_free = block; if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) @@ -648,7 +675,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* } mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { - return mi_heap_calloc(mi_get_default_heap(),count,size); + return mi_heap_calloc(mi_prim_get_default_heap(),count,size); } // Uninitialized `calloc` @@ -659,7 +686,7 @@ mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, } mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { - return mi_heap_mallocn(mi_get_default_heap(),count,size); + return mi_heap_mallocn(mi_prim_get_default_heap(),count,size); } // Expand (or shrink) in place (or fail) @@ -682,9 +709,10 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.) const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0) if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0) - // todo: adjust potential padding to reflect the new size? - mi_track_free_size(p, size); - mi_track_malloc(p,newsize,true); + mi_assert_internal(p!=NULL); + // todo: do not track as the usable size is still the same in the free; adjust potential padding? + // mi_track_resize(p,size,newsize) + // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); } return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); @@ -692,14 +720,15 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); - memset((uint8_t*)newp + start, 0, newsize - start); + _mi_memzero((uint8_t*)newp + start, newsize - start); + } + else if (newsize == 0) { + ((uint8_t*)newp)[0] = 0; // work around for applications that expect zero-reallocation to be zero initialized (issue #725) } if mi_likely(p != NULL) { - if mi_likely(_mi_is_aligned(p, sizeof(uintptr_t))) { // a client may pass in an arbitrary pointer `p`.. - const size_t copysize = (newsize > size ? size : newsize); - mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. - _mi_memcpy_aligned(newp, p, copysize); - } + const size_t copysize = (newsize > size ? size : newsize); + mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. + _mi_memcpy(newp, p, copysize); mi_free(p); // only free the original pointer if successful } } @@ -736,24 +765,24 @@ mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_realloc(mi_get_default_heap(),p,newsize); + return mi_heap_realloc(mi_prim_get_default_heap(),p,newsize); } mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { - return mi_heap_reallocn(mi_get_default_heap(),p,count,size); + return mi_heap_reallocn(mi_prim_get_default_heap(),p,count,size); } // Reallocate but free `p` on errors mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_reallocf(mi_get_default_heap(),p,newsize); + return mi_heap_reallocf(mi_prim_get_default_heap(),p,newsize); } mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); + return mi_heap_rezalloc(mi_prim_get_default_heap(), p, newsize); } mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { - return mi_heap_recalloc(mi_get_default_heap(), p, count, size); + return mi_heap_recalloc(mi_prim_get_default_heap(), p, count, size); } @@ -774,7 +803,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const c } mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { - return mi_heap_strdup(mi_get_default_heap(), s); + return mi_heap_strdup(mi_prim_get_default_heap(), s); } // `strndup` using mi_malloc @@ -791,7 +820,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const } mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { - return mi_heap_strndup(mi_get_default_heap(),s,n); + return mi_heap_strndup(mi_prim_get_default_heap(),s,n); } #ifndef __wasi__ @@ -860,7 +889,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) #endif mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { - return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); + return mi_heap_realpath(mi_prim_get_default_heap(),fname,resolved_name); } #endif @@ -927,7 +956,7 @@ static bool mi_try_new_handler(bool nothrow) { } #endif -static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) { +mi_decl_export mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) { void* p = NULL; while(p == NULL && mi_try_new_handler(nothrow)) { p = mi_heap_malloc(heap,size); @@ -936,22 +965,22 @@ static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool } static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) { - return mi_heap_try_new(mi_get_default_heap(), size, nothrow); + return mi_heap_try_new(mi_prim_get_default_heap(), size, nothrow); } -mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { void* p = mi_heap_malloc(heap,size); if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false); return p; } mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) { - return mi_heap_alloc_new(mi_get_default_heap(), size); + return mi_heap_alloc_new(mi_prim_get_default_heap(), size); } -mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { size_t total; if mi_unlikely(mi_count_size_overflow(count, size, &total)) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc @@ -963,7 +992,7 @@ mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new_n(mi_he } mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) { - return mi_heap_alloc_new_n(mi_get_default_heap(), size, count); + return mi_heap_alloc_new_n(mi_prim_get_default_heap(), size, count); } @@ -1025,7 +1054,7 @@ void* _mi_externs[] = { (void*)&mi_heap_malloc, (void*)&mi_heap_zalloc, (void*)&mi_heap_malloc_small, - (void*)&mi_heap_alloc_new, - (void*)&mi_heap_alloc_new_n + // (void*)&mi_heap_alloc_new, + // (void*)&mi_heap_alloc_new_n }; #endif diff --git a/third-party/mimalloc/src/arena.c b/third-party/mimalloc/src/arena.c index 80dd4786..a04a04c8 100644 --- a/third-party/mimalloc/src/arena.c +++ b/third-party/mimalloc/src/arena.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2022, Microsoft Research, Daan Leijen +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -11,36 +11,22 @@ large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. -Currently arenas are only used to for huge OS page (1GiB) reservations, -or direct OS memory reservations -- otherwise it delegates to direct allocation from the OS. -In the future, we can expose an API to manually add more kinds of arenas -which is sometimes needed for embedded devices or shared memory for example. -(We can also employ this with WASI or `sbrk` systems to reserve large arenas - on demand and be able to reuse them efficiently). +Arenas are used to for huge OS page (1GiB) reservations or for reserving +OS memory upfront which can be improve performance or is sometimes needed +on embedded devices. We can also employ this with WASI or `sbrk` systems +to reserve large arenas upfront and be able to reuse the memory more effectively. The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include // memset -#include // ENOMEM +#include // ENOMEM #include "bitmap.h" // atomic bitmap - -// os.c -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); - -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); - -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); - - /* ----------------------------------------------------------- Arena allocation ----------------------------------------------------------- */ @@ -50,22 +36,25 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); typedef uintptr_t mi_block_info_t; #define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB -#define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) +#define MI_MAX_ARENAS (112) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific - bool exclusive; // only allow allocations if specifically for this arena + mi_memid_t memid; // memid of the memory area _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + size_t meta_size; // size of the arena structure itself (including its bitmaps) + mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node - bool is_zero_init; // is the arena zero initialized? - bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL - bool is_large; // large- or huge OS pages (always committed) + bool exclusive; // only allow allocations if specifically for this arena + bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(size_t) search_idx; // optimization to start the search for free blocks + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -75,9 +64,10 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 +//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept; + /* ----------------------------------------------------------- Arena id's - 0 is used for non-arena's (like OS memory) id = arena_index + 1 ----------------------------------------------------------- */ @@ -87,10 +77,7 @@ static size_t mi_arena_id_index(mi_arena_id_t id) { static mi_arena_id_t mi_arena_id_create(size_t arena_index) { mi_assert_internal(arena_index < MI_MAX_ARENAS); - mi_assert_internal(MI_MAX_ARENAS <= 126); - int id = (int)arena_index + 1; - mi_assert_internal(id >= 1 && id <= 127); - return id; + return (int)arena_index + 1; } mi_arena_id_t _mi_arena_id_none(void) { @@ -102,46 +89,123 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus (arena_id == req_arena_id)); } +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { + if (memid.memkind == MI_MEM_ARENA) { + return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); + } + else { + return mi_arena_id_is_suitable(0, false, request_arena_id); + } +} + +bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) { + return (memid.memkind == MI_MEM_OS); +} /* ----------------------------------------------------------- - Arena allocations get a memory id where the lower 8 bits are - the arena id, and the upper bits the block index. + Arena allocations get a (currently) 16-bit memory id where the + lower 8 bits are the arena id, and the upper bits the block index. ----------------------------------------------------------- */ -// Use `0` as a special id for direct OS allocated memory. -#define MI_MEMID_OS 0 - -static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_index_t bitmap_index) { - mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? - mi_assert_internal(id >= 0 && id <= 0x7F); - return ((bitmap_index << 8) | ((uint8_t)id & 0x7F) | (exclusive ? 0x80 : 0)); -} - -static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { - *bitmap_index = (arena_memid >> 8); - mi_arena_id_t id = (int)(arena_memid & 0x7F); - *arena_index = mi_arena_id_index(id); - return ((arena_memid & 0x80) != 0); -} - -bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) { - mi_arena_id_t id = (int)(arena_memid & 0x7F); - bool exclusive = ((arena_memid & 0x80) != 0); - return mi_arena_id_is_suitable(id, exclusive, request_arena_id); -} - static size_t mi_block_count_of_size(size_t size) { return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } +static size_t mi_arena_block_size(size_t bcount) { + return (bcount * MI_ARENA_BLOCK_SIZE); +} + +static size_t mi_arena_size(mi_arena_t* arena) { + return mi_arena_block_size(arena->block_count); +} + +static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { + mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); + memid.mem.arena.id = id; + memid.mem.arena.block_index = bitmap_index; + memid.mem.arena.is_exclusive = is_exclusive; + return memid; +} + +static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { + mi_assert_internal(memid.memkind == MI_MEM_ARENA); + *arena_index = mi_arena_id_index(memid.mem.arena.id); + *bitmap_index = memid.mem.arena.block_index; + return memid.mem.arena.is_exclusive; +} + + + +/* ----------------------------------------------------------- + Special static area for mimalloc internal structures + to avoid OS calls (for example, for the arena metadata) +----------------------------------------------------------- */ + +#define MI_ARENA_STATIC_MAX (MI_INTPTR_SIZE*MI_KiB) // 8 KiB on 64-bit + +static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; +static _Atomic(size_t) mi_arena_static_top; + +static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { + *memid = _mi_memid_none(); + if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; + if ((mi_atomic_load_relaxed(&mi_arena_static_top) + size) > MI_ARENA_STATIC_MAX) return NULL; + + // try to claim space + if (alignment == 0) { alignment = 1; } + const size_t oversize = size + alignment - 1; + if (oversize > MI_ARENA_STATIC_MAX) return NULL; + const size_t oldtop = mi_atomic_add_acq_rel(&mi_arena_static_top, oversize); + size_t top = oldtop + oversize; + if (top > MI_ARENA_STATIC_MAX) { + // try to roll back, ok if this fails + mi_atomic_cas_strong_acq_rel(&mi_arena_static_top, &top, oldtop); + return NULL; + } + + // success + *memid = _mi_memid_create(MI_MEM_STATIC); + const size_t start = _mi_align_up(oldtop, alignment); + uint8_t* const p = &mi_arena_static[start]; + _mi_memzero(p, size); + return p; +} + +static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { + *memid = _mi_memid_none(); + + // try static + void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid); + if (p != NULL) return p; + + // or fall back to the OS + return _mi_os_alloc(size, memid, stats); +} + +static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { + if (mi_memkind_is_os(memid.memkind)) { + _mi_os_free(p, size, memid, stats); + } + else { + mi_assert(memid.memkind == MI_MEM_STATIC); + } +} + +static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { + return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex))); +} + + /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) + +// claim the `blocks_inuse` bits +static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; @@ -152,92 +216,116 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena Allocation ----------------------------------------------------------- */ -static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) +static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool commit, mi_memid_t* memid, mi_os_tld_t* tld) { MI_UNUSED(arena_index); mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); - if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; mi_bitmap_index_t bitmap_index; - if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; + if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; - // claimed it! set the dirty bits (todo: no need for an atomic op here?) - void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); - *memid = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index); - *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); - *large = arena->is_large; - *is_pinned = (arena->is_large || !arena->allow_decommit); + // claimed it! + void* p = mi_arena_block_start(arena, bitmap_index); + *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); + memid->is_pinned = arena->memid.is_pinned; + + // none of the claimed blocks should be scheduled for a decommit + if (arena->blocks_purge != NULL) { + // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`). + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index); + } + + // set the dirty bits (todo: no need for an atomic op here?) + if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { + memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + } + + // set commit state if (arena->blocks_committed == NULL) { // always committed - *commit = true; + memid->initially_committed = true; } - else if (*commit) { - // arena not committed as a whole, but commit requested: ensure commit now + else if (commit) { + // commit requested, but the range may not be committed as a whole: ensure it is committed now + memid->initially_committed = true; bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { - bool commit_zero; - _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); - if (commit_zero) *is_zero = true; + bool commit_zero = false; + if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) { + memid->initially_committed = false; + } + else { + if (commit_zero) { memid->initially_zero = true; } + } } } else { // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } + return p; } -// allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, - bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) +// allocate in a speficic arena +static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - const size_t bcount = mi_block_count_of_size(size); - if mi_likely(max_arena == 0) return NULL; - mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE); + const size_t bcount = mi_block_count_of_size(size); + const size_t arena_index = mi_arena_id_index(arena_id); + mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); + mi_assert_internal(size <= mi_arena_block_size(bcount)); + + // Check arena suitability + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + if (arena == NULL) return NULL; + if (!allow_large && arena->is_large) return NULL; + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; + if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity + const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); + if (match_numa_node) { if (!numa_suitable) return NULL; } + else { if (numa_suitable) return NULL; } + } - size_t arena_index = mi_arena_id_index(req_arena_id); - if (arena_index < MI_MAX_ARENAS) { - // try a specific arena if requested - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); - if ((arena != NULL) && - (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); + // try to allocate + void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, memid, tld); + mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment)); + return p; +} + + +// allocate from an arena with fallback to the OS +static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) +{ + MI_UNUSED(alignment); + mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + if mi_likely(max_arena == 0) return NULL; + + if (req_arena_id != _mi_arena_id_none()) { + // try a specific arena if requested + if (mi_arena_id_index(req_arena_id) < max_arena) { + void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } else { // try numa affine allocation - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; // end reached - if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; - } + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; } // try from another numa node instead.. - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; // end reached - if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); + if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } @@ -245,75 +333,294 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size return NULL; } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) +// try to reserve a fresh arena space +static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) { - mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - *memid = MI_MEMID_OS; - *is_zero = false; - *is_pinned = false; + if (_mi_preloading()) return false; // use OS only while pre loading + if (req_arena_id != _mi_arena_id_none()) return false; + + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); + if (arena_count > (MI_MAX_ARENAS - 4)) return false; + + size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); + if (arena_reserve == 0) return false; + + if (!_mi_os_has_virtual_reserve()) { + arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example) + } + arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); + if (arena_count >= 8 && arena_count <= 128) { + arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially + } + if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size + + // commit eagerly? + bool arena_commit = false; + if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } + else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + + return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0); +} + + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = _mi_memid_none(); - bool default_large = false; - if (large == NULL) large = &default_large; // ensure `large != NULL` const int numa_node = _mi_os_numa_node(tld); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - if (p != NULL) return p; + void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + + // otherwise, try to first eagerly reserve a new arena + if (req_arena_id == _mi_arena_id_none()) { + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { + // and try allocate in there + mi_assert_internal(req_arena_id == _mi_arena_id_none()); + p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + } + } } - // finally, fall back to the OS + // if we cannot use OS allocation, return NULL if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } - *is_zero = true; - *memid = MI_MEMID_OS; - void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats); - if (p != NULL) { *is_pinned = *large; } - return p; + + // finally, fall back to the OS + if (align_offset > 0) { + return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); + } + else { + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); + } } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); } + void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; size_t arena_index = mi_arena_id_index(arena_id); if (arena_index >= MI_MAX_ARENAS) return NULL; - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; - if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE; + if (size != NULL) { *size = mi_arena_block_size(arena->block_count); } return arena->start; } + +/* ----------------------------------------------------------- + Arena purge +----------------------------------------------------------- */ + +static long mi_arena_purge_delay(void) { + // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay + return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); +} + +// reset or decommit in an arena and update the committed/decommit bitmaps +// assumes we own the area (i.e. blocks_in_use is claimed by us) +static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(arena->blocks_committed != NULL); + mi_assert_internal(arena->blocks_purge != NULL); + mi_assert_internal(!arena->memid.is_pinned); + const size_t size = mi_arena_block_size(blocks); + void* const p = mi_arena_block_start(arena, bitmap_idx); + bool needs_recommit; + if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { + // all blocks are committed, we can purge freely + needs_recommit = _mi_os_purge(p, size, stats); + } + else { + // some blocks are not committed -- this can happen when a partially committed block is freed + // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), + // and also undo the decommit stats (as it was already adjusted) + mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); + _mi_stat_increase(&stats->committed, size); + } + + // clear the purged blocks + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); + // update committed bitmap + if (needs_recommit) { + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + } +} + +// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. +// Note: assumes we (still) own the area as we may purge immediately +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(arena->blocks_purge != NULL); + const long delay = mi_arena_purge_delay(); + if (delay < 0) return; // is purging allowed at all? + + if (_mi_preloading() || delay == 0) { + // decommit directly + mi_arena_purge(arena, bitmap_idx, blocks, stats); + } + else { + // schedule decommit + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire != 0) { + mi_atomic_addi64_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay + } + else { + mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); + } + _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); + } +} + +// purge a range of blocks +// return true if the full range was purged. +// assumes we own the area (i.e. blocks_in_use is claimed by us) +static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { + const size_t endidx = startidx + bitlen; + size_t bitidx = startidx; + bool all_purged = false; + while (bitidx < endidx) { + // count consequetive ones in the purge mask + size_t count = 0; + while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) { + count++; + } + if (count > 0) { + // found range to be purged + const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx); + mi_arena_purge(arena, range_idx, count, stats); + if (count == bitlen) { + all_purged = true; + } + } + bitidx += (count+1); // +1 to skip the zero bit (or end) + } + return all_purged; +} + +// returns true if anything was purged +static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) +{ + if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire == 0) return false; + if (!force && expire > now) return false; + + // reset expire (if not already set concurrently) + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0); + + // potential purges scheduled, walk through the bitmap + bool any_purged = false; + bool full_purge = true; + for (size_t i = 0; i < arena->field_count; i++) { + size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]); + if (purge != 0) { + size_t bitidx = 0; + while (bitidx < MI_BITMAP_FIELD_BITS) { + // find consequetive range of ones in the purge mask + size_t bitlen = 0; + while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) { + bitlen++; + } + // try to claim the longest range of corresponding in_use bits + const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); + while( bitlen > 0 ) { + if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { + break; + } + bitlen--; + } + // actual claimed bits at `in_use` + if (bitlen > 0) { + // read purge again now that we have the in_use bits + purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); + if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { + full_purge = false; + } + any_purged = true; + // release the claimed `in_use` bits again + _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index); + } + bitidx += (bitlen+1); // +1 to skip the zero (or end) + } // while bitidx + } // purge != 0 + } + // if not fully purged, make sure to purge again in the future + if (!full_purge) { + const long delay = mi_arena_purge_delay(); + mi_msecs_t expected = 0; + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); + } + return any_purged; +} + +static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { + if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled + + const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); + if (max_arena == 0) return; + + // allow only one thread to purge at a time + static mi_atomic_guard_t purge_guard; + mi_atomic_guard(&purge_guard) + { + mi_msecs_t now = _mi_clock_now(); + size_t max_purge_count = (visit_all ? max_arena : 1); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + if (mi_arena_try_purge(arena, now, force, stats)) { + if (max_purge_count <= 1) break; + max_purge_count--; + } + } + } + } +} + + /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); + mi_assert_internal(committed_size <= size); if (p==NULL) return; if (size==0) return; - - if (memid == MI_MEMID_OS) { + const bool all_committed = (committed_size == size); + + if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through - _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats); + if (!all_committed && committed_size > 0) { + // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) + _mi_stat_decrease(&stats->committed, committed_size); + } + _mi_os_free(p, size, memid, stats); } - else { + else if (memid.memkind == MI_MEM_ARENA) { // allocated in an arena - mi_assert_internal(align_offset == 0); size_t arena_idx; size_t bitmap_idx; mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); const size_t blocks = mi_block_count_of_size(size); + // checks if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -324,24 +631,100 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } + + // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) + mi_track_mem_undefined(p,size); + // potentially decommit - if (!arena->allow_decommit || arena->blocks_committed == NULL) { - mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c) + if (arena->memid.is_pinned || arena->blocks_committed == NULL) { + mi_assert_internal(all_committed); } else { mi_assert_internal(arena->blocks_committed != NULL); - _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails - _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_assert_internal(arena->blocks_purge != NULL); + + if (!all_committed) { + // mark the entire range as no longer committed (so we recommit the full range when re-using) + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_track_mem_noaccess(p,size); + if (committed_size > 0) { + // if partially committed, adjust the committed stats (is it will be recommitted when re-using) + // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. + _mi_stat_decrease(&stats->committed, committed_size); + } + // note: if not all committed, it may be that the purge will reset/decommit the entire range + // that contains already decommitted parts. Since purge consistently uses reset or decommit that + // works (as we should never reset decommitted parts). + } + // (delay) purge the entire range + mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } + // and make it available to others again bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!all_inuse) { - _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); + _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size); return; }; } + else { + // arena was none, external, or static; nothing to do + mi_assert_internal(memid.memkind < MI_MEM_OS); + } + + // purge expired decommits + mi_arenas_try_purge(false, false, stats); } +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +static void mi_arenas_unsafe_destroy(void) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + size_t new_max_arena = 0; + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { + mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); + } + else { + new_max_arena = i; + } + mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size, &_mi_stats_main); + } + } + + // try to lower the max arena. + size_t expected = max_arena; + mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); +} + +// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired +void _mi_arena_collect(bool force_purge, mi_stats_t* stats) { + mi_arenas_try_purge(force_purge, true /* visit all */, stats); +} + +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { + mi_arenas_unsafe_destroy(); + _mi_arena_collect(true /* force purge */, stats); // purge non-owned arenas +} + +// Is a pointer inside any of our arenas? +bool _mi_arena_contains(const void* p) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { + return true; + } + } + return false; +} + + /* ----------------------------------------------------------- Add an arena. ----------------------------------------------------------- */ @@ -350,53 +733,58 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - if (arena_id != NULL) *arena_id = -1; + if (arena_id != NULL) { *arena_id = -1; } size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } - mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); arena->id = mi_arena_id_create(i); - if (arena_id != NULL) *arena_id = arena->id; + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena->id; } return true; } -bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (size < MI_ARENA_BLOCK_SIZE) return false; if (is_large) { - mi_assert_internal(is_committed); - is_committed = true; + mi_assert_internal(memid.initially_committed && memid.is_pinned); } const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - const size_t bitmaps = (is_committed ? 2 : 3); + const size_t bitmaps = (memid.is_pinned ? 2 : 4); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + mi_memid_t meta_memid; + mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; - + + // already zero'd due to os_alloc + // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); + arena->memid = memid; arena->exclusive = exclusive; + arena->meta_size = asize; + arena->meta_memid = meta_memid; arena->block_count = bcount; arena->field_count = fields; arena->start = (uint8_t*)start; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; - arena->is_zero_init = is_zero; - arena->allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory + arena->purge_expire = 0; arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap - arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap - // the bitmaps are already zero initialized due to os_alloc + arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap + arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap // initialize committed bitmap? - if (arena->blocks_committed != NULL && is_committed) { + if (arena->blocks_committed != NULL && arena->memid.initially_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning } + // and claim leftover blocks if needed (so we never allocate there) ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; mi_assert_internal(post >= 0); @@ -405,32 +793,42 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } - return mi_arena_add(arena, arena_id); } +bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); + memid.initially_committed = is_committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id); +} + // Reserve a range of regular OS memory -int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept -{ +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block - bool large = allow_large; - void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main); - if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) { - _mi_os_free_ex(start, size, commit, &_mi_stats_main); - _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); + mi_memid_t memid; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main); + if (start == NULL) return ENOMEM; + const bool is_large = memid.is_pinned; // todo: use separate is_large field? + if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024)); return ENOMEM; } - _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : ""); + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); return 0; } + +// Manage a range of regular OS memory bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { - return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false, NULL); + return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); } +// Reserve a range of regular OS memory int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); } @@ -480,15 +878,16 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); + mi_memid_t memid; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); return ENOMEM; } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) { - _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); + if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + _mi_os_free(p, hsize, memid, &_mi_stats_main); return ENOMEM; } return 0; @@ -534,3 +933,4 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } + diff --git a/third-party/mimalloc/src/bitmap.c b/third-party/mimalloc/src/bitmap.c index 4ea9f4af..a13dbe15 100644 --- a/third-party/mimalloc/src/bitmap.c +++ b/third-party/mimalloc/src/bitmap.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2021 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -11,14 +11,13 @@ represeted as an array of fields where each field is a machine word (`size_t`) There are two api's; the standard one cannot have sequences that cross between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). -(this is used in region allocation) The `_across` postfixed functions do allow sequences that can cross over between the fields. (This is used in arena allocation) ---------------------------------------------------------------------------- */ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" #include "bitmap.h" /* ----------------------------------------------------------- @@ -63,12 +62,12 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons // scan linearly for a free range of zero bits while (bitidx <= bitidx_max) { - const size_t mapm = map & m; + const size_t mapm = (map & m); if (mapm == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? - const size_t newmap = map | m; + const size_t newmap = (map | m); mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here? // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } @@ -81,7 +80,8 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons else { // on to the next bit range #ifdef MI_HAVE_FAST_BITSCAN - const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); + mi_assert_internal(mapm != 0); + const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); mi_assert_internal(shift > 0 && shift <= count); #else const size_t shift = 1; @@ -100,7 +100,7 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap + if (idx >= bitmap_fields) { idx = 0; } // wrap if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } @@ -127,14 +127,6 @@ bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap return false; } -/* -// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { - return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); -} -*/ - // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { @@ -143,7 +135,7 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); - size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); + const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); } @@ -157,7 +149,7 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); - if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); } return ((prev & mask) == 0); } @@ -167,11 +159,28 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - size_t field = mi_atomic_load_relaxed(&bitmap[idx]); - if (any_ones != NULL) *any_ones = ((field & mask) != 0); + const size_t field = mi_atomic_load_relaxed(&bitmap[idx]); + if (any_ones != NULL) { *any_ones = ((field & mask) != 0); } return ((field & mask) == mask); } +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); + do { + if ((expected & mask) != 0) return false; + } + while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); + mi_assert_internal((expected & mask) == 0); + return true; +} + + bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); } @@ -190,6 +199,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Try to atomically claim a sequence of `count` bits starting from the field // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. +// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`) static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); @@ -200,9 +210,9 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit const size_t initial = mi_clz(map); // count of initial zeros starting at idx mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); if (initial == 0) return false; - if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields + if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries - + // scan ahead size_t found = initial; size_t mask = 0; // mask bits for the final field @@ -210,25 +220,27 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit field++; map = mi_atomic_load_relaxed(field); const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS); mask = mi_bitmap_mask_(mask_bits, 0); - if ((map & mask) != 0) return false; + if ((map & mask) != 0) return false; // some part is already claimed found += mask_bits; } mi_assert_internal(field < &bitmap[bitmap_fields]); - // found range of zeros up to the final field; mask contains mask in the final field - // now claim it atomically + // we found a range of contiguous zeros up to the final field; mask contains mask in the final field + // now try to claim the range atomically mi_bitmap_field_t* const final_field = field; const size_t final_mask = mask; mi_bitmap_field_t* const initial_field = &bitmap[idx]; - const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); + const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial; + const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx); // initial field size_t newmap; field = initial_field; map = mi_atomic_load_relaxed(field); do { - newmap = map | initial_mask; + newmap = (map | initial_mask); if ((map & initial_mask) != 0) { goto rollback; }; } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); @@ -243,31 +255,32 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit mi_assert_internal(field == final_field); map = mi_atomic_load_relaxed(field); do { - newmap = map | final_mask; + newmap = (map | final_mask); if ((map & final_mask) != 0) { goto rollback; } } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); // claimed! - *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); + *bitmap_idx = mi_bitmap_index_create(idx, initial_idx); return true; rollback: // roll back intermediate fields + // (we just failed to claim `field` so decrement first) while (--field > initial_field) { newmap = 0; map = MI_BITMAP_FIELD_FULL; mi_assert_internal(mi_atomic_load_relaxed(field) == map); mi_atomic_store_release(field, newmap); } - if (field == initial_field) { + if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`) map = mi_atomic_load_relaxed(field); do { mi_assert_internal((map & initial_mask) == initial_mask); - newmap = map & ~initial_mask; + newmap = (map & ~initial_mask); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); } // retry? (we make a recursive call instead of goto to be able to use const declarations) - if (retries < 4) { + if (retries <= 2) { return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); } else { @@ -280,17 +293,22 @@ rollback: // Starts at idx, and wraps around to search in all `bitmap_fields` fields. bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(count > 0); - if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + if (count <= 2) { + // we don't bother with crossover fields for small counts + return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + } + + // visit the fields size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap - // try to claim inside the field + if (idx >= bitmap_fields) { idx = 0; } // wrap + // first try to claim inside a field if (count <= MI_BITMAP_FIELD_BITS) { if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } - // try to claim across fields + // if that fails, then try to claim across fields if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { return true; } @@ -300,7 +318,7 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm // Helper for masks across fields; returns the mid count, post_mask may be 0 static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) { - MI_UNUSED_RELEASE(bitmap_fields); + MI_UNUSED(bitmap_fields); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) { *pre_mask = mi_bitmap_mask_(count, bitidx); @@ -333,14 +351,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_one = true; mi_bitmap_field_t* field = &bitmap[idx]; - size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); + size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part if ((prev & pre_mask) != pre_mask) all_one = false; while(mid_count-- > 0) { - prev = mi_atomic_and_acq_rel(field++, ~mid_mask); + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part if ((prev & mid_mask) != mid_mask) all_one = false; } if (post_mask!=0) { - prev = mi_atomic_and_acq_rel(field, ~post_mask); + prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part if ((prev & post_mask) != post_mask) all_one = false; } return all_one; @@ -370,7 +388,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != post_mask) any_zero = true; } - if (pany_zero != NULL) *pany_zero = any_zero; + if (pany_zero != NULL) { *pany_zero = any_zero; } return all_zero; } @@ -399,7 +417,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != 0) any_ones = true; } - if (pany_ones != NULL) *pany_ones = any_ones; + if (pany_ones != NULL) { *pany_ones = any_ones; } return all_ones; } diff --git a/third-party/mimalloc/src/bitmap.h b/third-party/mimalloc/src/bitmap.h index 0c501ec1..0a765c71 100644 --- a/third-party/mimalloc/src/bitmap.h +++ b/third-party/mimalloc/src/bitmap.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -80,6 +80,10 @@ bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero); diff --git a/third-party/mimalloc/src/heap.c b/third-party/mimalloc/src/heap.c index ac2d042b..58520ddf 100644 --- a/third-party/mimalloc/src/heap.c +++ b/third-party/mimalloc/src/heap.c @@ -6,8 +6,9 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // mi_prim_get_default_heap #include // memset, memcpy @@ -30,15 +31,18 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void // visit all pages #if MI_DEBUG>1 size_t total = heap->page_count; - #endif size_t count = 0; + #endif + for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue mi_assert_internal(mi_page_heap(page) == heap); + #if MI_DEBUG>1 count++; + #endif if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue } @@ -150,8 +154,8 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); - // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list) - // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment + // collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list) + // note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments); // collect segment local caches @@ -159,13 +163,10 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_segment_thread_collect(&heap->tld->segments); } - // decommit in global segment caches - // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment - _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os); - // collect regions on program-exit (or shared library unload) if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - //_mi_mem_collect(&heap->tld->os); + _mi_thread_data_collect(); // collect thread data cache + _mi_arena_collect(true /* force purge */, &heap->tld->stats); } } @@ -178,7 +179,7 @@ void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { } void mi_collect(bool force) mi_attr_noexcept { - mi_heap_collect(mi_get_default_heap(), force); + mi_heap_collect(mi_prim_get_default_heap(), force); } @@ -188,9 +189,14 @@ void mi_collect(bool force) mi_attr_noexcept { mi_heap_t* mi_heap_get_default(void) { mi_thread_init(); - return mi_get_default_heap(); + return mi_prim_get_default_heap(); } +static bool mi_heap_is_default(const mi_heap_t* heap) { + return (heap == mi_prim_get_default_heap()); +} + + mi_heap_t* mi_heap_get_backing(void) { mi_heap_t* heap = mi_heap_get_default(); mi_assert_internal(heap!=NULL); @@ -200,16 +206,16 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) { +mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? - if (heap==NULL) return NULL; + if (heap == NULL) return NULL; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); heap->arena_id = arena_id; _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; + heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe @@ -223,7 +229,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { return mi_heap_new_in_arena(_mi_arena_id_none()); } -bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) { +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) { return _mi_arena_memid_is_suitable(memid, heap->arena_id); } @@ -237,9 +243,6 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); // TODO: copy full empty heap instead? memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); -#ifdef MI_MEDIUM_DIRECT - memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium)); -#endif _mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages)); heap->thread_delayed_free = NULL; heap->page_count = 0; @@ -330,6 +333,14 @@ void _mi_heap_destroy_pages(mi_heap_t* heap) { mi_heap_reset_pages(heap); } +#if MI_TRACK_HEAP_DESTROY +static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { + MI_UNUSED(heap); MI_UNUSED(area); MI_UNUSED(arg); MI_UNUSED(block_size); + mi_track_free_size(block,mi_usable_size(block)); + return true; +} +#endif + void mi_heap_destroy(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); @@ -341,13 +352,18 @@ void mi_heap_destroy(mi_heap_t* heap) { mi_heap_delete(heap); } else { + // track all blocks as freed + #if MI_TRACK_HEAP_DESTROY + mi_heap_visit_blocks(heap, true, mi_heap_track_block_free, NULL); + #endif // free all pages _mi_heap_destroy_pages(heap); mi_heap_free(heap); } } -void _mi_heap_destroy_all(void) { +// forcefully destroy all heaps in the current thread +void _mi_heap_unsafe_destroy_all(void) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* curr = bheap->tld->heaps; while (curr != NULL) { @@ -425,7 +441,7 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = mi_get_default_heap(); + mi_heap_t* old = mi_prim_get_default_heap(); _mi_heap_set_default_direct(heap); return old; } @@ -475,7 +491,7 @@ bool mi_heap_check_owned(mi_heap_t* heap, const void* p) { } bool mi_check_owned(const void* p) { - return mi_heap_check_owned(mi_get_default_heap(), p); + return mi_heap_check_owned(mi_prim_get_default_heap(), p); } /* ----------------------------------------------------------- @@ -518,9 +534,13 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; memset(free_map, 0, sizeof(free_map)); + #if MI_DEBUG>1 size_t free_count = 0; + #endif for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { + #if MI_DEBUG>1 free_count++; + #endif mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; mi_assert_internal(offset % bsize == 0); @@ -533,7 +553,9 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal(page->capacity == (free_count + page->used)); // walk through all blocks skipping the free ones + #if MI_DEBUG>1 size_t used_count = 0; + #endif for (size_t i = 0; i < page->capacity; i++) { size_t bitidx = (i / sizeof(uintptr_t)); size_t bit = i - (bitidx * sizeof(uintptr_t)); @@ -542,7 +564,9 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v i += (sizeof(uintptr_t) - 1); // skip a run of free blocks } else if ((m & ((uintptr_t)1 << bit)) == 0) { + #if MI_DEBUG>1 used_count++; + #endif uint8_t* block = pstart + (i * bsize); if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; } diff --git a/third-party/mimalloc/src/init.c b/third-party/mimalloc/src/init.c index c416208c..b1db14c5 100644 --- a/third-party/mimalloc/src/init.c +++ b/third-party/mimalloc/src/init.c @@ -5,14 +5,16 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" #include // memcpy, memset #include // atexit + // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, false, + 0, false, false, false, 0, // capacity 0, // reserved capacity { 0 }, // flags @@ -22,7 +24,7 @@ const mi_page_t _mi_page_empty = { 0, // used 0, // xblock_size NULL, // local_free - #if MI_ENCODE_FREELIST + #if (MI_PADDING || MI_ENCODE_FREELIST) { 0, 0 }, #endif MI_ATOMIC_VAR_INIT(0), // xthread_free @@ -35,6 +37,7 @@ const mi_page_t _mi_page_empty = { #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) +#if (MI_SMALL_WSIZE_MAX==128) #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } #elif (MI_PADDING>0) @@ -42,7 +45,9 @@ const mi_page_t _mi_page_empty = { #else #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } #endif - +#else +#error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX" +#endif // Empty page queues for every bin #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } @@ -77,8 +82,9 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ + MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() @@ -130,6 +136,10 @@ mi_decl_cache_align static const mi_tld_t tld_empty = { { MI_STATS_NULL } // stats }; +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { + return _mi_prim_thread_id(); +} + // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; @@ -193,6 +203,7 @@ mi_heap_t* _mi_heap_main_get(void) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_memid_t memid; } mi_thread_data_t; @@ -201,30 +212,44 @@ typedef struct mi_thread_data_s { // destroy many OS threads, this may causes too much overhead // per thread so we maintain a small cache of recently freed metadata. -#define TD_CACHE_SIZE (8) +#define TD_CACHE_SIZE (16) static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; -static mi_thread_data_t* mi_thread_data_alloc(void) { +static mi_thread_data_t* mi_thread_data_zalloc(void) { // try to find thread metadata in the cache - mi_thread_data_t* td; + bool is_zero = false; + mi_thread_data_t* td = NULL; for (int i = 0; i < TD_CACHE_SIZE; i++) { td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { + // found cached allocation, try use it td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - return td; + break; } } } - // if that fails, allocate directly from the OS - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + + // if that fails, allocate as meta data if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + } } + if (td != NULL) { + td->memid = memid; + is_zero = memid.initially_zero; + } + } + + if (td != NULL && !is_zero) { + _mi_memzero_aligned(td, sizeof(*td)); } return td; } @@ -241,17 +266,17 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) { } } // if that fails, just free it directly - _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); + _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); } -static void mi_thread_data_collect(void) { +void _mi_thread_data_collect(void) { // free all thread metadata from the cache for (int i = 0; i < TD_CACHE_SIZE; i++) { mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); + _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main); } } } @@ -259,20 +284,19 @@ static void mi_thread_data_collect(void) { // Initialize the thread local default heap, called from `mi_thread_init` static bool _mi_heap_init(void) { - if (mi_heap_is_initialized(mi_get_default_heap())) return true; + if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); _mi_heap_set_default_direct(&_mi_heap_main); - //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); + //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { // use `_mi_os_alloc` to allocate directly from the OS - mi_thread_data_t* td = mi_thread_data_alloc(); + mi_thread_data_t* td = mi_thread_data_zalloc(); if (td == NULL) return false; - // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); @@ -334,7 +358,6 @@ static bool _mi_heap_done(mi_heap_t* heap) { mi_thread_data_free((mi_thread_data_t*)heap); } else { - mi_thread_data_collect(); // free cached thread metadata #if 0 // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, // there may still be delete/free calls after the mi_fls_done is called. Issue #207 @@ -363,54 +386,12 @@ static bool _mi_heap_done(mi_heap_t* heap) { // to set up the thread local keys. // -------------------------------------------------------- -static void _mi_thread_done(mi_heap_t* default_heap); - -#if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain -#elif defined(_WIN32) && !defined(MI_SHARED_LIB) - // use thread local storage keys to detect thread ending - #include - #include - #if (_WIN32_WINNT < 0x600) // before Windows Vista - WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); - WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); - WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); - WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); - #endif - static DWORD mi_fls_key = (DWORD)(-1); - static void NTAPI mi_fls_done(PVOID value) { - mi_heap_t* heap = (mi_heap_t*)value; - if (heap != NULL) { - _mi_thread_done(heap); - FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 - } - } -#elif defined(MI_USE_PTHREADS) - // use pthread local storage keys to detect thread ending - // (and used with MI_TLS_PTHREADS for the default heap) - pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); - static void mi_pthread_done(void* value) { - if (value!=NULL) _mi_thread_done((mi_heap_t*)value); - } -#elif defined(__wasi__) -// no pthreads in the WebAssembly Standard Interface -#else - #pragma message("define a way to call mi_thread_done when a thread is done") -#endif - // Set up handlers so `mi_thread_done` is called automatically static void mi_process_setup_auto_thread_done(void) { static bool tls_initialized = false; // fine if it races if (tls_initialized) return; tls_initialized = true; - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - mi_fls_key = FlsAlloc(&mi_fls_done); - #elif defined(MI_USE_PTHREADS) - mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); - pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); - #endif + _mi_prim_thread_init_auto_done(); _mi_heap_set_default_direct(&_mi_heap_main); } @@ -442,13 +423,26 @@ void mi_thread_init(void) mi_attr_noexcept } void mi_thread_done(void) mi_attr_noexcept { - _mi_thread_done(mi_get_default_heap()); + _mi_thread_done(NULL); } -static void _mi_thread_done(mi_heap_t* heap) { +void _mi_thread_done(mi_heap_t* heap) +{ + // calling with NULL implies using the default heap + if (heap == NULL) { + heap = mi_prim_get_default_heap(); + if (heap == NULL) return; + } + + // prevent re-entrancy through heap_done/heap_set_default_direct (issue #699) + if (!mi_heap_is_initialized(heap)) { + return; + } + + // adjust stats mi_atomic_decrement_relaxed(&thread_count); _mi_stat_decrease(&_mi_stats_main.threads, 1); - + // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->thread_id != _mi_thread_id()) return; @@ -459,7 +453,7 @@ static void _mi_thread_done(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); #if defined(MI_TLS_SLOT) - mi_tls_slot_set(MI_TLS_SLOT,heap); + mi_prim_tls_slot_set(MI_TLS_SLOT,heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) *mi_tls_pthread_heap_slot() = heap; #elif defined(MI_TLS_PTHREAD) @@ -470,16 +464,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - mi_assert_internal(mi_fls_key != 0); - FlsSetValue(mi_fls_key, heap); - #elif defined(MI_USE_PTHREADS) - if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD - pthread_setspecific(_mi_heap_default_key, heap); - } - #endif + _mi_prim_thread_associate_default_heap(heap); } @@ -492,7 +477,7 @@ static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. -bool _mi_preloading(void) { +bool mi_decl_noinline _mi_preloading(void) { return os_preloading; } @@ -535,9 +520,9 @@ static void mi_allocator_done(void) { // Called once by the process loader static void mi_process_load(void) { mi_heap_main_init(); - #if defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; - MI_UNUSED(dummy); + if (dummy == NULL) return; // use dummy or otherwise the access may get optimized away (issue #697) #endif os_preloading = false; mi_assert_internal(_mi_is_main_thread()); @@ -568,7 +553,7 @@ static void mi_detect_cpu_features(void) { // FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017)) int32_t cpu_info[4]; __cpuid(cpu_info, 7); - _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see + _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see } #else static void mi_detect_cpu_features(void) { @@ -579,29 +564,37 @@ static void mi_detect_cpu_features(void) { // Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { // ensure we are called once - if (_mi_process_is_initialized) return; - _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); + static mi_atomic_once_t process_init; + #if _MSC_VER < 1920 + mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main + #endif + if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; + _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); mi_process_setup_auto_thread_done(); mi_detect_cpu_features(); _mi_os_init(); mi_heap_main_init(); - #if (MI_DEBUG) + #if MI_DEBUG _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif _mi_verbose_message("secure level: %d\n", MI_SECURE); _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL); + #if MI_TSAN + _mi_verbose_message("thread santizer enabled\n"); + #endif mi_thread_init(); - #if defined(_WIN32) && !defined(MI_SHARED_LIB) - // When building as a static lib the FLS cleanup happens to early for the main thread. + #if defined(_WIN32) + // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup // will not call _mi_thread_done on the (still executing) main thread. See issue #508. - FlsSetValue(mi_fls_key, NULL); + _mi_prim_thread_associate_default_heap(NULL); #endif mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + mi_track_init(); if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024); @@ -629,12 +622,11 @@ static void mi_cdecl mi_process_done(void) { if (process_done) return; process_done = true; - #if defined(_WIN32) && !defined(MI_SHARED_LIB) - FlsFree(mi_fls_key); // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208 - #endif - + // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread + _mi_prim_thread_done_auto_done(); + #ifndef MI_SKIP_COLLECT_ON_EXIT - #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) + #if (MI_DEBUG || !defined(MI_SHARED_LIB)) // free all memory if possible on process exit. This is not needed for a stand-alone process // but should be done if mimalloc is statically linked into another shared library which // is repeatedly loaded/unloaded, see issue #281. @@ -646,8 +638,9 @@ static void mi_cdecl mi_process_done(void) { // since after process_done there might still be other code running that calls `free` (like at_exit routines, // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { - _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os); // release all cached segments + mi_collect(true /* force */); + _mi_heap_unsafe_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) + _mi_arena_unsafe_destroy_all(& _mi_heap_main_get()->tld->stats); } if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { diff --git a/third-party/mimalloc/src/options.c b/third-party/mimalloc/src/options.c index 0a82ca65..345b560e 100644 --- a/third-party/mimalloc/src/options.c +++ b/third-party/mimalloc/src/options.c @@ -5,19 +5,14 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // mi_prim_out_stderr -#include -#include // strtol -#include // strncpy, strncat, strlen, strstr -#include // toupper +#include // FILE +#include // abort #include -#ifdef _MSC_VER -#pragma warning(disable:4996) // strncpy, strncat -#endif - static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit) static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit) @@ -28,9 +23,6 @@ int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } -#ifdef _WIN32 -#include -#endif // -------------------------------------------------------- // Options @@ -49,7 +41,7 @@ typedef struct mi_option_desc_s { mi_init_t init; // is it initialized yet? (from the environment) mi_option_t option; // for debugging: the option index should match the option const char* name; // option name without `mimalloc_` prefix - const char* legacy_name; // potential legacy v1.x option name + const char* legacy_name; // potential legacy option name } mi_option_desc_t; #define MI_OPTION(opt) mi_option_##opt, #opt, NULL @@ -66,36 +58,38 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(show_stats) }, { 0, UNINIT, MI_OPTION(verbose) }, - // Some of the following options are experimental and not all combinations are valid. Use with care. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) }, - { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) }, - { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's - { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages - { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N + // the following options are experimental and not all combinations make sense. + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) + { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) + { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) + { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, - { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates - { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, - #if defined(__NetBSD__) - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - #elif defined(_WIN32) - { 4, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates + { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, // reset segment memory on free (needs eager commit) +#if defined(__NetBSD__) + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed +#else + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) +#endif + { 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output + { 8, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. number of segment reclaims from the abandoned segments per try. + { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! + #if (MI_INTPTR_SIZE>4) + { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time #else - { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif - { 25, UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds - { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. - { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) - { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output - { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output - { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. - { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) - { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments - { 1, UNINIT, MI_OPTION(decommit_extend_delay) }, - { 0, UNINIT, MI_OPTION(destroy_on_exit)} // release all OS memory on process exit; careful with dangling pointer or after-exit frees! + { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's + { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, }; static void mi_option_init(mi_option_desc_t* desc); @@ -133,6 +127,12 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma return (x < min ? min : (x > max ? max : x)); } +mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { + mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_arena_reserve); + long x = mi_option_get(option); + return (x < 0 ? 0 : (size_t)x * MI_KiB); +} + void mi_option_set(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); if (option < 0 || option >= _mi_option_last) return; @@ -171,41 +171,11 @@ void mi_option_disable(mi_option_t option) { mi_option_set_enabled(option,false); } - static void mi_cdecl mi_out_stderr(const char* msg, void* arg) { MI_UNUSED(arg); - if (msg == NULL) return; - #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output - // after the main thread closes so we use direct console output. - if (!_mi_preloading()) { - // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console - static HANDLE hcon = INVALID_HANDLE_VALUE; - static bool hconIsConsole; - if (hcon == INVALID_HANDLE_VALUE) { - CONSOLE_SCREEN_BUFFER_INFO sbi; - hcon = GetStdHandle(STD_ERROR_HANDLE); - hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); - } - const size_t len = strlen(msg); - if (len > 0 && len < UINT32_MAX) { - DWORD written = 0; - if (hconIsConsole) { - WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); - } - else if (hcon != INVALID_HANDLE_VALUE) { - // use direct write if stderr was redirected - WriteFile(hcon, msg, (DWORD)len, &written, NULL); - } - else { - // finally fall back to fputs after all - fputs(msg, stderr); - } - } + if (msg != NULL && msg[0] != 0) { + _mi_prim_out_stderr(msg); } - #else - fputs(msg, stderr); - #endif } // Since an output function can be registered earliest in the `main` @@ -222,7 +192,7 @@ static void mi_cdecl mi_out_buf(const char* msg, void* arg) { MI_UNUSED(arg); if (msg==NULL) return; if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; - size_t n = strlen(msg); + size_t n = _mi_strlen(msg); if (n==0) return; // claim space size_t start = mi_atomic_add_acq_rel(&out_len, n); @@ -279,7 +249,7 @@ void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { } // add stderr to the delayed output after the module is loaded -static void mi_add_stderr_output() { +static void mi_add_stderr_output(void) { mi_assert_internal(mi_out_default == NULL); mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output @@ -314,7 +284,7 @@ static mi_decl_noinline void mi_recurse_exit_prim(void) { static bool mi_recurse_enter(void) { #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) - if (_mi_preloading()) return true; + if (_mi_preloading()) return false; #endif return mi_recurse_enter_prim(); } @@ -327,7 +297,7 @@ static void mi_recurse_exit(void) { } void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { - if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? + if (out==NULL || (void*)out==(void*)stdout || (void*)out==(void*)stderr) { // TODO: use mi_out_stderr for stderr? if (!mi_recurse_enter()) return; out = mi_out_get_default(&arg); if (prefix != NULL) out(prefix, arg); @@ -359,7 +329,7 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { } static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) { - if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) { + if (prefix != NULL && _mi_strnlen(prefix,33) <= 32 && !_mi_is_main_thread()) { char tprefix[64]; snprintf(tprefix, sizeof(tprefix), "%sthread 0x%llx: ", prefix, (unsigned long long)_mi_thread_id()); mi_vfprintf(out, arg, tprefix, fmt, args); @@ -464,8 +434,20 @@ void _mi_error_message(int err, const char* fmt, ...) { // -------------------------------------------------------- // Initialize options by checking the environment // -------------------------------------------------------- +char _mi_toupper(char c) { + if (c >= 'a' && c <= 'z') return (c - 'a' + 'A'); + else return c; +} -static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { +int _mi_strnicmp(const char* s, const char* t, size_t n) { + if (n == 0) return 0; + for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { + if (_mi_toupper(*s) != _mi_toupper(*t)) break; + } + return (n == 0 ? 0 : *s - *t); +} + +void _mi_strlcpy(char* dest, const char* src, size_t dest_size) { if (dest==NULL || src==NULL || dest_size == 0) return; // copy until end of src, or when dest is (almost) full while (*src != 0 && dest_size > 1) { @@ -476,7 +458,7 @@ static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { *dest = 0; } -static void mi_strlcat(char* dest, const char* src, size_t dest_size) { +void _mi_strlcat(char* dest, const char* src, size_t dest_size) { if (dest==NULL || src==NULL || dest_size == 0) return; // find end of string in the dest buffer while (*dest != 0 && dest_size > 1) { @@ -484,7 +466,21 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { dest_size--; } // and catenate - mi_strlcpy(dest, src, dest_size); + _mi_strlcpy(dest, src, dest_size); +} + +size_t _mi_strlen(const char* s) { + if (s==NULL) return 0; + size_t len = 0; + while(s[len] != 0) { len++; } + return len; +} + +size_t _mi_strnlen(const char* s, size_t max_len) { + if (s==NULL) return 0; + size_t len = 0; + while(s[len] != 0 && len < max_len) { len++; } + return len; } #ifdef MI_NO_GETENV @@ -495,107 +491,40 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { return false; } #else -#if defined _WIN32 -// On Windows use GetEnvironmentVariable instead of getenv to work -// reliably even when this is invoked before the C runtime is initialized. -// i.e. when `_mi_preloading() == true`. -// Note: on windows, environment names are not case sensitive. -#include static bool mi_getenv(const char* name, char* result, size_t result_size) { - result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); - return (len > 0 && len < result_size); -} -#elif !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) -// On Posix systemsr use `environ` to acces environment variables -// even before the C runtime is initialized. -#if defined(__APPLE__) && defined(__has_include) && __has_include() -#include -static char** mi_get_environ(void) { - return (*_NSGetEnviron()); -} -#else -extern char** environ; -static char** mi_get_environ(void) { - return environ; + if (name==NULL || result == NULL || result_size < 64) return false; + return _mi_prim_getenv(name,result,result_size); } #endif -static int mi_strnicmp(const char* s, const char* t, size_t n) { - if (n == 0) return 0; - for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { - if (toupper(*s) != toupper(*t)) break; - } - return (n == 0 ? 0 : *s - *t); -} -static bool mi_getenv(const char* name, char* result, size_t result_size) { - if (name==NULL) return false; - const size_t len = strlen(name); - if (len == 0) return false; - char** env = mi_get_environ(); - if (env == NULL) return false; - // compare up to 256 entries - for (int i = 0; i < 256 && env[i] != NULL; i++) { - const char* s = env[i]; - if (mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive - // found it - mi_strlcpy(result, s + len + 1, result_size); - return true; - } - } - return false; -} -#else -// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime -static bool mi_getenv(const char* name, char* result, size_t result_size) { - // cannot call getenv() when still initializing the C runtime. - if (_mi_preloading()) return false; - const char* s = getenv(name); - if (s == NULL) { - // we check the upper case name too. - char buf[64+1]; - size_t len = strlen(name); - if (len >= sizeof(buf)) len = sizeof(buf) - 1; - for (size_t i = 0; i < len; i++) { - buf[i] = toupper(name[i]); - } - buf[len] = 0; - s = getenv(buf); - } - if (s != NULL && strlen(s) < result_size) { - mi_strlcpy(result, s, result_size); - return true; - } - else { - return false; - } -} -#endif // !MI_USE_ENVIRON -#endif // !MI_NO_GETENV + +// TODO: implement ourselves to reduce dependencies on the C runtime +#include // strtol +#include // strstr + static void mi_option_init(mi_option_desc_t* desc) { // Read option value from the environment - char s[64+1]; + char s[64 + 1]; char buf[64+1]; - mi_strlcpy(buf, "mimalloc_", sizeof(buf)); - mi_strlcat(buf, desc->name, sizeof(buf)); - bool found = mi_getenv(buf,s,sizeof(s)); + _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + _mi_strlcat(buf, desc->name, sizeof(buf)); + bool found = mi_getenv(buf, s, sizeof(s)); if (!found && desc->legacy_name != NULL) { - mi_strlcpy(buf, "mimalloc_", sizeof(buf)); - mi_strlcat(buf, desc->legacy_name, sizeof(buf)); - found = mi_getenv(buf,s,sizeof(s)); + _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + _mi_strlcat(buf, desc->legacy_name, sizeof(buf)); + found = mi_getenv(buf, s, sizeof(s)); if (found) { - _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name ); - } + _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name); + } } if (found) { - size_t len = strlen(s); - if (len >= sizeof(buf)) len = sizeof(buf) - 1; + size_t len = _mi_strnlen(s, sizeof(buf) - 1); for (size_t i = 0; i < len; i++) { - buf[i] = (char)toupper(s[i]); + buf[i] = _mi_toupper(s[i]); } buf[len] = 0; - if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { + if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) { desc->value = 1; desc->init = INITIALIZED; } @@ -606,7 +535,7 @@ static void mi_option_init(mi_option_desc_t* desc) { else { char* end = buf; long value = strtol(buf, &end, 10); - if (desc->option == mi_option_reserve_os_memory) { + if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_arena_reserve) { // this option is interpreted in KiB to prevent overflow of `long` if (*end == 'K') { end++; } else if (*end == 'M') { value *= MI_KiB; end++; } @@ -626,11 +555,11 @@ static void mi_option_init(mi_option_desc_t* desc) { // if the 'mimalloc_verbose' env var has a bogus value we'd never know // (since the value defaults to 'off') so in that case briefly enable verbose desc->value = 1; - _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name ); + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); desc->value = 0; } else { - _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name ); + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); } } } diff --git a/third-party/mimalloc/src/os.c b/third-party/mimalloc/src/os.c index 0f984741..b4f02ba3 100644 --- a/third-party/mimalloc/src/os.c +++ b/third-party/mimalloc/src/os.c @@ -1,118 +1,54 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -#ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE // ensure mmap flags are defined -#endif - -#if defined(__sun) -// illumos provides new mman.h api when any of these are defined -// otherwise the old api based on caddr_t which predates the void pointers one. -// stock solaris provides only the former, chose to atomically to discard those -// flags only here rather than project wide tough. -#undef _XOPEN_SOURCE -#undef _POSIX_C_SOURCE -#endif #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" -#include // strerror - -#ifdef _MSC_VER -#pragma warning(disable:4996) // strerror -#endif - -#if defined(__wasi__) -#define MI_USE_SBRK -#endif - -#if defined(_WIN32) -#include -#elif defined(__wasi__) -#include // sbrk -#else -#include // mmap -#include // sysconf -#if defined(__linux__) -#include -#include -#if defined(__GLIBC__) -#include // linux mmap flags -#else -#include -#endif -#endif -#if defined(__APPLE__) -#include -#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR -#include -#endif -#endif -#if defined(__FreeBSD__) || defined(__DragonFly__) -#include -#if __FreeBSD_version >= 1200000 -#include -#include -#endif -#include -#endif -#endif /* ----------------------------------------------------------- Initialization. On windows initializes support for aligned allocation and large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); -static void* mi_align_up_ptr(void* p, size_t alignment) { - return (void*)_mi_align_up((uintptr_t)p, alignment); -} - -static void* mi_align_down_ptr(void* p, size_t alignment) { - return (void*)_mi_align_down((uintptr_t)p, alignment); -} - - -// page size (initialized properly in `os_init`) -static size_t os_page_size = 4096; - -// minimal allocation granularity -static size_t os_alloc_granularity = 4096; - -// if non-zero, use large page allocation -static size_t large_os_page_size = 0; - -// is memory overcommit allowed? -// set dynamically in _mi_os_init (and if true we use MAP_NORESERVE) -static bool os_overcommit = true; +static mi_os_mem_config_t mi_os_mem_config = { + 4096, // page size + 0, // large page size (usually 2MiB) + 4096, // allocation granularity + true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) + false, // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) +}; bool _mi_os_has_overcommit(void) { - return os_overcommit; + return mi_os_mem_config.has_overcommit; } +bool _mi_os_has_virtual_reserve(void) { + return mi_os_mem_config.has_virtual_reserve; +} + + // OS (small) page size size_t _mi_os_page_size(void) { - return os_page_size; + return mi_os_mem_config.page_size; } // if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) size_t _mi_os_large_page_size(void) { - return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size()); + return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size()); } -#if !defined(MI_USE_SBRK) && !defined(__wasi__) -static bool use_large_os_page(size_t size, size_t alignment) { +bool _mi_os_use_large_page(size_t size, size_t alignment) { // if we have access, check the size and alignment requirements - if (large_os_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; - return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0); + if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false; + return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0); } -#endif // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { @@ -126,177 +62,24 @@ size_t _mi_os_good_alloc_size(size_t size) { return _mi_align_up(size, align_size); } -#if defined(_WIN32) -// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. -// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) -// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) -// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. -typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { - MiMemExtendedParameterInvalidType = 0, - MiMemExtendedParameterAddressRequirements, - MiMemExtendedParameterNumaNode, - MiMemExtendedParameterPartitionHandle, - MiMemExtendedParameterUserPhysicalHandle, - MiMemExtendedParameterAttributeFlags, - MiMemExtendedParameterMax -} MI_MEM_EXTENDED_PARAMETER_TYPE; - -typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { - struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; - union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; -} MI_MEM_EXTENDED_PARAMETER; - -typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { - PVOID LowestStartingAddress; - PVOID HighestEndingAddress; - SIZE_T Alignment; -} MI_MEM_ADDRESS_REQUIREMENTS; - -#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 - -#include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -static PVirtualAlloc2 pVirtualAlloc2 = NULL; -static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; - -// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 -typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; - -typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); -typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); -typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); -typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); -static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; -static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; -static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; -static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; - -static bool mi_win_enable_large_os_pages(void) -{ - if (large_os_page_size > 0) return true; - - // Try to see if large OS pages are supported - // To use large pages on Windows, we first need access permission - // Set "Lock pages in memory" permission in the group policy editor - // - unsigned long err = 0; - HANDLE token = NULL; - BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); - if (ok) { - TOKEN_PRIVILEGES tp; - ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); - if (ok) { - tp.PrivilegeCount = 1; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); - if (ok) { - err = GetLastError(); - ok = (err == ERROR_SUCCESS); - if (ok) { - large_os_page_size = GetLargePageMinimum(); - } - } - } - CloseHandle(token); - } - if (!ok) { - if (err == 0) err = GetLastError(); - _mi_warning_message("cannot enable large OS page support, error %lu\n", err); - } - return (ok!=0); -} - -void _mi_os_init(void) -{ - os_overcommit = false; - // get the page size - SYSTEM_INFO si; - GetSystemInfo(&si); - if (si.dwPageSize > 0) os_page_size = si.dwPageSize; - if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity; - // get the VirtualAlloc2 function - HINSTANCE hDll; - hDll = LoadLibrary(TEXT("kernelbase.dll")); - if (hDll != NULL) { - // use VirtualAlloc2FromApp if possible as it is available to Windows store apps - pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); - FreeLibrary(hDll); - } - // NtAllocateVirtualMemoryEx is used for huge page allocation - hDll = LoadLibrary(TEXT("ntdll.dll")); - if (hDll != NULL) { - pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); - FreeLibrary(hDll); - } - // Try to use Win7+ numa API - hDll = LoadLibrary(TEXT("kernel32.dll")); - if (hDll != NULL) { - pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); - pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); - pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); - pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); - FreeLibrary(hDll); - } - if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - mi_win_enable_large_os_pages(); - } -} -#elif defined(__wasi__) void _mi_os_init(void) { - os_overcommit = false; - os_page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB - os_alloc_granularity = 16; + _mi_prim_mem_init(&mi_os_mem_config); } -#else // generic unix -static void os_detect_overcommit(void) { -#if defined(__linux__) - int fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); - if (fd < 0) return; - char buf[32]; - ssize_t nread = read(fd, &buf, sizeof(buf)); - close(fd); - // - // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) - if (nread >= 1) { - os_overcommit = (buf[0] == '0' || buf[0] == '1'); - } -#elif defined(__FreeBSD__) - int val = 0; - size_t olen = sizeof(val); - if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { - os_overcommit = (val != 0); - } -#else - // default: overcommit is true -#endif +/* ----------------------------------------------------------- + Util +-------------------------------------------------------------- */ +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); + +static void* mi_align_up_ptr(void* p, size_t alignment) { + return (void*)_mi_align_up((uintptr_t)p, alignment); } -void _mi_os_init(void) { - // get the page size - long result = sysconf(_SC_PAGESIZE); - if (result > 0) { - os_page_size = (size_t)result; - os_alloc_granularity = os_page_size; - } - large_os_page_size = 2*MI_MiB; // TODO: can we query the OS for this? - os_detect_overcommit(); +static void* mi_align_down_ptr(void* p, size_t alignment) { + return (void*)_mi_align_down((uintptr_t)p, alignment); } -#endif - - -#if defined(MADV_NORMAL) -static int mi_madvise(void* addr, size_t length, int advice) { - #if defined(__sun) - return madvise((caddr_t)addr, length, advice); // Solaris needs cast (issue #520) - #else - return madvise(addr, length, advice); - #endif -} -#endif /* ----------------------------------------------------------- @@ -319,7 +102,7 @@ static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; #define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) #define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; size = _mi_align_up(size, MI_SEGMENT_SIZE); @@ -332,7 +115,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize uintptr_t init = MI_HINT_BASE; #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif uintptr_t expected = hint + size; @@ -343,401 +126,93 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) return (void*)hint; } #else -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { MI_UNUSED(try_alignment); MI_UNUSED(size); return NULL; } #endif + /* ----------------------------------------------------------- Free memory -------------------------------------------------------------- */ -static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) -{ - if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) - bool err = false; -#if defined(_WIN32) - DWORD errcode = 0; - err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); - if (err) { errcode = GetLastError(); } - if (errcode == ERROR_INVALID_ADDRESS) { - // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside - // the memory region returned by VirtualAlloc; in that case we need to free using - // the start of the region. - MEMORY_BASIC_INFORMATION info = { 0 }; - VirtualQuery(addr, &info, sizeof(info)); - if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { - errcode = 0; - err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); - if (err) { errcode = GetLastError(); } - } +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); + +static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_assert_internal((size % _mi_os_page_size()) == 0); + if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) + int err = _mi_prim_free(addr, size); + if (err != 0) { + _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } - if (errcode != 0) { - _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size); - } -#elif defined(MI_USE_SBRK) || defined(__wasi__) - err = false; // sbrk heap cannot be shrunk -#else - err = (munmap(addr, size) == -1); - if (err) { - _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size); - } -#endif - if (was_committed) { _mi_stat_decrease(&stats->committed, size); } + mi_stats_t* stats = &_mi_stats_main; + if (still_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); - return !err; } - -/* ----------------------------------------------------------- - Raw allocation on Windows (VirtualAlloc) --------------------------------------------------------------- */ - -#ifdef _WIN32 - -#define MEM_COMMIT_RESERVE (MEM_COMMIT|MEM_RESERVE) - -static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if (MI_INTPTR_SIZE >= 8) - // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations - if (addr == NULL) { - void* hint = mi_os_get_aligned_hint(try_alignment,size); - if (hint != NULL) { - void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); - if (p != NULL) return p; - _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); - // fall through on error +void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) { + if (mi_memkind_is_os(memid.memkind)) { + size_t csize = _mi_os_good_alloc_size(size); + void* base = addr; + // different base? (due to alignment) + if (memid.mem.os.base != NULL) { + mi_assert(memid.mem.os.base <= addr); + mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr); + base = memid.mem.os.base; + csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); } - } -#endif - // on modern Windows try use VirtualAlloc2 for aligned allocation - if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { - MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; - reqs.Alignment = try_alignment; - MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; - param.Type.Type = MiMemExtendedParameterAddressRequirements; - param.Arg.Pointer = &reqs; - void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); - if (p != NULL) return p; - _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); - // fall through on error - } - // last resort - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); -} - -static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { - mi_assert_internal(!(large_only && !allow_large)); - static _Atomic(size_t) large_page_try_ok; // = 0; - void* p = NULL; - // Try to allocate large OS pages (2MiB) if allowed or required. - if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { - size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); - if (!large_only && try_ok > 0) { - // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. - // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + // free it + if (memid.memkind == MI_MEM_OS_HUGE) { + mi_assert(memid.is_pinned); + mi_os_free_huge_os_pages(base, csize, tld_stats); } else { - // large OS pages must always reserve and commit. - *is_large = true; - p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES); - if (large_only) return p; - // fall back to non-large page allocation on error (`p == NULL`). - if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations - } + mi_os_prim_free(base, csize, still_committed, tld_stats); } } - // Fall back to regular page allocation - if (p == NULL) { - *is_large = ((flags&MEM_LARGE_PAGES) != 0); - p = mi_win_virtual_allocx(addr, size, try_alignment, flags); - } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); - } - return p; -} - -/* ----------------------------------------------------------- - Raw allocation using `sbrk` or `wasm_memory_grow` --------------------------------------------------------------- */ - -#elif defined(MI_USE_SBRK) || defined(__wasi__) -#if defined(MI_USE_SBRK) - static void* mi_memory_grow( size_t size ) { - void* p = sbrk(size); - if (p == (void*)(-1)) return NULL; - #if !defined(__wasi__) // on wasi this is always zero initialized already (?) - memset(p,0,size); - #endif - return p; - } -#elif defined(__wasi__) - static void* mi_memory_grow( size_t size ) { - size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) - : __builtin_wasm_memory_size(0)); - if (base == SIZE_MAX) return NULL; - return (void*)(base * _mi_os_page_size()); - } -#endif - -#if defined(MI_USE_PTHREADS) -static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; -#endif - -static void* mi_heap_grow(size_t size, size_t try_alignment) { - void* p = NULL; - if (try_alignment <= 1) { - // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) - #if defined(MI_USE_PTHREADS) - pthread_mutex_lock(&mi_heap_grow_mutex); - #endif - p = mi_memory_grow(size); - #if defined(MI_USE_PTHREADS) - pthread_mutex_unlock(&mi_heap_grow_mutex); - #endif - } else { - void* base = NULL; - size_t alloc_size = 0; - // to allocate aligned use a lock to try to avoid thread interaction - // between getting the current size and actual allocation - // (also, `sbrk` is not thread safe in general) - #if defined(MI_USE_PTHREADS) - pthread_mutex_lock(&mi_heap_grow_mutex); - #endif - { - void* current = mi_memory_grow(0); // get current size - if (current != NULL) { - void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space - alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); - base = mi_memory_grow(alloc_size); - } - } - #if defined(MI_USE_PTHREADS) - pthread_mutex_unlock(&mi_heap_grow_mutex); - #endif - if (base != NULL) { - p = mi_align_up_ptr(base, try_alignment); - if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { - // another thread used wasm_memory_grow/sbrk in-between and we do not have enough - // space after alignment. Give up (and waste the space as we cannot shrink :-( ) - // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) - p = NULL; - } - } + // nothing to do + mi_assert(memid.memkind < MI_MEM_OS); } - if (p == NULL) { - _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); - errno = ENOMEM; - return NULL; - } - mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); - return p; } -/* ----------------------------------------------------------- - Raw allocation on Unix's (mmap) --------------------------------------------------------------- */ -#else -#define MI_OS_USE_MMAP -static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { - MI_UNUSED(try_alignment); - #if defined(MAP_ALIGNED) // BSD - if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - size_t n = mi_bsr(try_alignment); - if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB - flags |= MAP_ALIGNED(n); - void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - } - #elif defined(MAP_ALIGN) // Solaris - if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - #endif - #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) - // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations - if (addr == NULL) { - void* hint = mi_os_get_aligned_hint(try_alignment, size); - if (hint != NULL) { - void* p = mmap(hint, size, protect_flags, flags, fd, 0); - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - } - #endif - // regular mmap - void* p = mmap(addr, size, protect_flags, flags, fd, 0); - if (p!=MAP_FAILED) return p; - // failed to allocate - return NULL; +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) { + _mi_os_free_ex(p, size, true, memid, tld_stats); } -static int mi_unix_mmap_fd(void) { -#if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) os_tag = 100; - return VM_MAKE_TAG(os_tag); -#else - return -1; -#endif -} - -static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { - void* p = NULL; - #if !defined(MAP_ANONYMOUS) - #define MAP_ANONYMOUS MAP_ANON - #endif - #if !defined(MAP_NORESERVE) - #define MAP_NORESERVE 0 - #endif - const int fd = mi_unix_mmap_fd(); - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - if (_mi_os_has_overcommit()) { - flags |= MAP_NORESERVE; - } - #if defined(PROT_MAX) - protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - // huge page allocation - if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { - static _Atomic(size_t) large_page_try_ok; // = 0; - size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); - if (!large_only && try_ok > 0) { - // If the OS is not configured for large OS pages, or the user does not have - // enough permission, the `mmap` will always fail (but it might also fail for other reasons). - // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times - // to avoid too many failing calls to mmap. - mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); - } - else { - int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux - int lfd = fd; - #ifdef MAP_ALIGNED_SUPER - lflags |= MAP_ALIGNED_SUPER; - #endif - #ifdef MAP_HUGETLB - lflags |= MAP_HUGETLB; - #endif - #ifdef MAP_HUGE_1GB - static bool mi_huge_pages_available = true; - if ((size % MI_GiB) == 0 && mi_huge_pages_available) { - lflags |= MAP_HUGE_1GB; - } - else - #endif - { - #ifdef MAP_HUGE_2MB - lflags |= MAP_HUGE_2MB; - #endif - } - #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB - lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - #endif - if (large_only || lflags != flags) { - // try large OS page allocation - *is_large = true; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - #ifdef MAP_HUGE_1GB - if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { - mi_huge_pages_available = false; // don't try huge 1GiB pages again - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); - lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - } - #endif - if (large_only) return p; - if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations - } - } - } - } - // regular allocation - if (p == NULL) { - *is_large = false; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - if (p != NULL) { - #if defined(MADV_HUGEPAGE) - // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE - // though since properly aligned allocations will already use large pages if available - // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow THP if called with explicit `madvise`, so - // when large OS pages are enabled for mimalloc, we call `madvise` anyways. - if (allow_large && use_large_os_page(size, try_alignment)) { - if (mi_madvise(p, size, MADV_HUGEPAGE) == 0) { - *is_large = true; // possibly - }; - } - #elif defined(__sun) - if (allow_large && use_large_os_page(size, try_alignment)) { - struct memcntl_mha cmd = {0}; - cmd.mha_pagesize = large_os_page_size; - cmd.mha_cmd = MHA_MAPSIZE_VA; - if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { - *is_large = true; - } - } - #endif - } - } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); - } - return p; -} -#endif - /* ----------------------------------------------------------- Primitive allocation from the OS. -------------------------------------------------------------- */ // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { +static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(is_zero != NULL); + mi_assert_internal(is_large != NULL); if (size == 0) return NULL; - if (!commit) allow_large = false; - if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning + if (!commit) { allow_large = false; } + if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning - void* p = NULL; - /* - if (commit && allow_large) { - p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); - if (p != NULL) { - *is_large = true; - return p; - } + *is_zero = false; + void* p = NULL; + int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p); + if (err != 0) { + _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); } - */ - - #if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) { flags |= MEM_COMMIT; } - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); - #elif defined(MI_USE_SBRK) || defined(__wasi__) - MI_UNUSED(allow_large); - *is_large = false; - p = mi_heap_grow(size, try_alignment); - #else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); - #endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); - if (commit) { _mi_stat_increase(&stats->committed, size); } + if (commit) { + _mi_stat_increase(&stats->committed, size); + // seems needed for asan (or `mimalloc-test-api` fails) + #ifdef MI_TRACK_ASAN + if (*is_zero) { mi_track_mem_defined(p,size); } + else { mi_track_mem_undefined(p,size); } + #endif + } } return p; } @@ -745,99 +220,109 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { +static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_large != NULL); + mi_assert_internal(is_zero != NULL); + mi_assert_internal(base != NULL); if (!commit) allow_large = false; if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // try first with a hint (this will be aligned directly on Win 10+ or BSD) - void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); + void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; - // if not aligned, free it, overallocate, and unmap around it - if (((uintptr_t)p % alignment != 0)) { - mi_os_mem_free(p, size, commit, stats); - _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit); + // aligned already? + if (((uintptr_t)p % alignment) == 0) { + *base = p; + } + else { + // if not aligned, free it, overallocate, and unmap around it + _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); + mi_os_prim_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; -#if _WIN32 - // over-allocate uncommitted (virtual) memory - p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); - if (p == NULL) return NULL; + if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block + // over-allocate uncommitted (virtual) memory + p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); + if (p == NULL) return NULL; + + // set p to the aligned part in the full region + // note: this is dangerous on Windows as VirtualFree needs the actual base pointer + // this is handled though by having the `base` field in the memid's + *base = p; // remember the base + p = mi_align_up_ptr(p, alignment); - // set p to the aligned part in the full region - // note: this is dangerous on Windows as VirtualFree needs the actual region pointer - // but in mi_os_mem_free we handle this (hopefully exceptional) situation. - p = mi_align_up_ptr(p, alignment); - - // explicitly commit only the aligned part - if (commit) { - _mi_os_commit(p, size, NULL, stats); + // explicitly commit only the aligned part + if (commit) { + _mi_os_commit(p, size, NULL, stats); + } + } + else { // mmap can free inside an allocation + // overallocate... + p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); + if (p == NULL) return NULL; + + // and selectively unmap parts around the over-allocated area. (noop on sbrk) + void* aligned_p = mi_align_up_ptr(p, alignment); + size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; + size_t mid_size = _mi_align_up(size, _mi_os_page_size()); + size_t post_size = over_size - pre_size - mid_size; + mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); + if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); } + if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } + // we can return the aligned pointer on `mmap` (and sbrk) systems + p = aligned_p; + *base = aligned_p; // since we freed the pre part, `*base == p`. } -#else - // overallocate... - p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats); - if (p == NULL) return NULL; - // and selectively unmap parts around the over-allocated area. (noop on sbrk) - void* aligned_p = mi_align_up_ptr(p, alignment); - size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; - size_t mid_size = _mi_align_up(size, _mi_os_page_size()); - size_t post_size = over_size - pre_size - mid_size; - mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size); - if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); - if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); - // we can return the aligned pointer on `mmap` (and sbrk) systems - p = aligned_p; -#endif } - mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0)); + mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0)); return p; } /* ----------------------------------------------------------- - OS API: alloc, free, alloc_aligned + OS API: alloc and alloc_aligned ----------------------------------------------------------- */ -void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); + *memid = _mi_memid_none(); mi_stats_t* stats = &_mi_stats_main; if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); - bool is_large = false; - return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); + bool os_is_large = false; + bool os_is_zero = false; + void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); + if (p != NULL) { + *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); + } + return p; } -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - if (size == 0 || p == NULL) return; - size = _mi_os_good_alloc_size(size); - mi_os_mem_free(p, size, was_committed, stats); -} - -void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { - _mi_os_free_ex(p, size, true, stats); -} - -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { - MI_UNUSED(&mi_os_get_aligned_hint); // suppress unused warnings + MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); + *memid = _mi_memid_none(); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); - bool allow_large = false; - if (large != NULL) { - allow_large = *large; - *large = false; + + bool os_is_large = false; + bool os_is_zero = false; + void* os_base = NULL; + void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ ); + if (p != NULL) { + *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); + memid->mem.os.base = os_base; + memid->mem.os.alignment = alignment; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); + return p; } /* ----------------------------------------------------------- @@ -848,22 +333,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar to use the actual start of the memory region. ----------------------------------------------------------- */ -void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) { +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); + *memid = _mi_memid_none(); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { // regular aligned allocation - return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats); + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats); } else { // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; - void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats); + void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats); if (start == NULL) return NULL; - void* p = (uint8_t*)start + extra; + + void* const p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); // decommit the overallocation at the start if (commit && extra > _mi_os_page_size()) { @@ -873,18 +360,10 @@ void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, } } -void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { - mi_assert(align_offset <= MI_SEGMENT_SIZE); - const size_t extra = _mi_align_up(align_offset, alignment) - align_offset; - void* start = (uint8_t*)p - extra; - _mi_os_free_ex(start, size + extra, was_committed, tld_stats); -} - /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ - // OS page align within a given area, either conservative (pages inside the area only), // or not (straddling pages outside the area is possible) static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) { @@ -909,183 +388,116 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -static void mi_mprotect_hint(int err) { -#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page - if (err == ENOMEM) { - _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n" - " On Linux this is controlled by the vm.max_map_count. For example:\n" - " > sudo sysctl -w vm.max_map_count=262144\n"); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + if (is_zero != NULL) { *is_zero = false; } + _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit + _mi_stat_counter_increase(&stats->commit_calls, 1); + + // page align range + size_t csize; + void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize); + if (csize == 0) return true; + + // commit + bool os_is_zero = false; + int err = _mi_prim_commit(start, csize, &os_is_zero); + if (err != 0) { + _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + return false; } -#else - MI_UNUSED(err); -#endif + if (os_is_zero && is_zero != NULL) { + *is_zero = true; + mi_assert_expensive(mi_mem_is_zero(start, csize)); + } + // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails) + #ifdef MI_TRACK_ASAN + if (os_is_zero) { mi_track_mem_defined(start,csize); } + else { mi_track_mem_undefined(start,csize); } + #endif + return true; } -// Commit/Decommit memory. -// Usually commit is aligned liberal, while decommit is aligned conservative. -// (but not for the reset version where we want commit to be conservative as well) -static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { - // page align in the range, commit liberally, decommit conservative - if (is_zero != NULL) { *is_zero = false; } - size_t csize; - void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) - int err = 0; - if (commit) { - _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit - _mi_stat_counter_increase(&stats->commit_calls, 1); - } - else { - _mi_stat_decrease(&stats->committed, size); - } +static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + mi_assert_internal(needs_recommit!=NULL); + _mi_stat_decrease(&stats->committed, size); - #if defined(_WIN32) - if (commit) { - // *is_zero = true; // note: if the memory was already committed, the call succeeds but the memory is not zero'd - void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE); - err = (p == start ? 0 : GetLastError()); - } - else { - BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); - err = (ok ? 0 : GetLastError()); - } - #elif defined(__wasi__) - // WebAssembly guests can't control memory protection - #elif 0 && defined(MAP_FIXED) && !defined(__APPLE__) - // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) - if (commit) { - // commit: just change the protection - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) - const int fd = mi_unix_mmap_fd(); - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } - } - #else - // Linux, macOSX and others. - if (commit) { - // commit: ensure we can access the area - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) - err = madvise(start, csize, MADV_DONTNEED); - #else - // decommit: just disable access (also used in debug and secure mode to trap on illegal access) - err = mprotect(start, csize, PROT_NONE); - if (err != 0) { err = errno; } - #endif - //#if defined(MADV_FREE_REUSE) - // while ((err = mi_madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; } - //#endif - } - #endif + // page align + size_t csize; + void* start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return true; + + // decommit + *needs_recommit = true; + int err = _mi_prim_decommit(start,csize,needs_recommit); if (err != 0) { - _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err); - mi_mprotect_hint(err); + _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } mi_assert_internal(err == 0); return (err == 0); } -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); -} - bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); + bool needs_recommit; + return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats); } -/* -static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true // conservative - , is_zero, stats); -} -*/ // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. -static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) { +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) - if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); - if (!reset) return true; // nothing to do on unreset! + _mi_stat_increase(&stats->reset, csize); + _mi_stat_counter_increase(&stats->reset_calls, 1); - #if (MI_DEBUG>1) && !MI_TRACK_ENABLED - if (MI_SECURE==0) { - memset(start, 0, csize); // pretend it is eagerly reset - } + #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN + memset(start, 0, csize); // pretend it is eagerly reset #endif -#if defined(_WIN32) - // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory - void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); - mi_assert_internal(p == start); - #if 1 - if (p == start && start != NULL) { - VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set - } - #endif - if (p != start) return false; -#else -#if defined(MADV_FREE) - static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); - int oadvice = (int)mi_atomic_load_relaxed(&advice); - int err; - while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; - if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { - // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on - mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); - err = mi_madvise(start, csize, MADV_DONTNEED); - } -#elif defined(__wasi__) - int err = 0; -#else - int err = mi_madvise(start, csize, MADV_DONTNEED); -#endif + int err = _mi_prim_reset(start, csize); if (err != 0) { - _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, errno); + _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } - //mi_assert(err == 0); - if (err != 0) return false; -#endif - return true; + return (err == 0); } -// Signal to the OS that the address range is no longer in use -// but may be used later again. This will release physical memory -// pages and reduce swapping while keeping the memory committed. -// We page align to a conservative area inside the range to reset. -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - return mi_os_resetx(addr, size, true, stats); + +// either resets or decommits memory, returns true if the memory needs +// to be recommitted if it is to be re-used later on. +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) +{ + if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? + _mi_stat_counter_increase(&stats->purge_calls, 1); + _mi_stat_increase(&stats->purged, size); + + if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? + !_mi_preloading()) // don't decommit during preloading (unsafe) + { + bool needs_recommit = true; + mi_os_decommit_ex(p, size, &needs_recommit, stats); + return needs_recommit; + } + else { + if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed + _mi_os_reset(p, size, stats); + } + return false; // needs no recommit + } } -/* -bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); +// either resets or decommits memory, returns true if the memory needs +// to be recommitted if it is to be re-used later on. +bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) { + return _mi_os_purge_ex(p, size, true, stats); } -*/ // Protect a region in memory to be not accessible. static bool mi_os_protectx(void* addr, size_t size, bool protect) { @@ -1098,20 +510,9 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ - int err = 0; -#ifdef _WIN32 - DWORD oldprotect = 0; - BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); - err = (ok ? 0 : GetLastError()); -#elif defined(__wasi__) - err = 0; -#else - err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } -#endif + int err = _mi_prim_protect(start,csize,protect); if (err != 0) { - _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err); - mi_mprotect_hint(err); + _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize); } return (err == 0); } @@ -1126,115 +527,12 @@ bool _mi_os_unprotect(void* addr, size_t size) { -bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { - // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); - if (oldsize < newsize || p == NULL) return false; - if (oldsize == newsize) return true; - - // oldsize and newsize should be page aligned or we cannot shrink precisely - void* addr = (uint8_t*)p + newsize; - size_t size = 0; - void* start = mi_os_page_align_area_conservative(addr, oldsize - newsize, &size); - if (size == 0 || start != addr) return false; - -#ifdef _WIN32 - // we cannot shrink on windows, but we can decommit - return _mi_os_decommit(start, size, stats); -#else - return mi_os_mem_free(start, size, true, stats); -#endif -} - - /* ---------------------------------------------------------------------------- Support for allocating huge OS pages (1Gib) that are reserved up-front and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ #define MI_HUGE_OS_PAGE_SIZE (MI_GiB) -#if defined(_WIN32) && (MI_INTPTR_SIZE >= 8) -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) -{ - mi_assert_internal(size%MI_GiB == 0); - mi_assert_internal(addr != NULL); - const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; - - mi_win_enable_large_os_pages(); - - MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - static bool mi_huge_pages_available = true; - if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - params[0].Type.Type = MiMemExtendedParameterAttributeFlags; - params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - ULONG param_count = 1; - if (numa_node >= 0) { - param_count++; - params[1].Type.Type = MiMemExtendedParameterNumaNode; - params[1].Arg.ULong = (unsigned)numa_node; - } - SIZE_T psize = size; - void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); - if (err == 0 && base != NULL) { - return base; - } - else { - // fall back to regular large pages - mi_huge_pages_available = false; // don't try further huge pages - _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); - } - } - // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation - if (pVirtualAlloc2 != NULL && numa_node >= 0) { - params[0].Type.Type = MiMemExtendedParameterNumaNode; - params[0].Arg.ULong = (unsigned)numa_node; - return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); - } - - // otherwise use regular virtual alloc on older windows - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); -} - -#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) -#include -#ifndef MPOL_PREFERRED -#define MPOL_PREFERRED 1 -#endif -#if defined(SYS_mbind) -static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); -} -#else -static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); - return 0; -} -#endif -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%MI_GiB == 0); - bool is_large = true; - void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); - if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes - unsigned long numa_mask = (1UL << numa_node); - // TODO: does `mbind` work correctly for huge OS pages? should we - // use `set_mempolicy` before calling mmap instead? - // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); - if (err != 0) { - _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno)); - } - } - return p; -} -#else -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); - return NULL; -} -#endif #if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages @@ -1253,10 +551,10 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { if (start == 0) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address -#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB -#endif + #endif } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); @@ -1274,7 +572,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { #endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) { + *memid = _mi_memid_none(); if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; @@ -1285,23 +584,32 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. mi_msecs_t start_t = _mi_clock_start(); - size_t page; - for (page = 0; page < pages; page++) { + size_t page = 0; + bool all_zero = true; + while (page < pages) { // allocate a page + bool is_zero = false; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); - void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + void* p = NULL; + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p); + if (!is_zero) { all_zero = false; } + if (err != 0) { + _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); + break; + } // Did we succeed at a contiguous address? if (p != addr) { // no success, issue a warning and break if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); + mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main); } break; } // success, record it + page++; // increase before timeout check (see issue #711) _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); @@ -1315,7 +623,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } if (elapsed > max_msecs) { - _mi_warning_message("huge page allocation timed out\n"); + _mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page); break; } } @@ -1323,16 +631,25 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) { *pages_reserved = page; } if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } + if (page != 0) { + mi_assert(start != NULL); + *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); + memid->memkind = MI_MEM_OS_HUGE; + mi_assert(memid->is_pinned); + #ifdef MI_TRACK_ASAN + if (all_zero) { mi_track_mem_defined(start,size); } + #endif + } return (page == 0 ? NULL : start); } // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { - _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats); size -= MI_HUGE_OS_PAGE_SIZE; base += MI_HUGE_OS_PAGE_SIZE; } @@ -1341,113 +658,6 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -#ifdef _WIN32 -static size_t mi_os_numa_nodex(void) { - USHORT numa_node = 0; - if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { - // Extended API is supported - MI_PROCESSOR_NUMBER pnum; - (*pGetCurrentProcessorNumberEx)(&pnum); - USHORT nnode = 0; - BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); - if (ok) { numa_node = nnode; } - } - else if (pGetNumaProcessorNode != NULL) { - // Vista or earlier, use older API that is limited to 64 processors. Issue #277 - DWORD pnum = GetCurrentProcessorNumber(); - UCHAR nnode = 0; - BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); - if (ok) { numa_node = nnode; } - } - return numa_node; -} - -static size_t mi_os_numa_node_countx(void) { - ULONG numa_max = 0; - GetNumaHighestNodeNumber(&numa_max); - // find the highest node number that has actual processors assigned to it. Issue #282 - while(numa_max > 0) { - if (pGetNumaNodeProcessorMaskEx != NULL) { - // Extended API is supported - GROUP_AFFINITY affinity; - if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { - if (affinity.Mask != 0) break; // found the maximum non-empty node - } - } - else { - // Vista or earlier, use older API that is limited to 64 processors. - ULONGLONG mask; - if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { - if (mask != 0) break; // found the maximum non-empty node - }; - } - // max node was invalid or had no processor assigned, try again - numa_max--; - } - return ((size_t)numa_max + 1); -} -#elif defined(__linux__) -#include // getcpu -#include // access - -static size_t mi_os_numa_nodex(void) { -#ifdef SYS_getcpu - unsigned long node = 0; - unsigned long ncpu = 0; - long err = syscall(SYS_getcpu, &ncpu, &node, NULL); - if (err != 0) return 0; - return node; -#else - return 0; -#endif -} -static size_t mi_os_numa_node_countx(void) { - char buf[128]; - unsigned node = 0; - for(node = 0; node < 256; node++) { - // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) - snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf,R_OK) != 0) break; - } - return (node+1); -} -#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 -static size_t mi_os_numa_nodex(void) { - domainset_t dom; - size_t node; - int policy; - if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; - for (node = 0; node < MAXMEMDOM; node++) { - if (DOMAINSET_ISSET(node, &dom)) return node; - } - return 0ul; -} -static size_t mi_os_numa_node_countx(void) { - size_t ndomains = 0; - size_t len = sizeof(ndomains); - if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; - return ndomains; -} -#elif defined(__DragonFly__) -static size_t mi_os_numa_nodex(void) { - // TODO: DragonFly does not seem to provide any userland means to get this information. - return 0ul; -} -static size_t mi_os_numa_node_countx(void) { - size_t ncpus = 0, nvirtcoresperphys = 0; - size_t len = sizeof(size_t); - if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; - if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; - return nvirtcoresperphys * ncpus; -} -#else -static size_t mi_os_numa_nodex(void) { - return 0; -} -static size_t mi_os_numa_node_countx(void) { - return 1; -} -#endif _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count @@ -1459,7 +669,7 @@ size_t _mi_os_numa_node_count_get(void) { count = (size_t)ncount; } else { - count = mi_os_numa_node_countx(); // or detect dynamically + count = _mi_prim_numa_node_count(); // or detect dynamically if (count == 0) count = 1; } mi_atomic_store_release(&_mi_numa_node_count, count); // save it @@ -1473,7 +683,7 @@ int _mi_os_numa_node_get(mi_os_tld_t* tld) { size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = mi_os_numa_nodex(); + size_t numa_node = _mi_prim_numa_node(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } return (int)numa_node; } diff --git a/third-party/mimalloc/src/page.c b/third-party/mimalloc/src/page.c index 4250ff35..8ac0a715 100644 --- a/third-party/mimalloc/src/page.c +++ b/third-party/mimalloc/src/page.c @@ -12,8 +12,8 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" /* ----------------------------------------------------------- Definition of page queues for each block size @@ -66,6 +66,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { if (p < start || p >= end) return false; p = mi_block_next(page, p); } +#if MI_DEBUG>3 // generally too expensive to check this + if (page->free_is_zero) { + const size_t ubsize = mi_page_usable_block_size(page); + for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) { + mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); + } + } +#endif return true; } @@ -84,7 +92,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); #if MI_DEBUG>3 // generally too expensive to check this - if (page->is_zero) { + if (page->free_is_zero) { const size_t ubsize = mi_page_usable_block_size(page); for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); @@ -92,10 +100,12 @@ static bool mi_page_is_valid_init(mi_page_t* page) { } #endif + #if !MI_TRACK_ENABLED && !MI_TSAN mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); //size_t tfree_count = mi_page_list_count(page, tfree); //mi_assert_internal(tfree_count <= page->thread_freed + 1); + #endif size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); mi_assert_internal(page->used + free_count == page->capacity); @@ -103,6 +113,8 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } +extern bool _mi_process_is_initialized; // has mi_process_init been called? + bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE @@ -217,7 +229,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // usual case page->free = page->local_free; page->local_free = NULL; - page->is_zero = false; + page->free_is_zero = false; } else if (force) { // append -- only on shutdown (force) as this is a linear operation @@ -229,7 +241,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_block_set_next(page, tail, page->free); page->free = page->local_free; page->local_free = NULL; - page->is_zero = false; + page->free_is_zero = false; } } @@ -251,7 +263,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); #endif - mi_assert_internal(!page->is_reset); + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); @@ -379,7 +391,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_page_set_heap(page, NULL); -#if MI_DEBUG>1 +#if (MI_DEBUG>1) && !MI_TRACK_ENABLED // check there are no references left.. for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) { mi_assert_internal(_mi_ptr_page(block) != page); @@ -417,7 +429,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { // Retire parameters #define MI_MAX_RETIRE_SIZE (MI_MEDIUM_OBJ_SIZE_MAX) -#define MI_RETIRE_CYCLES (8) +#define MI_RETIRE_CYCLES (16) // Retire a page with no more used blocks // Important to not retire too quickly though as new @@ -637,11 +649,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // enable the new free list page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); - - // extension into zero initialized memory preserves the zero'd free list - if (!page->is_zero_init) { - page->is_zero = false; - } mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -663,18 +670,19 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); mi_assert_internal(page->reserved > 0); - #ifdef MI_ENCODE_FREELIST + #if (MI_PADDING || MI_ENCODE_FREELIST) page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif - #if MI_DEBUG > 0 - page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501 - #else - page->is_zero = page->is_zero_init; + page->free_is_zero = page->is_zero_init; + #if MI_DEBUG>2 + if (page->is_zero_init) { + mi_track_mem_defined(page_start, page_size); + mi_assert_expensive(mi_mem_is_zero(page_start, page_size)); + } #endif - + mi_assert_internal(page->is_committed); - mi_assert_internal(!page->is_reset); mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); @@ -683,7 +691,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->prev == NULL); mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); - #if (MI_ENCODE_FREELIST) + #if (MI_PADDING || MI_ENCODE_FREELIST) mi_assert_internal(page->keys[0] != 0); mi_assert_internal(page->keys[1] != 0); #endif @@ -703,12 +711,16 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { // search through the pages in "next fit" order + #if MI_STAT size_t count = 0; + #endif mi_page_t* page = pq->first; while (page != NULL) { mi_page_t* next = page->next; // remember next + #if MI_STAT count++; + #endif // 0. collect freed blocks by us and other threads _mi_page_free_collect(page, false); @@ -869,7 +881,9 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme } else { // otherwise find a page with free blocks in our size segregated queues - mi_assert_internal(size >= MI_PADDING_SIZE); + #if MI_PADDING + mi_assert_internal(size >= MI_PADDING_SIZE); + #endif return mi_find_free_page(heap, size); } } @@ -884,8 +898,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // initialize if necessary if mi_unlikely(!mi_heap_is_initialized(heap)) { - mi_thread_init(); // calls `_mi_heap_init` in turn - heap = mi_get_default_heap(); + heap = mi_heap_get_default(); // calls mi_thread_init if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; } } mi_assert_internal(mi_heap_is_initialized(heap)); diff --git a/third-party/mimalloc/src/alloc-override-osx.c b/third-party/mimalloc/src/prim/osx/alloc-override-zone.c similarity index 98% rename from third-party/mimalloc/src/alloc-override-osx.c rename to third-party/mimalloc/src/prim/osx/alloc-override-zone.c index a2819a8b..0e0a99d9 100644 --- a/third-party/mimalloc/src/alloc-override-osx.c +++ b/third-party/mimalloc/src/prim/osx/alloc-override-zone.c @@ -6,7 +6,7 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" #if defined(MI_MALLOC_OVERRIDE) @@ -195,7 +195,7 @@ static malloc_introspection_t mi_introspect = { .log = &intro_log, .force_lock = &intro_force_lock, .force_unlock = &intro_force_unlock, -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) .statistics = &intro_statistics, .zone_locked = &intro_zone_locked, #endif @@ -216,7 +216,7 @@ static malloc_zone_t mi_malloc_zone = { .batch_malloc = &zone_batch_malloc, .batch_free = &zone_batch_free, .introspect = &mi_introspect, -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) .version = 10, #else @@ -420,7 +420,7 @@ __attribute__((constructor(0))) #else __attribute__((constructor)) // seems not supported by g++-11 on the M1 #endif -static void _mi_macos_override_malloc() { +static void _mi_macos_override_malloc(void) { malloc_zone_t* purgeable_zone = NULL; #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) diff --git a/third-party/mimalloc/src/prim/osx/prim.c b/third-party/mimalloc/src/prim/osx/prim.c new file mode 100644 index 00000000..8a2f4e8a --- /dev/null +++ b/third-party/mimalloc/src/prim/osx/prim.c @@ -0,0 +1,9 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// We use the unix/prim.c with the mmap API on macOSX +#include "../unix/prim.c" diff --git a/third-party/mimalloc/src/prim/prim.c b/third-party/mimalloc/src/prim/prim.c new file mode 100644 index 00000000..9a597d8e --- /dev/null +++ b/third-party/mimalloc/src/prim/prim.c @@ -0,0 +1,24 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// Select the implementation of the primitives +// depending on the OS. + +#if defined(_WIN32) +#include "windows/prim.c" // VirtualAlloc (Windows) + +#elif defined(__APPLE__) +#include "osx/prim.c" // macOSX (actually defers to mmap in unix/prim.c) + +#elif defined(__wasi__) +#define MI_USE_SBRK +#include "wasi/prim.c" // memory-grow or sbrk (Wasm) + +#else +#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) + +#endif diff --git a/third-party/mimalloc/src/prim/readme.md b/third-party/mimalloc/src/prim/readme.md new file mode 100644 index 00000000..380dd3a7 --- /dev/null +++ b/third-party/mimalloc/src/prim/readme.md @@ -0,0 +1,9 @@ +## Portability Primitives + +This is the portability layer where all primitives needed from the OS are defined. + +- `include/mimalloc/prim.h`: primitive portability API definition. +- `prim.c`: Selects one of `unix/prim.c`, `wasi/prim.c`, or `windows/prim.c` depending on the host platform + (and on macOS, `osx/prim.c` defers to `unix/prim.c`). + +Note: still work in progress, there may still be places in the sources that still depend on OS ifdef's. \ No newline at end of file diff --git a/third-party/mimalloc/src/prim/unix/prim.c b/third-party/mimalloc/src/prim/unix/prim.c new file mode 100644 index 00000000..314281fe --- /dev/null +++ b/third-party/mimalloc/src/prim/unix/prim.c @@ -0,0 +1,859 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined +#endif + +#if defined(__sun) +// illumos provides new mman.h api when any of these are defined +// otherwise the old api based on caddr_t which predates the void pointers one. +// stock solaris provides only the former, chose to atomically to discard those +// flags only here rather than project wide tough. +#undef _XOPEN_SOURCE +#undef _POSIX_C_SOURCE +#endif + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" + +#include // mmap +#include // sysconf + +#if defined(__linux__) + #include + #include + #if defined(__GLIBC__) + #include // linux mmap flags + #else + #include + #endif +#elif defined(__APPLE__) + #include + #if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR + #include + #endif +#elif defined(__FreeBSD__) || defined(__DragonFly__) + #include + #if __FreeBSD_version >= 1200000 + #include + #include + #endif + #include +#endif + +#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) + #define MI_HAS_SYSCALL_H + #include +#endif + +//------------------------------------------------------------------------------------ +// Use syscalls for some primitives to allow for libraries that override open/read/close etc. +// and do allocation themselves; using syscalls prevents recursion when mimalloc is +// still initializing (issue #713) +//------------------------------------------------------------------------------------ + +#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access) + +static int mi_prim_open(const char* fpath, int open_flags) { + return syscall(SYS_open,fpath,open_flags,0); +} +static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { + return syscall(SYS_read,fd,buf,bufsize); +} +static int mi_prim_close(int fd) { + return syscall(SYS_close,fd); +} +static int mi_prim_access(const char *fpath, int mode) { + return syscall(SYS_access,fpath,mode); +} + +#elif !defined(__APPLE__) // avoid unused warnings + +static int mi_prim_open(const char* fpath, int open_flags) { + return open(fpath,open_flags); +} +static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { + return read(fd,buf,bufsize); +} +static int mi_prim_close(int fd) { + return close(fd); +} +static int mi_prim_access(const char *fpath, int mode) { + return access(fpath,mode); +} + +#endif + + + +//--------------------------------------------- +// init +//--------------------------------------------- + +static bool unix_detect_overcommit(void) { + bool os_overcommit = true; +#if defined(__linux__) + int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd >= 0) { + char buf[32]; + ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf)); + mi_prim_close(fd); + // + // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) + if (nread >= 1) { + os_overcommit = (buf[0] == '0' || buf[0] == '1'); + } + } +#elif defined(__FreeBSD__) + int val = 0; + size_t olen = sizeof(val); + if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { + os_overcommit = (val != 0); + } +#else + // default: overcommit is true +#endif + return os_overcommit; +} + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { + long psize = sysconf(_SC_PAGESIZE); + if (psize > 0) { + config->page_size = (size_t)psize; + config->alloc_granularity = (size_t)psize; + } + config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + config->has_overcommit = unix_detect_overcommit(); + config->must_free_whole = false; // mmap can free in parts + config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) +} + + +//--------------------------------------------- +// free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + bool err = (munmap(addr, size) == -1); + return (err ? errno : 0); +} + + +//--------------------------------------------- +// mmap +//--------------------------------------------- + +static int unix_madvise(void* addr, size_t size, int advice) { + #if defined(__sun) + return madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520) + #else + return madvise(addr, size, advice); + #endif +} + +static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + MI_UNUSED(try_alignment); + void* p = NULL; + #if defined(MAP_ALIGNED) // BSD + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + size_t n = mi_bsr(try_alignment); + if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB + p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); + } + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #elif defined(MAP_ALIGN) // Solaris + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + #endif + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment, size); + if (hint != NULL) { + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? + int err = 0; + #else + int err = errno; + #endif + _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #endif + // regular mmap + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // failed to allocate + return NULL; +} + +static int unix_mmap_fd(void) { + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) { os_tag = 100; } + return VM_MAKE_TAG(os_tag); + #else + return -1; + #endif +} + +static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif + void* p = NULL; + const int fd = unix_mmap_fd(); + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + if (_mi_os_has_overcommit()) { + flags |= MAP_NORESERVE; + } + #if defined(PROT_MAX) + protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD + #endif + // huge page allocation + if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) { + static _Atomic(size_t) large_page_try_ok; // = 0; + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // If the OS is not configured for large OS pages, or the user does not have + // enough permission, the `mmap` will always fail (but it might also fail for other reasons). + // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times + // to avoid too many failing calls to mmap. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux + int lfd = fd; + #ifdef MAP_ALIGNED_SUPER + lflags |= MAP_ALIGNED_SUPER; + #endif + #ifdef MAP_HUGETLB + lflags |= MAP_HUGETLB; + #endif + #ifdef MAP_HUGE_1GB + static bool mi_huge_pages_available = true; + if ((size % MI_GiB) == 0 && mi_huge_pages_available) { + lflags |= MAP_HUGE_1GB; + } + else + #endif + { + #ifdef MAP_HUGE_2MB + lflags |= MAP_HUGE_2MB; + #endif + } + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif + if (large_only || lflags != flags) { + // try large OS page allocation + *is_large = true; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + #ifdef MAP_HUGE_1GB + if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); + lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + } + #endif + if (large_only) return p; + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations + } + } + } + } + // regular allocation + if (p == NULL) { + *is_large = false; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); + if (p != NULL) { + #if defined(MADV_HUGEPAGE) + // Many Linux systems don't allow MAP_HUGETLB but they support instead + // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE + // though since properly aligned allocations will already use large pages if available + // in that case -- in particular for our large regions (in `memory.c`). + // However, some systems only allow THP if called with explicit `madvise`, so + // when large OS pages are enabled for mimalloc, we call `madvise` anyways. + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; + } + #elif defined(__sun) + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + struct memcntl_mha cmd = {0}; + cmd.mha_pagesize = large_os_page_size; + cmd.mha_cmd = MHA_MAPSIZE_VA; + if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { + *is_large = true; + } + } + #endif + } + } + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + + *is_zero = true; + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : errno); +} + + +//--------------------------------------------- +// Commit/Reset +//--------------------------------------------- + +static void unix_mprotect_hint(int err) { + #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("The next warning may be caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n" + " For example: sudo sysctl -w vm.max_map_count=262144\n"); + } + #else + MI_UNUSED(err); + #endif +} + +int _mi_prim_commit(void* start, size_t size, bool* is_zero) { + // commit: ensure we can access the area + // note: we may think that *is_zero can be true since the memory + // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but + // we sometimes call commit on a range with still partially committed + // memory and `mprotect` does not zero the range. + *is_zero = false; + int err = mprotect(start, size, (PROT_READ | PROT_WRITE)); + if (err != 0) { + err = errno; + unix_mprotect_hint(err); + } + return err; +} + +int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { + int err = 0; + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + err = unix_madvise(start, size, MADV_DONTNEED); + #if !MI_DEBUG && !MI_SECURE + *needs_recommit = false; + #else + *needs_recommit = true; + mprotect(start, size, PROT_NONE); + #endif + /* + // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss) + *needs_recommit = true; + const int fd = unix_mmap_fd(); + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } + */ + return err; +} + +int _mi_prim_reset(void* start, size_t size) { + // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it + // will not reduce the `rss` stats in tools like `top` even though the memory is available + // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by + // default `MADV_DONTNEED` is used though. + #if defined(MADV_FREE) + static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); + int oadvice = (int)mi_atomic_load_relaxed(&advice); + int err; + while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; + if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { + // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on + mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); + } + #else + int err = unix_madvise(start, size, MADV_DONTNEED); + #endif + return err; +} + +int _mi_prim_protect(void* start, size_t size, bool protect) { + int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + unix_mprotect_hint(err); + return err; +} + + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__) + +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif + +#if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind) +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); + return 0; +} +#endif + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + bool is_large = true; + *is_zero = true; + *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + unsigned long numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: + long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + err = errno; + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err); + } + } + return (*addr != NULL ? 0 : errno); +} + +#else + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *is_zero = false; + *addr = NULL; + return ENOMEM; +} + +#endif + +//--------------------------------------------- +// NUMA nodes +//--------------------------------------------- + +#if defined(__linux__) + +#include // snprintf + +size_t _mi_prim_numa_node(void) { + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu) + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); + if (err != 0) return 0; + return node; + #else + return 0; + #endif +} + +size_t _mi_prim_numa_node_count(void) { + char buf[128]; + unsigned node = 0; + for(node = 0; node < 256; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); + if (mi_prim_access(buf,R_OK) != 0) break; + } + return (node+1); +} + +#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 + +size_t _mi_prim_numa_node(void) { + domainset_t dom; + size_t node; + int policy; + if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; + for (node = 0; node < MAXMEMDOM; node++) { + if (DOMAINSET_ISSET(node, &dom)) return node; + } + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ndomains = 0; + size_t len = sizeof(ndomains); + if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; + return ndomains; +} + +#elif defined(__DragonFly__) + +size_t _mi_prim_numa_node(void) { + // TODO: DragonFly does not seem to provide any userland means to get this information. + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ncpus = 0, nvirtcoresperphys = 0; + size_t len = sizeof(size_t); + if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; + if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; + return nvirtcoresperphys * ncpus; +} + +#else + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + +#endif + +// ---------------------------------------------------------------- +// Clock +// ---------------------------------------------------------------- + +#include + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif +} + +#endif + + + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__) +#include +#include +#include + +#if defined(__APPLE__) +#include +#endif + +#if defined(__HAIKU__) +#include +#endif + +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); +} + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + pinfo->utime = timeval_secs(&rusage.ru_utime); + pinfo->stime = timeval_secs(&rusage.ru_stime); +#if !defined(__HAIKU__) + pinfo->page_faults = rusage.ru_majflt; +#endif +#if defined(__HAIKU__) + // Haiku does not have (yet?) a way to + // get these stats per process + thread_info tid; + area_info mem; + ssize_t c; + get_thread_info(find_thread(0), &tid); + while (get_next_area_info(tid.team, &c, &mem) == B_OK) { + pinfo->peak_rss += mem.ram_size; + } + pinfo->page_faults = 0; +#elif defined(__APPLE__) + pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes + #ifdef MACH_TASK_BASIC_INFO + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + pinfo->current_rss = (size_t)info.resident_size; + } + #else + struct task_basic_info info; + mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + pinfo->current_rss = (size_t)info.resident_size; + } + #endif +#else + pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB +#endif + // use defaults for commit +} + +#else + +#ifndef __wasi__ +// WebAssembly instances are not processes +#pragma message("define a way to get process info") +#endif + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + // use defaults + MI_UNUSED(pinfo); +} + +#endif + + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) +// On Posix systemsr use `environ` to access environment variables +// even before the C runtime is initialized. +#if defined(__APPLE__) && defined(__has_include) && __has_include() +#include +static char** mi_get_environ(void) { + return (*_NSGetEnviron()); +} +#else +extern char** environ; +static char** mi_get_environ(void) { + return environ; +} +#endif +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + if (name==NULL) return false; + const size_t len = _mi_strlen(name); + if (len == 0) return false; + char** env = mi_get_environ(); + if (env == NULL) return false; + // compare up to 10000 entries + for (int i = 0; i < 10000 && env[i] != NULL; i++) { + const char* s = env[i]; + if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive + // found it + _mi_strlcpy(result, s + len + 1, result_size); + return true; + } + } + return false; +} +#else +// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} +#endif // !MI_USE_ENVIRON + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(__APPLE__) + +#include +#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 +#include +#include +#endif +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 + // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf + // may fail silently on macOS. See PR #390, and + return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); + #else + // fall back on older macOS + arc4random_buf(buf, buf_len); + return true; + #endif +} + +#elif defined(__ANDROID__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__sun) + +#include +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} + +#elif defined(__linux__) || defined(__HAIKU__) + +#include +#include +#include +#include + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see ) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom) + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_load_acquire(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (errno != ENOSYS) return false; + mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom + } + #endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = mi_prim_open("/dev/urandom", flags); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + mi_prim_close(fd); + return (count==buf_len); +} + +#else + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} + +#endif + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if defined(MI_USE_PTHREADS) + +// use pthread local storage keys to detect thread ending +// (and used with MI_TLS_PTHREADS for the default heap) +pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); + +static void mi_pthread_done(void* value) { + if (value!=NULL) { + _mi_thread_done((mi_heap_t*)value); + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); + pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing to do +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD + pthread_setspecific(_mi_heap_default_key, heap); + } +} + +#else + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + +#endif diff --git a/third-party/mimalloc/src/prim/wasi/prim.c b/third-party/mimalloc/src/prim/wasi/prim.c new file mode 100644 index 00000000..50511f0b --- /dev/null +++ b/third-party/mimalloc/src/prim/wasi/prim.c @@ -0,0 +1,275 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" + +//--------------------------------------------- +// Initialize +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { + config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB + config->alloc_granularity = 16; + config->has_overcommit = false; + config->must_free_whole = true; + config->has_virtual_reserve = false; +} + +//--------------------------------------------- +// Free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + MI_UNUSED(addr); MI_UNUSED(size); + // wasi heap cannot be shrunk + return 0; +} + + +//--------------------------------------------- +// Allocation: sbrk or memory_grow +//--------------------------------------------- + +#if defined(MI_USE_SBRK) + static void* mi_memory_grow( size_t size ) { + void* p = sbrk(size); + if (p == (void*)(-1)) return NULL; + #if !defined(__wasi__) // on wasi this is always zero initialized already (?) + memset(p,0,size); + #endif + return p; + } +#elif defined(__wasi__) + static void* mi_memory_grow( size_t size ) { + size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) + : __builtin_wasm_memory_size(0)); + if (base == SIZE_MAX) return NULL; + return (void*)(base * _mi_os_page_size()); + } +#endif + +#if defined(MI_USE_PTHREADS) +static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + +static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { + void* p = NULL; + if (try_alignment <= 1) { + // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + p = mi_memory_grow(size); + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + } + else { + void* base = NULL; + size_t alloc_size = 0; + // to allocate aligned use a lock to try to avoid thread interaction + // between getting the current size and actual allocation + // (also, `sbrk` is not thread safe in general) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + { + void* current = mi_memory_grow(0); // get current size + if (current != NULL) { + void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space + alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); + base = mi_memory_grow(alloc_size); + } + } + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + if (base != NULL) { + p = mi_align_up_ptr(base, try_alignment); + if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { + // another thread used wasm_memory_grow/sbrk in-between and we do not have enough + // space after alignment. Give up (and waste the space as we cannot shrink :-( ) + // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) + p = NULL; + } + } + } + /* + if (p == NULL) { + _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); + errno = ENOMEM; + return NULL; + } + */ + mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { + MI_UNUSED(allow_large); MI_UNUSED(commit); + *is_large = false; + *is_zero = false; + *addr = mi_prim_mem_grow(size, try_alignment); + return (*addr != NULL ? 0 : ENOMEM); +} + + +//--------------------------------------------- +// Commit/Reset/Protect +//--------------------------------------------- + +int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { + MI_UNUSED(addr); MI_UNUSED(size); + *is_zero = false; + return 0; +} + +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { + MI_UNUSED(addr); MI_UNUSED(size); + *needs_recommit = false; + return 0; +} + +int _mi_prim_reset(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); + return 0; +} + + +//--------------------------------------------- +// Huge pages and NUMA nodes +//--------------------------------------------- + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *is_zero = true; + *addr = NULL; + return ENOSYS; +} + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +#include + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif +} + +#endif + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + // use defaults + MI_UNUSED(pinfo); +} + + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} diff --git a/third-party/mimalloc/src/prim/windows/etw-mimalloc.wprp b/third-party/mimalloc/src/prim/windows/etw-mimalloc.wprp new file mode 100644 index 00000000..b00cd7ad --- /dev/null +++ b/third-party/mimalloc/src/prim/windows/etw-mimalloc.wprp @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/third-party/mimalloc/src/prim/windows/etw.h b/third-party/mimalloc/src/prim/windows/etw.h new file mode 100644 index 00000000..4e0a092a --- /dev/null +++ b/third-party/mimalloc/src/prim/windows/etw.h @@ -0,0 +1,905 @@ +//**********************************************************************` +//* This is an include file generated by Message Compiler. *` +//* *` +//* Copyright (c) Microsoft Corporation. All Rights Reserved. *` +//**********************************************************************` +#pragma once + +//***************************************************************************** +// +// Notes on the ETW event code generated by MC: +// +// - Structures and arrays of structures are treated as an opaque binary blob. +// The caller is responsible for packing the data for the structure into a +// single region of memory, with no padding between values. The macro will +// have an extra parameter for the length of the blob. +// - Arrays of nul-terminated strings must be packed by the caller into a +// single binary blob containing the correct number of strings, with a nul +// after each string. The size of the blob is specified in characters, and +// includes the final nul. +// - Arrays of SID are treated as a single binary blob. The caller is +// responsible for packing the SID values into a single region of memory with +// no padding. +// - The length attribute on the data element in the manifest is significant +// for values with intype win:UnicodeString, win:AnsiString, or win:Binary. +// The length attribute must be specified for win:Binary, and is optional for +// win:UnicodeString and win:AnsiString (if no length is given, the strings +// are assumed to be nul-terminated). For win:UnicodeString, the length is +// measured in characters, not bytes. +// - For an array of win:UnicodeString, win:AnsiString, or win:Binary, the +// length attribute applies to every value in the array, so every value in +// the array must have the same length. The values in the array are provided +// to the macro via a single pointer -- the caller is responsible for packing +// all of the values into a single region of memory with no padding between +// values. +// - Values of type win:CountedUnicodeString, win:CountedAnsiString, and +// win:CountedBinary can be generated and collected on Vista or later. +// However, they may not decode properly without the Windows 10 2018 Fall +// Update. +// - Arrays of type win:CountedUnicodeString, win:CountedAnsiString, and +// win:CountedBinary must be packed by the caller into a single region of +// memory. The format for each item is a UINT16 byte-count followed by that +// many bytes of data. When providing the array to the generated macro, you +// must provide the total size of the packed array data, including the UINT16 +// sizes for each item. In the case of win:CountedUnicodeString, the data +// size is specified in WCHAR (16-bit) units. In the case of +// win:CountedAnsiString and win:CountedBinary, the data size is specified in +// bytes. +// +//***************************************************************************** + +#include +#include +#include + +#ifndef ETW_INLINE + #ifdef _ETW_KM_ + // In kernel mode, save stack space by never inlining templates. + #define ETW_INLINE DECLSPEC_NOINLINE __inline + #else + // In user mode, save code size by inlining templates as appropriate. + #define ETW_INLINE __inline + #endif +#endif // ETW_INLINE + +#if defined(__cplusplus) +extern "C" { +#endif + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// MCGEN_USE_KERNEL_MODE_APIS macro: +// Controls whether the generated code uses kernel-mode or user-mode APIs. +// - Set to 0 to use Windows user-mode APIs such as EventRegister. +// - Set to 1 to use Windows kernel-mode APIs such as EtwRegister. +// Default is based on whether the _ETW_KM_ macro is defined (i.e. by wdm.h). +// Note that the APIs can also be overridden directly, e.g. by setting the +// MCGEN_EVENTWRITETRANSFER or MCGEN_EVENTREGISTER macros. +// +#ifndef MCGEN_USE_KERNEL_MODE_APIS + #ifdef _ETW_KM_ + #define MCGEN_USE_KERNEL_MODE_APIS 1 + #else + #define MCGEN_USE_KERNEL_MODE_APIS 0 + #endif +#endif // MCGEN_USE_KERNEL_MODE_APIS + +// +// MCGEN_HAVE_EVENTSETINFORMATION macro: +// Controls how McGenEventSetInformation uses the EventSetInformation API. +// - Set to 0 to disable the use of EventSetInformation +// (McGenEventSetInformation will always return an error). +// - Set to 1 to directly invoke MCGEN_EVENTSETINFORMATION. +// - Set to 2 to to locate EventSetInformation at runtime via GetProcAddress +// (user-mode) or MmGetSystemRoutineAddress (kernel-mode). +// Default is determined as follows: +// - If MCGEN_EVENTSETINFORMATION has been customized, set to 1 +// (i.e. use MCGEN_EVENTSETINFORMATION). +// - Else if the target OS version has EventSetInformation, set to 1 +// (i.e. use MCGEN_EVENTSETINFORMATION). +// - Else set to 2 (i.e. try to dynamically locate EventSetInformation). +// Note that an McGenEventSetInformation function will only be generated if one +// or more provider in a manifest has provider traits. +// +#ifndef MCGEN_HAVE_EVENTSETINFORMATION + #ifdef MCGEN_EVENTSETINFORMATION // if MCGEN_EVENTSETINFORMATION has been customized, + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #elif MCGEN_USE_KERNEL_MODE_APIS // else if using kernel-mode APIs, + #if NTDDI_VERSION >= 0x06040000 // if target OS is Windows 10 or later, + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #else // else + #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EtwSetInformation" via MmGetSystemRoutineAddress. + #endif // else (using user-mode APIs) + #else // if target OS and SDK is Windows 8 or later, + #if WINVER >= 0x0602 && defined(EVENT_FILTER_TYPE_SCHEMATIZED) + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #else // else + #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EventSetInformation" via GetModuleHandleExW/GetProcAddress. + #endif + #endif +#endif // MCGEN_HAVE_EVENTSETINFORMATION + +// +// MCGEN Override Macros +// +// The following override macros may be defined before including this header +// to control the APIs used by this header: +// +// - MCGEN_EVENTREGISTER +// - MCGEN_EVENTUNREGISTER +// - MCGEN_EVENTSETINFORMATION +// - MCGEN_EVENTWRITETRANSFER +// +// If the the macro is undefined, the MC implementation will default to the +// corresponding ETW APIs. For example, if the MCGEN_EVENTREGISTER macro is +// undefined, the EventRegister[MyProviderName] macro will use EventRegister +// in user mode and will use EtwRegister in kernel mode. +// +// To prevent issues from conflicting definitions of these macros, the value +// of the override macro will be used as a suffix in certain internal function +// names. Because of this, the override macros must follow certain rules: +// +// - The macro must be defined before any MC-generated header is included and +// must not be undefined or redefined after any MC-generated header is +// included. Different translation units (i.e. different .c or .cpp files) +// may set the macros to different values, but within a translation unit +// (within a single .c or .cpp file), the macro must be set once and not +// changed. +// - The override must be an object-like macro, not a function-like macro +// (i.e. the override macro must not have a parameter list). +// - The override macro's value must be a simple identifier, i.e. must be +// something that starts with a letter or '_' and contains only letters, +// numbers, and '_' characters. +// - If the override macro's value is the name of a second object-like macro, +// the second object-like macro must follow the same rules. (The override +// macro's value can also be the name of a function-like macro, in which +// case the function-like macro does not need to follow the same rules.) +// +// For example, the following will cause compile errors: +// +// #define MCGEN_EVENTWRITETRANSFER MyNamespace::MyClass::MyFunction // Value has non-identifier characters (colon). +// #define MCGEN_EVENTWRITETRANSFER GetEventWriteFunctionPointer(7) // Value has non-identifier characters (parentheses). +// #define MCGEN_EVENTWRITETRANSFER(h,e,a,r,c,d) EventWrite(h,e,c,d) // Override is defined as a function-like macro. +// #define MY_OBJECT_LIKE_MACRO MyNamespace::MyClass::MyEventWriteFunction +// #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // Evaluates to something with non-identifier characters (colon). +// +// The following would be ok: +// +// #define MCGEN_EVENTWRITETRANSFER MyEventWriteFunction1 // OK, suffix will be "MyEventWriteFunction1". +// #define MY_OBJECT_LIKE_MACRO MyEventWriteFunction2 +// #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // OK, suffix will be "MyEventWriteFunction2". +// #define MY_FUNCTION_LIKE_MACRO(h,e,a,r,c,d) MyNamespace::MyClass::MyEventWriteFunction3(h,e,c,d) +// #define MCGEN_EVENTWRITETRANSFER MY_FUNCTION_LIKE_MACRO // OK, suffix will be "MY_FUNCTION_LIKE_MACRO". +// +#ifndef MCGEN_EVENTREGISTER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTREGISTER EtwRegister + #else + #define MCGEN_EVENTREGISTER EventRegister + #endif +#endif // MCGEN_EVENTREGISTER +#ifndef MCGEN_EVENTUNREGISTER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTUNREGISTER EtwUnregister + #else + #define MCGEN_EVENTUNREGISTER EventUnregister + #endif +#endif // MCGEN_EVENTUNREGISTER +#ifndef MCGEN_EVENTSETINFORMATION + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTSETINFORMATION EtwSetInformation + #else + #define MCGEN_EVENTSETINFORMATION EventSetInformation + #endif +#endif // MCGEN_EVENTSETINFORMATION +#ifndef MCGEN_EVENTWRITETRANSFER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTWRITETRANSFER EtwWriteTransfer + #else + #define MCGEN_EVENTWRITETRANSFER EventWriteTransfer + #endif +#endif // MCGEN_EVENTWRITETRANSFER + +// +// MCGEN_EVENT_ENABLED macro: +// Override to control how the EventWrite[EventName] macros determine whether +// an event is enabled. The default behavior is for EventWrite[EventName] to +// use the EventEnabled[EventName] macros. +// +#ifndef MCGEN_EVENT_ENABLED +#define MCGEN_EVENT_ENABLED(EventName) EventEnabled##EventName() +#endif + +// +// MCGEN_EVENT_ENABLED_FORCONTEXT macro: +// Override to control how the EventWrite[EventName]_ForContext macros +// determine whether an event is enabled. The default behavior is for +// EventWrite[EventName]_ForContext to use the +// EventEnabled[EventName]_ForContext macros. +// +#ifndef MCGEN_EVENT_ENABLED_FORCONTEXT +#define MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, EventName) EventEnabled##EventName##_ForContext(pContext) +#endif + +// +// MCGEN_ENABLE_CHECK macro: +// Determines whether the specified event would be considered as enabled +// based on the state of the specified context. Slightly faster than calling +// McGenEventEnabled directly. +// +#ifndef MCGEN_ENABLE_CHECK +#define MCGEN_ENABLE_CHECK(Context, Descriptor) (Context.IsEnabled && McGenEventEnabled(&Context, &Descriptor)) +#endif + +#if !defined(MCGEN_TRACE_CONTEXT_DEF) +#define MCGEN_TRACE_CONTEXT_DEF +// This structure is for use by MC-generated code and should not be used directly. +typedef struct _MCGEN_TRACE_CONTEXT +{ + TRACEHANDLE RegistrationHandle; + TRACEHANDLE Logger; // Used as pointer to provider traits. + ULONGLONG MatchAnyKeyword; + ULONGLONG MatchAllKeyword; + ULONG Flags; + ULONG IsEnabled; + UCHAR Level; + UCHAR Reserve; + USHORT EnableBitsCount; + PULONG EnableBitMask; + const ULONGLONG* EnableKeyWords; + const UCHAR* EnableLevel; +} MCGEN_TRACE_CONTEXT, *PMCGEN_TRACE_CONTEXT; +#endif // MCGEN_TRACE_CONTEXT_DEF + +#if !defined(MCGEN_LEVEL_KEYWORD_ENABLED_DEF) +#define MCGEN_LEVEL_KEYWORD_ENABLED_DEF +// +// Determines whether an event with a given Level and Keyword would be +// considered as enabled based on the state of the specified context. +// Note that you may want to use MCGEN_ENABLE_CHECK instead of calling this +// function directly. +// +FORCEINLINE +BOOLEAN +McGenLevelKeywordEnabled( + _In_ PMCGEN_TRACE_CONTEXT EnableInfo, + _In_ UCHAR Level, + _In_ ULONGLONG Keyword + ) +{ + // + // Check if the event Level is lower than the level at which + // the channel is enabled. + // If the event Level is 0 or the channel is enabled at level 0, + // all levels are enabled. + // + + if ((Level <= EnableInfo->Level) || // This also covers the case of Level == 0. + (EnableInfo->Level == 0)) { + + // + // Check if Keyword is enabled + // + + if ((Keyword == (ULONGLONG)0) || + ((Keyword & EnableInfo->MatchAnyKeyword) && + ((Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) { + return TRUE; + } + } + + return FALSE; +} +#endif // MCGEN_LEVEL_KEYWORD_ENABLED_DEF + +#if !defined(MCGEN_EVENT_ENABLED_DEF) +#define MCGEN_EVENT_ENABLED_DEF +// +// Determines whether the specified event would be considered as enabled based +// on the state of the specified context. Note that you may want to use +// MCGEN_ENABLE_CHECK instead of calling this function directly. +// +FORCEINLINE +BOOLEAN +McGenEventEnabled( + _In_ PMCGEN_TRACE_CONTEXT EnableInfo, + _In_ PCEVENT_DESCRIPTOR EventDescriptor + ) +{ + return McGenLevelKeywordEnabled(EnableInfo, EventDescriptor->Level, EventDescriptor->Keyword); +} +#endif // MCGEN_EVENT_ENABLED_DEF + +#if !defined(MCGEN_CONTROL_CALLBACK) +#define MCGEN_CONTROL_CALLBACK + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +VOID +__stdcall +McGenControlCallbackV2( + _In_ LPCGUID SourceId, + _In_ ULONG ControlCode, + _In_ UCHAR Level, + _In_ ULONGLONG MatchAnyKeyword, + _In_ ULONGLONG MatchAllKeyword, + _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, + _Inout_opt_ PVOID CallbackContext + ) +/*++ + +Routine Description: + + This is the notification callback for Windows Vista and later. + +Arguments: + + SourceId - The GUID that identifies the session that enabled the provider. + + ControlCode - The parameter indicates whether the provider + is being enabled or disabled. + + Level - The level at which the event is enabled. + + MatchAnyKeyword - The bitmask of keywords that the provider uses to + determine the category of events that it writes. + + MatchAllKeyword - This bitmask additionally restricts the category + of events that the provider writes. + + FilterData - The provider-defined data. + + CallbackContext - The context of the callback that is defined when the provider + called EtwRegister to register itself. + +Remarks: + + ETW calls this function to notify provider of enable/disable + +--*/ +{ + PMCGEN_TRACE_CONTEXT Ctx = (PMCGEN_TRACE_CONTEXT)CallbackContext; + ULONG Ix; +#ifndef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + UNREFERENCED_PARAMETER(SourceId); + UNREFERENCED_PARAMETER(FilterData); +#endif + + if (Ctx == NULL) { + return; + } + + switch (ControlCode) { + + case EVENT_CONTROL_CODE_ENABLE_PROVIDER: + Ctx->Level = Level; + Ctx->MatchAnyKeyword = MatchAnyKeyword; + Ctx->MatchAllKeyword = MatchAllKeyword; + Ctx->IsEnabled = EVENT_CONTROL_CODE_ENABLE_PROVIDER; + + for (Ix = 0; Ix < Ctx->EnableBitsCount; Ix += 1) { + if (McGenLevelKeywordEnabled(Ctx, Ctx->EnableLevel[Ix], Ctx->EnableKeyWords[Ix]) != FALSE) { + Ctx->EnableBitMask[Ix >> 5] |= (1 << (Ix % 32)); + } else { + Ctx->EnableBitMask[Ix >> 5] &= ~(1 << (Ix % 32)); + } + } + break; + + case EVENT_CONTROL_CODE_DISABLE_PROVIDER: + Ctx->IsEnabled = EVENT_CONTROL_CODE_DISABLE_PROVIDER; + Ctx->Level = 0; + Ctx->MatchAnyKeyword = 0; + Ctx->MatchAllKeyword = 0; + if (Ctx->EnableBitsCount > 0) { +#pragma warning(suppress: 26451) // Arithmetic overflow cannot occur, no matter the value of EnableBitCount + RtlZeroMemory(Ctx->EnableBitMask, (((Ctx->EnableBitsCount - 1) / 32) + 1) * sizeof(ULONG)); + } + break; + + default: + break; + } + +#ifdef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + // + // Call user defined callback + // + MCGEN_PRIVATE_ENABLE_CALLBACK_V2( + SourceId, + ControlCode, + Level, + MatchAnyKeyword, + MatchAllKeyword, + FilterData, + CallbackContext + ); +#endif // MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + + return; +} + +#endif // MCGEN_CONTROL_CALLBACK + +#ifndef _mcgen_PENABLECALLBACK + #if MCGEN_USE_KERNEL_MODE_APIS + #define _mcgen_PENABLECALLBACK PETWENABLECALLBACK + #else + #define _mcgen_PENABLECALLBACK PENABLECALLBACK + #endif +#endif // _mcgen_PENABLECALLBACK + +#if !defined(_mcgen_PASTE2) +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_PASTE2(a, b) _mcgen_PASTE2_imp(a, b) +#define _mcgen_PASTE2_imp(a, b) a##b +#endif // _mcgen_PASTE2 + +#if !defined(_mcgen_PASTE3) +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_PASTE3(a, b, c) _mcgen_PASTE3_imp(a, b, c) +#define _mcgen_PASTE3_imp(a, b, c) a##b##_##c +#endif // _mcgen_PASTE3 + +// +// Macro validation +// + +// Validate MCGEN_EVENTREGISTER: + +// Trigger an error if MCGEN_EVENTREGISTER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER); + +// Trigger an error if MCGEN_EVENTREGISTER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER) + MCGEN_EVENTREGISTER_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTREGISTER is defined as a function-like macro: +typedef void MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTREGISTER; +typedef int _mcgen_PASTE2(MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTREGISTER); + +// Validate MCGEN_EVENTUNREGISTER: + +// Trigger an error if MCGEN_EVENTUNREGISTER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER); + +// Trigger an error if MCGEN_EVENTUNREGISTER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER) + MCGEN_EVENTUNREGISTER_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTUNREGISTER is defined as a function-like macro: +typedef void MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTUNREGISTER; +typedef int _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTUNREGISTER); + +// Validate MCGEN_EVENTSETINFORMATION: + +// Trigger an error if MCGEN_EVENTSETINFORMATION is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION); + +// Trigger an error if MCGEN_EVENTSETINFORMATION is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION) + MCGEN_EVENTSETINFORMATION_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTSETINFORMATION is defined as a function-like macro: +typedef void MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_MCGEN_EVENTSETINFORMATION; +typedef int _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_, MCGEN_EVENTSETINFORMATION); + +// Validate MCGEN_EVENTWRITETRANSFER: + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER); + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER) + MCGEN_EVENTWRITETRANSFER_must_not_be_redefined_between_headers;; + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is defined as a function-like macro: +typedef void MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_MCGEN_EVENTWRITETRANSFER; +typedef int _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_, MCGEN_EVENTWRITETRANSFER); + +#ifndef McGenEventWrite_def +#define McGenEventWrite_def + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventWrite _mcgen_PASTE2(McGenEventWrite_, MCGEN_EVENTWRITETRANSFER) + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventWrite( + _In_ PMCGEN_TRACE_CONTEXT Context, + _In_ PCEVENT_DESCRIPTOR Descriptor, + _In_opt_ LPCGUID ActivityId, + _In_range_(1, 128) ULONG EventDataCount, + _Pre_cap_(EventDataCount) EVENT_DATA_DESCRIPTOR* EventData + ) +{ + const USHORT UNALIGNED* Traits; + + // Some customized MCGEN_EVENTWRITETRANSFER macros might ignore ActivityId. + UNREFERENCED_PARAMETER(ActivityId); + + Traits = (const USHORT UNALIGNED*)(UINT_PTR)Context->Logger; + + if (Traits == NULL) { + EventData[0].Ptr = 0; + EventData[0].Size = 0; + EventData[0].Reserved = 0; + } else { + EventData[0].Ptr = (ULONG_PTR)Traits; + EventData[0].Size = *Traits; + EventData[0].Reserved = 2; // EVENT_DATA_DESCRIPTOR_TYPE_PROVIDER_METADATA + } + + return MCGEN_EVENTWRITETRANSFER( + Context->RegistrationHandle, + Descriptor, + ActivityId, + NULL, + EventDataCount, + EventData); +} +#endif // McGenEventWrite_def + +#if !defined(McGenEventRegisterUnregister) +#define McGenEventRegisterUnregister + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventRegister _mcgen_PASTE2(McGenEventRegister_, MCGEN_EVENTREGISTER) + +#pragma warning(push) +#pragma warning(disable:6103) +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventRegister( + _In_ LPCGUID ProviderId, + _In_opt_ _mcgen_PENABLECALLBACK EnableCallback, + _In_opt_ PVOID CallbackContext, + _Inout_ PREGHANDLE RegHandle + ) +/*++ + +Routine Description: + + This function registers the provider with ETW. + +Arguments: + + ProviderId - Provider ID to register with ETW. + + EnableCallback - Callback to be used. + + CallbackContext - Context for the callback. + + RegHandle - Pointer to registration handle. + +Remarks: + + Should not be called if the provider is already registered (i.e. should not + be called if *RegHandle != 0). Repeatedly registering a provider is a bug + and may indicate a race condition. However, for compatibility with previous + behavior, this function will return SUCCESS in this case. + +--*/ +{ + ULONG Error; + + if (*RegHandle != 0) + { + Error = 0; // ERROR_SUCCESS + } + else + { + Error = MCGEN_EVENTREGISTER(ProviderId, EnableCallback, CallbackContext, RegHandle); + } + + return Error; +} +#pragma warning(pop) + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventUnregister _mcgen_PASTE2(McGenEventUnregister_, MCGEN_EVENTUNREGISTER) + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventUnregister(_Inout_ PREGHANDLE RegHandle) +/*++ + +Routine Description: + + Unregister from ETW and set *RegHandle = 0. + +Arguments: + + RegHandle - the pointer to the provider registration handle + +Remarks: + + If provider has not been registered (i.e. if *RegHandle == 0), + return SUCCESS. It is safe to call McGenEventUnregister even if the + call to McGenEventRegister returned an error. + +--*/ +{ + ULONG Error; + + if(*RegHandle == 0) + { + Error = 0; // ERROR_SUCCESS + } + else + { + Error = MCGEN_EVENTUNREGISTER(*RegHandle); + *RegHandle = (REGHANDLE)0; + } + + return Error; +} + +#endif // McGenEventRegisterUnregister + +#ifndef _mcgen_EVENT_BIT_SET + #if defined(_M_IX86) || defined(_M_X64) + // This macro is for use by MC-generated code and should not be used directly. + #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((((const unsigned char*)EnableBits)[BitPosition >> 3] & (1u << (BitPosition & 7))) != 0) + #else // CPU type + // This macro is for use by MC-generated code and should not be used directly. + #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((EnableBits[BitPosition >> 5] & (1u << (BitPosition & 31))) != 0) + #endif // CPU type +#endif // _mcgen_EVENT_BIT_SET + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// Provider "microsoft-windows-mimalloc" event count 2 +//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +// Provider GUID = 138f4dbb-ee04-4899-aa0a-572ad4475779 +EXTERN_C __declspec(selectany) const GUID ETW_MI_Provider = {0x138f4dbb, 0xee04, 0x4899, {0xaa, 0x0a, 0x57, 0x2a, 0xd4, 0x47, 0x57, 0x79}}; + +#ifndef ETW_MI_Provider_Traits +#define ETW_MI_Provider_Traits NULL +#endif // ETW_MI_Provider_Traits + +// +// Event Descriptors +// +EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_ALLOC = {0x64, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; +#define ETW_MI_ALLOC_value 0x64 +EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_FREE = {0x65, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; +#define ETW_MI_FREE_value 0x65 + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// Event Enablement Bits +// These variables are for use by MC-generated code and should not be used directly. +// +EXTERN_C __declspec(selectany) DECLSPEC_CACHEALIGN ULONG microsoft_windows_mimallocEnableBits[1]; +EXTERN_C __declspec(selectany) const ULONGLONG microsoft_windows_mimallocKeywords[1] = {0x0}; +EXTERN_C __declspec(selectany) const unsigned char microsoft_windows_mimallocLevels[1] = {4}; + +// +// Provider context +// +EXTERN_C __declspec(selectany) MCGEN_TRACE_CONTEXT ETW_MI_Provider_Context = {0, (ULONG_PTR)ETW_MI_Provider_Traits, 0, 0, 0, 0, 0, 0, 1, microsoft_windows_mimallocEnableBits, microsoft_windows_mimallocKeywords, microsoft_windows_mimallocLevels}; + +// +// Provider REGHANDLE +// +#define microsoft_windows_mimallocHandle (ETW_MI_Provider_Context.RegistrationHandle) + +// +// This macro is set to 0, indicating that the EventWrite[Name] macros do not +// have an Activity parameter. This is controlled by the -km and -um options. +// +#define ETW_MI_Provider_EventWriteActivity 0 + +// +// Register with ETW using the control GUID specified in the manifest. +// Invoke this macro during module initialization (i.e. program startup, +// DLL process attach, or driver load) to initialize the provider. +// Note that if this function returns an error, the error means that +// will not work, but no action needs to be taken -- even if EventRegister +// returns an error, it is generally safe to use EventWrite and +// EventUnregister macros (they will be no-ops if EventRegister failed). +// +#ifndef EventRegistermicrosoft_windows_mimalloc +#define EventRegistermicrosoft_windows_mimalloc() McGenEventRegister(&ETW_MI_Provider, McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) +#endif + +// +// Register with ETW using a specific control GUID (i.e. a GUID other than what +// is specified in the manifest). Advanced scenarios only. +// +#ifndef EventRegisterByGuidmicrosoft_windows_mimalloc +#define EventRegisterByGuidmicrosoft_windows_mimalloc(Guid) McGenEventRegister(&(Guid), McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) +#endif + +// +// Unregister with ETW and close the provider. +// Invoke this macro during module shutdown (i.e. program exit, DLL process +// detach, or driver unload) to unregister the provider. +// Note that you MUST call EventUnregister before DLL or driver unload +// (not optional): failure to unregister a provider before DLL or driver unload +// will result in crashes. +// +#ifndef EventUnregistermicrosoft_windows_mimalloc +#define EventUnregistermicrosoft_windows_mimalloc() McGenEventUnregister(µsoft_windows_mimallocHandle) +#endif + +// +// MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION macro: +// Define this macro to enable support for caller-allocated provider context. +// +#ifdef MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION + +// +// Advanced scenarios: Caller-allocated provider context. +// Use when multiple differently-configured provider handles are needed, +// e.g. for container-aware drivers, one context per container. +// +// Usage: +// +// - Caller enables the feature before including this header, e.g. +// #define MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION 1 +// - Caller allocates memory, e.g. pContext = malloc(sizeof(McGenContext_microsoft_windows_mimalloc)); +// - Caller registers the provider, e.g. EventRegistermicrosoft_windows_mimalloc_ForContext(pContext); +// - Caller writes events, e.g. EventWriteMyEvent_ForContext(pContext, ...); +// - Caller unregisters, e.g. EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext); +// - Caller frees memory, e.g. free(pContext); +// + +typedef struct tagMcGenContext_microsoft_windows_mimalloc { + // The fields of this structure are subject to change and should + // not be accessed directly. To access the provider's REGHANDLE, + // use microsoft_windows_mimallocHandle_ForContext(pContext). + MCGEN_TRACE_CONTEXT Context; + ULONG EnableBits[1]; +} McGenContext_microsoft_windows_mimalloc; + +#define EventRegistermicrosoft_windows_mimalloc_ForContext(pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&ETW_MI_Provider, pContext) +#define EventRegisterByGuidmicrosoft_windows_mimalloc_ForContext(Guid, pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&(Guid), pContext) +#define EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext) McGenEventUnregister(&(pContext)->Context.RegistrationHandle) + +// +// Provider REGHANDLE for caller-allocated context. +// +#define microsoft_windows_mimallocHandle_ForContext(pContext) ((pContext)->Context.RegistrationHandle) + +// This function is for use by MC-generated code and should not be used directly. +// Initialize and register the caller-allocated context. +__inline +ULONG __stdcall +_mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)( + _In_ LPCGUID pProviderId, + _Out_ McGenContext_microsoft_windows_mimalloc* pContext) +{ + RtlZeroMemory(pContext, sizeof(*pContext)); + pContext->Context.Logger = (ULONG_PTR)ETW_MI_Provider_Traits; + pContext->Context.EnableBitsCount = 1; + pContext->Context.EnableBitMask = pContext->EnableBits; + pContext->Context.EnableKeyWords = microsoft_windows_mimallocKeywords; + pContext->Context.EnableLevel = microsoft_windows_mimallocLevels; + return McGenEventRegister( + pProviderId, + McGenControlCallbackV2, + &pContext->Context, + &pContext->Context.RegistrationHandle); +} + +// This function is for use by MC-generated code and should not be used directly. +// Trigger a compile error if called with the wrong parameter type. +FORCEINLINE +_Ret_ McGenContext_microsoft_windows_mimalloc* +_mcgen_CheckContextType_microsoft_windows_mimalloc(_In_ McGenContext_microsoft_windows_mimalloc* pContext) +{ + return pContext; +} + +#endif // MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION + +// +// Enablement check macro for event "ETW_MI_ALLOC" +// +#define EventEnabledETW_MI_ALLOC() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) +#define EventEnabledETW_MI_ALLOC_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) + +// +// Event write macros for event "ETW_MI_ALLOC" +// +#define EventWriteETW_MI_ALLOC(Address, Size) \ + MCGEN_EVENT_ENABLED(ETW_MI_ALLOC) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) : 0 +#define EventWriteETW_MI_ALLOC_AssumeEnabled(Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) +#define EventWriteETW_MI_ALLOC_ForContext(pContext, Address, Size) \ + MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_ALLOC) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&(pContext)->Context, &ETW_MI_ALLOC, Address, Size) : 0 +#define EventWriteETW_MI_ALLOC_ForContextAssumeEnabled(pContext, Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_ALLOC, Address, Size) + +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) + +// +// Enablement check macro for event "ETW_MI_FREE" +// +#define EventEnabledETW_MI_FREE() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) +#define EventEnabledETW_MI_FREE_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) + +// +// Event write macros for event "ETW_MI_FREE" +// +#define EventWriteETW_MI_FREE(Address, Size) \ + MCGEN_EVENT_ENABLED(ETW_MI_FREE) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) : 0 +#define EventWriteETW_MI_FREE_AssumeEnabled(Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) +#define EventWriteETW_MI_FREE_ForContext(pContext, Address, Size) \ + MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_FREE) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&(pContext)->Context, &ETW_MI_FREE, Address, Size) : 0 +#define EventWriteETW_MI_FREE_ForContextAssumeEnabled(pContext, Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_FREE, Address, Size) + +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_TEMPLATE_FOR_ETW_MI_FREE _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// Template Functions +// + +// +// Function for template "ETW_CUSTOM_HEAP_ALLOC_DATA" (and possibly others). +// This function is for use by MC-generated code and should not be used directly. +// +#ifndef McTemplateU0xx_def +#define McTemplateU0xx_def +ETW_INLINE +ULONG +_mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER)( + _In_ PMCGEN_TRACE_CONTEXT Context, + _In_ PCEVENT_DESCRIPTOR Descriptor, + _In_ const unsigned __int64 _Arg0, + _In_ const unsigned __int64 _Arg1 + ) +{ +#define McTemplateU0xx_ARGCOUNT 2 + + EVENT_DATA_DESCRIPTOR EventData[McTemplateU0xx_ARGCOUNT + 1]; + + EventDataDescCreate(&EventData[1],&_Arg0, sizeof(const unsigned __int64) ); + + EventDataDescCreate(&EventData[2],&_Arg1, sizeof(const unsigned __int64) ); + + return McGenEventWrite(Context, Descriptor, NULL, McTemplateU0xx_ARGCOUNT + 1, EventData); +} +#endif // McTemplateU0xx_def + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +#if defined(__cplusplus) +} +#endif diff --git a/third-party/mimalloc/src/prim/windows/etw.man b/third-party/mimalloc/src/prim/windows/etw.man new file mode 100644 index 0000000000000000000000000000000000000000..cfd1f8a9eaacd50af63f1e28f9540aa88c20f90c GIT binary patch literal 3926 zcmeH~T~8B16o${WiT`2c+NGc<*i;EYh$d8xl<0*CS-Mb~vPP(R>hsQY*nU!q z$b})3?##}d?{nTW+uy%xwr*doYaNU1!Vc}sa%7F%g+hVAmL$hwL?4dodnmuAKhUXm0)X9|WHj>XRahh@~SWDIkbduX;B#u6^Q>@U= zDO8U+KXa0*th&%f*z^Udh4ol@uE=Q&`emUsh_CA`FOXgI{i-`XZ9C#bR1yBm=PJ*p z9kVN$J6O;h;8HY>p)RnhY8A#Hb?z)_!y(Iaen(I)@-9CrSSp(;_Jn9I*$S&ATjP1? zVxB>pV@LVsy;^jZr7r$HNAm06TcUiI`l@~F$Mt$E%Shfd3O+h1vFhR9a8yQZ@wpne zr3bI-p=VEdo{)#uWxSVJeYQF|-5tnq>~f+CP~A0&{v=(up=ngEDl>5!$EDwPRaNjW zXj|wbG$OwmwaW-hMvBK%pbm3wpic71O^dzbxT%*13+SP9h- zI~rA5hapTVnk|qmiIVYy{__+x9f7OV4j3`g4;{{8_SWnLBSu2PUc%~`t%Ae^>J`SS zdtZg-r<0xAH*_ALtK;Nv(d9nbKK1jK=Ld)I(jQrKhBjgToR#Wm8{0a}@6ZuEO(lvG=y=Jh{M!4!-$(E)!vYUrf47 znf4W$e&QFOovV_$>J%X*KN>oXI@jt%BJms>IU}I$<7Hr{PChchs%+mx{% zt(fa_PM4r63?1h#YOk~;byc5`>%q>sLHA1gohNq{qOREhxu<y~`}YVhGe0?R_ceQ8vt^BpSNp6kErj_0hUNFyWQ1IOZ|GEgWBPwYFLgFu Oo!*uqEBu%Ae18C_t1cS= literal 0 HcmV?d00001 diff --git a/third-party/mimalloc/src/prim/windows/prim.c b/third-party/mimalloc/src/prim/windows/prim.c new file mode 100644 index 00000000..e6b61079 --- /dev/null +++ b/third-party/mimalloc/src/prim/windows/prim.c @@ -0,0 +1,622 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" +#include // fputs, stderr + + +//--------------------------------------------- +// Dynamically bind Windows API points for portability +//--------------------------------------------- + +// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. +// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) +// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) +// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. +typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { + MiMemExtendedParameterInvalidType = 0, + MiMemExtendedParameterAddressRequirements, + MiMemExtendedParameterNumaNode, + MiMemExtendedParameterPartitionHandle, + MiMemExtendedParameterUserPhysicalHandle, + MiMemExtendedParameterAttributeFlags, + MiMemExtendedParameterMax +} MI_MEM_EXTENDED_PARAMETER_TYPE; + +typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { + struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; + union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; +} MI_MEM_EXTENDED_PARAMETER; + +typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { + PVOID LowestStartingAddress; + PVOID HighestEndingAddress; + SIZE_T Alignment; +} MI_MEM_ADDRESS_REQUIREMENTS; + +#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 + +#include +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +static PVirtualAlloc2 pVirtualAlloc2 = NULL; +static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; + +// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 +typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; + +typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); +typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); +typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); +typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); +static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; +static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; +static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; +static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; + +//--------------------------------------------- +// Enable large page support dynamically (if possible) +//--------------------------------------------- + +static bool win_enable_large_os_pages(size_t* large_page_size) +{ + static bool large_initialized = false; + if (large_initialized) return (_mi_os_large_page_size() > 0); + large_initialized = true; + + // Try to see if large OS pages are supported + // To use large pages on Windows, we first need access permission + // Set "Lock pages in memory" permission in the group policy editor + // + unsigned long err = 0; + HANDLE token = NULL; + BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + if (ok) { + TOKEN_PRIVILEGES tp; + ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); + if (ok) { + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + if (ok) { + err = GetLastError(); + ok = (err == ERROR_SUCCESS); + if (ok && large_page_size != NULL) { + *large_page_size = GetLargePageMinimum(); + } + } + } + CloseHandle(token); + } + if (!ok) { + if (err == 0) err = GetLastError(); + _mi_warning_message("cannot enable large OS page support, error %lu\n", err); + } + return (ok!=0); +} + + +//--------------------------------------------- +// Initialize +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) +{ + config->has_overcommit = false; + config->must_free_whole = true; + config->has_virtual_reserve = true; + // get the page size + SYSTEM_INFO si; + GetSystemInfo(&si); + if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; } + if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; } + // get the VirtualAlloc2 function + HINSTANCE hDll; + hDll = LoadLibrary(TEXT("kernelbase.dll")); + if (hDll != NULL) { + // use VirtualAlloc2FromApp if possible as it is available to Windows store apps + pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + FreeLibrary(hDll); + } + // NtAllocateVirtualMemoryEx is used for huge page allocation + hDll = LoadLibrary(TEXT("ntdll.dll")); + if (hDll != NULL) { + pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); + FreeLibrary(hDll); + } + // Try to use Win7+ numa API + hDll = LoadLibrary(TEXT("kernel32.dll")); + if (hDll != NULL) { + pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); + pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); + pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); + pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); + FreeLibrary(hDll); + } + if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + win_enable_large_os_pages(&config->large_page_size); + } +} + + +//--------------------------------------------- +// Free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + MI_UNUSED(size); + DWORD errcode = 0; + bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + if (errcode == ERROR_INVALID_ADDRESS) { + // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside + // the memory region returned by VirtualAlloc; in that case we need to free using + // the start of the region. + MEMORY_BASIC_INFORMATION info = { 0 }; + VirtualQuery(addr, &info, sizeof(info)); + if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { + errcode = 0; + err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + } + } + return (int)errcode; +} + + +//--------------------------------------------- +// VirtualAlloc +//--------------------------------------------- + +static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) { + #if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment,size); + if (hint != NULL) { + void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); + if (p != NULL) return p; + _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); + // fall through on error + } + } + #endif + // on modern Windows try use VirtualAlloc2 for aligned allocation + if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { + MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; + reqs.Alignment = try_alignment; + MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; + param.Type.Type = MiMemExtendedParameterAddressRequirements; + param.Arg.Pointer = &reqs; + void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); + if (p != NULL) return p; + _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); + // fall through on error + } + // last resort + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + +static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { + mi_assert_internal(!(large_only && !allow_large)); + static _Atomic(size_t) large_page_try_ok; // = 0; + void* p = NULL; + // Try to allocate large OS pages (2MiB) if allowed or required. + if ((large_only || _mi_os_use_large_page(size, try_alignment)) + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. + // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + // large OS pages must always reserve and commit. + *is_large = true; + p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES); + if (large_only) return p; + // fall back to non-large page allocation on error (`p == NULL`). + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations + } + } + } + // Fall back to regular page allocation + if (p == NULL) { + *is_large = ((flags&MEM_LARGE_PAGES) != 0); + p = win_virtual_alloc_prim(addr, size, try_alignment, flags); + } + //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); } + return p; +} + +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + *is_zero = true; + int flags = MEM_RESERVE; + if (commit) { flags |= MEM_COMMIT; } + *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Commit/Reset/Protect +//--------------------------------------------- +#ifdef _MSC_VER +#pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit) +#endif + +int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { + *is_zero = false; + /* + // zero'ing only happens on an initial commit... but checking upfront seems expensive.. + _MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo); + if (VirtualQuery(addr, &meminfo, size) > 0) { + if ((meminfo.State & MEM_COMMIT) == 0) { + *is_zero = true; + } + } + */ + // commit + void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); + if (p == NULL) return (int)GetLastError(); + return 0; +} + +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { + BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); + *needs_recommit = true; // for safety, assume always decommitted even in the case of an error. + return (ok ? 0 : (int)GetLastError()); +} + +int _mi_prim_reset(void* addr, size_t size) { + void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); + mi_assert_internal(p == addr); + #if 0 + if (p != NULL) { + VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set + } + #endif + return (p != NULL ? 0 : (int)GetLastError()); +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + DWORD oldprotect = 0; + BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); + return (ok ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node) +{ + const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + + win_enable_large_os_pages(NULL); + + MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { + params[0].Type.Type = MiMemExtendedParameterAttributeFlags; + params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + ULONG param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type.Type = MiMemExtendedParameterNumaNode; + params[1].Arg.ULong = (unsigned)numa_node; + } + SIZE_T psize = size; + void* base = hint_addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0 && base != NULL) { + return base; + } + else { + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + } + } + // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation + if (pVirtualAlloc2 != NULL && numa_node >= 0) { + params[0].Type.Type = MiMemExtendedParameterNumaNode; + params[0].Arg.ULong = (unsigned)numa_node; + return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1); + } + + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE); +} + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + *is_zero = true; + *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node); + return (*addr != NULL ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Numa nodes +//--------------------------------------------- + +size_t _mi_prim_numa_node(void) { + USHORT numa_node = 0; + if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { + // Extended API is supported + MI_PROCESSOR_NUMBER pnum; + (*pGetCurrentProcessorNumberEx)(&pnum); + USHORT nnode = 0; + BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); + if (ok) { numa_node = nnode; } + } + else if (pGetNumaProcessorNode != NULL) { + // Vista or earlier, use older API that is limited to 64 processors. Issue #277 + DWORD pnum = GetCurrentProcessorNumber(); + UCHAR nnode = 0; + BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); + if (ok) { numa_node = nnode; } + } + return numa_node; +} + +size_t _mi_prim_numa_node_count(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + // find the highest node number that has actual processors assigned to it. Issue #282 + while(numa_max > 0) { + if (pGetNumaNodeProcessorMaskEx != NULL) { + // Extended API is supported + GROUP_AFFINITY affinity; + if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { + if (affinity.Mask != 0) break; // found the maximum non-empty node + } + } + else { + // Vista or earlier, use older API that is limited to 64 processors. + ULONGLONG mask; + if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node + }; + } + // max node was invalid or had no processor assigned, try again + numa_max--; + } + return ((size_t)numa_max + 1); +} + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { + LARGE_INTEGER f; + QueryPerformanceFrequency(&f); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; + } + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); +} + +mi_msecs_t _mi_prim_clock_now(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return mi_to_msecs(t); +} + + +//---------------------------------------------------------------- +// Process Info +//---------------------------------------------------------------- + +#include +#include + +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { + ULARGE_INTEGER i; + i.LowPart = ftime->dwLowDateTime; + i.HighPart = ftime->dwHighDateTime; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; +} + +typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); +static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + FILETIME ct; + FILETIME ut; + FILETIME st; + FILETIME et; + GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); + pinfo->utime = filetime_msecs(&ut); + pinfo->stime = filetime_msecs(&st); + + // load psapi on demand + if (pGetProcessMemoryInfo == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); + if (hDll != NULL) { + pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); + } + } + + // get process info + PROCESS_MEMORY_COUNTERS info; + memset(&info, 0, sizeof(info)); + if (pGetProcessMemoryInfo != NULL) { + pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + } + pinfo->current_rss = (size_t)info.WorkingSetSize; + pinfo->peak_rss = (size_t)info.PeakWorkingSetSize; + pinfo->current_commit = (size_t)info.PagefileUsage; + pinfo->peak_commit = (size_t)info.PeakPagefileUsage; + pinfo->page_faults = (size_t)info.PageFaultCount; +} + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) +{ + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so we use direct console output. + if (!_mi_preloading()) { + // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console + static HANDLE hcon = INVALID_HANDLE_VALUE; + static bool hconIsConsole; + if (hcon == INVALID_HANDLE_VALUE) { + CONSOLE_SCREEN_BUFFER_INFO sbi; + hcon = GetStdHandle(STD_ERROR_HANDLE); + hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); + } + const size_t len = _mi_strlen(msg); + if (len > 0 && len < UINT32_MAX) { + DWORD written = 0; + if (hconIsConsole) { + WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + } + else if (hcon != INVALID_HANDLE_VALUE) { + // use direct write if stderr was redirected + WriteFile(hcon, msg, (DWORD)len, &written, NULL); + } + else { + // finally fall back to fputs after all + fputs(msg, stderr); + } + } + } +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +// On Windows use GetEnvironmentVariable instead of getenv to work +// reliably even when this is invoked before the C runtime is initialized. +// i.e. when `_mi_preloading() == true`. +// Note: on windows, environment names are not case sensitive. +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + result[0] = 0; + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + return (len > 0 && len < result_size); +} + + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) +// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using +// dynamic overriding, we observed it can raise an exception when compiled with C++, and +// sometimes deadlocks when also running under the VS debugger. +// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. +// To be continued.. +#pragma comment (lib,"advapi32.lib") +#define RtlGenRandom SystemFunction036 +mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return (RtlGenRandom(buf, (ULONG)buf_len) != 0); +} + +#else + +#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG +#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 +#endif + +typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); +static PBCryptGenRandom pBCryptGenRandom = NULL; + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + if (pBCryptGenRandom == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); + if (hDll != NULL) { + pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); + } + if (pBCryptGenRandom == NULL) return false; + } + return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} + +#endif // MI_USE_RTLGENRANDOM + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if !defined(MI_SHARED_LIB) + +// use thread local storage keys to detect thread ending +#include +#if (_WIN32_WINNT < 0x600) // before Windows Vista +WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); +WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); +WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); +WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); +#endif + +static DWORD mi_fls_key = (DWORD)(-1); + +static void NTAPI mi_fls_done(PVOID value) { + mi_heap_t* heap = (mi_heap_t*)value; + if (heap != NULL) { + _mi_thread_done(heap); + FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_fls_key = FlsAlloc(&mi_fls_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // call thread-done on all threads (except the main thread) to prevent + // dangling callback pointer if statically linked with a DLL; Issue #208 + FlsFree(mi_fls_key); +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + mi_assert_internal(mi_fls_key != (DWORD)(-1)); + FlsSetValue(mi_fls_key, heap); +} + +#else + +// Dll; nothing to do as in that case thread_done is handled through the DLL_THREAD_DETACH event. + +void _mi_prim_thread_init_auto_done(void) { +} + +void _mi_prim_thread_done_auto_done(void) { +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + +#endif diff --git a/third-party/mimalloc/src/prim/windows/readme.md b/third-party/mimalloc/src/prim/windows/readme.md new file mode 100644 index 00000000..217c3d17 --- /dev/null +++ b/third-party/mimalloc/src/prim/windows/readme.md @@ -0,0 +1,17 @@ +## Primitives: + +- `prim.c` contains Windows primitives for OS allocation. + +## Event Tracing for Windows (ETW) + +- `etw.h` is generated from `etw.man` which contains the manifest for mimalloc events. + (100 is an allocation, 101 is for a free) + +- `etw-mimalloc.wprp` is a profile for the Windows Performance Recorder (WPR). + In an admin prompt, you can use: + ``` + > wpr -start src\prim\windows\etw-mimalloc.wprp -filemode + > + > wpr -stop test.etl + ``` + and then open `test.etl` in the Windows Performance Analyzer (WPA). \ No newline at end of file diff --git a/third-party/mimalloc/src/random.c b/third-party/mimalloc/src/random.c index 06d4ba4a..4fc8b2f8 100644 --- a/third-party/mimalloc/src/random.c +++ b/third-party/mimalloc/src/random.c @@ -4,14 +4,10 @@ This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -#ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE // for syscall() on Linux -#endif - #include "mimalloc.h" -#include "mimalloc-internal.h" - -#include // memset +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // _mi_prim_random_buf +#include // memset /* ---------------------------------------------------------------------------- We use our own PRNG to keep predictable performance of random number generation @@ -158,159 +154,13 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { /* ---------------------------------------------------------------------------- -To initialize a fresh random context we rely on the OS: -- Windows : BCryptGenRandom (or RtlGenRandom) -- macOS : CCRandomGenerateBytes, arc4random_buf -- bsd,wasi : arc4random_buf -- Linux : getrandom,/dev/urandom +To initialize a fresh random context. If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. -----------------------------------------------------------------------------*/ -#if defined(_WIN32) - -#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) -// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using -// dynamic overriding, we observed it can raise an exception when compiled with C++, and -// sometimes deadlocks when also running under the VS debugger. -// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. -// To be continued.. -#pragma comment (lib,"advapi32.lib") -#define RtlGenRandom SystemFunction036 -#ifdef __cplusplus -extern "C" { -#endif -BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); -#ifdef __cplusplus -} -#endif -static bool os_random_buf(void* buf, size_t buf_len) { - return (RtlGenRandom(buf, (ULONG)buf_len) != 0); -} -#else - -#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG -#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 -#endif - -typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); -static PBCryptGenRandom pBCryptGenRandom = NULL; - -static bool os_random_buf(void* buf, size_t buf_len) { - if (pBCryptGenRandom == NULL) { - HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); - if (hDll != NULL) { - pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); - } - } - if (pBCryptGenRandom == NULL) { - return false; - } - else { - return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); - } -} -#endif - -#elif defined(__APPLE__) -#include -#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 -#include -#include -#endif -static bool os_random_buf(void* buf, size_t buf_len) { - #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 - // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf - // may fail silently on macOS. See PR #390, and - return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); - #else - // fall back on older macOS - arc4random_buf(buf, buf_len); - return true; - #endif -} - -#elif defined(__ANDROID__) || defined(__DragonFly__) || \ - defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__sun) // todo: what to use with __wasi__? -#include -static bool os_random_buf(void* buf, size_t buf_len) { - arc4random_buf(buf, buf_len); - return true; -} -#elif defined(__linux__) || defined(__HAIKU__) -#if defined(__linux__) -#include -#endif -#include -#include -#include -#include -#include -static bool os_random_buf(void* buf, size_t buf_len) { - // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` - // and for the latter the actual `getrandom` call is not always defined. - // (see ) - // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. -#ifdef SYS_getrandom - #ifndef GRND_NONBLOCK - #define GRND_NONBLOCK (1) - #endif - static _Atomic(uintptr_t) no_getrandom; // = 0 - if (mi_atomic_load_acquire(&no_getrandom)==0) { - ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); - if (ret >= 0) return (buf_len == (size_t)ret); - if (errno != ENOSYS) return false; - mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom - } -#endif - int flags = O_RDONLY; - #if defined(O_CLOEXEC) - flags |= O_CLOEXEC; - #endif - int fd = open("/dev/urandom", flags, 0); - if (fd < 0) return false; - size_t count = 0; - while(count < buf_len) { - ssize_t ret = read(fd, (char*)buf + count, buf_len - count); - if (ret<=0) { - if (errno!=EAGAIN && errno!=EINTR) break; - } - else { - count += ret; - } - } - close(fd); - return (count==buf_len); -} -#else -static bool os_random_buf(void* buf, size_t buf_len) { - return false; -} -#endif - -#if defined(_WIN32) -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random - - #if defined(_WIN32) - LARGE_INTEGER pcount; - QueryPerformanceCounter(&pcount); - x ^= (uintptr_t)(pcount.QuadPart); - #elif defined(__APPLE__) - x ^= (uintptr_t)mach_absolute_time(); - #else - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - x ^= (uintptr_t)time.tv_sec; - x ^= (uintptr_t)time.tv_nsec; - #endif + x ^= _mi_prim_clock_now(); // and do a few randomization steps uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; for (uintptr_t i = 0; i < max; i++) { @@ -322,7 +172,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { uint8_t key[32]; - if (use_weak || !os_random_buf(key, sizeof(key))) { + if (use_weak || !_mi_prim_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time #if !defined(__wasi__) diff --git a/third-party/mimalloc/src/region.c b/third-party/mimalloc/src/region.c deleted file mode 100644 index 3571abb6..00000000 --- a/third-party/mimalloc/src/region.c +++ /dev/null @@ -1,516 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. There may be multiple -implementations of this (one could be the identity going directly to the OS, -another could be a simple cache etc), but the current one uses large "regions". -In contrast to the rest of mimalloc, the "regions" are shared between threads and -need to be accessed using atomic operations. -We need this memory layer between the raw OS calls because of: -1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory effectively. -2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses - in that object :-( (`malloc-large` tests this). This means we need a cheaper - way to reuse memory. -3. This layer allows for NUMA aware allocation. - -Possible issues: -- (2) can potentially be addressed too with a small cache per thread which is much - simpler. Generally though that requires shrinking of huge pages, and may overuse - memory per thread. (and is not compatible with `sbrk`). -- Since the current regions are per-process, we need atomic operations to - claim blocks which may be contended -- In the worst case, we need to search the whole region map (16KiB for 256GiB) - linearly. At what point will direct OS calls be faster? Is there a way to - do this better without adding too much complexity? ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" - -#include // memset - -#include "bitmap.h" - -// Internal raw OS interface -size_t _mi_os_large_page_size(void); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); - -// arena.c -mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); - - - -// Constants -#if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * MI_GiB) // 64KiB for the region map -#elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * MI_GiB) // ~ KiB for the region map -#else -#error "define the maximum heap space allowed for regions on this platform" -#endif - -#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) -#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB -#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) - -// Region info -typedef union mi_region_info_u { - size_t value; - struct { - bool valid; // initialized? - bool is_large:1; // allocated in fixed large/huge OS pages - bool is_pinned:1; // pinned memory cannot be decommitted - short numa_node; // the associated NUMA node (where -1 means no associated node) - } x; -} mi_region_info_t; - - -// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with -// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. -typedef struct mem_region_s { - _Atomic(size_t) info; // mi_region_info_t.value - _Atomic(void*) start; // start of the memory area - mi_bitmap_field_t in_use; // bit per in-use block - mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block - mi_bitmap_field_t reset; // track if reset per block - _Atomic(size_t) arena_memid; // if allocated from a (huge page) arena - _Atomic(size_t) padding; // round to 8 fields (needs to be atomic for msvc, see issue #508) -} mem_region_t; - -// The region map -static mem_region_t regions[MI_REGION_MAX]; - -// Allocated regions -static _Atomic(size_t) regions_count; // = 0; - - -/* ---------------------------------------------------------------------------- -Utility functions ------------------------------------------------------------------------------*/ - -// Blocks (of 4MiB) needed for the given size. -static size_t mi_region_block_count(size_t size) { - return _mi_divide_up(size, MI_SEGMENT_SIZE); -} - -/* -// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. -static size_t mi_good_commit_size(size_t size) { - if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; - return _mi_align_up(size, _mi_os_large_page_size()); -} -*/ - -// Return if a pointer points into a region reserved by us. -mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - if (p==NULL) return false; - size_t count = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); - if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; - } - return false; -} - - -static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); - mi_assert_internal(start != NULL); - return (start + (bit_idx * MI_SEGMENT_SIZE)); -} - -static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { - mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); - size_t idx = region - regions; - mi_assert_internal(®ions[idx] == region); - return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; -} - -static size_t mi_memid_create_from_arena(size_t arena_memid) { - return (arena_memid << 1) | 1; -} - - -static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { - if ((id&1)==1) { - if (arena_memid != NULL) *arena_memid = (id>>1); - return true; - } - else { - size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; - *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; - *region = ®ions[idx]; - return false; - } -} - - -/* ---------------------------------------------------------------------------- - Allocate a region is allocated from the OS (or an arena) ------------------------------------------------------------------------------*/ - -static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // not out of regions yet? - if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; - - // try to allocate a fresh region from the OS - bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); - bool region_large = (commit && allow_large); - bool is_zero = false; - bool is_pinned = false; - size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, ®ion_commit, ®ion_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld); - if (start == NULL) return false; - mi_assert_internal(!(region_large && !allow_large)); - mi_assert_internal(!region_large || region_commit); - - // claim a fresh slot - const size_t idx = mi_atomic_increment_acq_rel(®ions_count); - if (idx >= MI_REGION_MAX) { - mi_atomic_decrement_acq_rel(®ions_count); - _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats); - _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB)); - return false; - } - - // allocated, initialize and claim the initial blocks - mem_region_t* r = ®ions[idx]; - r->arena_memid = arena_memid; - mi_atomic_store_release(&r->in_use, (size_t)0); - mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); - mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); - mi_atomic_store_release(&r->reset, (size_t)0); - *bit_idx = 0; - _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_store_ptr_release(void,&r->start, start); - - // and share it - mi_region_info_t info; - info.value = 0; // initialize the full union to zero - info.x.valid = true; - info.x.is_large = region_large; - info.x.is_pinned = is_pinned; - info.x.numa_node = (short)_mi_os_numa_node(tld); - mi_atomic_store_release(&r->info, info.value); // now make it available to others - *region = r; - return true; -} - -/* ---------------------------------------------------------------------------- - Try to claim blocks in suitable regions ------------------------------------------------------------------------------*/ - -static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { - // initialized at all? - mi_region_info_t info; - info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); - if (info.value==0) return false; - - // numa correct - if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = info.x.numa_node; - if (rnode >= 0 && rnode != numa_node) return false; - } - - // check allow-large - if (!allow_large && info.x.is_large) return false; - - return true; -} - - -static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // try all regions for a free slot - const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - mem_region_t* r = ®ions[idx]; - // if this region suits our demand (numa node matches, large OS page matches) - if (mi_region_is_suitable(r, numa_node, allow_large)) { - // then try to atomically claim a segment(s) in this region - if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { - tld->region_idx = idx; // remember the last found position - *region = r; - return true; - } - } - } - return false; -} - - -static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); - mem_region_t* region; - mi_bitmap_index_t bit_idx; - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); - // try to claim in existing regions - if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { - // otherwise try to allocate a fresh region and claim in there - if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { - // out of regions or memory - return NULL; - } - } - - // ------------------------------------------------ - // found a region and claimed `blocks` at `bit_idx`, initialize them now - mi_assert_internal(region != NULL); - mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); - - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); - mi_assert_internal(!(info.x.is_large && !*large)); - mi_assert_internal(start != NULL); - - *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *large = info.x.is_large; - *is_pinned = info.x.is_pinned; - *memid = mi_memid_create(region, bit_idx); - void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - - // commit - if (*commit) { - // ensure commit - bool any_uncommitted; - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); - if (any_uncommitted) { - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - bool commit_zero = false; - if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { - // failed to commit! unclaim and return - mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - return NULL; - } - if (commit_zero) *is_zero = true; - } - } - else { - // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); - } - mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); - - // unreset reset blocks - if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { - // some blocks are still reset - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); - mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed - bool reset_zero = false; - _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); - if (reset_zero) *is_zero = true; - } - } - mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); - - #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED - if (*commit) { ((uint8_t*)p)[0] = 0; } - #endif - - // and return the allocation - mi_assert_internal(p != NULL); - return p; -} - - -/* ---------------------------------------------------------------------------- - Allocation ------------------------------------------------------------------------------*/ - -// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. -// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - *memid = 0; - *is_zero = false; - *is_pinned = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - if (size == 0) return NULL; - size = _mi_align_up(size, _mi_os_page_size()); - - // allocate from regions if possible - void* p = NULL; - size_t arena_memid; - const size_t blocks = mi_region_block_count(size); - if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); - if (p == NULL) { - _mi_warning_message("unable to allocate from region: size %zu\n", size); - } - } - if (p == NULL) { - // and otherwise fall back to the OS - p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld); - *memid = mi_memid_create_from_arena(arena_memid); - } - - if (p != NULL) { - mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0); - #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED - if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed - #endif - } - return p; -} - - - -/* ---------------------------------------------------------------------------- -Free ------------------------------------------------------------------------------*/ - -// Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { - mi_assert_internal(size > 0 && tld != NULL); - if (p==NULL) return; - if (size==0) return; - size = _mi_align_up(size, _mi_os_page_size()); - - size_t arena_memid = 0; - mi_bitmap_index_t bit_idx; - mem_region_t* region; - if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { - // was a direct arena allocation, pass through - _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats); - } - else { - // allocated in a region - mi_assert_internal(align_offset == 0); - mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; - const size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - mi_assert_internal(info.value != 0); - void* blocks_start = mi_region_blocks_start(region, bit_idx); - mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); - if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? - - // committed? - if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); - } - - if (any_reset) { - // set the is_reset bits if any pages were reset - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); - } - - // reset the blocks to reduce the working set. - if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) - && (mi_option_is_enabled(mi_option_eager_commit) || - mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead - { - bool any_unreset; - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); - if (any_unreset) { - _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) - _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); - } - } - - // and unclaim - bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed); - } -} - - -/* ---------------------------------------------------------------------------- - collection ------------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_os_tld_t* tld) { - // free every region that has no segments in use. - size_t rcount = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < rcount; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_load_relaxed(®ion->info) != 0) { - // if no segments used, try to claim the whole region - size_t m = mi_atomic_load_relaxed(®ion->in_use); - while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; - if (m == 0) { - // on success, free the whole region - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); - size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); - size_t commit = mi_atomic_load_relaxed(®ions[i].commit); - memset((void*)®ions[i], 0, sizeof(mem_region_t)); // cast to void* to avoid atomic warning - // and release the whole region - mi_atomic_store_release(®ion->info, (size_t)0); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_abandoned_await_readers(); // ensure no pending reads - _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats); - } - } - } - } -} - - -/* ---------------------------------------------------------------------------- - Other ------------------------------------------------------------------------------*/ - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(p, size, tld->stats); - } - else { - return _mi_os_reset(p, size, tld->stats); - } -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_commit(p, size, is_zero, tld->stats); - } - else { - return _mi_os_unreset(p, size, is_zero, tld->stats); - } -} - -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - return _mi_os_commit(p, size, is_zero, tld->stats); -} - -bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - return _mi_os_decommit(p, size, tld->stats); -} - -bool _mi_mem_protect(void* p, size_t size) { - return _mi_os_protect(p, size); -} - -bool _mi_mem_unprotect(void* p, size_t size) { - return _mi_os_unprotect(p, size); -} diff --git a/third-party/mimalloc/src/segment-cache.c b/third-party/mimalloc/src/segment-cache.c deleted file mode 100644 index d93fd644..00000000 --- a/third-party/mimalloc/src/segment-cache.c +++ /dev/null @@ -1,409 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- - Implements a cache of segments to avoid expensive OS calls and to reuse - the commit_mask to optimize the commit/decommit calls. - The full memory map of all segments is also implemented here. ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" - -#include "bitmap.h" // atomic bitmap - -//#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache - -#define MI_CACHE_FIELDS (16) -#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit - -#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) -#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes - -typedef struct mi_cache_slot_s { - void* p; - size_t memid; - bool is_pinned; - mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; - _Atomic(mi_msecs_t) expire; -} mi_cache_slot_t; - -static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 - -static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! -static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; -static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free - -static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { - mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); -} - -mi_decl_noinline static void* mi_segment_cache_pop_ex( - bool all_suitable, - size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return NULL; -#else - - // only segment blocks - if (size != MI_SEGMENT_SIZE) return NULL; - - // numa node determines start field - const int numa_node = _mi_os_numa_node(tld); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot - mi_bitmap_index_t bitidx = 0; - bool claimed = false; - mi_arena_id_t req_arena_id = _req_arena_id; - mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? - - if (*large) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = true; - } - if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = false; - } - - if (!claimed) return NULL; - - // found a slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - void* p = slot->p; - *memid = slot->memid; - *is_pinned = slot->is_pinned; - *is_zero = false; - *commit_mask = slot->commit_mask; - *decommit_mask = slot->decommit_mask; - slot->p = NULL; - mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - - // mark the slot as free again - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); - return p; -#endif -} - - -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) -{ - return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large, is_pinned, is_zero, _req_arena_id, memid, tld); -} - -static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) -{ - if (mi_commit_mask_is_empty(cmask)) { - // nothing - } - else if (mi_commit_mask_is_full(cmask)) { - _mi_os_decommit(p, total, stats); - } - else { - // todo: one call to decommit the whole at once? - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t part = total/MI_COMMIT_MASK_BITS; - size_t idx; - size_t count; - mi_commit_mask_foreach(cmask, idx, count) { - void* start = (uint8_t*)p + (idx*part); - size_t size = count*part; - _mi_os_decommit(start, size, stats); - } - mi_commit_mask_foreach_end() - } - mi_commit_mask_create_empty(cmask); -} - -#define MI_MAX_PURGE_PER_PUSH (4) - -static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) -{ - MI_UNUSED(tld); - if (!mi_option_is_enabled(mi_option_allow_decommit)) return; - mi_msecs_t now = _mi_clock_now(); - size_t purged = 0; - const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); - size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); - for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots - if (idx >= MI_CACHE_MAX) idx = 0; // wrap - mi_cache_slot_t* slot = &cache[idx]; - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // racy read - // seems expired, first claim it from available - purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { - // was available, we claimed it - expire = mi_atomic_loadi64_acquire(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // safe read - // still expired, decommit it - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); - _mi_abandoned_await_readers(); // wait until safe to decommit - // decommit committed parts - // TODO: instead of decommit, we could also free to the OS? - mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->decommit_mask); - } - _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop - } - if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push - } - } -} - -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { - if (force) { - // called on `mi_collect(true)` but not on thread termination - _mi_segment_cache_free_all(tld); - } - else { - mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); - } -} - -void _mi_segment_cache_free_all(mi_os_tld_t* tld) { - mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; - bool is_pinned; - bool is_zero; - size_t memid; - const size_t size = MI_SEGMENT_SIZE; - // iterate twice: first large pages, then regular memory - for (int i = 0; i < 2; i++) { - void* p; - do { - // keep popping and freeing the memory - bool large = (i == 0); - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, - &large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); - if (p != NULL) { - size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); - if (csize > 0 && !is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); - _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } - } while (p != NULL); - } -} - -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return false; -#else - - // only for normal segment blocks - if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // numa node determines start field - int numa_node = _mi_os_numa_node(NULL); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // purge expired entries - mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); - - // find an available slot - mi_bitmap_index_t bitidx; - bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (!claimed) return false; - - mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); -#if MI_DEBUG>1 - if (is_pinned || is_large) { - mi_assert_internal(mi_commit_mask_is_full(commit_mask)); - } -#endif - - // set the slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - slot->p = start; - slot->memid = memid; - slot->is_pinned = is_pinned; - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->commit_mask = *commit_mask; - slot->decommit_mask = *decommit_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { - long delay = mi_option_get(mi_option_segment_decommit_delay); - if (delay == 0) { - _mi_abandoned_await_readers(); // wait until safe to decommit - mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->decommit_mask); - } - else { - mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); - } - } - - // make it available - _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); - return true; -#endif -} - - -/* ----------------------------------------------------------- - The following functions are to reliably find the segment or - block that encompasses any pointer p (or NULL if it is not - in any of our segments). - We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) - set to 1 if it contains the segment meta data. ------------------------------------------------------------ */ - - -#if (MI_INTPTR_SIZE==8) -#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB -#else -#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb -#endif - -#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) -#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) -#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) - -static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments - -static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { - mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? - if ((uintptr_t)segment >= MI_MAX_ADDRESS) { - *bitidx = 0; - return MI_SEGMENT_MAP_WSIZE; - } - else { - const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; - *bitidx = segindex % MI_INTPTR_BITS; - const size_t mapindex = segindex / MI_INTPTR_BITS; - mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); - return mapindex; - } -} - -void _mi_segment_map_allocated_at(const mi_segment_t* segment) { - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); - if (index==MI_SEGMENT_MAP_WSIZE) return; - uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - uintptr_t newmask; - do { - newmask = (mask | ((uintptr_t)1 << bitidx)); - } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); -} - -void _mi_segment_map_freed_at(const mi_segment_t* segment) { - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); - if (index == MI_SEGMENT_MAP_WSIZE) return; - uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - uintptr_t newmask; - do { - newmask = (mask & ~((uintptr_t)1 << bitidx)); - } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); -} - -// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. -static mi_segment_t* _mi_segment_of(const void* p) { - if (p == NULL) return NULL; - mi_segment_t* segment = _mi_ptr_segment(p); - mi_assert_internal(segment != NULL); - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge - const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { - return segment; // yes, allocated by us - } - if (index==MI_SEGMENT_MAP_WSIZE) return NULL; - - // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? - - // search downwards for the first segment in case it is an interior pointer - // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough - // valid huge objects - // note: we could maintain a lowest index to speed up the path for invalid pointers? - size_t lobitidx; - size_t loindex; - uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); - if (lobits != 0) { - loindex = index; - lobitidx = mi_bsr(lobits); // lobits != 0 - } - else if (index == 0) { - return NULL; - } - else { - mi_assert_internal(index > 0); - uintptr_t lomask = mask; - loindex = index; - do { - loindex--; - lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); - } while (lomask != 0 && loindex > 0); - if (lomask == 0) return NULL; - lobitidx = mi_bsr(lomask); // lomask != 0 - } - mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); - // take difference as the addresses could be larger than the MAX_ADDRESS space. - size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; - segment = (mi_segment_t*)((uint8_t*)segment - diff); - - if (segment == NULL) return NULL; - mi_assert_internal((void*)segment < p); - bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(cookie_ok); - if mi_unlikely(!cookie_ok) return NULL; - if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range - mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); - return segment; -} - -// Is this a valid pointer in our heap? -static bool mi_is_valid_pointer(const void* p) { - return (_mi_segment_of(p) != NULL); -} - -mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - return mi_is_valid_pointer(p); -} - -/* -// Return the full segment range belonging to a pointer -static void* mi_segment_range_of(const void* p, size_t* size) { - mi_segment_t* segment = _mi_segment_of(p); - if (segment == NULL) { - if (size != NULL) *size = 0; - return NULL; - } - else { - if (size != NULL) *size = segment->segment_size; - return segment; - } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); - mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); - mi_reset_delayed(tld); - mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); - return page; -} -*/ diff --git a/third-party/mimalloc/src/segment-map.c b/third-party/mimalloc/src/segment-map.c new file mode 100644 index 00000000..4c2104bd --- /dev/null +++ b/third-party/mimalloc/src/segment-map.c @@ -0,0 +1,153 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" + +#if (MI_INTPTR_SIZE==8) +#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) +#else +#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? + if ((uintptr_t)segment >= MI_MAX_ADDRESS) { + *bitidx = 0; + return MI_SEGMENT_MAP_WSIZE; + } + else { + const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; + *bitidx = segindex % MI_INTPTR_BITS; + const size_t mapindex = segindex / MI_INTPTR_BITS; + mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); + return mapindex; + } +} + +void _mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index==MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +void _mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index == MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + if (p == NULL) return NULL; + mi_segment_t* segment = _mi_ptr_segment(p); + mi_assert_internal(segment != NULL); + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge + const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { + return segment; // yes, allocated by us + } + if (index==MI_SEGMENT_MAP_WSIZE) return NULL; + + // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? + + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough + // valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = mi_bsr(lobits); // lobits != 0 + } + else if (index == 0) { + return NULL; + } + else { + mi_assert_internal(index > 0); + uintptr_t lomask = mask; + loindex = index; + do { + loindex--; + lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); + } while (lomask != 0 && loindex > 0); + if (lomask == 0) return NULL; + lobitidx = mi_bsr(lomask); // lomask != 0 + } + mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if mi_unlikely(!cookie_ok) return NULL; + if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p))); +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + +/* +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_reset_delayed(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); + return page; +} +*/ diff --git a/third-party/mimalloc/src/segment.c b/third-party/mimalloc/src/segment.c index dc98e3e7..28685f21 100644 --- a/third-party/mimalloc/src/segment.c +++ b/third-party/mimalloc/src/segment.c @@ -5,15 +5,15 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include // memset #include -#define MI_PAGE_HUGE_ALIGN (256*1024) +#define MI_PAGE_HUGE_ALIGN (256*1024) -static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); +static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); // ------------------------------------------------------------------- @@ -257,7 +257,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // can only decommit committed blocks //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -316,7 +316,13 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c ptrdiff_t idx = slice - segment->slices; size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; // make the start not OS page aligned for smaller blocks to avoid page/cache effects - size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); + // note: the offset must always be an xblock_size multiple since we assume small allocations + // are aligned (see `mi_heap_malloc_aligned`). + size_t start_offset = 0; + if (xblock_size >= MI_INTPTR_SIZE) { + if (xblock_size <= 64) { start_offset = 3*xblock_size; } + else if (xblock_size <= 512) { start_offset = xblock_size; } + } if (page_size != NULL) { *page_size = psize - start_offset; } return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } @@ -383,19 +389,14 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_os_unprotect(end, os_pagesize); } - // purge delayed decommits now? (no, leave it to the cache) - // mi_segment_delayed_decommit(segment,true,tld->stats); + // purge delayed decommits now? (no, leave it to the arena) + // mi_segment_try_purge(segment,true,tld->stats); - // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); - if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache - !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) - { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); - _mi_abandoned_await_readers(); // wait until safe to free - _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); + + _mi_abandoned_await_readers(); // wait until safe to free + _mi_arena_free(segment, mi_segment_size(segment), csize, segment->memid, tld->stats); } // called by threads that are terminating @@ -459,60 +460,81 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin mi_commit_mask_create(bitidx, bitcount, cm); } +static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); -static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - - // commit liberal, but decommit conservative + // commit liberal uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; - mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask); - if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + mi_segment_commit_mask(segment, false /* conservative? */, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size == 0) return true; - if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + if (!mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + // committing bool is_zero = false; mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; - mi_commit_mask_set(&segment->commit_mask, &mask); + if (!_mi_os_commit(start, full_size, &is_zero, stats)) return false; + mi_commit_mask_set(&segment->commit_mask, &mask); + } + + // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon. + if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) { + segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay); } - else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { - mi_assert_internal((void*)start != (void*)segment); - //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask)); - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (segment->allow_decommit) { - _mi_os_decommit(start, full_size, stats); // ok if this fails - } - mi_commit_mask_clear(&segment->commit_mask, &mask); - } - // increase expiration of reusing part of the delayed decommit - if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) { - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); - } - // always undo delayed decommits - mi_commit_mask_clear(&segment->decommit_mask, &mask); + // always clear any delayed purges in our range (as they are either committed now) + mi_commit_mask_clear(&segment->purge_mask, &mask); return true; } static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow - if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed - return mi_segment_commitx(segment,true,p,size,stats); + if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); + return mi_segment_commit(segment, p, size, stats); } -static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - if (!segment->allow_decommit) return; - if (mi_option_get(mi_option_decommit_delay) == 0) { - mi_segment_commitx(segment, false, p, size, stats); +static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); + if (!segment->allow_purge) return true; + + // purge conservative + uint8_t* start = NULL; + size_t full_size = 0; + mi_commit_mask_t mask; + mi_segment_commit_mask(segment, true /* conservative? */, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + + if (mi_commit_mask_any_set(&segment->commit_mask, &mask)) { + // purging + mi_assert_internal((void*)start != (void*)segment); + mi_assert_internal(segment->allow_decommit); + const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit + if (decommitted) { + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting + mi_commit_mask_clear(&segment->commit_mask, &mask); + } + } + + // always clear any scheduled purges in our range + mi_commit_mask_clear(&segment->purge_mask, &mask); + return true; +} + +static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + if (!segment->allow_purge) return; + + if (mi_option_get(mi_option_purge_delay) == 0) { + mi_segment_purge(segment, p, size, stats); } else { - // register for future decommit in the decommit mask + // register for future purge in the purge mask uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; @@ -520,39 +542,39 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ if (mi_commit_mask_is_empty(&mask) || full_size==0) return; // update delayed commit - mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask)); mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more - mi_commit_mask_set(&segment->decommit_mask, &cmask); + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only purge what is committed; span_free may try to decommit more + mi_commit_mask_set(&segment->purge_mask, &cmask); mi_msecs_t now = _mi_clock_now(); - if (segment->decommit_expire == 0) { - // no previous decommits, initialize now - segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay); + if (segment->purge_expire == 0) { + // no previous purgess, initialize now + segment->purge_expire = now + mi_option_get(mi_option_purge_delay); } - else if (segment->decommit_expire <= now) { - // previous decommit mask already expired - if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) { - mi_segment_delayed_decommit(segment, true, stats); + else if (segment->purge_expire <= now) { + // previous purge mask already expired + if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) { + mi_segment_try_purge(segment, true, stats); } else { - segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's } } else { - // previous decommit mask is not yet expired, increase the expiration by a bit. - segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay); + // previous purge mask is not yet expired, increase the expiration by a bit. + segment->purge_expire += mi_option_get(mi_option_purge_extend_delay); } } } -static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { - if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return; +static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { + if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return; mi_msecs_t now = _mi_clock_now(); - if (!force && now < segment->decommit_expire) return; + if (!force && now < segment->purge_expire) return; - mi_commit_mask_t mask = segment->decommit_mask; - segment->decommit_expire = 0; - mi_commit_mask_create_empty(&segment->decommit_mask); + mi_commit_mask_t mask = segment->purge_mask; + segment->purge_expire = 0; + mi_commit_mask_create_empty(&segment->purge_mask); size_t idx; size_t count; @@ -561,11 +583,11 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st if (count > 0) { uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); size_t size = count * MI_COMMIT_SIZE; - mi_segment_commitx(segment, false, p, size, stats); + mi_segment_purge(segment, p, size, stats); } } mi_commit_mask_foreach_end() - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); } @@ -578,7 +600,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) { } // note: can be called on abandoned segments -static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) { +static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) ? NULL : mi_span_queue_for(slice_count,tld)); @@ -598,8 +620,8 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size } // perhaps decommit - if (allow_decommit) { - mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); + if (allow_purge) { + mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); } // and push it on the free page queue (if it was not a huge page) @@ -632,7 +654,8 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ // for huge pages, just mark as free but don't add to the queues if (segment->kind == MI_SEGMENT_HUGE) { - mi_assert_internal(segment->used == 1); // decreased right after this call in `mi_segment_page_clear` + // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case) + mi_assert_internal((segment->used==0 && slice->xblock_size==0) || segment->used == 1); // decreased right after this call in `mi_segment_page_clear` slice->xblock_size = 0; // mark as free anyways // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to // avoid a possible cache miss (and the segment is about to be freed) @@ -716,7 +739,6 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i } // and initialize the page - page->is_reset = false; page->is_committed = true; segment->used++; return page; @@ -730,7 +752,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); size_t next_index = mi_slice_index(slice) + slice_count; size_t next_count = slice->slice_count - slice_count; - mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld); + mi_segment_span_free(segment, next_index, next_count, false /* don't purge left-over part */, tld); slice->slice_count = (uint32_t)slice_count; } @@ -773,16 +795,13 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren Segment allocation ----------------------------------------------------------- */ -static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id, +static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id, size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, - mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask, - bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) + bool commit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - // Allocate the segment from the OS - bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy - bool is_pinned = false; - size_t memid = 0; + mi_memid_t memid; + bool allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy size_t align_offset = 0; size_t alignment = MI_SEGMENT_ALIGN; @@ -795,48 +814,41 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment const size_t extra = align_offset - info_size; // recalculate due to potential guard pages *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices); - //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE); - //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE; } - const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; - mi_segment_t* segment = NULL; - // get from cache? - if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); + const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; + mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, commit, allow_large, req_arena_id, &memid, os_tld); + if (segment == NULL) { + return NULL; // failed to allocate + } + + // ensure metadata part of the segment is committed + mi_commit_mask_t commit_mask; + if (memid.initially_committed) { + mi_commit_mask_create_full(&commit_mask); + } + else { + // at least commit the info slices + const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_assert_internal(commit_needed>0); + mi_commit_mask_create(0, commit_needed, &commit_mask); + mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE); + if (!_mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, NULL, tld->stats)) { + _mi_arena_free(segment,segment_size,0,memid,tld->stats); + return NULL; + } } - - // get from OS - if (segment==NULL) { - segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, pcommit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); - if (segment == NULL) return NULL; // failed to allocate - if (*pcommit) { - mi_commit_mask_create_full(pcommit_mask); - } - else { - mi_commit_mask_create_empty(pcommit_mask); - } - } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); - mi_assert_internal(commit_needed>0); - mi_commit_mask_t commit_needed_mask; - mi_commit_mask_create(0, commit_needed, &commit_needed_mask); - if (!mi_commit_mask_all_set(pcommit_mask, &commit_needed_mask)) { - // at least commit the info slices - mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE); - bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, is_zero, tld->stats); - if (!ok) return NULL; // failed to commit - mi_commit_mask_set(pcommit_mask, &commit_needed_mask); - } - mi_track_mem_undefined(segment,commit_needed); segment->memid = memid; - segment->mem_is_pinned = is_pinned; - segment->mem_is_large = mem_large; - segment->mem_is_committed = mi_commit_mask_is_full(pcommit_mask); - segment->mem_alignment = alignment; - segment->mem_align_offset = align_offset; + segment->allow_decommit = !memid.is_pinned; + segment->allow_purge = segment->allow_decommit && (mi_option_get(mi_option_purge_delay) >= 0); + segment->segment_size = segment_size; + segment->commit_mask = commit_mask; + segment->purge_expire = 0; + mi_commit_mask_create_empty(&segment->purge_mask); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan + mi_segments_track_size((long)(segment_size), tld); _mi_segment_map_allocated_at(segment); return segment; @@ -859,42 +871,21 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (required > 0); - bool is_zero = false; - - mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; - mi_commit_mask_create_empty(&commit_mask); - mi_commit_mask_create_empty(&decommit_mask); - + // Allocate the segment from the OS mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, - &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask, - &is_zero, &commit, tld, os_tld); + &segment_slices, &pre_size, &info_slices, commit, tld, os_tld); if (segment == NULL) return NULL; - // zero the segment info? -- not always needed as it may be zero initialized from the OS - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan - if (!is_zero) { - ptrdiff_t ofs = offsetof(mi_segment_t, next); + // zero the segment info? -- not always needed as it may be zero initialized from the OS + if (!segment->memid.initially_zero) { + ptrdiff_t ofs = offsetof(mi_segment_t, next); size_t prefix = offsetof(mi_segment_t, slices) - ofs; - memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1)); // one more + size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more + _mi_memzero((uint8_t*)segment + ofs, zsize); } - segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed - segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); - if (segment->allow_decommit) { - segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay); - segment->decommit_mask = decommit_mask; - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - #if MI_DEBUG>2 - const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); - mi_commit_mask_t commit_needed_mask; - mi_commit_mask_create(0, commit_needed, &commit_needed_mask); - mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); - #endif - } - - // initialize segment info + // initialize the rest of the segment info const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; @@ -903,7 +894,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi segment->slice_entries = slice_entries; segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); - // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); + // _mi_memzero(segment->slices, sizeof(mi_slice_t)*(info_slices+1)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); // set up guard pages @@ -930,11 +921,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // initialize initial free pages if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page mi_assert_internal(huge_page==NULL); - mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld); + mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't purge */, tld); } else { mi_assert_internal(huge_page!=NULL); - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask)); *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance @@ -954,7 +945,9 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t // Remove the free pages mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); + #if MI_DEBUG>1 size_t page_count = 0; + #endif while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); @@ -962,7 +955,9 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t if (slice->xblock_size == 0 && segment->kind != MI_SEGMENT_HUGE) { mi_segment_span_remove_from_queue(slice, tld); } + #if MI_DEBUG>1 page_count++; + #endif slice = slice + slice->slice_count; } mi_assert_internal(page_count == 2); // first page is allocated by the segment itself @@ -993,17 +988,16 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? - if (!segment->mem_is_pinned && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { + if (segment->allow_decommit && mi_option_is_enabled(mi_option_deprecated_page_reset)) { size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - page->is_reset = true; + uint8_t* start = _mi_page_start(segment, page, &psize); _mi_os_reset(start, psize, tld->stats); } // zero the page data, but not the segment fields page->is_zero_init = false; ptrdiff_t ofs = offsetof(mi_page_t, capacity); - memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); page->xblock_size = 1; // and free it @@ -1048,7 +1042,7 @@ We maintain a global list of abandoned segments that are reclaimed on demand. Since this is shared among threads the implementation needs to avoid the A-B-A problem on popping abandoned segments: -We use tagged pointers to avoid accidentially identifying +We use tagged pointers to avoid accidentally identifying reused segments, much like stamped references in Java. Secondly, we maintain a reader counter to avoid resetting or decommitting segments that have a pending read operation. @@ -1234,8 +1228,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } - // perform delayed decommits - mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats); + // perform delayed decommits (forcing is much slower on mstress) + mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); @@ -1343,7 +1337,6 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, if (mi_slice_is_used(slice)) { // in use: reclaim the page in our heap mi_page_t* page = mi_slice_to_page(slice); - mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); @@ -1424,7 +1417,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again + mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again mi_abandoned_visited_push(segment); } } @@ -1448,9 +1441,9 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // otherwise, decommit if needed and push on the visited list - // note: forced decommit can be expensive if many threads are destroyed/created as in mstress. - mi_segment_delayed_decommit(segment, force, tld->stats); + // otherwise, purge if needed and push on the visited list + // note: forced purge can be expensive if many threads are destroyed/created as in mstress. + mi_segment_try_purge(segment, force, tld->stats); mi_abandoned_visited_push(segment); } } @@ -1508,7 +1501,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); - mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats); + mi_segment_try_purge(_mi_ptr_segment(page), false, tld->stats); return page; } @@ -1542,7 +1535,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_assert_internal(psize - (aligned_p - start) >= size); uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list ptrdiff_t decommit_size = aligned_p - decommit_start; - _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } return page; @@ -1585,9 +1578,12 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc mi_assert_internal(page->used == 1); // this is called just before the free mi_assert_internal(page->free == NULL); if (segment->allow_decommit) { - const size_t csize = mi_usable_size(block) - sizeof(mi_block_t); - uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); - _mi_os_decommit(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + size_t csize = mi_usable_size(block); + if (csize > sizeof(mi_block_t)) { + csize = csize - sizeof(mi_block_t); + uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); + _mi_os_reset(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + } } } #endif diff --git a/third-party/mimalloc/src/static.c b/third-party/mimalloc/src/static.c index 5b34ddbb..bc05dd72 100644 --- a/third-party/mimalloc/src/static.c +++ b/third-party/mimalloc/src/static.c @@ -14,26 +14,27 @@ terms of the MIT license. A copy of the license can be found in the file #endif #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" // For a static override we create a single object file // containing the whole library. If it is linked first // it will override all the standard library allocation // functions (on Unix's). -#include "stats.c" -#include "random.c" -#include "os.c" -#include "bitmap.c" -#include "arena.c" -#include "segment-cache.c" -#include "segment.c" -#include "page.c" -#include "heap.c" -#include "alloc.c" +#include "alloc.c" // includes alloc-override.c #include "alloc-aligned.c" #include "alloc-posix.c" -#if MI_OSX_ZONE -#include "alloc-override-osx.c" -#endif +#include "arena.c" +#include "bitmap.c" +#include "heap.c" #include "init.c" #include "options.c" +#include "os.c" +#include "page.c" // includes page-queue.c +#include "random.c" +#include "segment.c" +#include "segment-map.c" +#include "stats.c" +#include "prim/prim.c" +#if MI_OSX_ZONE +#include "prim/osx/alloc-override-zone.c" +#endif diff --git a/third-party/mimalloc/src/stats.c b/third-party/mimalloc/src/stats.c index 2a8b9404..300956ce 100644 --- a/third-party/mimalloc/src/stats.c +++ b/third-party/mimalloc/src/stats.c @@ -5,10 +5,11 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" -#include // fputs, stderr +#include // snprintf #include // memset #if defined(_MSC_VER) && (_MSC_VER < 1920) @@ -95,6 +96,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->reserved, &src->reserved, 1); mi_stat_add(&stats->committed, &src->committed, 1); mi_stat_add(&stats->reset, &src->reset, 1); + mi_stat_add(&stats->purged, &src->purged, 1); mi_stat_add(&stats->page_committed, &src->page_committed, 1); mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); @@ -110,6 +112,8 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); + mi_stat_counter_add(&stats->reset_calls, &src->reset_calls, 1); + mi_stat_counter_add(&stats->purge_calls, &src->purge_calls, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); @@ -142,7 +146,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* const int64_t pos = (n < 0 ? -n : n); if (pos < base) { if (n!=1 || suffix[0] != 'B') { // skip printing 1 B for the unit column - snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix)); + snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix)); } } else { @@ -157,7 +161,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix); snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc); } - _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); + _mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf); } @@ -166,7 +170,7 @@ static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* a } static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { - if (unit==1) _mi_fprintf(out, arg, "%11s"," "); + if (unit==1) _mi_fprintf(out, arg, "%12s"," "); else mi_print_amount(n,0,out,arg); } @@ -181,7 +185,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64 mi_print_count(stat->allocated, unit, out, arg); if (stat->allocated > stat->freed) { _mi_fprintf(out, arg, " "); - _mi_fprintf(out, arg, (notok == NULL ? "not all freed!" : notok)); + _mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok)); _mi_fprintf(out, arg, "\n"); } else { @@ -194,7 +198,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64 mi_print_amount(stat->freed, -1, out, arg); mi_print_amount(stat->current, -1, out, arg); if (unit==-1) { - _mi_fprintf(out, arg, "%22s", ""); + _mi_fprintf(out, arg, "%24s", ""); } else { mi_print_amount(-unit, 1, out, arg); @@ -218,12 +222,19 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t mi_stat_print_ex(stat, msg, unit, out, arg, NULL); } +static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { + _mi_fprintf(out, arg, "%10s:", msg); + mi_print_amount(stat->peak, unit, out, arg); + _mi_fprintf(out, arg, "\n"); +} + static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); _mi_fprintf(out, arg, "\n"); } + static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); @@ -233,7 +244,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* static void mi_print_header(mi_output_fun* out, void* arg ) { - _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); + _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); } #if MI_STAT>1 @@ -291,8 +302,6 @@ static void mi_cdecl mi_buffered_out(const char* msg, void* arg) { // Print statistics //------------------------------------------------------------ -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); - static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept { // wrap the output function to be line buffered char buf[256]; @@ -322,7 +331,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, ""); - mi_stat_print(&stats->reset, "reset", 1, out, arg); + mi_stat_peak_print(&stats->reset, "reset", 1, out, arg ); + mi_stat_peak_print(&stats->purged, "purged", 1, out, arg ); mi_stat_print(&stats->page_committed, "touched", 1, out, arg); mi_stat_print(&stats->segments, "segments", -1, out, arg); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); @@ -333,20 +343,22 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_counter_print(&stats->reset_calls, "resets", out, arg); + mi_stat_counter_print(&stats->purge_calls, "purges", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); - _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); + _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); - mi_msecs_t elapsed; - mi_msecs_t user_time; - mi_msecs_t sys_time; + size_t elapsed; + size_t user_time; + size_t sys_time; size_t current_rss; size_t peak_rss; size_t current_commit; size_t peak_commit; size_t page_faults; - mi_stat_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); - _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); @@ -404,47 +416,13 @@ void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { // ---------------------------------------------------------------- // Basic timer for convenience; use milli-seconds to avoid doubles // ---------------------------------------------------------------- -#ifdef _WIN32 -#include -static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { - static LARGE_INTEGER mfreq; // = 0 - if (mfreq.QuadPart == 0LL) { - LARGE_INTEGER f; - QueryPerformanceFrequency(&f); - mfreq.QuadPart = f.QuadPart/1000LL; - if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; - } - return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); -} - -mi_msecs_t _mi_clock_now(void) { - LARGE_INTEGER t; - QueryPerformanceCounter(&t); - return mi_to_msecs(t); -} -#else -#include -#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) -mi_msecs_t _mi_clock_now(void) { - struct timespec t; - #ifdef CLOCK_MONOTONIC - clock_gettime(CLOCK_MONOTONIC, &t); - #else - clock_gettime(CLOCK_REALTIME, &t); - #endif - return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); -} -#else -// low resolution timer -mi_msecs_t _mi_clock_now(void) { - return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); -} -#endif -#endif - static mi_msecs_t mi_clock_diff; +mi_msecs_t _mi_clock_now(void) { + return _mi_prim_clock_now(); +} + mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { mi_msecs_t t0 = _mi_clock_now(); @@ -463,156 +441,27 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { // Basic process statistics // -------------------------------------------------------- -#if defined(_WIN32) -#include - -static mi_msecs_t filetime_msecs(const FILETIME* ftime) { - ULARGE_INTEGER i; - i.LowPart = ftime->dwLowDateTime; - i.HighPart = ftime->dwHighDateTime; - mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds - return msecs; -} - -typedef struct _PROCESS_MEMORY_COUNTERS { - DWORD cb; - DWORD PageFaultCount; - SIZE_T PeakWorkingSetSize; - SIZE_T WorkingSetSize; - SIZE_T QuotaPeakPagedPoolUsage; - SIZE_T QuotaPagedPoolUsage; - SIZE_T QuotaPeakNonPagedPoolUsage; - SIZE_T QuotaNonPagedPoolUsage; - SIZE_T PagefileUsage; - SIZE_T PeakPagefileUsage; -} PROCESS_MEMORY_COUNTERS; -typedef PROCESS_MEMORY_COUNTERS* PPROCESS_MEMORY_COUNTERS; -typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); -static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - FILETIME ct; - FILETIME ut; - FILETIME st; - FILETIME et; - GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_msecs(&ut); - *stime = filetime_msecs(&st); - - // load psapi on demand - if (pGetProcessMemoryInfo == NULL) { - HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); - if (hDll != NULL) { - pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); - } - } - - // get process info - PROCESS_MEMORY_COUNTERS info; - memset(&info, 0, sizeof(info)); - if (pGetProcessMemoryInfo != NULL) { - pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - } - *current_rss = (size_t)info.WorkingSetSize; - *peak_rss = (size_t)info.PeakWorkingSetSize; - *current_commit = (size_t)info.PagefileUsage; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_faults = (size_t)info.PageFaultCount; -} - -#elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)) -#include -#include -#include - -#if defined(__APPLE__) -#include -#endif - -#if defined(__HAIKU__) -#include -#endif - -static mi_msecs_t timeval_secs(const struct timeval* tv) { - return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); -} - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - struct rusage rusage; - getrusage(RUSAGE_SELF, &rusage); - *utime = timeval_secs(&rusage.ru_utime); - *stime = timeval_secs(&rusage.ru_stime); -#if !defined(__HAIKU__) - *page_faults = rusage.ru_majflt; -#endif - // estimate commit using our stats - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *current_rss = *current_commit; // estimate -#if defined(__HAIKU__) - // Haiku does not have (yet?) a way to - // get these stats per process - thread_info tid; - area_info mem; - ssize_t c; - get_thread_info(find_thread(0), &tid); - while (get_next_area_info(tid.team, &c, &mem) == B_OK) { - *peak_rss += mem.ram_size; - } - *page_faults = 0; -#elif defined(__APPLE__) - *peak_rss = rusage.ru_maxrss; // BSD reports in bytes - struct mach_task_basic_info info; - mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; - if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { - *current_rss = (size_t)info.resident_size; - } -#else - *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB -#endif -} - -#else -#ifndef __wasi__ -// WebAssembly instances are not processes -#pragma message("define a way to get process info") -#endif - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *peak_rss = *peak_commit; - *current_rss = *current_commit; - *page_faults = 0; - *utime = 0; - *stime = 0; -} -#endif - - mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { - mi_msecs_t elapsed = 0; - mi_msecs_t utime = 0; - mi_msecs_t stime = 0; - size_t current_rss0 = 0; - size_t peak_rss0 = 0; - size_t current_commit0 = 0; - size_t peak_commit0 = 0; - size_t page_faults0 = 0; - mi_stat_process_info(&elapsed,&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); - if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); - if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX)); - if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX)); - if (current_rss!=NULL) *current_rss = current_rss0; - if (peak_rss!=NULL) *peak_rss = peak_rss0; - if (current_commit!=NULL) *current_commit = current_commit0; - if (peak_commit!=NULL) *peak_commit = peak_commit0; - if (page_faults!=NULL) *page_faults = page_faults0; + mi_process_info_t pinfo; + _mi_memzero_var(pinfo); + pinfo.elapsed = _mi_clock_end(mi_process_start); + pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_rss = pinfo.current_commit; + pinfo.peak_rss = pinfo.peak_commit; + pinfo.utime = 0; + pinfo.stime = 0; + pinfo.page_faults = 0; + + _mi_prim_process_info(&pinfo); + + if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX)); + if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX)); + if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX)); + if (current_rss!=NULL) *current_rss = pinfo.current_rss; + if (peak_rss!=NULL) *peak_rss = pinfo.peak_rss; + if (current_commit!=NULL) *current_commit = pinfo.current_commit; + if (peak_commit!=NULL) *peak_commit = pinfo.peak_commit; + if (page_faults!=NULL) *page_faults = pinfo.page_faults; } diff --git a/third-party/mimalloc/test/main-override-static.c b/third-party/mimalloc/test/main-override-static.c index 534c8849..e71be29e 100644 --- a/third-party/mimalloc/test/main-override-static.c +++ b/third-party/mimalloc/test/main-override-static.c @@ -20,6 +20,7 @@ static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); static void test_heap_arena(void); +static void test_align(void); int main() { mi_version(); @@ -37,6 +38,7 @@ int main() { // alloc_huge(); // test_heap_walk(); // test_heap_arena(); + // test_align(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -68,6 +70,13 @@ int main() { return 0; } +static void test_align() { + void* p = mi_malloc_aligned(256, 256); + if (((uintptr_t)p % 256) != 0) { + fprintf(stderr, "%p is not 256 alignend!\n", p); + } +} + static void invalid_free() { free((void*)0xBADBEEF); realloc((void*)0xBADBEEF,10); diff --git a/third-party/mimalloc/test/main-override.cpp b/third-party/mimalloc/test/main-override.cpp index 7242eb29..c1607b66 100644 --- a/third-party/mimalloc/test/main-override.cpp +++ b/third-party/mimalloc/test/main-override.cpp @@ -37,15 +37,20 @@ static void fail_aslr(); // issue #372 static void tsan_numa_test(); // issue #414 static void strdup_test(); // issue #445 static void bench_alloc_large(void); // issue #xxx +//static void test_large_migrate(void); // issue #691 static void heap_thread_free_huge(); +static void test_std_string(); // issue #697 static void test_stl_allocators(); int main() { - mi_stats_reset(); // ignore earlier allocations - heap_thread_free_huge(); + // mi_stats_reset(); // ignore earlier allocations + + // test_std_string(); + // heap_thread_free_huge(); /* + heap_thread_free_huge(); heap_thread_free_large(); heap_no_delete(); heap_late_free(); @@ -55,12 +60,13 @@ int main() { tsan_numa_test(); strdup_test(); */ - test_stl_allocators(); - test_mt_shutdown(); + // test_stl_allocators(); + // test_mt_shutdown(); + // test_large_migrate(); //fail_aslr(); - bench_alloc_large(); - mi_stats_print(NULL); + // bench_alloc_large(); + // mi_stats_print(NULL); return 0; } @@ -99,6 +105,10 @@ static void various_tests() { delete t; t = new (std::nothrow) Test(42); delete t; + auto tbuf = new unsigned char[sizeof(Test)]; + t = new (tbuf) Test(42); + t->~Test(); + delete tbuf; } class Static { @@ -134,6 +144,7 @@ static bool test_stl_allocator2() { return vec.size() == 0; } +#if MI_HAS_HEAP_STL_ALLOCATOR static bool test_stl_allocator3() { std::vector > vec; vec.push_back(1); @@ -161,16 +172,55 @@ static bool test_stl_allocator6() { vec.pop_back(); return vec.size() == 0; } +#endif static void test_stl_allocators() { test_stl_allocator1(); test_stl_allocator2(); +#if MI_HAS_HEAP_STL_ALLOCATOR test_stl_allocator3(); test_stl_allocator4(); test_stl_allocator5(); test_stl_allocator6(); +#endif } +#if 0 +// issue #691 +static char* cptr; + +static void* thread1_allocate() +{ + cptr = mi_calloc_tp(char,22085632); + return NULL; +} + +static void* thread2_free() +{ + assert(cptr); + mi_free(cptr); + cptr = NULL; + return NULL; +} + +static void test_large_migrate(void) { + auto t1 = std::thread(thread1_allocate); + t1.join(); + auto t2 = std::thread(thread2_free); + t2.join(); + /* + pthread_t thread1, thread2; + + pthread_create(&thread1, NULL, &thread1_allocate, NULL); + pthread_join(thread1, NULL); + + pthread_create(&thread2, NULL, &thread2_free, NULL); + pthread_join(thread2, NULL); + */ + return; +} +#endif + // issue 445 static void strdup_test() { #ifdef _MSC_VER @@ -196,6 +246,13 @@ static void heap_no_delete() { } +// Issue #697 +static void test_std_string() { + std::string path = "/Users/xxxx/Library/Developer/Xcode/DerivedData/xxxxxxxxxx/Build/Intermediates.noindex/xxxxxxxxxxx/arm64/XX_lto.o/0.arm64.lto.o"; + std::string path1 = "/Users/xxxx/Library/Developer/Xcode/DerivedData/xxxxxxxxxx/Build/Intermediates.noindex/xxxxxxxxxxx/arm64/XX_lto.o/1.arm64.lto.o"; + std::cout << path + "\n>>> " + path1 + "\n>>> " << std::endl; +} + // Issue #204 static volatile void* global_p; diff --git a/third-party/mimalloc/test/test-api-fill.c b/third-party/mimalloc/test/test-api-fill.c index 85d8524f..3fca3b9d 100644 --- a/third-party/mimalloc/test/test-api-fill.c +++ b/third-party/mimalloc/test/test-api-fill.c @@ -5,7 +5,7 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-types.h" +#include "mimalloc/types.h" #include "testhelper.h" @@ -164,7 +164,7 @@ int main(void) { mi_free(p); }; -#if MI_DEBUG >= 2 +#if (MI_DEBUG >= 2) && !MI_TSAN // --------------------------------------------------- // Debug filling // --------------------------------------------------- @@ -271,7 +271,7 @@ int main(void) { mi_free(p); }; - + #if !(MI_TRACK_VALGRIND || MI_TRACK_ASAN) CHECK_BODY("fill-freed-small") { size_t malloc_size = MI_SMALL_SIZE_MAX / 2; uint8_t* p = (uint8_t*)mi_malloc(malloc_size); @@ -286,6 +286,7 @@ int main(void) { // First sizeof(void*) bytes will contain housekeeping data, skip these result = check_debug_fill_freed(p + sizeof(void*), malloc_size - sizeof(void*)); }; + #endif #endif // --------------------------------------------------- @@ -309,7 +310,7 @@ bool check_zero_init(uint8_t* p, size_t size) { #if MI_DEBUG >= 2 bool check_debug_fill_uninit(uint8_t* p, size_t size) { -#if MI_VALGRIND +#if MI_TRACK_VALGRIND || MI_TRACK_ASAN (void)p; (void)size; return true; // when compiled with valgrind we don't init on purpose #else @@ -325,7 +326,7 @@ bool check_debug_fill_uninit(uint8_t* p, size_t size) { } bool check_debug_fill_freed(uint8_t* p, size_t size) { -#if MI_VALGRIND +#if MI_TRACK_VALGRIND (void)p; (void)size; return true; // when compiled with valgrind we don't fill on purpose #else diff --git a/third-party/mimalloc/test/test-api.c b/third-party/mimalloc/test/test-api.c index 884a433b..8dd24e1b 100644 --- a/third-party/mimalloc/test/test-api.c +++ b/third-party/mimalloc/test/test-api.c @@ -33,8 +33,8 @@ we therefore test the API over various inputs. Please add more tests :-) #endif #include "mimalloc.h" -// #include "mimalloc-internal.h" -#include "mimalloc-types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX +// #include "mimalloc/internal.h" +#include "mimalloc/types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX #include "testhelper.h" @@ -46,12 +46,20 @@ bool test_heap2(void); bool test_stl_allocator1(void); bool test_stl_allocator2(void); +bool mem_is_zero(uint8_t* p, size_t size) { + if (p==NULL) return false; + for (size_t i = 0; i < size; ++i) { + if (p[i] != 0) return false; + } + return true; +} + // --------------------------------------------------------------------------- // Main testing // --------------------------------------------------------------------------- int main(void) { mi_option_disable(mi_option_verbose); - + // --------------------------------------------------- // Malloc // --------------------------------------------------- @@ -149,7 +157,8 @@ int main(void) { for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) { void* ps[8]; for (int i = 0; i < 8 && ok; i++) { - ps[i] = mi_malloc_aligned(align*5 /*size*/, align); + ps[i] = mi_malloc_aligned(align*13 // size + , align); if (ps[i] == NULL || (uintptr_t)(ps[i]) % align != 0) { ok = false; } @@ -211,6 +220,11 @@ int main(void) { result = mi_heap_contains_block(heap, p); mi_heap_destroy(heap); } + CHECK_BODY("mimalloc-aligned12") { + void* p = mi_malloc_aligned(0x100, 0x100); + result = (((uintptr_t)p % 0x100) == 0); // #602 + mi_free(p); + } CHECK_BODY("malloc-aligned-at1") { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); }; @@ -226,6 +240,21 @@ int main(void) { } result = ok; }; + CHECK_BODY("zalloc-aligned-small1") { + size_t zalloc_size = MI_SMALL_SIZE_MAX / 2; + uint8_t* p = (uint8_t*)mi_zalloc_aligned(zalloc_size, MI_MAX_ALIGN_SIZE * 2); + result = mem_is_zero(p, zalloc_size); + mi_free(p); + }; + CHECK_BODY("rezalloc_aligned-small1") { + size_t zalloc_size = MI_SMALL_SIZE_MAX / 2; + uint8_t* p = (uint8_t*)mi_zalloc_aligned(zalloc_size, MI_MAX_ALIGN_SIZE * 2); + result = mem_is_zero(p, zalloc_size); + zalloc_size *= 3; + p = (uint8_t*)mi_rezalloc_aligned(p, zalloc_size, MI_MAX_ALIGN_SIZE * 2); + result = result && mem_is_zero(p, zalloc_size); + mi_free(p); + }; // --------------------------------------------------- // Reallocation @@ -285,7 +314,7 @@ int main(void) { // Larger test functions // --------------------------------------------------- -bool test_heap1() { +bool test_heap1(void) { mi_heap_t* heap = mi_heap_new(); int* p1 = mi_heap_malloc_tp(heap,int); int* p2 = mi_heap_malloc_tp(heap,int); @@ -294,7 +323,7 @@ bool test_heap1() { return true; } -bool test_heap2() { +bool test_heap2(void) { mi_heap_t* heap = mi_heap_new(); int* p1 = mi_heap_malloc_tp(heap,int); int* p2 = mi_heap_malloc_tp(heap,int); @@ -305,7 +334,7 @@ bool test_heap2() { return true; } -bool test_stl_allocator1() { +bool test_stl_allocator1(void) { #ifdef __cplusplus std::vector > vec; vec.push_back(1); @@ -318,7 +347,7 @@ bool test_stl_allocator1() { struct some_struct { int i; int j; double z; }; -bool test_stl_allocator2() { +bool test_stl_allocator2(void) { #ifdef __cplusplus std::vector > vec; vec.push_back(some_struct()); diff --git a/third-party/mimalloc/test/test-stress.c b/third-party/mimalloc/test/test-stress.c index 133ba50c..7b74b465 100644 --- a/third-party/mimalloc/test/test-stress.c +++ b/third-party/mimalloc/test/test-stress.c @@ -7,7 +7,7 @@ terms of the MIT license. /* This is a stress test for the allocator, using multiple threads and transferring objects between threads. It tries to reflect real-world workloads: - allocation size is distributed linearly in powers of two - - with some fraction extra large (and some extra extra large) + - with some fraction extra large (and some very large) - the allocations are initialized and read again at free - pointers transfer between threads - threads are terminated and recreated with some objects surviving in between @@ -20,31 +20,37 @@ terms of the MIT license. #include #include #include +#include // > mimalloc-test-stress [THREADS] [SCALE] [ITER] // // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 25; // scaling factor + +#if defined(MI_TSAN) +static int ITER = 10; // N full iterations destructing and re-creating all threads (on tsan reduce for azure pipeline limits) +#else static int ITER = 50; // N full iterations destructing and re-creating all threads +#endif // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor #define STRESS // undefine for leak test -static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? +static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100) +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? // #define USE_STD_MALLOC #ifdef USE_STD_MALLOC -#define custom_calloc(n,s) malloc(n*s) +#define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) #else #include -#define custom_calloc(n,s) mi_malloc(n*s) +#define custom_calloc(n,s) mi_calloc(n,s) #define custom_realloc(p,s) mi_realloc(p,s) #define custom_free(p) mi_free(p) #endif @@ -91,7 +97,7 @@ static bool chance(size_t perc, random_t r) { static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) { - if (chance(1, r) && allow_large_objects) items *= 50000; // 0.01% giant + if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge else items *= 100; // 1% large objects; } @@ -101,6 +107,7 @@ static void* alloc_items(size_t items, random_t r) { uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t)); if (p != NULL) { for (uintptr_t i = 0; i < items; i++) { + assert(p[i] == 0); p[i] = (items - i) ^ cookie; } } @@ -222,6 +229,10 @@ static void test_leak(void) { #endif int main(int argc, char** argv) { + #ifndef USE_STD_MALLOC + mi_stats_reset(); + #endif + // > mimalloc-test-stress [THREADS] [SCALE] [ITER] if (argc >= 2) { char* end; @@ -238,7 +249,10 @@ int main(int argc, char** argv) { long n = (strtol(argv[3], &end, 10)); if (n > 0) ITER = n; } - printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); + if (SCALE > 100) { + allow_large_objects = true; + } + printf("Using %d threads with a %d%% load-per-thread and %d iterations %s\n", THREADS, SCALE, ITER, (allow_large_objects ? "(allow large objects)" : "")); //mi_reserve_os_memory(1024*1024*1024ULL, false, true); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); diff --git a/third-party/mimalloc/test/test-wrong.c b/third-party/mimalloc/test/test-wrong.c index 17d253b6..56a2339a 100644 --- a/third-party/mimalloc/test/test-wrong.c +++ b/third-party/mimalloc/test/test-wrong.c @@ -5,11 +5,14 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -/* test file for valgrind support. +/* test file for valgrind/asan support. + + VALGRIND: + ---------- Compile in an "out/debug" folder: > cd out/debug - > cmake ../.. -DMI_VALGRIND=1 + > cmake ../.. -DMI_TRACK_VALGRIND=1 > make -j8 and then compile this file as: @@ -19,6 +22,25 @@ terms of the MIT license. A copy of the license can be found in the file and test as: > valgrind ./test-wrong + + + ASAN + ---------- + Compile in an "out/debug" folder: + + > cd out/debug + > cmake ../.. -DMI_TRACK_ASAN=1 + > make -j8 + + and then compile this file as: + + > clang -g -o test-wrong -I../../include ../../test/test-wrong.c libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address + + and test as: + + > ASAN_OPTIONS=verbosity=1:halt_on_error=0 ./test-wrong + + */ #include #include