Update third-party/mimalloc to v2.0.3

2024-11-13 09:39:13 +03:00 · 2021-12-20 13:01:38 +09:00 · 2021-12-20 13:01:38 +09:00 · fb89ce22fd
commit fb89ce22fd
parent 3e397d6e78
46 changed files with 3367 additions and 1738 deletions
--- a/third-party/mimalloc/CMakeLists.txt
+++ b/third-party/mimalloc/CMakeLists.txt
@ -12,8 +12,8 @@ option(MI_XMALLOC           "Enable abort() call on memory allocation failure by
 option(MI_SHOW_ERRORS       "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
 option(MI_USE_CXX           "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
 option(MI_SEE_ASM           "Generate assembly files" OFF)
-option(MI_INTERPOSE         "Use interpose to override standard malloc on macOS" ON)
-option(MI_OSX_ZONE          "Use malloc zone to override standard malloc on macOS" OFF) # enables interpose as well
+option(MI_OSX_INTERPOSE     "Use interpose to override standard malloc on macOS" ON)
+option(MI_OSX_ZONE          "Use malloc zone to override standard malloc on macOS" ON) 
 option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
 option(MI_BUILD_SHARED      "Build shared library" ON)
 option(MI_BUILD_STATIC      "Build static library" ON)
@ -23,7 +23,9 @@ option(MI_DEBUG_TSAN        "Build with thread sanitizer (needs clang)" OFF)
 option(MI_DEBUG_UBSAN       "Build with undefined-behavior sanitizer (needs clang++)" OFF)
 option(MI_CHECK_FULL        "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
 option(MI_INSTALL_TOPLEVEL  "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF)
+option(MI_USE_LIBATOMIC     "Explicitly link with -latomic (on older systems)" OFF)

+include(GNUInstallDirs)
 include("cmake/mimalloc-config-version.cmake")

 set(mi_sources
@ -32,7 +34,7 @@ set(mi_sources
    src/os.c
    src/bitmap.c
    src/arena.c
-    src/region.c
+    src/segment-cache.c
    src/segment.c
    src/page.c
    src/alloc.c
@ -42,10 +44,12 @@ set(mi_sources
    src/options.c
    src/init.c)

+
 # -----------------------------------------------------------------------------
-# Converience: set default build type depending on the build directory
+# Convenience: set default build type depending on the build directory
 # -----------------------------------------------------------------------------

+message(STATUS "")    
 if (NOT CMAKE_BUILD_TYPE)
  if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR  MI_DEBUG_FULL)
    message(STATUS "No build type selected, default to: Debug")
@ -61,6 +65,7 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$")
  set(MI_SECURE "ON")
 endif()

+
 # -----------------------------------------------------------------------------
 # Process options
 # -----------------------------------------------------------------------------
@ -76,16 +81,21 @@ if(MI_OVERRIDE)
      # use zone's on macOS
      message(STATUS "  Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
      list(APPEND mi_sources src/alloc-override-osx.c)
-      list(APPEND mi_defines MI_OSX_ZONE=1)
-      if(NOT MI_INTERPOSE)
-        message(STATUS "  (enabling INTERPOSE as well since zone's require this)")
-        set(MI_INTERPOSE "ON")
+      list(APPEND mi_defines MI_OSX_ZONE=1)      
+      if (NOT MI_OSX_INTERPOSE)
+        message(STATUS "  WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)")
      endif()
    endif()
-    if(MI_INTERPOSE)
+    if(MI_OSX_INTERPOSE)
      # use interpose on macOS
-      message(STATUS "  Use interpose to override malloc (MI_INTERPOSE=ON)")
-      list(APPEND mi_defines MI_INTERPOSE)
+      message(STATUS "  Use interpose to override malloc (MI_OSX_INTERPOSE=ON)")
+      list(APPEND mi_defines MI_OSX_INTERPOSE=1)
+      if (NOT MI_OSX_ZONE)
+        message(STATUS "  WARNING: interpose usually also needs zone overriding (use -DMI_OSX_INTERPOSE=ON)")
+      endif()
+    endif()
+    if((NOT MI_USE_CXX) AND MI_OVERRIDE)
+      message(STATUS "  WARNING: if overriding C++ new/delete, it is best to build mimalloc with a C++ compiler (use -DMI_USE_CXX=ON)")
    endif()
  endif()
 endif()
@ -168,10 +178,16 @@ endif()

 # Compiler flags
 if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
-  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
+  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden   $<$<CONFIG:RELEASE>:-O3>)
+  if(NOT MI_USE_CXX)
+    list(APPEND mi_cflags -Wstrict-prototypes)
+  endif()
  if(CMAKE_C_COMPILER_ID MATCHES "GNU")
    list(APPEND mi_cflags -Wno-invalid-memory-model)
  endif()
+  if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
+    list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline)
+  endif()
 endif()

 if(CMAKE_C_COMPILER_ID MATCHES "Intel")
@ -184,6 +200,9 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
  else()
    list(APPEND mi_cflags -ftls-model=initial-exec)
  endif()
+  if(MI_OVERRIDE)
+    list(APPEND mi_cflags -fno-builtin-malloc)
+  endif()
 endif()

 if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914)
@ -203,14 +222,22 @@ else()
  endif()
 endif()

+if (MI_USE_LIBATOMIC) 
+  list(APPEND mi_libraries atomic)
+endif()
+
 # -----------------------------------------------------------------------------
 # Install and output names
 # -----------------------------------------------------------------------------

 if (MI_INSTALL_TOPLEVEL)
-  set(mi_install_dir "${CMAKE_INSTALL_PREFIX}")
+  set(mi_install_libdir   "${CMAKE_INSTALL_LIBDIR}")
+  set(mi_install_incdir   "${CMAKE_INSTALL_INCLUDEDIR}")
+  set(mi_install_cmakedir "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc")
 else()
-  set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}")
+  set(mi_install_libdir   "lib/mimalloc-${mi_version}")
+  set(mi_install_incdir   "include/mimalloc-${mi_version}")
+  set(mi_install_cmakedir "share/mimalloc-${mi_version}/cmake") 
 endif()

 if(MI_SECURE)
@ -220,7 +247,7 @@ else()
 endif()

 string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
-if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel)$"))
+if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$"))
  set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version
 endif()
 if(MI_BUILD_SHARED)
@ -238,13 +265,14 @@ endif()

 message(STATUS "")
 message(STATUS "Library base name: ${mi_basename}")
+message(STATUS "Version          : ${mi_version}")
 message(STATUS "Build type       : ${CMAKE_BUILD_TYPE_LC}")
 if(MI_USE_CXX)
-  message(STATUS "Compiler         : ${CMAKE_CXX_COMPILER}")
+  message(STATUS "C++ Compiler     : ${CMAKE_CXX_COMPILER}")
 else()
-  message(STATUS "Compiler         : ${CMAKE_C_COMPILER}")
+  message(STATUS "C Compiler       : ${CMAKE_C_COMPILER}")
 endif()
-message(STATUS "Install directory: ${mi_install_dir}")
+message(STATUS "Compiler flags   : ${mi_cflags}")
 message(STATUS "Build targets    : ${mi_build_targets}")
 message(STATUS "")

@ -261,25 +289,24 @@ if(MI_BUILD_SHARED)
  target_link_libraries(mimalloc PUBLIC ${mi_libraries})
  target_include_directories(mimalloc PUBLIC
      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-      $<INSTALL_INTERFACE:${mi_install_dir}/include>
+      $<INSTALL_INTERFACE:${mi_install_incdir}>
  )
  if(WIN32)
    # On windows copy the mimalloc redirection dll too.
-    target_link_libraries(mimalloc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect.lib)
+    if(CMAKE_SIZEOF_VOID_P EQUAL 4)
+      set(MIMALLOC_REDIRECT_SUFFIX "32")
+    else()
+      set(MIMALLOC_REDIRECT_SUFFIX "")
+    endif()
+
+    target_link_libraries(mimalloc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.lib)
    add_custom_command(TARGET mimalloc POST_BUILD
-      COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect.dll" $<TARGET_FILE_DIR:mimalloc>
-      COMMENT "Copy mimalloc-redirect.dll to output directory")
+      COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" $<TARGET_FILE_DIR:mimalloc>
+      COMMENT "Copy mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll to output directory")
  endif()

-  if (MI_INSTALL_TOPLEVEL MATCHES "ON")
-    install(TARGETS mimalloc EXPORT mimalloc LIBRARY
-        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
-  else()
-    install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_dir} LIBRARY)
-  endif()
-  install(EXPORT mimalloc DESTINATION ${mi_install_dir}/cmake)
+  install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY)  
+  install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
 endif()

 # static library
@ -291,7 +318,7 @@ if (MI_BUILD_STATIC)
  target_link_libraries(mimalloc-static PUBLIC ${mi_libraries})
  target_include_directories(mimalloc-static PUBLIC
      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-      $<INSTALL_INTERFACE:${mi_install_dir}/include>
+      $<INSTALL_INTERFACE:${mi_install_incdir}>
  )
  if(WIN32)
    # When building both static and shared libraries on Windows, a static library should use a
@ -302,22 +329,25 @@ if (MI_BUILD_STATIC)
    set_target_properties(mimalloc-static PROPERTIES OUTPUT_NAME ${mi_basename})
  endif()

-  install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_dir})
+  install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY)
 endif()

 # install include files
-install(FILES include/mimalloc.h DESTINATION ${mi_install_dir}/include)
-install(FILES include/mimalloc-override.h DESTINATION ${mi_install_dir}/include)
-install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_dir}/include)
-install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_dir}/cmake)
-install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_dir}/cmake)
+install(FILES include/mimalloc.h DESTINATION ${mi_install_incdir})
+install(FILES include/mimalloc-override.h DESTINATION ${mi_install_incdir})
+install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_incdir})
+install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_cmakedir})
+install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_cmakedir})

 if(NOT WIN32 AND MI_BUILD_SHARED AND NOT MI_INSTALL_TOPLEVEL)
  # install a symlink in the /usr/local/lib to the versioned library
+  # note: use delayed prefix expansion as \${CMAKE_INSTALL_PREFIX}
  set(mi_symlink "${CMAKE_SHARED_MODULE_PREFIX}${mi_basename}${CMAKE_SHARED_LIBRARY_SUFFIX}")
  set(mi_soname "mimalloc-${mi_version}/${mi_symlink}.${mi_version}")
-  install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${mi_soname} ${mi_symlink} WORKING_DIRECTORY ${mi_install_dir}/..)")
-  install(CODE "MESSAGE(\"-- Symbolic link: ${CMAKE_INSTALL_PREFIX}/lib/${mi_symlink} -> ${mi_soname}\")")
+  install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${mi_soname} ${mi_symlink} WORKING_DIRECTORY \${CMAKE_INSTALL_PREFIX}/lib)")
+  install(CODE "MESSAGE(\"-- Symbolic link: \${CMAKE_INSTALL_PREFIX}/lib/${mi_symlink} -> ${mi_soname}\")")
+  install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${mi_soname} ${mi_symlink}.${mi_version} WORKING_DIRECTORY \${CMAKE_INSTALL_PREFIX}/lib)")
+  install(CODE "MESSAGE(\"-- Symbolic link: \${CMAKE_INSTALL_PREFIX}/lib/${mi_symlink}.${mi_version} -> ${mi_soname}\")")
 endif()

 # single object file for more predictable static overriding
@ -328,16 +358,16 @@ if (MI_BUILD_OBJECT)
  target_compile_options(mimalloc-obj PRIVATE ${mi_cflags})
  target_include_directories(mimalloc-obj PUBLIC
      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-      $<INSTALL_INTERFACE:${mi_install_dir}/include>
+      $<INSTALL_INTERFACE:${mi_install_incdir}>
  )

  # the following seems to lead to cmake warnings/errors on some systems, disable for now :-(
-  # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_dir})
+  # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_libdir})

  # the FILES expression can also be: $<TARGET_OBJECTS:mimalloc-obj>
  # but that fails cmake versions less than 3.10 so we leave it as is for now
  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}
-          DESTINATION ${mi_install_dir}
+          DESTINATION ${mi_install_libdir}
          RENAME ${mi_basename}${CMAKE_C_OUTPUT_EXTENSION} )
 endif()

@ -350,7 +380,7 @@ if (MI_BUILD_TESTS)
  target_compile_definitions(mimalloc-test-api PRIVATE ${mi_defines})
  target_compile_options(mimalloc-test-api PRIVATE ${mi_cflags})
  target_include_directories(mimalloc-test-api PRIVATE include)
-  target_link_libraries(mimalloc-test-api PRIVATE mimalloc-static ${mi_libraries})
+  target_link_libraries(mimalloc-test-api PRIVATE mimalloc ${mi_libraries})

  add_executable(mimalloc-test-stress test/test-stress.c)
  target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})
--- a/third-party/mimalloc/LICENSE
+++ b/third-party/mimalloc/LICENSE
@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2019 Microsoft Corporation, Daan Leijen
+Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/third-party/mimalloc/azure-pipelines.yml
+++ b/third-party/mimalloc/azure-pipelines.yml
@ -134,9 +134,16 @@ jobs:
      cmakeArgs: .. $(cmakeExtraArgs)
  - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
    displayName: Make
+  # - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api
+  #   workingDirectory: $(BuildType)
+  #   displayName: TestAPI
+  # - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress
+  #   workingDirectory: $(BuildType)
+  #   displayName: TestStress    
  - script: ctest --verbose --timeout 120
    workingDirectory: $(BuildType)
    displayName: CTest
+    
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-macos-$(BuildType)

--- a/third-party/mimalloc/cmake/mimalloc-config-version.cmake
+++ b/third-party/mimalloc/cmake/mimalloc-config-version.cmake
@ -1,5 +1,5 @@
-set(mi_version_major 1)
-set(mi_version_minor 7)
+set(mi_version_major 2)
+set(mi_version_minor 0)
 set(mi_version ${mi_version_major}.${mi_version_minor})

 set(PACKAGE_VERSION ${mi_version})
--- a/third-party/mimalloc/cmake/mimalloc-config.cmake
+++ b/third-party/mimalloc/cmake/mimalloc-config.cmake
@ -1,2 +1,11 @@
 include(${CMAKE_CURRENT_LIST_DIR}/mimalloc.cmake)
-get_filename_component(MIMALLOC_TARGET_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH)
+get_filename_component(MIMALLOC_SHARE_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH)  # one up from the cmake dir, e.g. /usr/local/share/mimalloc-2.0
+if (MIMALLOC_SHARE_DIR MATCHES "/share/")
+  string(REPLACE "/share/" "/lib/"     MIMALLOC_LIBRARY_DIR ${MIMALLOC_SHARE_DIR})
+  string(REPLACE "/share/" "/include/" MIMALLOC_INCLUDE_DIR ${MIMALLOC_SHARE_DIR})
+else()
+  # installed with -DMI_INSTALL_TOPLEVEL=ON
+  string(REPLACE "/lib/cmake" "/lib"     MIMALLOC_LIBRARY_DIR "${MIMALLOC_SHARE_DIR}")  
+  string(REPLACE "/lib/cmake" "/include" MIMALLOC_INCLUDE_DIR "${MIMALLOC_SHARE_DIR}")  
+endif()  
+set(MIMALLOC_TARGET_DIR "${MIMALLOC_LIBRARY_DIR}") # legacy
--- a/third-party/mimalloc/doc/mimalloc-doc.h
+++ b/third-party/mimalloc/doc/mimalloc-doc.h
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -782,6 +782,7 @@ typedef enum mi_option_e {
  mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
  mi_option_large_os_pages,      ///< Use large OS pages (2MiB in size) if possible
  mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program.
+  mi_option_reserve_huge_os_pages_at, ///< Reserve huge OS pages at node N.
  mi_option_segment_cache,   ///< The number of segments per thread to keep cached.
  mi_option_page_reset,      ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free.
  mi_option_segment_reset,   ///< Experimental
@ -1053,6 +1054,8 @@ or via environment variables.
   `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
   of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
   and allocate just a little to take up space in the huge OS page area (which cannot be reset).
+- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node. 
+   (`N` is -1 by default to reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected))

 Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
 for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
--- a/third-party/mimalloc/ide/vs2017/mimalloc-override.vcxproj
+++ b/third-party/mimalloc/ide/vs2017/mimalloc-override.vcxproj
@ -236,7 +236,6 @@
    <ClCompile Include="..\..\src\bitmap.c" />
    <ClCompile Include="..\..\src\heap.c" />
    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\region.c" />
    <ClCompile Include="..\..\src\options.c" />
    <ClCompile Include="..\..\src\os.c" />
    <ClCompile Include="..\..\src\page-queue.c">
@ -247,6 +246,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\stats.c" />
  </ItemGroup>
--- a/third-party/mimalloc/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/third-party/mimalloc/ide/vs2017/mimalloc-override.vcxproj.filters
@ -64,9 +64,6 @@
    <ClCompile Include="..\..\src\init.c">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\region.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
    <ClCompile Include="..\..\src\alloc-override.c">
      <Filter>Source Files</Filter>
    </ClCompile>
@ -82,5 +79,8 @@
    <ClCompile Include="..\..\src\bitmap.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\segment-cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
 </Project>
--- a/third-party/mimalloc/ide/vs2017/mimalloc.vcxproj
+++ b/third-party/mimalloc/ide/vs2017/mimalloc.vcxproj
@ -233,7 +233,6 @@
    <ClCompile Include="..\..\src\bitmap.c" />
    <ClCompile Include="..\..\src\heap.c" />
    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\region.c" />
    <ClCompile Include="..\..\src\options.c" />
    <ClCompile Include="..\..\src\page-queue.c">
      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -243,6 +242,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\os.c" />
    <ClCompile Include="..\..\src\stats.c" />
--- a/third-party/mimalloc/ide/vs2017/mimalloc.vcxproj.filters
+++ b/third-party/mimalloc/ide/vs2017/mimalloc.vcxproj.filters
@ -47,10 +47,10 @@
    <ClCompile Include="..\..\src\init.c">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\region.c">
+    <ClCompile Include="..\..\src\alloc-posix.c">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-posix.c">
+    <ClCompile Include="..\..\src\arena.c">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\..\src\arena.c">
@ -62,6 +62,9 @@
    <ClCompile Include="..\..\src\bitmap.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\segment-cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
--- a/third-party/mimalloc/ide/vs2019/mimalloc-override.vcxproj
+++ b/third-party/mimalloc/ide/vs2019/mimalloc-override.vcxproj
@ -236,7 +236,6 @@
    <ClCompile Include="..\..\src\bitmap.c" />
    <ClCompile Include="..\..\src\heap.c" />
    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\region.c" />
    <ClCompile Include="..\..\src\options.c" />
    <ClCompile Include="..\..\src\os.c" />
    <ClCompile Include="..\..\src\page-queue.c">
@ -247,6 +246,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\stats.c" />
  </ItemGroup>
--- a/third-party/mimalloc/ide/vs2019/mimalloc-override.vcxproj.filters
+++ b/third-party/mimalloc/ide/vs2019/mimalloc-override.vcxproj.filters
@ -19,9 +19,6 @@
    <ClCompile Include="..\..\src\init.c">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\region.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
    <ClCompile Include="..\..\src\os.c">
      <Filter>Source Files</Filter>
    </ClCompile>
@ -49,6 +46,9 @@
    <ClCompile Include="..\..\src\bitmap.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\segment-cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
@ -70,7 +70,7 @@
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="..\..\src\bitmap.h">
-      <Filter>Source Files</Filter>
+      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
--- a/third-party/mimalloc/ide/vs2019/mimalloc.vcxproj
+++ b/third-party/mimalloc/ide/vs2019/mimalloc.vcxproj
@ -92,7 +92,7 @@
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>Level4</WarningLevel>
      <Optimization>Disabled</Optimization>
      <SDLCheck>true</SDLCheck>
      <ConformanceMode>true</ConformanceMode>
@ -138,7 +138,7 @@
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>Level4</WarningLevel>
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <ConformanceMode>true</ConformanceMode>
@ -166,7 +166,7 @@
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>Level4</WarningLevel>
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <ConformanceMode>true</ConformanceMode>
@ -225,7 +225,6 @@
    </ClCompile>
    <ClCompile Include="..\..\src\heap.c" />
    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\region.c" />
    <ClCompile Include="..\..\src\options.c" />
    <ClCompile Include="..\..\src\page-queue.c">
      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -235,6 +234,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\os.c" />
    <ClCompile Include="..\..\src\stats.c" />
--- a/third-party/mimalloc/ide/vs2019/mimalloc.vcxproj.filters
+++ b/third-party/mimalloc/ide/vs2019/mimalloc.vcxproj.filters
@ -22,9 +22,6 @@
    <ClCompile Include="..\..\src\init.c">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\region.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
    <ClCompile Include="..\..\src\options.c">
      <Filter>Source Files</Filter>
    </ClCompile>
@ -52,6 +49,9 @@
    <ClCompile Include="..\..\src\bitmap.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\segment-cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
--- a/third-party/mimalloc/include/mimalloc-atomic.h
+++ b/third-party/mimalloc/include/mimalloc-atomic.h
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018,2020 Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -25,7 +25,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define  mi_memory_order(name)  std::memory_order_##name
 #elif defined(_MSC_VER)
 // Use MSVC C wrapper for C11 atomics
-#define  _Atomic(tp)            tp
+#define  _Atomic(tp)            tp 
 #define  ATOMIC_VAR_INIT(x)     x
 #define  mi_atomic(name)        mi_atomic_##name
 #define  mi_memory_order(name)  mi_memory_order_##name
@ -173,7 +173,7 @@ static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintpt
 }
 static inline void mi_atomic_thread_fence(mi_memory_order mo) {
  (void)(mo);
-  _Atomic(uintptr_t)x = 0;
+  _Atomic(uintptr_t) x = 0;
  mi_atomic_exchange_explicit(&x, 1, mo);
 }
 static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
@ -295,7 +295,7 @@ static inline void mi_atomic_yield(void) {
 }
 #elif defined(__aarch64__)
 static inline void mi_atomic_yield(void) {
-  asm volatile("wfe");
+  __asm__ volatile("wfe");
 }
 #elif (defined(__arm__) && __ARM_ARCH__ >= 7)
 static inline void mi_atomic_yield(void) {
@ -307,7 +307,7 @@ static inline void mi_atomic_yield(void) {
 }
 #elif defined(__armel__) || defined(__ARMEL__)
 static inline void mi_atomic_yield(void) {
-  asm volatile ("nop" ::: "memory");
+  __asm__ volatile ("nop" ::: "memory");
 }
 #endif
 #elif defined(__sun)
--- a/third-party/mimalloc/include/mimalloc-internal.h
+++ b/third-party/mimalloc/include/mimalloc-internal.h
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -19,10 +19,11 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_CACHE_LINE          64
 #if defined(_MSC_VER)
 #pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
+#pragma warning(disable:26812)  // unscoped enum warning
 #define mi_decl_noinline        __declspec(noinline)
 #define mi_decl_thread          __declspec(thread)
 #define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
-#elif (defined(__GNUC__) && (__GNUC__>=3))  // includes clang and icc
+#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
 #define mi_decl_noinline        __attribute__((noinline))
 #define mi_decl_thread          __thread
 #define mi_decl_cache_align     __attribute__((aligned(MI_CACHE_LINE)))
@ -32,6 +33,16 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_decl_cache_align
 #endif

+#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
+#define __wasi__
+#endif
+
+#if defined(__cplusplus)
+#define mi_decl_externc       extern "C"
+#else
+#define mi_decl_externc  
+#endif
+
 // "options.c"
 void       _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
 void       _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
@ -46,42 +57,51 @@ void       _mi_random_init(mi_random_ctx_t* ctx);
 void       _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
 uintptr_t  _mi_random_next(mi_random_ctx_t* ctx);
 uintptr_t  _mi_heap_random_next(mi_heap_t* heap);
-uintptr_t  _os_random_weak(uintptr_t extra_seed);
+uintptr_t  _mi_os_random_weak(uintptr_t extra_seed);
 static inline uintptr_t _mi_random_shuffle(uintptr_t x);

 // init.c
 extern mi_decl_cache_align mi_stats_t       _mi_stats_main;
 extern mi_decl_cache_align const mi_page_t  _mi_page_empty;
 bool       _mi_is_main_thread(void);
-bool       _mi_preloading();  // true while the C runtime is not ready
+size_t     _mi_current_thread_count(void);
+bool       _mi_preloading(void);  // true while the C runtime is not ready

 // os.c
 size_t     _mi_os_page_size(void);
 void       _mi_os_init(void);                                      // called from process init
 void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
+
+bool       _mi_os_protect(void* addr, size_t size);
+bool       _mi_os_unprotect(void* addr, size_t size);
+bool       _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
+bool       _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
+bool       _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
+bool       _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 size_t     _mi_os_good_alloc_size(size_t size);
+bool       _mi_os_has_overcommit(void);

-// memory.c
-void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld);
-void       _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld);
+// arena.c
+void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld);

-bool       _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
-bool       _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
-bool       _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
-bool       _mi_mem_protect(void* addr, size_t size);
-bool       _mi_mem_unprotect(void* addr, size_t size);
-
-void        _mi_mem_collect(mi_os_tld_t* tld);
+// "segment-cache.c"
+void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
+void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
+void       _mi_segment_map_freed_at(const mi_segment_t* segment);

 // "segment.c"
 mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
 void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
 void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
-uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page
+bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
+void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
 void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);

-void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
+uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
 void       _mi_abandoned_await_readers(void);

@ -90,7 +110,7 @@ void       _mi_abandoned_await_readers(void);
 // "page.c"
 void*      _mi_malloc_generic(mi_heap_t* heap, size_t size)  mi_attr_noexcept mi_attr_malloc;

-void       _mi_page_retire(mi_page_t* page);                                  // free the page if there are no other pages with many free blocks
+void       _mi_page_retire(mi_page_t* page) mi_attr_noexcept;                  // free the page if there are no other pages with many free blocks
 void       _mi_page_unfull(mi_page_t* page);
 void       _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force);   // free the page
 void       _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);            // abandon the page, to be picked up by another thread...
@ -176,11 +196,11 @@ bool        _mi_page_is_valid(mi_page_t* page);
 /* -----------------------------------------------------------
  Inlined definitions
 ----------------------------------------------------------- */
-#define UNUSED(x)     (void)(x)
+#define MI_UNUSED(x)     (void)(x)
 #if (MI_DEBUG>0)
-#define UNUSED_RELEASE(x)
+#define MI_UNUSED_RELEASE(x)
 #else
-#define UNUSED_RELEASE(x)  UNUSED(x)
+#define MI_UNUSED_RELEASE(x)  MI_UNUSED(x)
 #endif

 #define MI_INIT4(x)   x(),x(),x(),x()
@ -209,6 +229,18 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
  }
 }

+// Align downwards
+static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
+  mi_assert_internal(alignment != 0);
+  uintptr_t mask = alignment - 1;
+  if ((alignment & mask) == 0) { // power of two?
+    return (sz & ~mask);
+  }
+  else {
+    return ((sz / alignment) * alignment);
+  }
+}
+
 // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
 static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
  mi_assert_internal(divider != 0);
@ -223,6 +255,7 @@ static inline bool mi_mem_is_zero(void* p, size_t size) {
  return true;
 }

+
 // Align a byte size to a size in _machine words_,
 // i.e. byte size == `wsize*sizeof(void*)`.
 static inline size_t _mi_wsize_from_size(size_t size) {
@ -236,18 +269,18 @@ static inline bool mi_malloc_satisfies_alignment(size_t alignment, size_t size)
 }

 // Overflow detecting multiply
-#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
+#if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5))
 #include <limits.h>      // UINT_MAX, ULONG_MAX
 #if defined(_CLOCK_T)    // for Illumos
 #undef _CLOCK_T
 #endif
 static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
-  #if (SIZE_MAX == UINT_MAX)
-    return __builtin_umul_overflow(count, size, total);
-  #elif (SIZE_MAX == ULONG_MAX)
-    return __builtin_umull_overflow(count, size, total);
+  #if (SIZE_MAX == ULONG_MAX)
+    return __builtin_umull_overflow(count, size, (unsigned long *)total);
+  #elif (SIZE_MAX == UINT_MAX)
+    return __builtin_umul_overflow(count, size, (unsigned int *)total);
  #else
-    return __builtin_umulll_overflow(count, size, total);
+    return __builtin_umulll_overflow(count, size, (unsigned long long *)total);
  #endif
 }
 #else /* __builtin_umul_overflow is unavailable */
@ -293,8 +326,9 @@ extern bool _mi_process_is_initialized;
 mi_heap_t*  _mi_heap_main_get(void);    // statically allocated main backing heap

 #if defined(MI_MALLOC_OVERRIDE)
-#if defined(__MACH__) // OSX
+#if defined(__APPLE__) // macOS
 #define MI_TLS_SLOT               89  // seems unused? 
+// #define MI_TLS_RECURSE_GUARD 1     
 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
 // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
 #elif defined(__OpenBSD__)
@ -332,10 +366,12 @@ extern pthread_key_t _mi_heap_default_key;
 // However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356).
 extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from

+
 static inline mi_heap_t* mi_get_default_heap(void) {
 #if defined(MI_TLS_SLOT)
  mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
-  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
+  if (mi_unlikely(heap == NULL)) { heap = (mi_heap_t*)&_mi_heap_empty; } //_mi_heap_empty_get(); }
+  return heap;
 #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
  mi_heap_t* heap = *mi_tls_pthread_heap_slot();
  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
@ -343,7 +379,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
  mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
 #else
-  #if defined(MI_TLS_RECURSE_GUARD)
+  #if defined(MI_TLS_RECURSE_GUARD)  
  if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
  #endif
  return _mi_heap_default;
@ -391,35 +427,47 @@ static inline mi_segment_t* _mi_ptr_segment(const void* p) {
  return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK);
 }

+static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
+  mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0);
+  return (mi_page_t*)(s);
+}
+
+static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
+  mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0);
+  return (mi_slice_t*)(p);
+}
+
 // Segment belonging to a page
 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
-  mi_segment_t* segment = _mi_ptr_segment(page);
-  mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]);
+  mi_segment_t* segment = _mi_ptr_segment(page); 
+  mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
  return segment;
 }

-// used internally
-static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) {
-  // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0];  // huge pages
-  ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
-  mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE);
-  uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
-  mi_assert_internal(idx < segment->capacity);
-  mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
-  return idx;
+static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
+  mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset);
+  mi_assert_internal(start >= _mi_ptr_segment(slice)->slices);
+  mi_assert_internal(start->slice_offset == 0);
+  mi_assert_internal(start + start->slice_count > slice);
+  return start;
 }

 // Get the page containing the pointer
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
-  uintptr_t idx = _mi_segment_page_idx_of(segment, p);
-  return &((mi_segment_t*)segment)->pages[idx];
+  ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
+  mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE);
+  size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
+  mi_assert_internal(idx < segment->slice_entries);
+  mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
+  mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data
+  mi_assert_internal(slice->slice_offset == 0);
+  mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries);
+  return mi_slice_to_page(slice);
 }

 // Quick page start for initialized pages
 static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
-  const size_t bsize = page->xblock_size;
-  mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0);
-  return _mi_segment_page_start(segment, page, bsize, page_size, NULL);
+  return _mi_segment_page_start(segment, page, page_size);
 }

 // Get the page containing the pointer
@ -427,7 +475,7 @@ static inline mi_page_t* _mi_ptr_page(void* p) {
  return _mi_segment_page_of(_mi_ptr_segment(p), p);
 }

-// Get the block size of a page (special cased for huge objects)
+// Get the block size of a page (special case for huge objects)
 static inline size_t mi_page_block_size(const mi_page_t* page) {
  const size_t bsize = page->xblock_size;
  mi_assert_internal(bsize > 0);
@ -436,7 +484,7 @@ static inline size_t mi_page_block_size(const mi_page_t* page) {
  }
  else {
    size_t psize;
-    _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL);
+    _mi_segment_page_start(_mi_page_segment(page), page, &psize);
    return psize;
  }
 }
@ -447,6 +495,14 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
  return mi_page_block_size(page) - MI_PADDING_SIZE;
 }

+// size of a segment
+static inline size_t mi_segment_size(mi_segment_t* segment) {
+  return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
+}
+
+static inline uint8_t* mi_segment_end(mi_segment_t* segment) {
+  return (uint8_t*)segment + mi_segment_size(segment);
+}

 // Thread free access
 static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
@ -566,12 +622,13 @@ static inline bool mi_is_in_same_segment(const void* p, const void* q) {
 }

 static inline bool mi_is_in_same_page(const void* p, const void* q) {
-  mi_segment_t* segmentp = _mi_ptr_segment(p);
-  mi_segment_t* segmentq = _mi_ptr_segment(q);
-  if (segmentp != segmentq) return false;
-  uintptr_t idxp = _mi_segment_page_idx_of(segmentp, p);
-  uintptr_t idxq = _mi_segment_page_idx_of(segmentq, q);
-  return (idxp == idxq);
+  mi_segment_t* segment = _mi_ptr_segment(p);
+  if (_mi_ptr_segment(q) != segment) return false;
+  // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
+  mi_page_t* page = _mi_segment_page_of(segment, p);
+  size_t psize;
+  uint8_t* start = _mi_segment_page_start(segment, page, &psize);
+  return (start <= (uint8_t*)q && (uint8_t*)q < start + psize);
 }

 static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
@ -597,7 +654,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
  #ifdef MI_ENCODE_FREELIST
  return (mi_block_t*)mi_ptr_decode(null, block->next, keys);
  #else
-  UNUSED(keys); UNUSED(null);
+  MI_UNUSED(keys); MI_UNUSED(null);
  return (mi_block_t*)block->next;
  #endif
 }
@ -606,7 +663,7 @@ static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const
  #ifdef MI_ENCODE_FREELIST
  block->next = mi_ptr_encode(null, next, keys);
  #else
-  UNUSED(keys); UNUSED(null);
+  MI_UNUSED(keys); MI_UNUSED(null);
  block->next = (mi_encoded_t)next;
  #endif
 }
@ -622,7 +679,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t*
  }
  return next;
  #else
-  UNUSED(page);
+  MI_UNUSED(page);
  return mi_block_nextx(page,block,NULL);
  #endif
 }
@ -631,11 +688,57 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
  #ifdef MI_ENCODE_FREELIST
  mi_block_set_nextx(page,block,next, page->keys);
  #else
-  UNUSED(page);
+  MI_UNUSED(page);
  mi_block_set_nextx(page,block,next,NULL);
  #endif
 }

+
+// -------------------------------------------------------------------
+// commit mask
+// -------------------------------------------------------------------
+
+static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    cm->mask[i] = 0;
+  }
+}
+
+static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    cm->mask[i] = ~((size_t)0);
+  }
+}
+
+static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    if (cm->mask[i] != 0) return false;
+  }
+  return true;
+}
+
+static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    if (cm->mask[i] != ~((size_t)0)) return false;
+  }
+  return true;
+}
+
+// defined in `segment.c`:
+size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
+size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
+
+#define mi_commit_mask_foreach(cm,idx,count) \
+  idx = 0; \
+  while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { 
+        
+#define mi_commit_mask_foreach_end() \
+    idx += count; \
+  }
+      
+
+
+
 // -------------------------------------------------------------------
 // Fast "random" shuffle
 // -------------------------------------------------------------------
@ -667,13 +770,14 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
 int    _mi_os_numa_node_get(mi_os_tld_t* tld);
 size_t _mi_os_numa_node_count_get(void);

-extern size_t _mi_numa_node_count;
+extern _Atomic(size_t) _mi_numa_node_count;
 static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
-  if (mi_likely(_mi_numa_node_count == 1)) return 0;
+  if (mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1)) return 0;
  else return _mi_os_numa_node_get(tld);
 }
 static inline size_t _mi_os_numa_node_count(void) {
-  if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count;
+  const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
+  if (mi_likely(count>0)) return count;
  else return _mi_os_numa_node_count_get();
 }

@ -685,7 +789,7 @@ static inline size_t _mi_os_numa_node_count(void) {
 #if defined(_WIN32)
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
-static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
+static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
  // Windows: works on Intel and ARM in both 32- and 64-bit
  return (uintptr_t)NtCurrentTeb();
 }
@ -699,23 +803,24 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
  const size_t ofs = (slot*sizeof(void*));
 #if defined(__i386__)
  __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // 32-bit always uses GS
-#elif defined(__MACH__) && defined(__x86_64__)
+#elif defined(__APPLE__) && defined(__x86_64__)
  __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 macOSX uses GS
 #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
  __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x32 ABI
 #elif defined(__x86_64__)
  __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 Linux, BSD uses FS
 #elif defined(__arm__)
-  void** tcb; UNUSED(ofs);
+  void** tcb; MI_UNUSED(ofs);
  __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
  res = tcb[slot];
 #elif defined(__aarch64__)
-  void** tcb; UNUSED(ofs);
-#if defined(__APPLE__) // M1, issue #343
+  void** tcb; MI_UNUSED(ofs);
+  #if defined(__APPLE__) // M1, issue #343
  __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
-#else
+  tcb = (void**)((uintptr_t)tcb & ~0x07UL);  // clear lower 3 bits
+  #else
  __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
-#endif
+  #endif
  res = tcb[slot];
 #endif
  return res;
@ -726,31 +831,32 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
  const size_t ofs = (slot*sizeof(void*));
 #if defined(__i386__)
  __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // 32-bit always uses GS
-#elif defined(__MACH__) && defined(__x86_64__)
+#elif defined(__APPLE__) && defined(__x86_64__)
  __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 macOSX uses GS
 #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
  __asm__("movl %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x32 ABI
 #elif defined(__x86_64__)
  __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 Linux, BSD uses FS
 #elif defined(__arm__)
-  void** tcb; UNUSED(ofs);
+  void** tcb; MI_UNUSED(ofs);
  __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
  tcb[slot] = value;
 #elif defined(__aarch64__)
-  void** tcb; UNUSED(ofs);
-#if defined(__APPLE__) // M1, issue #343
+  void** tcb; MI_UNUSED(ofs);
+  #if defined(__APPLE__) // M1, issue #343
  __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
-#else
+  tcb = (void**)((uintptr_t)tcb & ~0x07UL);  // clear lower 3 bits
+  #else
  __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
-#endif
+  #endif
  tcb[slot] = value;
 #endif
 }

-static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
-#if defined(__aarch64__) && defined(__APPLE__)  // M1
-  // on macOS on the M1, slot 0 does not seem to work, so we fall back to portable C for now. See issue #354
-  return (uintptr_t)&_mi_heap_default;
+static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
+#if defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__))
+  // on Android, slot 1 is the thread ID (pointer to pthread internal struct)
+  return (uintptr_t)mi_tls_slot(1);
 #else
  // in all our other targets, slot 0 is the pointer to the thread control block
  return (uintptr_t)mi_tls_slot(0);
@ -758,7 +864,7 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
 }
 #else
 // otherwise use standard C
-static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
+static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
  return (uintptr_t)&_mi_heap_default;
 }
 #endif
@ -900,7 +1006,7 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
 // This is used for example in `mi_realloc`.
 // -------------------------------------------------------------------------------

-#if (__GNUC__ >= 4) || defined(__clang__)
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
 // On GCC/CLang we provide a hint that the pointers are word aligned.
 #include <string.h>
 static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
--- a/third-party/mimalloc/include/mimalloc-new-delete.h
+++ b/third-party/mimalloc/include/mimalloc-new-delete.h
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018,2019 Microsoft Research, Daan Leijen
+Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
--- a/third-party/mimalloc/include/mimalloc-override.h
+++ b/third-party/mimalloc/include/mimalloc-override.h
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018,2019 Microsoft Research, Daan Leijen
+Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
--- a/third-party/mimalloc/include/mimalloc-types.h
+++ b/third-party/mimalloc/include/mimalloc-types.h
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -10,14 +10,14 @@ terms of the MIT license. A copy of the license can be found in the file

 #include <stddef.h>   // ptrdiff_t
 #include <stdint.h>   // uintptr_t, uint16_t, etc
-#include <mimalloc-atomic.h>  // _Atomic
+#include "mimalloc-atomic.h"  // _Atomic

 #ifdef _MSC_VER
 #pragma warning(disable:4214) // bitfield is not int
 #endif 

 // Minimal alignment necessary. On most platforms 16 bytes are needed
-// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
+// due to SSE registers for example. This must be at least `sizeof(void*)`
 #ifndef MI_MAX_ALIGN_SIZE
 #define MI_MAX_ALIGN_SIZE  16   // sizeof(max_align_t)
 #endif
@ -67,6 +67,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_ENCODE_FREELIST  1
 #endif

+
 // ------------------------------------------------------
 // Platform specific values
 // ------------------------------------------------------
@ -83,20 +84,43 @@ terms of the MIT license. A copy of the license can be found in the file
 // or otherwise one might define an intptr_t type that is larger than a pointer...
 // ------------------------------------------------------

-#if INTPTR_MAX == 9223372036854775807LL
+#if INTPTR_MAX > INT64_MAX
+# define MI_INTPTR_SHIFT (4)  // assume 128-bit  (as on arm CHERI for example)
+#elif INTPTR_MAX == INT64_MAX
 # define MI_INTPTR_SHIFT (3)
-#elif INTPTR_MAX == 2147483647LL
+#elif INTPTR_MAX == INT32_MAX
 # define MI_INTPTR_SHIFT (2)
 #else
-#error platform must be 32 or 64 bits
+#error platform pointers must be 32, 64, or 128 bits
+#endif
+
+#if SIZE_MAX == UINT64_MAX
+# define MI_SIZE_SHIFT (3)
+typedef int64_t  mi_ssize_t;
+#elif SIZE_MAX == UINT32_MAX
+# define MI_SIZE_SHIFT (2)
+typedef int32_t  mi_ssize_t;
+#else
+#error platform objects must be 32 or 64 bits
+#endif
+
+#if (SIZE_MAX/2) > LONG_MAX
+# define MI_ZU(x)  x##ULL
+# define MI_ZI(x)  x##LL
+#else
+# define MI_ZU(x)  x##UL
+# define MI_ZI(x)  x##L
 #endif

 #define MI_INTPTR_SIZE  (1<<MI_INTPTR_SHIFT)
 #define MI_INTPTR_BITS  (MI_INTPTR_SIZE*8)

-#define KiB     ((size_t)1024)
-#define MiB     (KiB*KiB)
-#define GiB     (MiB*KiB)
+#define MI_SIZE_SIZE  (1<<MI_SIZE_SHIFT)
+#define MI_SIZE_BITS  (MI_SIZE_SIZE*8)
+
+#define MI_KiB     (MI_ZU(1024))
+#define MI_MiB     (MI_KiB*MI_KiB)
+#define MI_GiB     (MI_MiB*MI_KiB)


 // ------------------------------------------------------
@ -104,45 +128,65 @@ terms of the MIT license. A copy of the license can be found in the file
 // ------------------------------------------------------

 // Main tuning parameters for segment and page sizes
-// Sizes for 64-bit, divide by two for 32-bit
-#define MI_SMALL_PAGE_SHIFT               (13 + MI_INTPTR_SHIFT)      // 64kb
-#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)  // 512kb
-#define MI_LARGE_PAGE_SHIFT               ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4mb
-#define MI_SEGMENT_SHIFT                  ( MI_LARGE_PAGE_SHIFT)      // 4mb
+// Sizes for 64-bit (usually divide by two for 32-bit)
+#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
+
+#if MI_INTPTR_SIZE > 4
+#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64MiB
+#else
+#define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  // 4MiB on 32-bit
+#endif
+
+#define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
+#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB
+

 // Derived constants
-#define MI_SEGMENT_SIZE                   (1UL<<MI_SEGMENT_SHIFT)
-#define MI_SEGMENT_MASK                   ((uintptr_t)MI_SEGMENT_SIZE - 1)
+#define MI_SEGMENT_SIZE                   (MI_ZU(1)<<MI_SEGMENT_SHIFT)
+#define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
+#define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
+#define MI_SEGMENT_SLICE_SIZE             (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
+#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024

-#define MI_SMALL_PAGE_SIZE                (1UL<<MI_SMALL_PAGE_SHIFT)
-#define MI_MEDIUM_PAGE_SIZE               (1UL<<MI_MEDIUM_PAGE_SHIFT)
-#define MI_LARGE_PAGE_SIZE                (1UL<<MI_LARGE_PAGE_SHIFT)
+#define MI_SMALL_PAGE_SIZE                (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
+#define MI_MEDIUM_PAGE_SIZE               (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)

-#define MI_SMALL_PAGES_PER_SEGMENT        (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
-#define MI_MEDIUM_PAGES_PER_SEGMENT       (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
-#define MI_LARGE_PAGES_PER_SEGMENT        (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
-
-// The max object size are checked to not waste more than 12.5% internally over the page sizes.
-// (Except for large pages since huge objects are allocated in 4MiB chunks)
-#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 16kb
-#define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128kb
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_LARGE_PAGE_SIZE/2)   // 2mb
+#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 8KiB on 64-bit
+#define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128KiB on 64-bit
+#define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32MiB on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 #define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)

 // Maximum number of size classes. (spaced exponentially in 12.5% increments)
 #define MI_BIN_HUGE  (73U)

-#if (MI_LARGE_OBJ_WSIZE_MAX >= 655360)
+#if (MI_MEDIUM_OBJ_WSIZE_MAX >= 655360)
 #error "define more bins"
 #endif

+// Maximum slice offset (7)
+#define MI_MAX_SLICE_OFFSET               ((MI_MEDIUM_PAGE_SIZE / MI_SEGMENT_SLICE_SIZE) - 1)
+
 // Used as a special value to encode block sizes in 32 bits.
-#define MI_HUGE_BLOCK_SIZE   ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
+#define MI_HUGE_BLOCK_SIZE                ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
+
+// blocks up to this size are always allocated aligned
+#define MI_MAX_ALIGN_GUARANTEE  (8*MI_MAX_ALIGN_SIZE)  
+
+
+
+
+// ------------------------------------------------------
+// Mimalloc pages contain allocated blocks
+// ------------------------------------------------------

 // The free lists use encoded next fields
 // (Only actually encodes when MI_ENCODED_FREELIST is defined.)
-typedef uintptr_t mi_encoded_t;
+typedef uintptr_t  mi_encoded_t;
+
+// thread id's
+typedef size_t     mi_threadid_t;

 // free lists contain blocks
 typedef struct mi_block_s {
@ -218,18 +262,18 @@ typedef uintptr_t mi_thread_free_t;
 //   will be freed correctly even if only other threads free blocks.
 typedef struct mi_page_s {
  // "owned" by the segment
-  uint8_t               segment_idx;       // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
-  uint8_t               segment_in_use:1;  // `true` if the segment allocated this page
-  uint8_t               is_reset:1;        // `true` if the page memory was reset
-  uint8_t               is_committed:1;    // `true` if the page virtual memory is committed
-  uint8_t               is_zero_init:1;    // `true` if the page was zero initialized
+  uint32_t              slice_count;       // slices in this page (0 if not a page)
+  uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
+  uint8_t               is_reset : 1;        // `true` if the page memory was reset
+  uint8_t               is_committed : 1;    // `true` if the page virtual memory is committed
+  uint8_t               is_zero_init : 1;    // `true` if the page was zero initialized

  // layout like this to optimize access in `mi_malloc` and `mi_free`
  uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
  uint16_t              reserved;          // number of blocks reserved in memory
  mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
-  uint8_t               is_zero:1;         // `true` if the blocks in the free list are zero initialized
-  uint8_t               retire_expire:7;   // expiration count for retired blocks
+  uint8_t               is_zero : 1;         // `true` if the blocks in the free list are zero initialized
+  uint8_t               retire_expire : 7;   // expiration count for retired blocks

  mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
  #ifdef MI_ENCODE_FREELIST
@ -238,51 +282,90 @@ typedef struct mi_page_s {
  uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
  uint32_t              xblock_size;       // size available in each block (always `>0`) 

-  mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
+  mi_block_t* local_free;                  // list of deferred free blocks by this thread (migrates to `free`)
  _Atomic(mi_thread_free_t) xthread_free;  // list of deferred free blocks freed by other threads
  _Atomic(uintptr_t)        xheap;
-  
-  struct mi_page_s*     next;              // next page owned by this thread with the same `block_size`
-  struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
+
+  struct mi_page_s* next;                  // next page owned by this thread with the same `block_size`
+  struct mi_page_s* prev;                  // previous page owned by this thread with the same `block_size`
+
+  // 64-bit 9 words, 32-bit 12 words, (+2 for secure)
+  #if MI_INTPTR_SIZE==8
+  uintptr_t padding[1];
+  #endif
 } mi_page_t;



 typedef enum mi_page_kind_e {
-  MI_PAGE_SMALL,    // small blocks go into 64kb pages inside a segment
-  MI_PAGE_MEDIUM,   // medium blocks go into 512kb pages inside a segment
-  MI_PAGE_LARGE,    // larger blocks go into a single page spanning a whole segment
-  MI_PAGE_HUGE      // huge blocks (>512kb) are put into a single page in a segment of the exact size (but still 2mb aligned)
+  MI_PAGE_SMALL,    // small blocks go into 64KiB pages inside a segment
+  MI_PAGE_MEDIUM,   // medium blocks go into medium pages inside a segment
+  MI_PAGE_LARGE,    // larger blocks go into a page of just one block
+  MI_PAGE_HUGE,     // huge blocks (> 16 MiB) are put into a single page in a single segment.
 } mi_page_kind_t;

-// Segments are large allocated memory blocks (2mb on 64 bit) from
+typedef enum mi_segment_kind_e {
+  MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
+  MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
+} mi_segment_kind_t;
+
+// ------------------------------------------------------
+// A segment holds a commit mask where a bit is set if
+// the corresponding MI_COMMIT_SIZE area is committed.
+// The MI_COMMIT_SIZE must be a multiple of the slice
+// size. If it is equal we have the most fine grained 
+// decommit (but in practice 2x seems to perform better).
+// ------------------------------------------------------
+
+#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)   
+#define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
+#define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
+#define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
+
+#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS))
+#error "the segment size must be exactly divisible by the (commit size * size_t bits)"
+#endif
+
+typedef struct mi_commit_mask_s {
+  size_t mask[MI_COMMIT_MASK_FIELD_COUNT];
+} mi_commit_mask_t;
+
+typedef mi_page_t  mi_slice_t;
+typedef int64_t    mi_msecs_t;
+
+
+// Segments are large allocated memory blocks (8mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
 typedef struct mi_segment_s {
-  // memory fields
-  size_t               memid;            // id for the os-level memory manager
-  bool                 mem_is_pinned;    // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
-  bool                 mem_is_committed; // `true` if the whole segment is eagerly committed  
+  size_t            memid;              // memory id for arena allocation
+  bool              mem_is_pinned;      // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
+  bool              mem_is_large;       // in large/huge os pages?
+  bool              mem_is_committed;   // `true` if the whole segment is eagerly committed
+
+  bool              allow_decommit;     
+  mi_msecs_t        decommit_expire;
+  mi_commit_mask_t  decommit_mask;
+  mi_commit_mask_t  commit_mask;

-  // segment fields
  _Atomic(struct mi_segment_s*) abandoned_next;
-  struct mi_segment_s* next;             // must be the first segment field after abandoned_next -- see `segment.c:segment_init`
-  struct mi_segment_s* prev;

-  size_t               abandoned;        // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
-  size_t               abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
+  // from here is zero initialized
+  struct mi_segment_s* next;            // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
+  
+  size_t            abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
+  size_t            abandoned_visits;   // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
+  size_t            used;               // count of pages in use
+  uintptr_t         cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`  

-  size_t               used;             // count of pages in use (`used <= capacity`)
-  size_t               capacity;         // count of available pages (`#free + used`)
-  size_t               segment_size;     // for huge pages this may be different from `MI_SEGMENT_SIZE`
-  size_t               segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages.
-  uintptr_t            cookie;           // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
+  size_t            segment_slices;      // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
+  size_t            segment_info_slices; // initial slices we are using segment info and possible guard pages.

  // layout like this to optimize access in `mi_free`
-  size_t               page_shift;       // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
-  _Atomic(uintptr_t)   thread_id;        // unique id of the thread owning this segment
-  mi_page_kind_t       page_kind;        // kind of pages: small, large, or huge
-  mi_page_t            pages[1];         // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
+  mi_segment_kind_t kind;
+  _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
+  size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
+  mi_slice_t        slices[MI_SLICES_PER_SEGMENT];
 } mi_segment_t;


@ -319,7 +402,7 @@ typedef struct mi_random_cxt_s {
 } mi_random_ctx_t;


-// In debug mode there is a padding stucture at the end of the blocks to check for buffer overflows
+// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows
 #if (MI_PADDING)
 typedef struct mi_padding_s {
  uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
@ -341,7 +424,7 @@ struct mi_heap_s {
  mi_page_t*            pages_free_direct[MI_PAGES_DIRECT];  // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
  mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
  _Atomic(mi_block_t*)  thread_delayed_free;
-  uintptr_t             thread_id;                           // thread this heap belongs too
+  mi_threadid_t         thread_id;                           // thread this heap belongs too
  uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)
  uintptr_t             keys[2];                             // two random keys used to encode the `thread_delayed_free` list
  mi_random_ctx_t       random;                              // random number context used for secure allocation
@ -418,7 +501,7 @@ typedef struct mi_stats_s {
  mi_stat_count_t threads;
  mi_stat_count_t normal;
  mi_stat_count_t huge;
-  mi_stat_count_t giant;
+  mi_stat_count_t large;
  mi_stat_count_t malloc;
  mi_stat_count_t segments_cache;
  mi_stat_counter_t pages_extended;
@ -428,7 +511,7 @@ typedef struct mi_stats_s {
  mi_stat_counter_t searches;
  mi_stat_counter_t normal_count;
  mi_stat_counter_t huge_count;
-  mi_stat_counter_t giant_count;
+  mi_stat_counter_t large_count;
 #if MI_STAT>1
  mi_stat_count_t normal_bins[MI_BIN_HUGE+1];
 #endif
@ -457,13 +540,15 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
 // Thread Local data
 // ------------------------------------------------------

-typedef int64_t  mi_msecs_t;
+// A "span" is is an available range of slices. The span queues keep
+// track of slice spans of at most the given `slice_count` (but more than the previous size class).
+typedef struct mi_span_queue_s {
+  mi_slice_t* first;
+  mi_slice_t* last;
+  size_t      slice_count;
+} mi_span_queue_t;

-// Queue of segments
-typedef struct mi_segment_queue_s {
-  mi_segment_t* first;
-  mi_segment_t* last;
-} mi_segment_queue_t;
+#define MI_SEGMENT_BIN_MAX (35)     // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)

 // OS thread local data
 typedef struct mi_os_tld_s {
@ -471,11 +556,10 @@ typedef struct mi_os_tld_s {
  mi_stats_t*           stats;        // points to tld stats
 } mi_os_tld_t;

+
 // Segments thread local data
 typedef struct mi_segments_tld_s {
-  mi_segment_queue_t  small_free;   // queue of segments with free small pages
-  mi_segment_queue_t  medium_free;  // queue of segments with free medium pages
-  mi_page_queue_t     pages_reset;  // queue of freed pages that can be reset
+  mi_span_queue_t     spans[MI_SEGMENT_BIN_MAX+1];  // free slice spans inside segments
  size_t              count;        // current number of segments;
  size_t              peak_count;   // peak number of segments
  size_t              current_size; // current size of all segments
--- a/third-party/mimalloc/include/mimalloc.h
+++ b/third-party/mimalloc/include/mimalloc.h
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H

-#define MI_MALLOC_VERSION 171   // major + 2 digits minor
+#define MI_MALLOC_VERSION 203   // major + 2 digits minor

 // ------------------------------------------------------
 // Compiler specific attributes
@ -26,7 +26,7 @@ terms of the MIT license. A copy of the license can be found in the file

 #if defined(__cplusplus) && (__cplusplus >= 201703)
  #define mi_decl_nodiscard    [[nodiscard]]
-#elif (__GNUC__ >= 4) || defined(__clang__)  // includes clang, icc, and clang-cl
+#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)  // includes clang, icc, and clang-cl
  #define mi_decl_nodiscard    __attribute__((warn_unused_result))
 #elif (_MSC_VER >= 1700)
  #define mi_decl_nodiscard    _Check_return_
@ -58,8 +58,12 @@ terms of the MIT license. A copy of the license can be found in the file
  #define mi_attr_alloc_size2(s1,s2)
  #define mi_attr_alloc_align(p)
 #elif defined(__GNUC__)                 // includes clang and icc
+  #if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT)
+    #define mi_decl_export              __attribute__((visibility("default")))
+  #else
+    #define mi_decl_export
+  #endif
  #define mi_cdecl                      // leads to warnings... __attribute__((cdecl))
-  #define mi_decl_export                __attribute__((visibility("default")))
  #define mi_decl_restrict
  #define mi_attr_malloc                __attribute__((malloc))
  #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5)
@ -267,6 +271,7 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size
 mi_decl_export int  mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
 mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;

+mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;

 // deprecated
 mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
@ -306,13 +311,16 @@ typedef enum mi_option_e {
  mi_option_reset_decommits,
  mi_option_large_os_pages,         // implies eager commit
  mi_option_reserve_huge_os_pages,
+  mi_option_reserve_huge_os_pages_at,
  mi_option_reserve_os_memory,
  mi_option_segment_cache,
  mi_option_page_reset,
  mi_option_abandoned_page_reset,
  mi_option_segment_reset,
  mi_option_eager_commit_delay,
+  mi_option_allow_decommit,
  mi_option_reset_delay,
+  mi_option_segment_decommit_delay,
  mi_option_use_numa_nodes,
  mi_option_limit_os_alloc,
  mi_option_os_tag,
@ -342,6 +350,7 @@ mi_decl_export void mi_option_set_default(mi_option_t option, long value);
 mi_decl_export void  mi_cfree(void* p) mi_attr_noexcept;
 mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept;
 mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p)        mi_attr_noexcept;
+mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size)     mi_attr_noexcept;
 mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept;

 mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size)   mi_attr_noexcept;
@ -383,6 +392,7 @@ mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount,
 // ---------------------------------------------------------------------------------------------
 #ifdef __cplusplus

+#include <cstddef>     // std::size_t
 #include <cstdint>     // PTRDIFF_MAX
 #if (__cplusplus >= 201103L) || (_MSC_VER > 1900)  // C++11
 #include <type_traits> // std::true_type
--- a/third-party/mimalloc/readme.md
+++ b/third-party/mimalloc/readme.md
@ -12,13 +12,13 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the run-time systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.

-Latest release tag: `v2.0.1` (beta, 2021-04-06).  
-Latest stable  tag: `v1.7.1` (2021-04-06).
+Latest release tag: `v2.0.3` (beta, 2021-11-14).  
+Latest stable  tag: `v1.7.3` (2021-11-14).

 mimalloc is a drop-in replacement for `malloc` and can be used in other programs
 without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
 ```
-> LD_PRELOAD=/usr/bin/libmimalloc.so  myprogram
+> LD_PRELOAD=/usr/lib/libmimalloc.so  myprogram
 ```
 It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include:

@ -73,12 +73,21 @@ Enjoy!

 ### Releases

-* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, improved M1 support (still experimental).
-  
-* 2021-01-31, `v2.0.0`: beta release 2.0: new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
-  and fragmentation compared to mimalloc v1 (especially for large workloads). Should otherwise have similar performance
+Note: the `v2.x` beta has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
+  and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
  (see [below](#performance)); please report if you observe any significant performance regression.

+* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
+  M1), improved performance for v2 for large objects, Python integration improvements, more standard
+  installation directories, various small fixes.
+
+* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
+  thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
+
+* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
+  
+* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
+  
 * 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
  improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.

@ -137,7 +146,7 @@ mimalloc is used in various large scale low-latency services and programs, for e

 ## Windows

-Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`).
+Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
 The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
 `mimalloc-override` project builds a DLL for overriding malloc
 in the entire program.
@ -186,6 +195,11 @@ Notes:
 2. Install CCMake: `sudo apt-get install cmake-curses-gui`


+## Single source
+
+You can also directly build the single `src/static.c` file as part of your project without
+needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path.
+

 # Using the library

@ -297,6 +311,9 @@ or via environment variables:
   `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
   of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
   and allocate just a little to take up space in the huge OS page area (which cannot be reset).
+   The huge pages are usually allocated evenly among NUMA nodes.
+   We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all 
+   the huge pages at a specific numa node instead. 

 Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
 for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
@ -332,9 +349,9 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to
 - Corrupted free-lists and some forms of use-after-free are detected.


-# Overriding Malloc
+# Overriding Standard Malloc

-Overriding the standard `malloc` can be done either _dynamically_ or _statically_.
+Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_.

 ## Dynamic override

@ -365,13 +382,12 @@ On macOS we can also preload the mimalloc shared
 library so all calls to the standard `malloc` interface are
 resolved to the _mimalloc_ library.
 ```
-> env DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram
+> env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram
 ```

 Note that certain security restrictions may apply when doing this from
 the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).

-(Note: macOS support for dynamic overriding is recent, please report any issues.)

 ### Override on Windows

@ -381,7 +397,7 @@ the (dynamic) C runtime allocator, including those from other DLL's or libraries

 The overriding on Windows requires that you link your program explicitly with
 the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
-Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be available
+Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put
 in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
 The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
 mimalloc (in `mimalloc-override.dll`).
--- a/third-party/mimalloc/src/alloc-aligned.c
+++ b/third-party/mimalloc/src/alloc-aligned.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -41,7 +41,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
  }

  // use regular allocation if it is guaranteed to fit the alignment constraints
-  if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) {
+  if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) {
    void* p = _mi_heap_malloc_zero(heap, size, zero);
    mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
    return p;
--- a/third-party/mimalloc/src/alloc-override-osx.c
+++ b/third-party/mimalloc/src/alloc-override-osx.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -17,17 +17,20 @@ terms of the MIT license. A copy of the license can be found in the file
 /* ------------------------------------------------------
   Override system malloc on macOS
   This is done through the malloc zone interface.
-   It seems we also need to interpose (see `alloc-override.c`)
-   or otherwise we get zone errors as there are usually 
-   already allocations done by the time we take over the 
-   zone. Unfortunately, that means we need to replace
-   the `free` with a checked free (`cfree`) impacting 
-   performance.
+   It seems to be most robust in combination with interposing
+   though or otherwise we may get zone errors as there are could
+   be allocations done by the time we take over the 
+   zone. 
 ------------------------------------------------------ */

 #include <AvailabilityMacros.h>
 #include <malloc/malloc.h>
 #include <string.h>  // memset
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif

 #if defined(MAC_OS_X_VERSION_10_6) && \
    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
@ -40,45 +43,43 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im
 ------------------------------------------------------ */

 static size_t zone_size(malloc_zone_t* zone, const void* p) {
-  UNUSED(zone);
-  if (!mi_is_in_heap_region(p))
-    return 0; // not our pointer, bail out
-
+  MI_UNUSED(zone);
+  //if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out
  return mi_usable_size(p);
 }

 static void* zone_malloc(malloc_zone_t* zone, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  return mi_malloc(size);
 }

 static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  return mi_calloc(count, size);
 }

 static void* zone_valloc(malloc_zone_t* zone, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  return mi_malloc_aligned(size, _mi_os_page_size());
 }

 static void zone_free(malloc_zone_t* zone, void* p) {
-  UNUSED(zone);
-  return mi_free(p);
+  MI_UNUSED(zone);
+  mi_free(p);
 }

 static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  return mi_realloc(p, newsize);
 }

 static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  return mi_malloc_aligned(size,alignment);
 }

 static void zone_destroy(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  // todo: ignore for now?
 }

@ -99,16 +100,21 @@ static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) {
 }

 static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) {
-  UNUSED(zone); UNUSED(size);
+  MI_UNUSED(zone); MI_UNUSED(size);
  mi_collect(false);
  return 0;
 }

 static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) {
-  UNUSED(size);
+  MI_UNUSED(size);
  zone_free(zone,p);
 }

+static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) {
+  MI_UNUSED(zone);
+  return mi_is_in_heap_region(p);
+}
+

 /* ------------------------------------------------------
   Introspection members
@ -120,43 +126,43 @@ static kern_return_t intro_enumerator(task_t task, void* p,
                            vm_range_recorder_t recorder)
 {
  // todo: enumerate all memory
-  UNUSED(task); UNUSED(p); UNUSED(type_mask); UNUSED(zone_address);
-  UNUSED(reader); UNUSED(recorder);
+  MI_UNUSED(task); MI_UNUSED(p); MI_UNUSED(type_mask); MI_UNUSED(zone_address);
+  MI_UNUSED(reader); MI_UNUSED(recorder);
  return KERN_SUCCESS;
 }

 static size_t intro_good_size(malloc_zone_t* zone, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  return mi_good_size(size);
 }

 static boolean_t intro_check(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  return true;
 }

 static void intro_print(malloc_zone_t* zone, boolean_t verbose) {
-  UNUSED(zone); UNUSED(verbose);
+  MI_UNUSED(zone); MI_UNUSED(verbose);
  mi_stats_print(NULL);
 }

 static void intro_log(malloc_zone_t* zone, void* p) {
-  UNUSED(zone); UNUSED(p);
+  MI_UNUSED(zone); MI_UNUSED(p);
  // todo?
 }

 static void intro_force_lock(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  // todo?
 }

 static void intro_force_unlock(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  // todo?
 }

 static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  // todo...
  stats->blocks_in_use = 0;
  stats->size_in_use = 0;
@ -165,7 +171,7 @@ static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
 }

 static boolean_t intro_zone_locked(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
  return false;
 }

@ -174,7 +180,220 @@ static boolean_t intro_zone_locked(malloc_zone_t* zone) {
  At process start, override the default allocator
 ------------------------------------------------------ */

-static malloc_zone_t* mi_get_default_zone()
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
+
+static malloc_introspection_t mi_introspect = {
+  .enumerator = &intro_enumerator,
+  .good_size = &intro_good_size,
+  .check = &intro_check,
+  .print = &intro_print,
+  .log = &intro_log,
+  .force_lock = &intro_force_lock,
+  .force_unlock = &intro_force_unlock,
+#if defined(MAC_OS_X_VERSION_10_6) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
+  .statistics = &intro_statistics,
+  .zone_locked = &intro_zone_locked,
+#endif
+};
+
+static malloc_zone_t mi_malloc_zone = {
+  .size = &zone_size,
+  .malloc = &zone_malloc,
+  .calloc = &zone_calloc,
+  .valloc = &zone_valloc,
+  .free = &zone_free,
+  .realloc = &zone_realloc,
+  .destroy = &zone_destroy,
+  .zone_name = "mimalloc",
+  .batch_malloc = &zone_batch_malloc,
+  .batch_free = &zone_batch_free,
+  .introspect = &mi_introspect,  
+#if defined(MAC_OS_X_VERSION_10_6) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
+  // switch to version 9+ on OSX 10.6 to support memalign.
+  .memalign = &zone_memalign,
+  .free_definite_size = &zone_free_definite_size,
+  .pressure_relief = &zone_pressure_relief,
+  #if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
+  .claimed_address = &zone_claimed_address,
+  .version = 10
+  #else
+  .version = 9
+  #endif
+#else
+  .version = 4
+#endif
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT)
+
+// ------------------------------------------------------
+// Override malloc_xxx and malloc_zone_xxx api's to use only 
+// our mimalloc zone. Since even the loader uses malloc
+// on macOS, this ensures that all allocations go through
+// mimalloc (as all calls are interposed).
+// The main `malloc`, `free`, etc calls are interposed in `alloc-override.c`,
+// Here, we also override macOS specific API's like
+// `malloc_zone_calloc` etc. see <https://github.com/aosm/libmalloc/blob/master/man/malloc_zone_malloc.3>
+// ------------------------------------------------------
+
+static inline malloc_zone_t* mi_get_default_zone(void)
+{
+  static bool init;
+  if (mi_unlikely(!init)) { 
+    init = true;
+    malloc_zone_register(&mi_malloc_zone);  // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>)
+  }
+  return &mi_malloc_zone;
+}
+
+mi_decl_externc int  malloc_jumpstart(uintptr_t cookie);
+mi_decl_externc void _malloc_fork_prepare(void);
+mi_decl_externc void _malloc_fork_parent(void);
+mi_decl_externc void _malloc_fork_child(void);
+
+
+static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) {
+  MI_UNUSED(size); MI_UNUSED(flags);
+  return mi_get_default_zone();
+}
+
+static malloc_zone_t* mi_malloc_default_zone (void) {   
+  return mi_get_default_zone();
+}
+
+static malloc_zone_t* mi_malloc_default_purgeable_zone(void) {
+  return mi_get_default_zone();
+}
+
+static void mi_malloc_destroy_zone(malloc_zone_t* zone) {
+  MI_UNUSED(zone);
+  // nothing.
+}
+
+static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) {
+  MI_UNUSED(task); MI_UNUSED(mr);
+  if (addresses != NULL) *addresses = NULL;
+  if (count != NULL) *count = 0;
+  return KERN_SUCCESS;
+}
+
+static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {  
+  return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name);
+}
+
+static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {  
+  MI_UNUSED(zone); MI_UNUSED(name);
+}
+
+static int mi_malloc_jumpstart(uintptr_t cookie) {
+  MI_UNUSED(cookie);
+  return 1; // or 0 for no error?
+}
+
+static void mi__malloc_fork_prepare(void) {
+  // nothing  
+}
+static void mi__malloc_fork_parent(void) {
+  // nothing
+}
+static void mi__malloc_fork_child(void) {
+  // nothing
+}
+
+static void mi_malloc_printf(const char* fmt, ...) {
+  MI_UNUSED(fmt);
+}
+
+static bool zone_check(malloc_zone_t* zone) {
+  MI_UNUSED(zone);
+  return true;
+}
+
+static malloc_zone_t* zone_from_ptr(const void* p) {
+  MI_UNUSED(p);
+  return mi_get_default_zone();
+}
+
+static void zone_log(malloc_zone_t* zone, void* p) {
+  MI_UNUSED(zone); MI_UNUSED(p);
+}
+
+static void zone_print(malloc_zone_t* zone, bool b) {
+  MI_UNUSED(zone); MI_UNUSED(b);
+}
+
+static void zone_print_ptr_info(void* p) {
+  MI_UNUSED(p);
+}
+
+static void zone_register(malloc_zone_t* zone) {
+  MI_UNUSED(zone);
+}
+
+static void zone_unregister(malloc_zone_t* zone) {
+  MI_UNUSED(zone);
+}
+
+// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
+// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
+struct mi_interpose_s {
+  const void* replacement;
+  const void* target;
+};
+#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
+#define MI_INTERPOSE_MI(fun)            MI_INTERPOSE_FUN(fun,mi_##fun)
+#define MI_INTERPOSE_ZONE(fun)          MI_INTERPOSE_FUN(malloc_##fun,fun)
+__attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[]  __attribute__((section("__DATA, __interpose"))) =
+{
+
+  MI_INTERPOSE_MI(malloc_create_zone),
+  MI_INTERPOSE_MI(malloc_default_purgeable_zone),
+  MI_INTERPOSE_MI(malloc_default_zone),
+  MI_INTERPOSE_MI(malloc_destroy_zone),
+  MI_INTERPOSE_MI(malloc_get_all_zones),
+  MI_INTERPOSE_MI(malloc_get_zone_name),
+  MI_INTERPOSE_MI(malloc_jumpstart),  
+  MI_INTERPOSE_MI(malloc_printf),
+  MI_INTERPOSE_MI(malloc_set_zone_name),
+  MI_INTERPOSE_MI(_malloc_fork_child),
+  MI_INTERPOSE_MI(_malloc_fork_parent),
+  MI_INTERPOSE_MI(_malloc_fork_prepare),
+
+  MI_INTERPOSE_ZONE(zone_batch_free),
+  MI_INTERPOSE_ZONE(zone_batch_malloc),
+  MI_INTERPOSE_ZONE(zone_calloc),
+  MI_INTERPOSE_ZONE(zone_check),
+  MI_INTERPOSE_ZONE(zone_free),
+  MI_INTERPOSE_ZONE(zone_from_ptr),
+  MI_INTERPOSE_ZONE(zone_log),
+  MI_INTERPOSE_ZONE(zone_malloc),
+  MI_INTERPOSE_ZONE(zone_memalign),
+  MI_INTERPOSE_ZONE(zone_print),
+  MI_INTERPOSE_ZONE(zone_print_ptr_info),
+  MI_INTERPOSE_ZONE(zone_realloc),
+  MI_INTERPOSE_ZONE(zone_register),
+  MI_INTERPOSE_ZONE(zone_unregister),
+  MI_INTERPOSE_ZONE(zone_valloc)
+};
+
+
+#else
+
+// ------------------------------------------------------
+// hook into the zone api's without interposing
+// This is the official way of adding an allocator but
+// it seems less robust than using interpose.
+// ------------------------------------------------------
+
+static inline malloc_zone_t* mi_get_default_zone(void)
 {
  // The first returned zone is the real default
  malloc_zone_t** zones = NULL;
@ -189,70 +408,21 @@ static malloc_zone_t* mi_get_default_zone()
  }
 }

-static malloc_introspection_t mi_introspect = {
-  .enumerator = &intro_enumerator,
-  .good_size = &intro_good_size,
-  .check = &intro_check,
-  .print = &intro_print,
-  .log = &intro_log,
-  .force_lock = &intro_force_lock,
-  .force_unlock = &intro_force_unlock,
-#if defined(MAC_OS_X_VERSION_10_6) && \
-    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
-  .zone_locked = &intro_zone_locked,
-  .statistics = &intro_statistics,
-#endif
-};
-
-static malloc_zone_t mi_malloc_zone = {
-  .size = &zone_size,
-  .zone_name = "mimalloc",
-  .introspect = &mi_introspect,
-  .malloc = &zone_malloc,
-  .calloc = &zone_calloc,
-  .valloc = &zone_valloc,
-  .free = &zone_free,
-  .realloc = &zone_realloc,
-  .destroy = &zone_destroy,
-  .batch_malloc = &zone_batch_malloc,
-  .batch_free = &zone_batch_free,
-#if defined(MAC_OS_X_VERSION_10_6) && \
-    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
-  // switch to version 9 on OSX 10.6 to support memalign.
-  .version = 9,
-  .memalign = &zone_memalign,
-  .free_definite_size = &zone_free_definite_size,
-  .pressure_relief = &zone_pressure_relief,
+#if defined(__clang__)
+__attribute__((constructor(0))) 
 #else
-  .version = 4,
+__attribute__((constructor))      // seems not supported by g++-11 on the M1
 #endif
-};
-
-
-#if defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE)
-
-static malloc_zone_t *mi_malloc_default_zone(void) {
-  return &mi_malloc_zone;
-}
-// TODO: should use the macros in alloc-override but they aren't available here.
-__attribute__((used)) static struct {
-  const void *replacement;
-  const void *target;
-} replace_malloc_default_zone[] __attribute__((section("__DATA, __interpose"))) = {
-  { (const void*)mi_malloc_default_zone, (const void*)malloc_default_zone },
-};
-#endif
-
-static void __attribute__((constructor(0))) _mi_macos_override_malloc() {
+static void _mi_macos_override_malloc() {
  malloc_zone_t* purgeable_zone = NULL;

-#if defined(MAC_OS_X_VERSION_10_6) && \
+  #if defined(MAC_OS_X_VERSION_10_6) && \
    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
  // force the purgeable zone to exist to avoid strange bugs
  if (malloc_default_purgeable_zone) {
    purgeable_zone = malloc_default_purgeable_zone();
  }
-#endif
+  #endif

  // Register our zone.
  // thomcc: I think this is still needed to put us in the zone list.
@ -277,5 +447,6 @@ static void __attribute__((constructor(0))) _mi_macos_override_malloc() {
  }

 }
+#endif  // MI_OSX_INTERPOSE

-#endif // MI_MALLOC_OVERRIDE
+#endif // MI_MALLOC_OVERRIDE
--- a/third-party/mimalloc/src/alloc-override.c
+++ b/third-party/mimalloc/src/alloc-override.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -13,18 +13,28 @@ terms of the MIT license. A copy of the license can be found in the file
 #error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
 #endif

-#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) // || (defined(__MACH__) && !defined(MI_INTERPOSE)))
+#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) 
+
+#if defined(__APPLE__)
+mi_decl_externc void   vfree(void* p);
+mi_decl_externc size_t malloc_size(const void* p);
+mi_decl_externc size_t malloc_good_size(size_t size);
+#endif
+
+// helper definition for C override of C++ new
+typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;

 // ------------------------------------------------------
 // Override system malloc
 // ------------------------------------------------------

-#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
-  // use aliasing to alias the exported function to one of our `mi_` functions
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) && !defined(MI_VALGRIND)
+  // gcc, clang: use aliasing to alias the exported function to one of our `mi_` functions
  #if (defined(__GNUC__) && __GNUC__ >= 9)
-    #define MI_FORWARD(fun)      __attribute__((alias(#fun), used, visibility("default"), copy(fun)))
+    #pragma GCC diagnostic ignored "-Wattributes"  // or we get warnings that nodiscard is ignored on a forward
+    #define MI_FORWARD(fun)      __attribute__((alias(#fun), used, visibility("default"), copy(fun)));
  #else
-    #define MI_FORWARD(fun)      __attribute__((alias(#fun), used, visibility("default")))
+    #define MI_FORWARD(fun)      __attribute__((alias(#fun), used, visibility("default")));
  #endif
  #define MI_FORWARD1(fun,x)      MI_FORWARD(fun)
  #define MI_FORWARD2(fun,x,y)    MI_FORWARD(fun)
@ -32,7 +42,7 @@ terms of the MIT license. A copy of the license can be found in the file
  #define MI_FORWARD0(fun,x)      MI_FORWARD(fun)
  #define MI_FORWARD02(fun,x,y)   MI_FORWARD(fun)
 #else
-  // use forwarding by calling our `mi_` function
+  // otherwise use forwarding by calling our `mi_` function 
  #define MI_FORWARD1(fun,x)      { return fun(x); }
  #define MI_FORWARD2(fun,x,y)    { return fun(x,y); }
  #define MI_FORWARD3(fun,x,y,z)  { return fun(x,y,z); }
@ -40,7 +50,11 @@ terms of the MIT license. A copy of the license can be found in the file
  #define MI_FORWARD02(fun,x,y)   { fun(x,y); }
 #endif

-#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE)
+#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)    
+  // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for 
+  // functions that are interposed (or the interposing does not work)
+  #define MI_OSX_IS_INTERPOSED
+
  // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
  // See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
  struct mi_interpose_s {
@ -49,6 +63,7 @@ terms of the MIT license. A copy of the license can be found in the file
  };
  #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
  #define MI_INTERPOSE_MI(fun)            MI_INTERPOSE_FUN(fun,mi_##fun)
+  
  __attribute__((used)) static struct mi_interpose_s _mi_interposes[]  __attribute__((section("__DATA, __interpose"))) =
  {
    MI_INTERPOSE_MI(malloc),
@ -60,28 +75,56 @@ terms of the MIT license. A copy of the license can be found in the file
    MI_INTERPOSE_MI(posix_memalign),
    MI_INTERPOSE_MI(reallocf),
    MI_INTERPOSE_MI(valloc),
-    #ifndef MI_OSX_ZONE
-    // some code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
-    MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
-    #else
-    // We interpose malloc_default_zone in alloc-override-osx.c
+    MI_INTERPOSE_MI(malloc_size),
+    MI_INTERPOSE_MI(malloc_good_size),
+    MI_INTERPOSE_MI(aligned_alloc),
+    #ifdef MI_OSX_ZONE
+    // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
    MI_INTERPOSE_MI(free),
-    #endif
-    // some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
+    MI_INTERPOSE_FUN(vfree,mi_free),
+    #else
+    // sometimes code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
    MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
+    MI_INTERPOSE_FUN(vfree,mi_cfree),
+    #endif
  };
+
+  #ifdef __cplusplus
+  extern "C" {
+    void  _ZdlPv(void* p);   // delete
+    void  _ZdaPv(void* p);   // delete[]
+    void  _ZdlPvm(void* p, size_t n);  // delete
+    void  _ZdaPvm(void* p, size_t n);  // delete[]
+    void* _Znwm(size_t n);  // new
+    void* _Znam(size_t n);  // new[]
+    void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new nothrow
+    void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new[] nothrow
+  }  
+  __attribute__((used)) static struct mi_interpose_s _mi_cxx_interposes[]  __attribute__((section("__DATA, __interpose"))) =
+  {
+    MI_INTERPOSE_FUN(_ZdlPv,mi_free),
+    MI_INTERPOSE_FUN(_ZdaPv,mi_free),
+    MI_INTERPOSE_FUN(_ZdlPvm,mi_free_size),
+    MI_INTERPOSE_FUN(_ZdaPvm,mi_free_size),
+    MI_INTERPOSE_FUN(_Znwm,mi_new),
+    MI_INTERPOSE_FUN(_Znam,mi_new),
+    MI_INTERPOSE_FUN(_ZnwmRKSt9nothrow_t,mi_new_nothrow),
+    MI_INTERPOSE_FUN(_ZnamRKSt9nothrow_t,mi_new_nothrow),
+  };
+  #endif // __cplusplus
+
 #elif defined(_MSC_VER)
  // cannot override malloc unless using a dll.
  // we just override new/delete which does work in a static library.
 #else
-  // On all other systems forward to our API
-  void* malloc(size_t size)              MI_FORWARD1(mi_malloc, size);
-  void* calloc(size_t size, size_t n)    MI_FORWARD2(mi_calloc, size, n);
-  void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize);
-  void  free(void* p)                    MI_FORWARD0(mi_free, p);
+  // On all other systems forward to our API  
+  void* malloc(size_t size)              MI_FORWARD1(mi_malloc, size)
+  void* calloc(size_t size, size_t n)    MI_FORWARD2(mi_calloc, size, n)
+  void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
+  void  free(void* p)                    MI_FORWARD0(mi_free, p)
 #endif

-#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
 #pragma GCC visibility push(default)
 #endif

@ -96,18 +139,21 @@ terms of the MIT license. A copy of the license can be found in the file
  // see <https://en.cppreference.com/w/cpp/memory/new/operator_new>
  // ------------------------------------------------------
  #include <new>
-  void operator delete(void* p) noexcept              MI_FORWARD0(mi_free,p);
-  void operator delete[](void* p) noexcept            MI_FORWARD0(mi_free,p);

-  void* operator new(std::size_t n) noexcept(false)   MI_FORWARD1(mi_new,n);
-  void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n);
+  #ifndef MI_OSX_IS_INTERPOSED
+    void operator delete(void* p) noexcept              MI_FORWARD0(mi_free,p)
+    void operator delete[](void* p) noexcept            MI_FORWARD0(mi_free,p)

-  void* operator new  (std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); }
-  void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); }
+    void* operator new(std::size_t n) noexcept(false)   MI_FORWARD1(mi_new,n)
+    void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n)

-  #if (__cplusplus >= 201402L || _MSC_VER >= 1916)
-  void operator delete  (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n);
-  void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n);
+    void* operator new  (std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); }
+    void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); }
+
+    #if (__cplusplus >= 201402L || _MSC_VER >= 1916)
+    void operator delete  (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n)
+    void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n)
+    #endif
  #endif

  #if (__cplusplus > 201402L && defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5))
@ -122,91 +168,98 @@ terms of the MIT license. A copy of the license can be found in the file
  void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
  #endif

-#elif (defined(__GNUC__) || defined(__clang__))
+#elif (defined(__GNUC__) || defined(__clang__)) 
  // ------------------------------------------------------
  // Override by defining the mangled C++ names of the operators (as
  // used by GCC and CLang).
  // See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling>
  // ------------------------------------------------------
-  void _ZdlPv(void* p)            MI_FORWARD0(mi_free,p); // delete
-  void _ZdaPv(void* p)            MI_FORWARD0(mi_free,p); // delete[]
-  void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n);
-  void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n);
+  
+  void _ZdlPv(void* p)            MI_FORWARD0(mi_free,p) // delete
+  void _ZdaPv(void* p)            MI_FORWARD0(mi_free,p) // delete[]
+  void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
+  void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
  void _ZdlPvSt11align_val_t(void* p, size_t al)            { mi_free_aligned(p,al); }
  void _ZdaPvSt11align_val_t(void* p, size_t al)            { mi_free_aligned(p,al); }
  void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
  void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
-
-  typedef struct mi_nothrow_s {  } mi_nothrow_t;
+  
  #if (MI_INTPTR_SIZE==8)
-    void* _Znwm(size_t n)                             MI_FORWARD1(mi_new,n);  // new 64-bit
-    void* _Znam(size_t n)                             MI_FORWARD1(mi_new,n);  // new[] 64-bit
-    void* _ZnwmSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al);
-    void* _ZnamSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al);
-    void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); }
-    void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); }
-    void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
-    void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
+    void* _Znwm(size_t n)                             MI_FORWARD1(mi_new,n)  // new 64-bit
+    void* _Znam(size_t n)                             MI_FORWARD1(mi_new,n)  // new[] 64-bit
+    void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
+    void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }     
+    void* _ZnwmSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
+    void* _ZnamSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
+    void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
+    void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
  #elif (MI_INTPTR_SIZE==4)
-    void* _Znwj(size_t n)                             MI_FORWARD1(mi_new,n);  // new 64-bit
-    void* _Znaj(size_t n)                             MI_FORWARD1(mi_new,n);  // new[] 64-bit
-    void* _ZnwjSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al);
-    void* _ZnajSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al);
-    void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); }
-    void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); }
-    void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
-    void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
+    void* _Znwj(size_t n)                             MI_FORWARD1(mi_new,n)  // new 64-bit
+    void* _Znaj(size_t n)                             MI_FORWARD1(mi_new,n)  // new[] 64-bit
+    void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
+    void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }   
+    void* _ZnwjSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
+    void* _ZnajSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
+    void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
+    void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
  #else
-  #error "define overloads for new/delete for this platform (just for performance, can be skipped)"
+    #error "define overloads for new/delete for this platform (just for performance, can be skipped)"
  #endif
 #endif // __cplusplus

+// ------------------------------------------------------
+// Further Posix & Unix functions definitions
+// ------------------------------------------------------

 #ifdef __cplusplus
 extern "C" {
 #endif

-// ------------------------------------------------------
-// Posix & Unix functions definitions
-// ------------------------------------------------------
+#ifndef MI_OSX_IS_INTERPOSED
+  // Forward Posix/Unix calls as well
+  void*  reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize)
+  size_t malloc_size(const void* p)        MI_FORWARD1(mi_usable_size,p)
+  #if !defined(__ANDROID__) && !defined(__FreeBSD__)
+  size_t malloc_usable_size(void *p)       MI_FORWARD1(mi_usable_size,p)
+  #else
+  size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p)
+  #endif

-void   cfree(void* p)                    MI_FORWARD0(mi_free, p);
-void*  reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize);
-size_t malloc_size(const void* p)        MI_FORWARD1(mi_usable_size,p);
-#if !defined(__ANDROID__)
-size_t malloc_usable_size(void *p)       MI_FORWARD1(mi_usable_size,p);
-#else
-size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p);
+  // No forwarding here due to aliasing/name mangling issues
+  void*  valloc(size_t size)               { return mi_valloc(size); }
+  void   vfree(void* p)                    { mi_free(p); }                
+  size_t malloc_good_size(size_t size)     { return mi_malloc_good_size(size); }
+  int    posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
+  
+
+  // `aligned_alloc` is only available when __USE_ISOC11 is defined.
+  // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
+  // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
+  // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it
+  // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we  can avoid overriding it ourselves.
+  #if __USE_ISOC11 
+  void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
+  #endif
 #endif

 // no forwarding here due to aliasing/name mangling issues
-void* valloc(size_t size)                                     { return mi_valloc(size); }
-void* pvalloc(size_t size)                                    { return mi_pvalloc(size); }
-void* reallocarray(void* p, size_t count, size_t size)        { return mi_reallocarray(p, count, size); }
-void* memalign(size_t alignment, size_t size)                 { return mi_memalign(alignment, size); }
-int   posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
-void* _aligned_malloc(size_t alignment, size_t size)          { return mi_aligned_alloc(alignment, size); }
-
-// on some glibc `aligned_alloc` is declared `static inline` so we cannot override it (e.g. Conda). This happens
-// when _GLIBCXX_HAVE_ALIGNED_ALLOC is not defined. However, in those cases it will use `memalign`, `posix_memalign`, 
-// or `_aligned_malloc` and we can avoid overriding it ourselves.
-// We should always override if using C compilation. (issue #276)
-#if _GLIBCXX_HAVE_ALIGNED_ALLOC || !defined(__cplusplus)
-void* aligned_alloc(size_t alignment, size_t size)   { return mi_aligned_alloc(alignment, size); }
-#endif
-
+void  cfree(void* p)                                    { mi_free(p); } 
+void* pvalloc(size_t size)                              { return mi_pvalloc(size); }
+void* reallocarray(void* p, size_t count, size_t size)  { return mi_reallocarray(p, count, size); }
+void* memalign(size_t alignment, size_t size)           { return mi_memalign(alignment, size); }
+void* _aligned_malloc(size_t alignment, size_t size)    { return mi_aligned_alloc(alignment, size); }

 #if defined(__GLIBC__) && defined(__linux__)
  // forward __libc interface (needed for glibc-based Linux distributions)
-  void* __libc_malloc(size_t size)                  MI_FORWARD1(mi_malloc,size);
-  void* __libc_calloc(size_t count, size_t size)    MI_FORWARD2(mi_calloc,count,size);
-  void* __libc_realloc(void* p, size_t size)        MI_FORWARD2(mi_realloc,p,size);
-  void  __libc_free(void* p)                        MI_FORWARD0(mi_free,p);
-  void  __libc_cfree(void* p)                       MI_FORWARD0(mi_free,p);
+  void* __libc_malloc(size_t size)                      MI_FORWARD1(mi_malloc,size)
+  void* __libc_calloc(size_t count, size_t size)        MI_FORWARD2(mi_calloc,count,size)
+  void* __libc_realloc(void* p, size_t size)            MI_FORWARD2(mi_realloc,p,size)
+  void  __libc_free(void* p)                            MI_FORWARD0(mi_free,p)
+  void  __libc_cfree(void* p)                           MI_FORWARD0(mi_free,p)

-  void* __libc_valloc(size_t size)                                { return mi_valloc(size); }
-  void* __libc_pvalloc(size_t size)                               { return mi_pvalloc(size); }
-  void* __libc_memalign(size_t alignment, size_t size)            { return mi_memalign(alignment,size); }
+  void* __libc_valloc(size_t size)                      { return mi_valloc(size); }
+  void* __libc_pvalloc(size_t size)                     { return mi_pvalloc(size); }
+  void* __libc_memalign(size_t alignment, size_t size)  { return mi_memalign(alignment,size); }
  int   __posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p,alignment,size); }
 #endif

@ -214,7 +267,7 @@ void* aligned_alloc(size_t alignment, size_t size)   { return mi_aligned_alloc(a
 }
 #endif

-#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
 #pragma GCC visibility pop
 #endif

--- a/third-party/mimalloc/src/alloc-posix.c
+++ b/third-party/mimalloc/src/alloc-posix.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018,2019, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -33,13 +33,19 @@ terms of the MIT license. A copy of the license can be found in the file


 size_t mi_malloc_size(const void* p) mi_attr_noexcept {
+  //if (!mi_is_in_heap_region(p)) return 0;
  return mi_usable_size(p);
 }

 size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
+  //if (!mi_is_in_heap_region(p)) return 0;
  return mi_usable_size(p);
 }

+size_t mi_malloc_good_size(size_t size) mi_attr_noexcept {
+  return mi_good_size(size);
+}
+
 void mi_cfree(void* p) mi_attr_noexcept {
  if (mi_is_in_heap_region(p)) {
    mi_free(p);
--- a/third-party/mimalloc/src/alloc.c
+++ b/third-party/mimalloc/src/alloc.c
@ -1,9 +1,13 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE   // for realpath() on Linux
+#endif
+
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
 #include "mimalloc-atomic.h"
@ -119,7 +123,7 @@ extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
 void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
  // note: we need to initialize the whole usable block size to zero, not just the requested size,
  // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
-  UNUSED(size);
+  MI_UNUSED(size);
  mi_assert_internal(p != NULL);
  mi_assert_internal(mi_usable_size(p) >= size); // size can be zero
  mi_assert_internal(_mi_ptr_page(p)==page);
@ -201,8 +205,8 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
 }
 #else
 static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
-  UNUSED(page);
-  UNUSED(block);
+  MI_UNUSED(page);
+  MI_UNUSED(block);
  return false;
 }
 #endif
@ -274,19 +278,19 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co
 }
 #else
 static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
-  UNUSED(page);
-  UNUSED(block);
+  MI_UNUSED(page);
+  MI_UNUSED(block);
 }

 static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
-  UNUSED(block);
+  MI_UNUSED(block);
  return mi_page_usable_block_size(page);
 }

 static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
-  UNUSED(page);
-  UNUSED(block);
-  UNUSED(min_size);
+  MI_UNUSED(page);
+  MI_UNUSED(block);
+  MI_UNUSED(min_size);
 }
 #endif

@ -294,7 +298,7 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co
 #if (MI_STAT>0)
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
 #if (MI_STAT < 2)  
-  UNUSED(block);
+  MI_UNUSED(block);
 #endif
  mi_heap_t* const heap = mi_heap_get_default();
  const size_t bsize = mi_page_usable_block_size(page);  
@ -311,7 +315,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
 }
 #else
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-  UNUSED(page); UNUSED(block);
+  MI_UNUSED(page); MI_UNUSED(block);
 }
 #endif

@ -320,16 +324,16 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
 static void mi_stat_huge_free(const mi_page_t* page) {
  mi_heap_t* const heap = mi_heap_get_default();
  const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc`
-  if (bsize <= MI_HUGE_OBJ_SIZE_MAX) {
-    mi_heap_stat_decrease(heap, huge, bsize);
+  if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+    mi_heap_stat_decrease(heap, large, bsize);
  }
  else {
-    mi_heap_stat_decrease(heap, giant, bsize);
+    mi_heap_stat_decrease(heap, huge, bsize);
  }
 }
 #else
 static void mi_stat_huge_free(const mi_page_t* page) {
-  UNUSED(page);
+  MI_UNUSED(page);
 }
 #endif

@ -349,8 +353,8 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
  #endif

  // huge page segments are always abandoned and can be freed immediately
-  mi_segment_t* const segment = _mi_page_segment(page);
-  if (segment->page_kind==MI_PAGE_HUGE) {
+  mi_segment_t* segment = _mi_page_segment(page);
+  if (segment->kind==MI_SEGMENT_HUGE) {
    mi_stat_huge_free(page);
    _mi_segment_huge_page_free(segment, page, block);
    return;
@ -431,7 +435,7 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
 }


-static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) {
+static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) mi_attr_noexcept {
  mi_page_t* const page = _mi_segment_page_of(segment, p);
  mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
  mi_stat_free(page, block);
@ -443,7 +447,7 @@ static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool l
 // (and secure mode) if this was a valid pointer.
 static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) 
 {
-  UNUSED(msg);
+  MI_UNUSED(msg);
 #if (MI_DEBUG>0)
  if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) {
    _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
@ -465,7 +469,7 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
 #endif
 #if (MI_DEBUG>0 || MI_SECURE>=4)
  if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) {
-    _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", p);
+    _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
  }
 #endif
  return segment;
@ -478,12 +482,12 @@ void mi_free(void* p) mi_attr_noexcept
  const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
  if (mi_unlikely(segment == NULL)) return; 

-  const uintptr_t tid = _mi_thread_id();
+  const mi_threadid_t tid = _mi_thread_id();
  mi_page_t* const page = _mi_segment_page_of(segment, p);
-  mi_block_t* const block = (mi_block_t*)p;

-  if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
+  if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
    // local, and not full or aligned
+    mi_block_t* block = (mi_block_t*)(p);
    if (mi_unlikely(mi_check_is_double_free(page,block))) return;
    mi_check_padding(page, block);
    mi_stat_free(page, block);
@ -557,6 +561,7 @@ void* _mi_externs[] = {
  (void*)&_mi_page_malloc,
  (void*)&mi_malloc,
  (void*)&mi_malloc_small,
+  (void*)&mi_zalloc_small,
  (void*)&mi_heap_malloc,
  (void*)&mi_heap_zalloc,
  (void*)&mi_heap_malloc_small
@ -569,19 +574,19 @@ void* _mi_externs[] = {
 // ------------------------------------------------------

 void mi_free_size(void* p, size_t size) mi_attr_noexcept {
-  UNUSED_RELEASE(size);
+  MI_UNUSED_RELEASE(size);
  mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
  mi_free(p);
 }

 void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
-  UNUSED_RELEASE(alignment);
+  MI_UNUSED_RELEASE(alignment);
  mi_assert(((uintptr_t)p % alignment) == 0);
  mi_free_size(p,size);
 }

 void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
-  UNUSED_RELEASE(alignment);
+  MI_UNUSED_RELEASE(alignment);
  mi_assert(((uintptr_t)p % alignment) == 0);
  mi_free(p);
 }
@ -746,7 +751,7 @@ mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char
 }
 #else
 #include <unistd.h>  // pathconf
-static size_t mi_path_max() {
+static size_t mi_path_max(void) {
  static size_t path_max = 0;
  if (path_max <= 0) {
    long m = pathconf("/",_PC_PATH_MAX);
@ -797,7 +802,10 @@ static bool mi_try_new_handler(bool nothrow) {
    std::set_new_handler(h);
  #endif  
  if (h==NULL) {
-    if (!nothrow) throw std::bad_alloc();
+    _mi_error_message(ENOMEM, "out of memory in 'new'");      
+    if (!nothrow) {
+      throw std::bad_alloc();
+    }
    return false;
  }
  else {
@ -806,13 +814,13 @@ static bool mi_try_new_handler(bool nothrow) {
  }
 }
 #else
-typedef void (*std_new_handler_t)();
+typedef void (*std_new_handler_t)(void);

 #if (defined(__GNUC__) || defined(__clang__))
-std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv() {
+std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv(void) {
  return NULL;
 }
-static std_new_handler_t mi_get_new_handler() {
+static std_new_handler_t mi_get_new_handler(void) {
  return _ZSt15get_new_handlerv();
 }
 #else
@ -825,7 +833,10 @@ static std_new_handler_t mi_get_new_handler() {
 static bool mi_try_new_handler(bool nothrow) {
  std_new_handler_t h = mi_get_new_handler();
  if (h==NULL) {
-    if (!nothrow) exit(ENOMEM);  // cannot throw in plain C, use exit as we are out of memory anyway.
+    _mi_error_message(ENOMEM, "out of memory in 'new'");       
+    if (!nothrow) {
+      abort();  // cannot throw in plain C, use abort
+    }
    return false;
  }
  else {
--- a/third-party/mimalloc/src/arena.c
+++ b/third-party/mimalloc/src/arena.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019, Microsoft Research, Daan Leijen
+Copyright (c) 2019-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -7,23 +7,18 @@ terms of the MIT license. A copy of the license can be found in the file

 /* ----------------------------------------------------------------------------
 "Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
+large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB).
 In contrast to the rest of mimalloc, the arenas are shared between
 threads and need to be accessed using atomic operations.

 Currently arenas are only used to for huge OS page (1GiB) reservations,
-otherwise it delegates to direct allocation from the OS.
+or direct OS memory reservations -- otherwise it delegates to direct allocation from the OS.
 In the future, we can expose an API to manually add more kinds of arenas
 which is sometimes needed for embedded devices or shared memory for example.
 (We can also employ this with WASI or `sbrk` systems to reserve large arenas
 on demand and be able to reuse them efficiently).

-The arena allocation needs to be thread safe and we use an atomic
-bitmap to allocate. The current implementation of the bitmap can
-only do this within a field (`uintptr_t`) so we can allocate at most
-blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
-can lead to fragmentation but fortunately most objects will be regions
-of 256MiB in practice.
+The arena allocation needs to be thread safe and we use an atomic bitmap to allocate.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
@ -38,7 +33,6 @@ of 256MiB in practice.
 // os.c
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
 void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
-void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);

 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
 void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
@ -46,13 +40,17 @@ void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 bool  _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);

+
 /* -----------------------------------------------------------
  Arena allocation
 ----------------------------------------------------------- */

-#define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
-#define MI_ARENA_BLOCK_SIZE   (4*MI_SEGMENT_ALIGN)     // 32MiB
-#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 16MiB
+
+// Block info: bit 0 contains the `in_use` bit, the upper bits the
+// size in count of arena blocks.
+typedef uintptr_t mi_block_info_t;
+#define MI_ARENA_BLOCK_SIZE   (MI_SEGMENT_SIZE)        // 8MiB  (must be at least MI_SEGMENT_ALIGN)
+#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 4MiB
 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)

 // A memory arena descriptor
@ -62,18 +60,18 @@ typedef struct mi_arena_s {
  size_t   field_count;                   // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
  int      numa_node;                     // associated NUMA node
  bool     is_zero_init;                  // is the arena zero initialized?
-  bool     is_committed;                  // is the memory fully committed? (if so, block_committed == NULL)
+  bool     allow_decommit;                // is decommit allowed? if true, is_large should be false and blocks_committed != NULL
  bool     is_large;                      // large- or huge OS pages (always committed)
-  _Atomic(uintptr_t) search_idx;          // optimization to start the search for free blocks
+  _Atomic(size_t) search_idx;             // optimization to start the search for free blocks
  mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
-  mi_bitmap_field_t* blocks_committed;    // if `!is_committed`, are the blocks committed?
+  mi_bitmap_field_t* blocks_committed;    // are the blocks committed? (can be NULL for memory that cannot be decommitted)
  mi_bitmap_field_t  blocks_inuse[1];     // in-place bitmap of in-use blocks (of size `field_count`)
 } mi_arena_t;


 // The available arenas
 static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
-static mi_decl_cache_align _Atomic(uintptr_t)   mi_arena_count; // = 0
+static mi_decl_cache_align _Atomic(size_t)      mi_arena_count; // = 0


 /* -----------------------------------------------------------
@ -105,9 +103,9 @@ static size_t mi_block_count_of_size(size_t size) {
 ----------------------------------------------------------- */
 static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
 {
-  size_t idx = mi_atomic_load_acquire(&arena->search_idx);  // start from last search
+  size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
  if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
-    mi_atomic_store_release(&arena->search_idx, idx);  // start search from here next time
+    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around
    return true;
  };
  return false;
@ -118,8 +116,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
  Arena Allocation
 ----------------------------------------------------------- */

-static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
-                                 bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
+                                                  bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
  mi_bitmap_index_t bitmap_index;
  if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
@ -129,8 +127,8 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
  *memid     = mi_arena_id_create(arena_index, bitmap_index);
  *is_zero   = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
  *large     = arena->is_large;
-  *is_pinned = (arena->is_large || arena->is_committed);
-  if (arena->is_committed) {
+  *is_pinned = (arena->is_large || !arena->allow_decommit);
+  if (arena->blocks_committed == NULL) {
    // always committed
    *commit = true;
  }
@ -151,6 +149,48 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
  return p;
 }

+static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{  
+  MI_UNUSED_RELEASE(alignment);
+  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
+  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);  
+  const size_t bcount = mi_block_count_of_size(size);
+  if (mi_likely(max_arena == 0)) return NULL;
+  mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+
+  // try numa affine allocation
+  for (size_t i = 0; i < max_arena; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    if (arena==NULL) break; // end reached
+    if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
+      (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+    {
+      void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
+      mi_assert_internal((uintptr_t)p % alignment == 0);
+      if (p != NULL) {
+        return p;
+      }
+    }
+  }
+
+  // try from another numa node instead..
+  for (size_t i = 0; i < max_arena; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    if (arena==NULL) break; // end reached
+    if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
+      (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+    {
+      void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
+      mi_assert_internal((uintptr_t)p % alignment == 0);
+      if (p != NULL) {
+        return p;
+      }
+    }
+  }
+  return NULL;
+}
+
+
 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
                              size_t* memid, mi_os_tld_t* tld)
 {
@ -160,40 +200,14 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
  *is_zero = false;
  *is_pinned = false;

-  // try to allocate in an arena if the alignment is small enough
-  // and the object is not too large or too small.
-  if (alignment <= MI_SEGMENT_ALIGN &&
-      size >= MI_ARENA_MIN_OBJ_SIZE &&
-      mi_atomic_load_relaxed(&mi_arena_count) > 0)
-  {
-    const size_t bcount = mi_block_count_of_size(size);
-    const int numa_node = _mi_os_numa_node(tld); // current numa node
+  bool default_large = false;
+  if (large==NULL) large = &default_large;     // ensure `large != NULL`
+  const int numa_node = _mi_os_numa_node(tld); // current numa node

-    mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
-    // try numa affine allocation
-    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena==NULL) break; // end reached
-      if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
-      }
-    }
-    // try from another numa node instead..
-    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena==NULL) break; // end reached
-      if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
-      }
-    }
+  // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
+  if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) {
+    void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, memid, tld);
+    if (p != NULL) return p;
  }

  // finally, fall back to the OS
@ -217,13 +231,14 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, b
  Arena free
 ----------------------------------------------------------- */

-void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats) {
-  mi_assert_internal(size > 0 && stats != NULL);
+void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_os_tld_t* tld) {
+  mi_assert_internal(size > 0 && tld->stats != NULL);
  if (p==NULL) return;
  if (size==0) return;
+
  if (memid == MI_MEMID_OS) {
    // was a direct OS allocation, pass through
-    _mi_os_free_ex(p, size, all_committed, stats);
+    _mi_os_free_ex(p, size, all_committed, tld->stats);
  }
  else {
    // allocated in an arena
@ -245,12 +260,12 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s
      return;
    }
    // potentially decommit
-    if (arena->is_committed) {
-      mi_assert_internal(all_committed); 
+    if (!arena->allow_decommit || arena->blocks_committed == NULL) {
+      mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c)
    }
    else {
      mi_assert_internal(arena->blocks_committed != NULL);
-      _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails
+      _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, tld->stats); // ok if this fails
      _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
    }
    // and make it available to others again 
@ -271,7 +286,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
  mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
  mi_assert_internal(arena->block_count > 0);

-  uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
+  size_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
  if (i >= MI_MAX_ARENAS) {
    mi_atomic_decrement_acq_rel(&mi_arena_count);
    return false;
@ -282,12 +297,14 @@ static bool mi_arena_add(mi_arena_t* arena) {

 bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept
 {
+  if (size < MI_ARENA_BLOCK_SIZE) return false;
+
  if (is_large) {
    mi_assert_internal(is_committed);
    is_committed = true;
  }
  
-  const size_t bcount = mi_block_count_of_size(size);
+  const size_t bcount = size / MI_ARENA_BLOCK_SIZE; 
  const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
  const size_t bitmaps = (is_committed ? 2 : 3);
  const size_t asize  = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
@ -300,12 +317,16 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la
  arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
  arena->is_large     = is_large;
  arena->is_zero_init = is_zero;
-  arena->is_committed = is_committed;
+  arena->allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory
  arena->search_idx   = 0;
  arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
-  arena->blocks_committed = (is_committed ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
+  arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
  // the bitmaps are already zero initialized due to os_alloc
-  // just claim leftover blocks if needed
+  // initialize committed bitmap?
+  if (arena->blocks_committed != NULL && is_committed) {
+    memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
+  }
+  // and claim leftover blocks if needed (so we never allocate there)
  ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
  mi_assert_internal(post >= 0);
  if (post > 0) {
@ -321,7 +342,7 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la
 // Reserve a range of regular OS memory
 int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept 
 {
-  size = _mi_os_good_alloc_size(size);
+  size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
  bool large = allow_large;
  void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main);
  if (start==NULL) return ENOMEM;
@ -330,10 +351,37 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe
    _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
    return ENOMEM;
  }
-  _mi_verbose_message("reserved %zu kb memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : "");
+  _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : "");
  return 0;
 }

+static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) {
+  size_t inuse_count = 0;
+  for (size_t i = 0; i < field_count; i++) {
+    char buf[MI_BITMAP_FIELD_BITS + 1];
+    uintptr_t field = mi_atomic_load_relaxed(&fields[i]);
+    for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) {
+      bool inuse = ((((uintptr_t)1 << bit) & field) != 0);
+      if (inuse) inuse_count++;
+      buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.');
+    }
+    buf[MI_BITMAP_FIELD_BITS] = 0;
+    _mi_verbose_message("%s%s\n", prefix, buf);
+  }
+  return inuse_count;
+}
+
+void mi_debug_show_arenas(void) mi_attr_noexcept {
+  size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count);
+  for (size_t i = 0; i < max_arenas; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    if (arena == NULL) break;
+    size_t inuse_count = 0;
+    _mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count);
+    inuse_count += mi_debug_show_bitmap("  ", arena->blocks_inuse, arena->field_count);
+    _mi_verbose_message("  blocks in use ('x'): %zu\n", inuse_count);
+  }
+}

 /* -----------------------------------------------------------
  Reserve a huge page arena.
@ -347,10 +395,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
  size_t pages_reserved = 0;
  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
  if (p==NULL || pages_reserved==0) {
-    _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
+    _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
    return ENOMEM;
  }
-  _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages);
+  _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);

  if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) {
    _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
@ -389,7 +437,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
 }

 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
-  UNUSED(max_secs);
+  MI_UNUSED(max_secs);
  _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
  if (pages_reserved != NULL) *pages_reserved = 0;
  int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
--- a/third-party/mimalloc/src/bitmap.c
+++ b/third-party/mimalloc/src/bitmap.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019,2020 Microsoft Research, Daan Leijen
+Copyright (c) 2019-2021 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file

 /* ----------------------------------------------------------------------------
 Concurrent bitmap that can set/reset sequences of bits atomically,
-represeted as an array of fields where each field is a machine word (`uintptr_t`)
+represeted as an array of fields where each field is a machine word (`size_t`)

 There are two api's; the standard one cannot have sequences that cross
 between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
@ -26,47 +26,47 @@ between the fields. (This is used in arena allocation)
 ----------------------------------------------------------- */

 // The bit mask for a given number of blocks at a specified bit index.
-static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
+static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
  mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
  mi_assert_internal(count > 0);
  if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
  if (count == 0) return 0;
-  return ((((uintptr_t)1 << count) - 1) << bitidx);
+  return ((((size_t)1 << count) - 1) << bitidx);
 }


-
 /* -----------------------------------------------------------
  Claim a bit sequence atomically
 ----------------------------------------------------------- */

 // Try to atomically claim a sequence of `count` bits in a single
 // field at `idx` in `bitmap`. Returns `true` on success.
-bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
+inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
 {
  mi_assert_internal(bitmap_idx != NULL);
  mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
-  _Atomic(uintptr_t)* field = &bitmap[idx];
-  uintptr_t map  = mi_atomic_load_relaxed(field);
+  mi_assert_internal(count > 0);
+  mi_bitmap_field_t* field = &bitmap[idx];
+  size_t map  = mi_atomic_load_relaxed(field);
  if (map==MI_BITMAP_FIELD_FULL) return false; // short cut

  // search for 0-bit sequence of length count
-  const uintptr_t mask = mi_bitmap_mask_(count, 0);
-  const size_t    bitidx_max = MI_BITMAP_FIELD_BITS - count;
+  const size_t mask = mi_bitmap_mask_(count, 0);
+  const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;

 #ifdef MI_HAVE_FAST_BITSCAN
  size_t bitidx = mi_ctz(~map);    // quickly find the first zero bit if possible
 #else
  size_t bitidx = 0;               // otherwise start at 0
 #endif
-  uintptr_t m = (mask << bitidx);     // invariant: m == mask shifted by bitidx
+  size_t m = (mask << bitidx);     // invariant: m == mask shifted by bitidx

  // scan linearly for a free range of zero bits
  while (bitidx <= bitidx_max) {
-    const uintptr_t mapm = map & m;
+    const size_t mapm = map & m;
    if (mapm == 0) {  // are the mask bits free at bitidx?
      mi_assert_internal((m >> bitidx) == mask); // no overflow?
-      const uintptr_t newmap = map | m;
+      const size_t newmap = map | m;
      mi_assert_internal((newmap^map) >> bitidx == mask);
      if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) {  // TODO: use strong cas here?
        // no success, another thread claimed concurrently.. keep going (with updated `map`)
@ -94,9 +94,9 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_
  return false;
 }

-
+// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
 // Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
+// `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
 bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
  size_t idx = start_field_idx;
  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
@ -118,13 +118,13 @@ bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, c

 // Set `count` bits at `bitmap_idx` to 0 atomically
 // Returns `true` if all `count` bits were 1 previously.
-bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
  const size_t idx = mi_bitmap_index_field(bitmap_idx);
  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
+  const size_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
  // mi_assert_internal((bitmap[idx] & mask) == mask);
-  uintptr_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
+  size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
  return ((prev & mask) == mask);
 }

@ -134,10 +134,10 @@ bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, m
 bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
  const size_t idx = mi_bitmap_index_field(bitmap_idx);
  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
+  const size_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
  //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
-  uintptr_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
+  size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
  if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
  return ((prev & mask) == 0);
 }
@ -146,9 +146,9 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi
 static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
  const size_t idx = mi_bitmap_index_field(bitmap_idx);
  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
-  uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]);
+  const size_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
+  size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
  if (any_ones != NULL) *any_ones = ((field & mask) != 0);
  return ((field & mask) == mask);
 }
@ -176,8 +176,8 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
  mi_assert_internal(bitmap_idx != NULL);
  
  // check initial trailing zeros
-  _Atomic(uintptr_t)* field = &bitmap[idx];
-  uintptr_t map = mi_atomic_load_relaxed(field);  
+  mi_bitmap_field_t* field = &bitmap[idx];
+  size_t map = mi_atomic_load_relaxed(field);  
  const size_t initial = mi_clz(map);  // count of initial zeros starting at idx
  mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
  if (initial == 0)     return false;
@ -186,11 +186,11 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit

  // scan ahead
  size_t found = initial;
-  uintptr_t mask = 0;     // mask bits for the final field
+  size_t mask = 0;     // mask bits for the final field
  while(found < count) {
    field++;
    map = mi_atomic_load_relaxed(field);
-    const uintptr_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
+    const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
    mask = mi_bitmap_mask_(mask_bits, 0);
    if ((map & mask) != 0) return false;
    found += mask_bits;
@ -199,13 +199,13 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit

  // found range of zeros up to the final field; mask contains mask in the final field
  // now claim it atomically
-  _Atomic(uintptr_t)* const final_field = field;
-  const uintptr_t final_mask = mask;
-  _Atomic(uintptr_t)* const initial_field = &bitmap[idx];
-  const uintptr_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial);
+  mi_bitmap_field_t* const final_field = field;
+  const size_t final_mask = mask;
+  mi_bitmap_field_t* const initial_field = &bitmap[idx];
+  const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial);

  // initial field
-  uintptr_t newmap;
+  size_t newmap;
  field = initial_field;
  map = mi_atomic_load_relaxed(field);
  do {
@ -215,7 +215,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
  
  // intermediate fields
  while (++field < final_field) {
-    newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
+    newmap = MI_BITMAP_FIELD_FULL;
    map = 0;
    if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
  }
@ -236,7 +236,7 @@ rollback:
  // roll back intermediate fields
  while (--field > initial_field) {
    newmap = 0;
-    map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
+    map = MI_BITMAP_FIELD_FULL;
    mi_assert_internal(mi_atomic_load_relaxed(field) == map);
    mi_atomic_store_release(field, newmap);
  }
@ -280,8 +280,8 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm
 }

 // Helper for masks across fields; returns the mid count, post_mask may be 0
-static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, uintptr_t* pre_mask, uintptr_t* mid_mask, uintptr_t* post_mask) {
-  UNUSED_RELEASE(bitmap_fields);
+static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
+  MI_UNUSED_RELEASE(bitmap_fields);
  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
  if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) {
    *pre_mask = mi_bitmap_mask_(count, bitidx);
@ -308,13 +308,13 @@ static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_
 // Returns `true` if all `count` bits were 1 previously.
 bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
  size_t idx = mi_bitmap_index_field(bitmap_idx);
-  uintptr_t pre_mask;
-  uintptr_t mid_mask;
-  uintptr_t post_mask;
+  size_t pre_mask;
+  size_t mid_mask;
+  size_t post_mask;
  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);  
  bool all_one = true;
-  _Atomic(uintptr_t)*field = &bitmap[idx];
-  uintptr_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
+  mi_bitmap_field_t* field = &bitmap[idx];
+  size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
  if ((prev & pre_mask) != pre_mask) all_one = false;
  while(mid_count-- > 0) {
    prev = mi_atomic_and_acq_rel(field++, ~mid_mask);
@ -331,14 +331,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
 // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
 bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
  size_t idx = mi_bitmap_index_field(bitmap_idx);
-  uintptr_t pre_mask;
-  uintptr_t mid_mask;
-  uintptr_t post_mask;
+  size_t pre_mask;
+  size_t mid_mask;
+  size_t post_mask;
  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
  bool all_zero = true;
  bool any_zero = false;
-  _Atomic(uintptr_t)*field = &bitmap[idx];
-  uintptr_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
+  _Atomic(size_t)*field = &bitmap[idx];
+  size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
  if ((prev & pre_mask) != 0) all_zero = false;
  if ((prev & pre_mask) != pre_mask) any_zero = true;
  while (mid_count-- > 0) {
@ -360,14 +360,14 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
 // `any_ones` is `true` if there was at least one bit set to one.
 static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
  size_t idx = mi_bitmap_index_field(bitmap_idx);
-  uintptr_t pre_mask;
-  uintptr_t mid_mask;
-  uintptr_t post_mask;
+  size_t pre_mask;
+  size_t mid_mask;
+  size_t post_mask;
  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
  bool all_ones = true;
  bool any_ones = false;
-  _Atomic(uintptr_t)* field = &bitmap[idx];
-  uintptr_t prev = mi_atomic_load_relaxed(field++);
+  mi_bitmap_field_t* field = &bitmap[idx];
+  size_t prev = mi_atomic_load_relaxed(field++);
  if ((prev & pre_mask) != pre_mask) all_ones = false;
  if ((prev & pre_mask) != 0) any_ones = true;
  while (mid_count-- > 0) {
--- a/third-party/mimalloc/src/bitmap.h
+++ b/third-party/mimalloc/src/bitmap.h
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019,2020 Microsoft Research, Daan Leijen
+Copyright (c) 2019-2020 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file

 /* ----------------------------------------------------------------------------
 Concurrent bitmap that can set/reset sequences of bits atomically,
-represeted as an array of fields where each field is a machine word (`uintptr_t`)
+represeted as an array of fields where each field is a machine word (`size_t`)

 There are two api's; the standard one cannot have sequences that cross
 between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
@ -24,11 +24,11 @@ between the fields. (This is used in arena allocation)
  Bitmap definition
 ----------------------------------------------------------- */

-#define MI_BITMAP_FIELD_BITS   (8*MI_INTPTR_SIZE)
-#define MI_BITMAP_FIELD_FULL   (~((uintptr_t)0))   // all bits set
+#define MI_BITMAP_FIELD_BITS   (8*MI_SIZE_SIZE)
+#define MI_BITMAP_FIELD_FULL   (~((size_t)0))   // all bits set

-// An atomic bitmap of `uintptr_t` fields
-typedef _Atomic(uintptr_t)  mi_bitmap_field_t;
+// An atomic bitmap of `size_t` fields
+typedef _Atomic(size_t)  mi_bitmap_field_t;
 typedef mi_bitmap_field_t*  mi_bitmap_t;

 // A bitmap index is the index of the bit in a bitmap.
@ -40,6 +40,11 @@ static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx
  return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
 }

+// Create a bit index.
+static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) {  
+  return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS);
+}
+
 // Get the field index from a bit index.
 static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
  return (bitmap_idx / MI_BITMAP_FIELD_BITS);
@ -69,7 +74,7 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel

 // Set `count` bits at `bitmap_idx` to 0 atomically
 // Returns `true` if all `count` bits were 1 previously.
-bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);

 // Set `count` bits at `bitmap_idx` to 1 atomically
 // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
--- a/third-party/mimalloc/src/heap.c
+++ b/third-party/mimalloc/src/heap.c
@ -1,5 +1,5 @@
 /*----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -50,9 +50,9 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void

 #if MI_DEBUG>=2
 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  UNUSED(arg1);
-  UNUSED(arg2);
-  UNUSED(pq);
+  MI_UNUSED(arg1);
+  MI_UNUSED(arg2);
+  MI_UNUSED(pq);
  mi_assert_internal(mi_page_heap(page) == heap);
  mi_segment_t* segment = _mi_page_segment(page);
  mi_assert_internal(segment->thread_id == heap->thread_id);
@ -86,8 +86,8 @@ typedef enum mi_collect_e {


 static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
-  UNUSED(arg2);
-  UNUSED(heap);
+  MI_UNUSED(arg2);
+  MI_UNUSED(heap);
  mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
  mi_collect_t collect = *((mi_collect_t*)arg_collect);
  _mi_page_free_collect(page, collect >= MI_FORCE);
@ -104,10 +104,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
 }

 static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  UNUSED(arg1);
-  UNUSED(arg2);
-  UNUSED(heap);
-  UNUSED(pq);
+  MI_UNUSED(arg1);
+  MI_UNUSED(arg2);
+  MI_UNUSED(heap);
+  MI_UNUSED(pq);
  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
  return true; // don't break
 }
@ -154,7 +154,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)

  // collect regions on program-exit (or shared library unload)
  if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
-    _mi_mem_collect(&heap->tld->os);
+    //_mi_mem_collect(&heap->tld->os);
  }
 }

@ -262,19 +262,19 @@ static void mi_heap_free(mi_heap_t* heap) {
 ----------------------------------------------------------- */

 static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  UNUSED(arg1);
-  UNUSED(arg2);
-  UNUSED(heap);
-  UNUSED(pq);
+  MI_UNUSED(arg1);
+  MI_UNUSED(arg2);
+  MI_UNUSED(heap);
+  MI_UNUSED(pq);

  // ensure no more thread_delayed_free will be added
  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);

  // stats
  const size_t bsize = mi_page_block_size(page);
-  if (bsize > MI_LARGE_OBJ_SIZE_MAX) {
-    if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
-      mi_heap_stat_decrease(heap, giant, bsize);
+  if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) {
+    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+      mi_heap_stat_decrease(heap, large, bsize);
    }
    else {
      mi_heap_stat_decrease(heap, huge, bsize);
@ -333,7 +333,7 @@ void mi_heap_destroy(mi_heap_t* heap) {
  Safe Heap delete
 ----------------------------------------------------------- */

-// Tranfer the pages from one heap to the other
+// Transfer the pages from one heap to the other
 static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
  mi_assert_internal(heap!=NULL);
  if (from==NULL || from->page_count == 0) return;
@ -422,8 +422,8 @@ bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {


 static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) {
-  UNUSED(heap);
-  UNUSED(pq);
+  MI_UNUSED(heap);
+  MI_UNUSED(pq);
  bool* found = (bool*)vfound;
  mi_segment_t* segment = _mi_page_segment(page);
  void* start = _mi_page_start(segment, page, NULL);
@ -521,8 +521,8 @@ typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_


 static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
-  UNUSED(heap);
-  UNUSED(pq);
+  MI_UNUSED(heap);
+  MI_UNUSED(pq);
  mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
  mi_heap_area_ex_t xarea;
  const size_t bsize = mi_page_block_size(page);
--- a/third-party/mimalloc/src/init.c
+++ b/third-party/mimalloc/src/init.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -28,6 +28,9 @@ const mi_page_t _mi_page_empty = {
  ATOMIC_VAR_INIT(0), // xthread_free
  ATOMIC_VAR_INIT(0), // xheap
  NULL, NULL
+  #if MI_INTPTR_SIZE==8
+  , { 0 }  // padding
+  #endif
 };

 #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
@ -54,8 +57,8 @@ const mi_page_t _mi_page_empty = {
    QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
    QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
    QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
-    QNULL(MI_LARGE_OBJ_WSIZE_MAX + 1  /* 655360, Huge queue */), \
-    QNULL(MI_LARGE_OBJ_WSIZE_MAX + 2) /* Full queue */ }
+    QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 1  /* 655360, Huge queue */), \
+    QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 2) /* Full queue */ }

 #define MI_STAT_COUNT_NULL()  {0,0,0,0}

@ -78,6 +81,18 @@ const mi_page_t _mi_page_empty = {
  { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
  MI_STAT_COUNT_END_NULL()

+
+// Empty slice span queues for every bin
+#define SQNULL(sz)  { NULL, NULL, sz }
+#define MI_SEGMENT_SPAN_QUEUES_EMPTY \
+  { SQNULL(1), \
+    SQNULL(     1), SQNULL(     2), SQNULL(     3), SQNULL(     4), SQNULL(     5), SQNULL(     6), SQNULL(     7), SQNULL(    10), /*  8 */ \
+    SQNULL(    12), SQNULL(    14), SQNULL(    16), SQNULL(    20), SQNULL(    24), SQNULL(    28), SQNULL(    32), SQNULL(    40), /* 16 */ \
+    SQNULL(    48), SQNULL(    56), SQNULL(    64), SQNULL(    80), SQNULL(    96), SQNULL(   112), SQNULL(   128), SQNULL(   160), /* 24 */ \
+    SQNULL(   192), SQNULL(   224), SQNULL(   256), SQNULL(   320), SQNULL(   384), SQNULL(   448), SQNULL(   512), SQNULL(   640), /* 32 */ \
+    SQNULL(   768), SQNULL(   896), SQNULL(  1024) /* 35 */ }
+
+
 // --------------------------------------------------------
 // Statically allocate an empty heap as the initial
 // thread local value for the default heap,
@ -102,6 +117,18 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
  false
 };

+#define tld_empty_stats  ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
+#define tld_empty_os     ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os)))
+
+mi_decl_cache_align static const mi_tld_t tld_empty = {
+  0,
+  false,
+  NULL, NULL,
+  { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_empty_stats, tld_empty_os }, // segments
+  { 0, tld_empty_stats }, // os
+  { MI_STATS_NULL }       // stats
+};
+
 // the thread-local default heap for allocation
 mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;

@ -109,11 +136,8 @@ extern mi_heap_t _mi_heap_main;

 static mi_tld_t tld_main = {
  0, false,
-  &_mi_heap_main, &_mi_heap_main,
-  { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
-    0, 0, 0, 0, 0, 0, NULL,
-    &tld_main.stats, &tld_main.os
-  }, // segments
+  &_mi_heap_main, & _mi_heap_main,
+  { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, &tld_main.stats, &tld_main.os }, // segments
  { 0, &tld_main.stats },  // os
  { MI_STATS_NULL }       // stats
 };
@ -141,7 +165,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
 static void mi_heap_main_init(void) {
  if (_mi_heap_main.cookie == 0) {
    _mi_heap_main.thread_id = _mi_thread_id();
-    _mi_heap_main.cookie = _os_random_weak((uintptr_t)&mi_heap_main_init);
+    _mi_heap_main.cookie = _mi_os_random_weak((uintptr_t)&mi_heap_main_init);
    _mi_random_init(&_mi_heap_main.random);
    _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
    _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
@ -189,6 +213,7 @@ static bool _mi_heap_init(void) {
    // OS allocated so already zero initialized
    mi_tld_t*  tld = &td->tld;
    mi_heap_t* heap = &td->heap;
+    _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
    _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap));
    heap->thread_id = _mi_thread_id();
    _mi_random_init(&heap->random);
@ -240,7 +265,10 @@ static bool _mi_heap_done(mi_heap_t* heap) {

  // free if not the main thread
  if (heap != &_mi_heap_main) {
-    mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
+    // the following assertion does not always hold for huge segments as those are always treated
+    // as abondened: one may allocate it in one thread, but deallocate in another in which case
+    // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363
+    // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
    _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
  }
 #if 0  
@ -331,6 +359,12 @@ bool _mi_is_main_thread(void) {
  return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id());
 }

+static _Atomic(size_t) thread_count = ATOMIC_VAR_INIT(1);
+
+size_t  _mi_current_thread_count(void) {
+  return mi_atomic_load_relaxed(&thread_count);
+}
+
 // This is called from the `mi_malloc_generic`
 void mi_thread_init(void) mi_attr_noexcept
 {
@ -343,6 +377,7 @@ void mi_thread_init(void) mi_attr_noexcept
  if (_mi_heap_init()) return;  // returns true if already initialized

  _mi_stat_increase(&_mi_stats_main.threads, 1);
+  mi_atomic_increment_relaxed(&thread_count);
  //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
 }

@ -351,6 +386,7 @@ void mi_thread_done(void) mi_attr_noexcept {
 }

 static void _mi_thread_done(mi_heap_t* heap) {
+  mi_atomic_decrement_relaxed(&thread_count);
  _mi_stat_decrease(&_mi_stats_main.threads, 1);

  // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
@ -441,7 +477,7 @@ static void mi_process_load(void) {
  mi_heap_main_init();
  #if defined(MI_TLS_RECURSE_GUARD)
  volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
-  UNUSED(dummy);
+  MI_UNUSED(dummy);
  #endif
  os_preloading = false;
  atexit(&mi_process_done);
@ -478,10 +514,11 @@ static void mi_detect_cpu_features(void) {
 void mi_process_init(void) mi_attr_noexcept {
  // ensure we are called once
  if (_mi_process_is_initialized) return;
+  _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
  _mi_process_is_initialized = true;
  mi_process_setup_auto_thread_done();

-  _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
+  
  mi_detect_cpu_features();
  _mi_os_init();
  mi_heap_main_init();
@ -494,11 +531,18 @@ void mi_process_init(void) mi_attr_noexcept {

  if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
    size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
-    mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
+    long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at);
+    if (reserve_at != -1) {
+      mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500);
+    } else {
+      mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
+    }
  } 
  if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
    long ksize = mi_option_get(mi_option_reserve_os_memory);
-    if (ksize > 0) mi_reserve_os_memory((size_t)ksize*KiB, true, true);
+    if (ksize > 0) {
+      mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */);
+    }
  }
 }

@ -536,8 +580,8 @@ static void mi_process_done(void) {
 #if defined(_WIN32) && defined(MI_SHARED_LIB)
  // Windows DLL: easy to hook into process_init and thread_done
  __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
-    UNUSED(reserved);
-    UNUSED(inst);
+    MI_UNUSED(reserved);
+    MI_UNUSED(inst);
    if (reason==DLL_PROCESS_ATTACH) {
      mi_process_load();
    }
@ -569,7 +613,7 @@ static void mi_process_done(void) {
    return 0;
  }
  typedef int(*_crt_cb)(void);
-  #ifdef _M_X64
+  #if defined(_M_X64) || defined(_M_ARM64)
    __pragma(comment(linker, "/include:" "_mi_msvc_initu"))
    #pragma section(".CRT$XIU", long, read)
  #else
--- a/third-party/mimalloc/src/options.c
+++ b/third-party/mimalloc/src/options.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -19,10 +19,10 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif


-static uintptr_t mi_max_error_count   = 16; // stop outputting errors after this
-static uintptr_t mi_max_warning_count = 16; // stop outputting warnings after this
+static size_t mi_max_error_count   = 16; // stop outputting errors after this
+static size_t mi_max_warning_count = 16; // stop outputting warnings after this

-static void mi_add_stderr_output();
+static void mi_add_stderr_output(void);

 int mi_version(void) mi_attr_noexcept {
  return MI_MALLOC_VERSION;
@ -66,33 +66,37 @@ static mi_option_desc_t options[_mi_option_last] =
  { 0, UNINIT, MI_OPTION(verbose) },

  // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (4MiB)  (but see also `eager_commit_delay`)
+  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
  #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
  { 0, UNINIT, MI_OPTION(eager_region_commit) },
-  { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
+  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
  #else
  { 1, UNINIT, MI_OPTION(eager_region_commit) },
  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset uses MADV_FREE/MADV_DONTNEED
  #endif
  { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
  { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
+  { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
  { 0, UNINIT, MI_OPTION(reserve_os_memory)     },
  { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 1, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
+  { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
  { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
  { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
 #if defined(__NetBSD__)
  { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
+#elif defined(_WIN32)
+  { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #else
  { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
-  { 100, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
-  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes.
-  { 0,   UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
-  { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
-  { 16,  UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
-  { 16,  UNINIT, MI_OPTION(max_warnings) }       // maximum warnings that are output
-
+  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
+  { 25,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
+  { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
+  { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
+  { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
+  { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
+  { 16,   UNINIT, MI_OPTION(max_warnings) }       // maximum warnings that are output
 };

 static void mi_option_init(mi_option_desc_t* desc);
@ -103,7 +107,7 @@ void _mi_options_init(void) {
  mi_add_stderr_output(); // now it safe to use stderr for output
  for(int i = 0; i < _mi_option_last; i++ ) {
    mi_option_t option = (mi_option_t)i;
-    long l = mi_option_get(option); UNUSED(l); // initialize
+    long l = mi_option_get(option); MI_UNUSED(l); // initialize
    if (option != mi_option_verbose) {
      mi_option_desc_t* desc = &options[option];
      _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
@ -113,7 +117,7 @@ void _mi_options_init(void) {
  mi_max_warning_count = mi_option_get(mi_option_max_warnings);
 }

-long mi_option_get(mi_option_t option) {
+mi_decl_nodiscard long mi_option_get(mi_option_t option) {
  mi_assert(option >= 0 && option < _mi_option_last);
  mi_option_desc_t* desc = &options[option];
  mi_assert(desc->option == option);  // index should match the option
@ -139,7 +143,7 @@ void mi_option_set_default(mi_option_t option, long value) {
  }
 }

-bool mi_option_is_enabled(mi_option_t option) {
+mi_decl_nodiscard bool mi_option_is_enabled(mi_option_t option) {
  return (mi_option_get(option) != 0);
 }

@ -161,7 +165,7 @@ void mi_option_disable(mi_option_t option) {


 static void mi_out_stderr(const char* msg, void* arg) {
-  UNUSED(arg);
+  MI_UNUSED(arg);
  #ifdef _WIN32
  // on windows with redirection, the C runtime cannot handle locale dependent output
  // after the main thread closes so we use direct console output.
@ -176,19 +180,19 @@ static void mi_out_stderr(const char* msg, void* arg) {
 // an output function is registered it is called immediately with
 // the output up to that point.
 #ifndef MI_MAX_DELAY_OUTPUT
-#define MI_MAX_DELAY_OUTPUT ((uintptr_t)(32*1024))
+#define MI_MAX_DELAY_OUTPUT ((size_t)(32*1024))
 #endif
 static char out_buf[MI_MAX_DELAY_OUTPUT+1];
-static _Atomic(uintptr_t) out_len;
+static _Atomic(size_t) out_len;

 static void mi_out_buf(const char* msg, void* arg) {
-  UNUSED(arg);
+  MI_UNUSED(arg);
  if (msg==NULL) return;
  if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
  size_t n = strlen(msg);
  if (n==0) return;
  // claim space
-  uintptr_t start = mi_atomic_add_acq_rel(&out_len, n);
+  size_t start = mi_atomic_add_acq_rel(&out_len, n);
  if (start >= MI_MAX_DELAY_OUTPUT) return;
  // check bound
  if (start+n >= MI_MAX_DELAY_OUTPUT) {
@ -251,15 +255,15 @@ static void mi_add_stderr_output() {
 // --------------------------------------------------------
 // Messages, all end up calling `_mi_fputs`.
 // --------------------------------------------------------
-static _Atomic(uintptr_t) error_count;   // = 0;  // when >= max_error_count stop emitting errors
-static _Atomic(uintptr_t) warning_count; // = 0;  // when >= max_warning_count stop emitting warnings
+static _Atomic(size_t) error_count;   // = 0;  // when >= max_error_count stop emitting errors
+static _Atomic(size_t) warning_count; // = 0;  // when >= max_warning_count stop emitting warnings

 // When overriding malloc, we may recurse into mi_vfprintf if an allocation
 // inside the C runtime causes another message.
 static mi_decl_thread bool recurse = false;

 static bool mi_recurse_enter(void) {
-  #if defined(__MACH__) || defined(MI_TLS_RECURSE_GUARD)
+  #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
  if (_mi_preloading()) return true;
  #endif
  if (recurse) return false;
@ -268,7 +272,7 @@ static bool mi_recurse_enter(void) {
 }

 static void mi_recurse_exit(void) {
-  #if defined(__MACH__) || defined(MI_TLS_RECURSE_GUARD)
+  #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
  if (_mi_preloading()) return;
  #endif
  recurse = false;
@ -353,7 +357,7 @@ static mi_error_fun* volatile  mi_error_handler; // = NULL
 static _Atomic(void*) mi_error_arg;     // = NULL

 static void mi_error_default(int err) {
-  UNUSED(err);
+  MI_UNUSED(err);
 #if (MI_DEBUG>0) 
  if (err==EFAULT) {
    #ifdef _MSC_VER
@ -409,6 +413,14 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
  dest[dest_size - 1] = 0;
 }

+#ifdef MI_NO_GETENV
+static bool mi_getenv(const char* name, char* result, size_t result_size) {
+  MI_UNUSED(name);
+  MI_UNUSED(result);
+  MI_UNUSED(result_size);
+  return false;
+}
+#else
 static inline int mi_strnicmp(const char* s, const char* t, size_t n) {
  if (n==0) return 0;
  for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
@ -416,7 +428,6 @@ static inline int mi_strnicmp(const char* s, const char* t, size_t n) {
  }
  return (n==0 ? 0 : *s - *t);
 }
-
 #if defined _WIN32
 // On Windows use GetEnvironmentVariable instead of getenv to work
 // reliably even when this is invoked before the C runtime is initialized.
@ -484,7 +495,8 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
    return false;
  }
 }
-#endif
+#endif  // !MI_USE_ENVIRON
+#endif  // !MI_NO_GETENV

 static void mi_option_init(mi_option_desc_t* desc) {  
  // Read option value from the environment
@ -513,9 +525,9 @@ static void mi_option_init(mi_option_desc_t* desc) {
      if (desc->option == mi_option_reserve_os_memory) {
        // this option is interpreted in KiB to prevent overflow of `long`
        if (*end == 'K') { end++; }
-        else if (*end == 'M') { value *= KiB; end++; }
-        else if (*end == 'G') { value *= MiB; end++; }
-        else { value = (value + KiB - 1) / KiB; }
+        else if (*end == 'M') { value *= MI_KiB; end++; }
+        else if (*end == 'G') { value *= MI_MiB; end++; }
+        else { value = (value + MI_KiB - 1) / MI_KiB; }
        if (*end == 'B') { end++; }
      }
      if (*end == 0) {
--- a/third-party/mimalloc/src/os.c
+++ b/third-party/mimalloc/src/os.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -26,16 +26,20 @@ terms of the MIT license. A copy of the license can be found in the file
 #pragma warning(disable:4996)  // strerror
 #endif

+#if defined(__wasi__)
+#define MI_USE_SBRK
+#endif

 #if defined(_WIN32)
 #include <windows.h>
 #elif defined(__wasi__)
-// stdlib.h is all we need, and has already been included in mimalloc.h
+#include <unistd.h>    // sbrk
 #else
 #include <sys/mman.h>  // mmap
 #include <unistd.h>    // sysconf
 #if defined(__linux__)
 #include <features.h>
+#include <fcntl.h>
 #if defined(__GLIBC__)
 #include <linux/mman.h> // linux mmap flags
 #else
@ -48,9 +52,13 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <mach/vm_statistics.h>
 #endif
 #endif
-#if defined(__HAIKU__)
-#define madvise posix_madvise
-#define MADV_DONTNEED POSIX_MADV_DONTNEED
+#if defined(__FreeBSD__) || defined(__DragonFly__)
+#include <sys/param.h>
+#if __FreeBSD_version >= 1200000
+#include <sys/cpuset.h>
+#include <sys/domainset.h>
+#endif
+#include <sys/sysctl.h>
 #endif
 #endif

@ -65,10 +73,6 @@ static void* mi_align_up_ptr(void* p, size_t alignment) {
  return (void*)_mi_align_up((uintptr_t)p, alignment);
 }

-static uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
-  return (sz / alignment) * alignment;
-}
-
 static void* mi_align_down_ptr(void* p, size_t alignment) {
  return (void*)_mi_align_down((uintptr_t)p, alignment);
 }
@ -82,31 +86,41 @@ static size_t os_alloc_granularity = 4096;
 // if non-zero, use large page allocation
 static size_t large_os_page_size = 0;

+// is memory overcommit allowed? 
+// set dynamically in _mi_os_init (and if true we use MAP_NORESERVE)
+static bool os_overcommit = true;
+
+bool _mi_os_has_overcommit(void) {
+  return os_overcommit;
+}
+
 // OS (small) page size
 size_t _mi_os_page_size() {
  return os_page_size;
 }

 // if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
-size_t _mi_os_large_page_size() {
+size_t _mi_os_large_page_size(void) {
  return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size());
 }

+#if !defined(MI_USE_SBRK) && !defined(__wasi__)
 static bool use_large_os_page(size_t size, size_t alignment) {
  // if we have access, check the size and alignment requirements
  if (large_os_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false;
  return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0);
 }
+#endif

 // round to a good OS allocation size (bounded by max 12.5% waste)
 size_t _mi_os_good_alloc_size(size_t size) {
  size_t align_size;
-  if (size < 512*KiB) align_size = _mi_os_page_size();
-  else if (size < 2*MiB) align_size = 64*KiB;
-  else if (size < 8*MiB) align_size = 256*KiB;
-  else if (size < 32*MiB) align_size = 1*MiB;
-  else align_size = 4*MiB;
-  if (size >= (SIZE_MAX - align_size)) return size; // possible overflow?
+  if (size < 512*MI_KiB) align_size = _mi_os_page_size();
+  else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
+  else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
+  else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
+  else align_size = 4*MI_MiB;
+  if (mi_unlikely(size >= (SIZE_MAX - align_size))) return size; // possible overflow?
  return _mi_align_up(size, align_size);
 }

@ -168,7 +182,9 @@ static bool mi_win_enable_large_os_pages()
  return (ok!=0);
 }

-void _mi_os_init(void) {
+void _mi_os_init(void) 
+{
+  os_overcommit = false;
  // get the page size
  SYSTEM_INFO si;
  GetSystemInfo(&si);
@ -203,10 +219,36 @@ void _mi_os_init(void) {
 }
 #elif defined(__wasi__)
 void _mi_os_init() {
-  os_page_size = 0x10000; // WebAssembly has a fixed page size: 64KB
+  os_overcommit = false;
+  os_page_size = 0x10000; // WebAssembly has a fixed page size: 64KiB
  os_alloc_granularity = 16;
 }
+
+#else  // generic unix
+
+static void os_detect_overcommit(void) {
+#if defined(__linux__)
+  int fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+	if (fd < 0) return;
+  char buf[128];
+  ssize_t nread = read(fd, &buf, sizeof(buf));
+	close(fd);
+  // <https://www.kernel.org/doc/Documentation/vm/overcommit-accounting>
+  // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE)
+  if (nread >= 1) {
+    os_overcommit = (buf[0] == '0' || buf[0] == '1');
+  }
+#elif defined(__FreeBSD__)
+  int val = 0;
+  size_t olen = sizeof(val);
+  if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) {
+    os_overcommit = (val != 0);
+  }  
 #else
+  // default: overcommit is true  
+#endif
+}
+
 void _mi_os_init() {
  // get the page size
  long result = sysconf(_SC_PAGESIZE);
@ -214,7 +256,8 @@ void _mi_os_init() {
    os_page_size = (size_t)result;
    os_alloc_granularity = os_page_size;
  }
-  large_os_page_size = 2*MiB; // TODO: can we query the OS for this?
+  large_os_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
+  os_detect_overcommit();
 }
 #endif

@ -229,8 +272,8 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
  bool err = false;
 #if defined(_WIN32)
  err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
-#elif defined(__wasi__)
-  err = 0; // WebAssembly's heap cannot be shrunk
+#elif defined(MI_USE_SBRK)
+  err = 0; // sbrk heap cannot be shrunk
 #else
  err = (munmap(addr, size) == -1);
 #endif
@ -245,22 +288,32 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
  }
 }

+#if !defined(MI_USE_SBRK) && !defined(__wasi__)
 static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size);
+#endif

 #ifdef _WIN32
+ 
+#define MEM_COMMIT_RESERVE  (MEM_COMMIT|MEM_RESERVE)
+
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
 #if (MI_INTPTR_SIZE >= 8)
-  // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations
-  void* hint;
-  if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) {
-    void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
-    if (p != NULL) return p;
-    DWORD err = GetLastError();
-    if (err != ERROR_INVALID_ADDRESS &&   // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210)
-        err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230)
-      return NULL;
+  // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations
+  if (addr == NULL) {
+    void* hint = mi_os_get_aligned_hint(try_alignment,size);
+    if (hint != NULL) {
+      void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
+      if (p != NULL) return p;
+      // for robustness always fall through in case of an error
+      /*
+      DWORD err = GetLastError();
+      if (err != ERROR_INVALID_ADDRESS &&   // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210)
+          err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230)
+        return NULL;
+      }
+      */
+      _mi_warning_message("unable to allocate hinted aligned OS memory (%zu bytes, error code: %x, address: %p, alignment: %d, flags: %x)\n", size, GetLastError(), hint, try_alignment, flags);
    }
-    // fall through
  } 
 #endif
 #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
@ -271,7 +324,10 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
    MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
    param.Type = MemExtendedParameterAddressRequirements;
    param.Pointer = &reqs;
-    return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, &param, 1);
+    void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, &param, 1);
+    if (p != NULL) return p;
+    _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: %x, address: %p, alignment: %d, flags: %x)\n", size, GetLastError(), addr, try_alignment, flags);
+    // fall through on error
  }
 #endif
  // last resort
@ -280,11 +336,11 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment

 static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
  mi_assert_internal(!(large_only && !allow_large));
-  static _Atomic(uintptr_t) large_page_try_ok; // = 0;
+  static _Atomic(size_t) large_page_try_ok; // = 0;
  void* p = NULL;
  if ((large_only || use_large_os_page(size, try_alignment))
      && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
-    uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
+    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
    if (!large_only && try_ok > 0) {
      // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
      // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
@ -311,7 +367,32 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
  return p;
 }

-#elif defined(__wasi__)
+#elif defined(MI_USE_SBRK)
+#define MI_SBRK_FAIL ((void*)(-1)) 
+static void* mi_sbrk_heap_grow(size_t size, size_t try_alignment) {
+  void* pbase0 = sbrk(0);
+  if (pbase0 == MI_SBRK_FAIL) {
+    _mi_warning_message("unable to allocate sbrk() OS memory (%zu bytes)\n", size);
+    errno = ENOMEM;
+    return NULL;
+  }
+  uintptr_t base = (uintptr_t)pbase0;
+  uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment);
+  size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size());
+  mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0);
+  if (alloc_size < size) return NULL;
+  void* pbase1 = sbrk(alloc_size);
+  if (pbase1 == MI_SBRK_FAIL) {
+    _mi_warning_message("unable to allocate sbrk() OS memory (%zu bytes, %zu requested)\n", size, alloc_size);
+    errno = ENOMEM;
+    return NULL;
+  }
+  mi_assert(pbase0 == pbase1);
+  return (void*)aligned_base;
+}
+
+#elif defined(__wasi__)  
+ // currently unused as we use sbrk() on wasm
 static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
  uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size();
  uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment);
@ -319,31 +400,61 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
  mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0);
  if (alloc_size < size) return NULL;
  if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) {
+    _mi_warning_message("unable to allocate wasm_memory_grow() OS memory (%zu bytes, %zu requested)\n", size, alloc_size);
    errno = ENOMEM;
    return NULL;
  }
  return (void*)aligned_base;
 }
+
 #else
 #define MI_OS_USE_MMAP
 static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
-  void* p = NULL;
-  #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
-  // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
-  void* hint;
-  if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) {
-    p = mmap(hint,size,protect_flags,flags,fd,0);
-    if (p==MAP_FAILED) p = NULL; // fall back to regular mmap
+  MI_UNUSED(try_alignment);  
+  #if defined(MAP_ALIGNED)  // BSD
+  if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) {
+    size_t n = mi_bsr(try_alignment);
+    if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
+      flags |= MAP_ALIGNED(n);
+      void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
+      if (p!=MAP_FAILED) return p;
+      // fall back to regular mmap
+    }
+  }
+  #elif defined(MAP_ALIGN)  // Solaris
+  if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) {
+    void* p = mmap(try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);
+    if (p!=MAP_FAILED) return p;
+    // fall back to regular mmap
  }
-  #else
-  UNUSED(try_alignment);
-  UNUSED(mi_os_get_aligned_hint);
  #endif
-  if (p==NULL) {
-    p = mmap(addr,size,protect_flags,flags,fd,0);
-    if (p==MAP_FAILED) p = NULL;
+  #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
+  // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations
+  if (addr == NULL) {
+    void* hint = mi_os_get_aligned_hint(try_alignment, size);
+    if (hint != NULL) {
+      void* p = mmap(hint, size, protect_flags, flags, fd, 0);
+      if (p!=MAP_FAILED) return p;
+      // fall back to regular mmap
+    }
  }
-  return p;
+  #endif
+  // regular mmap
+  void* p = mmap(addr, size, protect_flags, flags, fd, 0);
+  if (p!=MAP_FAILED) return p;  
+  // failed to allocate
+  return NULL;
+}
+
+static int mi_unix_mmap_fd(void) {
+#if defined(VM_MAKE_TAG)
+  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
+  int os_tag = (int)mi_option_get(mi_option_os_tag);
+  if (os_tag < 100 || os_tag > 255) os_tag = 100;
+  return VM_MAKE_TAG(os_tag);
+#else
+  return -1;
+#endif
 }

 static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
@ -354,28 +465,18 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
  #if !defined(MAP_NORESERVE)
  #define MAP_NORESERVE  0
  #endif
-  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-  int fd = -1;
-  #if defined(MAP_ALIGNED)  // BSD
-  if (try_alignment > 0) {
-    size_t n = mi_bsr(try_alignment);
-    if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
-      flags |= MAP_ALIGNED(n);
-    }
-  }
-  #endif
+  const int fd = mi_unix_mmap_fd();
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+  if (_mi_os_has_overcommit()) {
+    flags |= MAP_NORESERVE;
+  }  
  #if defined(PROT_MAX)
  protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
-  #endif
-  #if defined(VM_MAKE_TAG)
-  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
-  int os_tag = (int)mi_option_get(mi_option_os_tag);
-  if (os_tag < 100 || os_tag > 255) os_tag = 100;
-  fd = VM_MAKE_TAG(os_tag);
-  #endif
+  #endif    
+  // huge page allocation
  if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
-    static _Atomic(uintptr_t) large_page_try_ok; // = 0;
-    uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
+    static _Atomic(size_t) large_page_try_ok; // = 0;
+    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
    if (!large_only && try_ok > 0) {
      // If the OS is not configured for large OS pages, or the user does not have
      // enough permission, the `mmap` will always fail (but it might also fail for other reasons).
@ -394,7 +495,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
      #endif
      #ifdef MAP_HUGE_1GB
      static bool mi_huge_pages_available = true;
-      if ((size % GiB) == 0 && mi_huge_pages_available) {
+      if ((size % MI_GiB) == 0 && mi_huge_pages_available) {
        lflags |= MAP_HUGE_1GB;
      }
      else
@ -421,37 +522,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
        #endif
        if (large_only) return p;
        if (p == NULL) {
-          mi_atomic_store_release(&large_page_try_ok, (uintptr_t)10);  // on error, don't try again for the next N allocations
+          mi_atomic_store_release(&large_page_try_ok, (size_t)8);  // on error, don't try again for the next N allocations
        }
      }
    }
  }
+  // regular allocation
  if (p == NULL) {
    *is_large = false;
    p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
-    #if defined(MADV_HUGEPAGE)
-    // Many Linux systems don't allow MAP_HUGETLB but they support instead
-    // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE
-    // though since properly aligned allocations will already use large pages if available
-    // in that case -- in particular for our large regions (in `memory.c`).
-    // However, some systems only allow THP if called with explicit `madvise`, so
-    // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
-    if (allow_large && use_large_os_page(size, try_alignment)) {
-      if (madvise(p, size, MADV_HUGEPAGE) == 0) {
-        *is_large = true; // possibly
-      };
-    }
-    #endif
-    #if defined(__sun)
-    if (allow_large && use_large_os_page(size, try_alignment)) {
-      struct memcntl_mha cmd = {0};
-      cmd.mha_pagesize = large_os_page_size;
-      cmd.mha_cmd = MHA_MAPSIZE_VA;
-      if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
-        *is_large = true;
+    if (p != NULL) {
+      #if defined(MADV_HUGEPAGE)
+      // Many Linux systems don't allow MAP_HUGETLB but they support instead
+      // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE
+      // though since properly aligned allocations will already use large pages if available
+      // in that case -- in particular for our large regions (in `memory.c`).
+      // However, some systems only allow THP if called with explicit `madvise`, so
+      // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
+      if (allow_large && use_large_os_page(size, try_alignment)) {
+        if (madvise(p, size, MADV_HUGEPAGE) == 0) {
+          *is_large = true; // possibly
+        };
      }
+      #elif defined(__sun)
+      if (allow_large && use_large_os_page(size, try_alignment)) {
+        struct memcntl_mha cmd = {0};
+        cmd.mha_pagesize = large_os_page_size;
+        cmd.mha_cmd = MHA_MAPSIZE_VA;
+        if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
+          *is_large = true;
+        }
+      }      
+      #endif
    }
-    #endif
  }
  if (p == NULL) {
    _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large);
@ -461,8 +564,8 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
 #endif

 // On 64-bit systems, we can do efficient aligned allocation by using
-// the 4TiB to 30TiB area to allocate them.
-#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
+// the 2TiB to 30TiB area to allocate them.
+#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || defined(MI_OS_USE_MMAP))
 static mi_decl_cache_align _Atomic(uintptr_t) aligned_base;

 // Return a 4MiB aligned address that is probably available.
@ -480,7 +583,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
 {
  if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL;
  if ((size%MI_SEGMENT_SIZE) != 0) return NULL;
-  if (size > 1*GiB) return NULL;  // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096.
+  if (size > 1*MI_GiB) return NULL;  // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096.
  #if (MI_SECURE>0)
  size += MI_SEGMENT_SIZE;        // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
  #endif
@ -499,9 +602,11 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
  if (hint%try_alignment != 0) return NULL;
  return (void*)hint;
 }
+#elif defined(__wasi__) || defined(MI_USE_SBRK)
+// no need for mi_os_get_aligned_hint
 #else
 static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
-  UNUSED(try_alignment); UNUSED(size);
+  MI_UNUSED(try_alignment); MI_UNUSED(size);
  return NULL;
 }
 #endif
@ -529,7 +634,12 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
    int flags = MEM_RESERVE;
    if (commit) flags |= MEM_COMMIT;
    p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
+  #elif defined(MI_USE_SBRK)
+    KK_UNUSED(allow_large);
+    *is_large = false;
+    p = mi_sbrk_heap_grow(size, try_alignment);
  #elif defined(__wasi__)
+    KK_UNUSED(allow_large);
    *is_large = false;
    p = mi_wasm_heap_grow(size, try_alignment);
  #else
@ -586,6 +696,10 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
        mi_os_mem_free(p, over_size, commit, stats);
        void* aligned_p = mi_align_up_ptr(p, alignment);
        p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false, allow_large, is_large);
+        if (p != NULL) {
+          _mi_stat_increase(&stats->reserved, size);
+          if (commit) { _mi_stat_increase(&stats->committed, size); }
+        }
        if (p == aligned_p) break; // success!
        if (p != NULL) { // should not happen?
          mi_os_mem_free(p, size, commit, stats);
@ -619,7 +733,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
 ----------------------------------------------------------- */

 void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
  mi_stats_t* stats = &_mi_stats_main;
  if (size == 0) return NULL;
  size = _mi_os_good_alloc_size(size);
@ -628,7 +742,7 @@ void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) {
 }

 void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
  mi_stats_t* stats = &_mi_stats_main;
  if (size == 0 || p == NULL) return;
  size = _mi_os_good_alloc_size(size);
@ -641,7 +755,7 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* stats) {

 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats)
 {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
  if (size == 0) return NULL;
  size = _mi_os_good_alloc_size(size);
  alignment = _mi_align_up(alignment, _mi_os_page_size());
@ -692,7 +806,7 @@ static void mi_mprotect_hint(int err) {
                        "  > sudo sysctl -w vm.max_map_count=262144\n");
  }
 #else
-  UNUSED(err);
+  MI_UNUSED(err);
 #endif
 }

@ -716,8 +830,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ

  #if defined(_WIN32)
  if (commit) {
-    // if the memory was already committed, the call succeeds but it is not zero'd
-    // *is_zero = true;
+    // *is_zero = true;  // note: if the memory was already committed, the call succeeds but the memory is not zero'd
    void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE);
    err = (p == start ? 0 : GetLastError());
  }
@ -727,26 +840,40 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
  }
  #elif defined(__wasi__)
  // WebAssembly guests can't control memory protection
-  #elif defined(MAP_FIXED)
-  if (!commit) {
-    // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
-    void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0);
-    if (p != start) { err = errno; }
-  }
-  else {
-    // for commit, just change the protection
+  #elif 0 && defined(MAP_FIXED) && !defined(__APPLE__)
+  // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?)
+  if (commit) {
+    // commit: just change the protection
    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
    if (err != 0) { err = errno; }
-    #if defined(MADV_FREE_REUSE)
-      while ((err = madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; }
-    #endif
+  } 
+  else {
+    // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss)
+    const int fd = mi_unix_mmap_fd();
+    void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
+    if (p != start) { err = errno; }
  }
  #else
-  err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE));
-  if (err != 0) { err = errno; }
+  // Linux, macOSX and others.
+  if (commit) {
+    // commit: ensure we can access the area    
+    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
+    if (err != 0) { err = errno; }
+  } 
+  else {
+    #if defined(MADV_DONTNEED)
+    // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
+    // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( )
+    err = madvise(start, csize, MADV_DONTNEED);
+    #else
+    // decommit: just disable access
+    err = mprotect(start, csize, PROT_NONE);
+    if (err != 0) { err = errno; }
+    #endif
+  }
  #endif
  if (err != 0) {
-    _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
+    _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
    mi_mprotect_hint(err);
  }
  mi_assert_internal(err == 0);
@ -754,13 +881,13 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
 }

 bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
  mi_stats_t* stats = &_mi_stats_main;
  return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats);
 }

 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
  mi_stats_t* stats = &_mi_stats_main;
  bool is_zero;
  return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
@ -801,18 +928,13 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
  if (p != start) return false;
 #else
 #if defined(MADV_FREE)
-  #if defined(MADV_FREE_REUSABLE)
-    #define KK_MADV_FREE_INITIAL  MADV_FREE_REUSABLE
-  #else
-    #define KK_MADV_FREE_INITIAL  MADV_FREE
-  #endif
-  static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(KK_MADV_FREE_INITIAL);
+  static _Atomic(size_t) advice = ATOMIC_VAR_INIT(MADV_FREE);
  int oadvice = (int)mi_atomic_load_relaxed(&advice);
  int err;
  while ((err = madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0;  };
-  if (err != 0 && errno == EINVAL && oadvice == KK_MADV_FREE_INITIAL) {  
-    // if MADV_FREE/MADV_FREE_REUSABLE is not supported, fall back to MADV_DONTNEED from now on
-    mi_atomic_store_release(&advice, (uintptr_t)MADV_DONTNEED);
+  if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {  
+    // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
+    mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
    err = madvise(start, csize, MADV_DONTNEED);
  }
 #elif defined(__wasi__)
@ -821,7 +943,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
  int err = madvise(start, csize, MADV_DONTNEED);
 #endif
  if (err != 0) {
-    _mi_warning_message("madvise reset error: start: %p, csize: 0x%x, errno: %i\n", start, csize, errno);
+    _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, errno);
  }
  //mi_assert(err == 0);
  if (err != 0) return false;
@ -834,7 +956,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
 // pages and reduce swapping while keeping the memory committed.
 // We page align to a conservative area inside the range to reset.
 bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
  mi_stats_t* stats = &_mi_stats_main;
  if (mi_option_is_enabled(mi_option_reset_decommits)) {
    return _mi_os_decommit(addr, size, stats);
@ -845,7 +967,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
 }

 bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
  mi_stats_t* stats = &_mi_stats_main;
  if (mi_option_is_enabled(mi_option_reset_decommits)) {
    return mi_os_commit_unreset(addr, size, is_zero, stats);  // re-commit it (conservatively!)
@ -880,7 +1002,7 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
  if (err != 0) { err = errno; }
 #endif
  if (err != 0) {
-    _mi_warning_message("mprotect error: start: %p, csize: 0x%x, err: %i\n", start, csize, err);
+    _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err);
    mi_mprotect_hint(err);
  }
  return (err == 0);
@ -921,12 +1043,12 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
 Support for allocating huge OS pages (1Gib) that are reserved up-front
 and possibly associated with a specific NUMA node. (use `numa_node>=0`)
 -----------------------------------------------------------------------------*/
-#define MI_HUGE_OS_PAGE_SIZE  (GiB)
+#define MI_HUGE_OS_PAGE_SIZE  (MI_GiB)

 #if defined(_WIN32) && (MI_INTPTR_SIZE >= 8)
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 {
-  mi_assert_internal(size%GiB == 0);
+  mi_assert_internal(size%MI_GiB == 0);
  mi_assert_internal(addr != NULL);
  const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;

@ -957,7 +1079,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
    else {
      // fall back to regular large pages
      mi_huge_pages_available = false; // don't try further huge pages
-      _mi_warning_message("unable to allocate using huge (1gb) pages, trying large (2mb) pages instead (status 0x%lx)\n", err);
+      _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
    }
  }
  // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
@ -967,7 +1089,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
    return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
  }
  #else
-    UNUSED(numa_node);
+    MI_UNUSED(numa_node);
  #endif
  // otherwise use regular virtual alloc on older windows
  return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
@ -984,30 +1106,30 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons
 }
 #else
 static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
-  UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags);
+  MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags);
  return 0;
 }
 #endif
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
-  mi_assert_internal(size%GiB == 0);
+  mi_assert_internal(size%MI_GiB == 0);
  bool is_large = true;
  void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
  if (p == NULL) return NULL;
  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
-    uintptr_t numa_mask = (1UL << numa_node);
+    unsigned long numa_mask = (1UL << numa_node);
    // TODO: does `mbind` work correctly for huge OS pages? should we
    // use `set_mempolicy` before calling mmap instead?
    // see: <https://lkml.org/lkml/2017/2/9/875>
    long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
    if (err != 0) {
-      _mi_warning_message("failed to bind huge (1gb) pages to numa node %d: %s\n", numa_node, strerror(errno));
+      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno));
    }
  }
  return p;
 }
 #else
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
-  UNUSED(addr); UNUSED(size); UNUSED(numa_node);
+  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node);
  return NULL;
 }
 #endif
@ -1043,7 +1165,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
 }
 #else
 static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
-  UNUSED(pages);
+  MI_UNUSED(pages);
  if (total_size != NULL) *total_size = 0;
  return NULL;
 }
@ -1186,6 +1308,35 @@ static size_t mi_os_numa_node_countx(void) {
  }
  return (node+1);
 }
+#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000
+static size_t mi_os_numa_nodex(void) {
+  domainset_t dom;
+  size_t node;
+  int policy;
+  if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul;
+  for (node = 0; node < MAXMEMDOM; node++) {
+    if (DOMAINSET_ISSET(node, &dom)) return node;
+  }
+  return 0ul;
+}
+static size_t mi_os_numa_node_countx(void) {
+  size_t ndomains = 0;
+  size_t len = sizeof(ndomains);
+  if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul;
+  return ndomains;
+}
+#elif defined(__DragonFly__)
+static size_t mi_os_numa_nodex(void) {
+  // TODO: DragonFly does not seem to provide any userland means to get this information.
+  return 0ul;
+}
+static size_t mi_os_numa_node_countx(void) {
+  size_t ncpus = 0, nvirtcoresperphys = 0;
+  size_t len = sizeof(size_t);
+  if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul;
+  if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul;
+  return nvirtcoresperphys * ncpus;
+}
 #else
 static size_t mi_os_numa_nodex(void) {
  return 0;
@ -1195,21 +1346,27 @@ static size_t mi_os_numa_node_countx(void) {
 }
 #endif

-size_t _mi_numa_node_count = 0;   // cache the node count
+_Atomic(size_t)  _mi_numa_node_count; // = 0   // cache the node count

 size_t _mi_os_numa_node_count_get(void) {
-  if (mi_unlikely(_mi_numa_node_count <= 0)) {
+  size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
+  if (count <= 0) {
    long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
-    if (ncount <= 0) ncount = (long)mi_os_numa_node_countx();        // or detect dynamically
-    _mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount);
-    _mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count);
+    if (ncount > 0) {
+      count = (size_t)ncount;
+    }
+    else {
+      count = mi_os_numa_node_countx(); // or detect dynamically
+      if (count == 0) count = 1;
+    }    
+    mi_atomic_store_release(&_mi_numa_node_count, count); // save it
+    _mi_verbose_message("using %zd numa regions\n", count);
  }
-  mi_assert_internal(_mi_numa_node_count >= 1);
-  return _mi_numa_node_count;
+  return count;
 }

 int _mi_os_numa_node_get(mi_os_tld_t* tld) {
-  UNUSED(tld);
+  MI_UNUSED(tld);
  size_t numa_count = _mi_os_numa_node_count();
  if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
  // never more than the node count and >= 0
--- a/third-party/mimalloc/src/page-queue.c
+++ b/third-party/mimalloc/src/page-queue.c
@ -1,5 +1,5 @@
 /*----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -34,15 +34,15 @@ terms of the MIT license. A copy of the license can be found in the file


 static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) {
-  return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+sizeof(uintptr_t)));
+  return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+sizeof(uintptr_t)));
 }

 static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) {
-  return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+(2*sizeof(uintptr_t))));
+  return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+(2*sizeof(uintptr_t))));
 }

 static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
-  return (pq->block_size > MI_LARGE_OBJ_SIZE_MAX);
+  return (pq->block_size > MI_MEDIUM_OBJ_SIZE_MAX);
 }

 /* -----------------------------------------------------------
@ -72,11 +72,11 @@ extern inline uint8_t _mi_bin(size_t size) {
    bin = (uint8_t)wsize;
  }
  #endif
-  else if (wsize > MI_LARGE_OBJ_WSIZE_MAX) {
+  else if (wsize > MI_MEDIUM_OBJ_WSIZE_MAX) {
    bin = MI_BIN_HUGE;
  }
  else {
-    #if defined(MI_ALIGN4W) 
+    #if defined(MI_ALIGN4W)
    if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
    #endif
    wsize--;
@ -104,7 +104,7 @@ size_t _mi_bin_size(uint8_t bin) {

 // Good size for allocation
 size_t mi_good_size(size_t size) mi_attr_noexcept {
-  if (size <= MI_LARGE_OBJ_SIZE_MAX) {
+  if (size <= MI_MEDIUM_OBJ_SIZE_MAX) {
    return _mi_bin_size(_mi_bin(size));
  }
  else {
@ -202,8 +202,9 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
 static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
  mi_assert_internal(page != NULL);
  mi_assert_expensive(mi_page_queue_contains(queue, page));
-  mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue))  || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
+  mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue))  || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
  mi_heap_t* heap = mi_page_heap(page);
+
  if (page->prev != NULL) page->prev->next = page->next;
  if (page->next != NULL) page->next->prev = page->prev;
  if (page == queue->last)  queue->last = page->prev;
@ -224,9 +225,10 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
 static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
  mi_assert_internal(mi_page_heap(page) == heap);
  mi_assert_internal(!mi_page_queue_contains(queue, page));
-  mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
+
+  mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
  mi_assert_internal(page->xblock_size == queue->block_size ||
-                      (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
+                      (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX) ||
                        (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));

  mi_page_set_in_full(page, mi_page_queue_is_full(queue));
@ -252,6 +254,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
  mi_assert_internal(page != NULL);
  mi_assert_expensive(mi_page_queue_contains(from, page));
  mi_assert_expensive(!mi_page_queue_contains(to, page));
+
  mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) ||
                     (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) ||
                     (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) ||
--- a/third-party/mimalloc/src/page.c
+++ b/third-party/mimalloc/src/page.c
@ -1,5 +1,5 @@
 /*----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file

 /* -----------------------------------------------------------
  The core of the allocator. Every segment contains
-  pages of a {certain block size. The main function
+  pages of a certain block size. The main function
  exported is `mi_malloc_generic`.
 ----------------------------------------------------------- */

@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file

 // Index a block in a page
 static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) {
-  UNUSED(page);
+  MI_UNUSED(page);
  mi_assert_internal(page != NULL);
  mi_assert_internal(i <= page->reserved);
  return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
@ -74,19 +74,20 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
  mi_assert_internal(page->used <= page->capacity);
  mi_assert_internal(page->capacity <= page->reserved);

-  const size_t bsize = mi_page_block_size(page);
  mi_segment_t* segment = _mi_page_segment(page);
  uint8_t* start = _mi_page_start(segment,page,NULL);
-  mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL));
+  mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
+  //const size_t bsize = mi_page_block_size(page);
  //mi_assert_internal(start + page->capacity*page->block_size == page->top);

  mi_assert_internal(mi_page_list_is_valid(page,page->free));
  mi_assert_internal(mi_page_list_is_valid(page,page->local_free));

  #if MI_DEBUG>3 // generally too expensive to check this
-  if (page->flags.is_zero) {
-    for(mi_block_t* block = page->free; block != NULL; mi_block_next(page,block)) {
-      mi_assert_expensive(mi_mem_is_zero(block + 1, page->block_size - sizeof(mi_block_t)));
+  if (page->is_zero) {
+    const size_t ubsize = mi_page_usable_block_size(page);
+    for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
+      mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t)));
    }
  }
  #endif
@ -109,11 +110,12 @@ bool _mi_page_is_valid(mi_page_t* page) {
  #endif
  if (mi_page_heap(page)!=NULL) {
    mi_segment_t* segment = _mi_page_segment(page);
-    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
-    if (segment->page_kind != MI_PAGE_HUGE) {
+
+    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id);
+    if (segment->kind != MI_SEGMENT_HUGE) {
      mi_page_queue_t* pq = mi_page_queue_of(page);
      mi_assert_internal(mi_page_queue_contains(pq, page));
-      mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
+      mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page));
      mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
    }
  }
@ -229,9 +231,10 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
 // called from segments when reclaiming abandoned pages
 void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
  mi_assert_expensive(mi_page_is_valid_init(page));
+
  mi_assert_internal(mi_page_heap(page) == heap);
  mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
-  mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
+  mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
  mi_assert_internal(!page->is_reset);
  // TODO: push on full queue immediately if it is full?
  mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
@ -242,14 +245,12 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
 // allocate a fresh page from a segment
 static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
  mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq));
-  mi_assert_internal(pq==NULL||block_size == pq->block_size);
  mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os);
  if (page == NULL) {
    // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
    return NULL;
  }
-  // a fresh page was found, initialize it
-  mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
+  mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
  mi_page_init(heap, page, block_size, heap->tld);
  _mi_stat_increase(&heap->tld->stats.pages, 1);
  if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
@ -366,9 +367,22 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
  // no more aligned blocks in here
  mi_page_set_has_aligned(page, false);

+  mi_heap_t* heap = mi_page_heap(page);
+  const size_t bsize = mi_page_block_size(page);
+  if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) {
+    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {      
+      _mi_stat_decrease(&heap->tld->stats.large, bsize);
+    }
+    else {
+      // not strictly necessary as we never get here for a huge page
+      mi_assert_internal(false);
+      _mi_stat_decrease(&heap->tld->stats.huge, bsize);      
+    }
+  }
+
  // remove from the page list
  // (no need to do _mi_heap_delayed_free first as all blocks are already free)
-  mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
+  mi_segments_tld_t* segments_tld = &heap->tld->segments;
  mi_page_queue_remove(pq, page);

  // and free it
@ -376,7 +390,8 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
  _mi_segment_page_free(page, force, segments_tld);
 }

-#define MI_MAX_RETIRE_SIZE    MI_LARGE_OBJ_SIZE_MAX  
+// Retire parameters
+#define MI_MAX_RETIRE_SIZE    MI_MEDIUM_OBJ_SIZE_MAX  
 #define MI_RETIRE_CYCLES      (8)

 // Retire a page with no more used blocks
@ -385,11 +400,11 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
 // Note: called from `mi_free` and benchmarks often
 // trigger this due to freeing everything and then
 // allocating again so careful when changing this.
-void _mi_page_retire(mi_page_t* page) {
+void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
  mi_assert_internal(page != NULL);
  mi_assert_expensive(_mi_page_is_valid(page));
  mi_assert_internal(mi_page_all_free(page));
-
+  
  mi_page_set_has_aligned(page, false);

  // don't retire too often..
@ -402,7 +417,7 @@ void _mi_page_retire(mi_page_t* page) {
  if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) {
    if (pq->last==page && pq->first==page) { // the only page in the queue?
      mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
+      page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);      
      mi_heap_t* heap = mi_page_heap(page);
      mi_assert_internal(pq >= heap->pages);
      const size_t index = pq - heap->pages;
@ -413,7 +428,6 @@ void _mi_page_retire(mi_page_t* page) {
      return; // dont't free after all
    }
  }
-
  _mi_page_free(page, pq, false);
 }

@ -458,7 +472,7 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
 #define MI_MIN_SLICES       (2)

 static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) {
-  UNUSED(stats);
+  MI_UNUSED(stats);
  #if (MI_SECURE<=2)
  mi_assert_internal(page->free == NULL);
  mi_assert_internal(page->local_free == NULL);
@ -516,7 +530,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co

 static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats)
 {
-  UNUSED(stats);
+  MI_UNUSED(stats);
  #if (MI_SECURE <= 2)
  mi_assert_internal(page->free == NULL);
  mi_assert_internal(page->local_free == NULL);
@ -557,6 +571,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
 // allocations but this did not speed up any benchmark (due to an
 // extra test in malloc? or cache effects?)
 static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
+  MI_UNUSED(tld); 
  mi_assert_expensive(mi_page_is_valid_init(page));
  #if (MI_SECURE<=2)
  mi_assert(page->free == NULL);
@ -566,7 +581,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
  if (page->capacity >= page->reserved) return;

  size_t page_size;
-  //uint8_t* page_start = 
  _mi_page_start(_mi_page_segment(page), page, &page_size);
  mi_stat_counter_increase(tld->stats.pages_extended, 1);

@ -580,7 +594,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
    // ensure we don't touch memory beyond the page to reduce page commit.
    // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
    extend = (max_extend==0 ? 1 : max_extend);
-  }
+  }  

  mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
  mi_assert_internal(extend < (1UL<<16));
@ -611,9 +625,11 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
  mi_assert_internal(block_size > 0);
  // set fields
  mi_page_set_heap(page, heap);
+  page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
  size_t page_size;
-  _mi_segment_page_start(segment, page, block_size, &page_size, NULL);
-  page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
+  _mi_segment_page_start(segment, page, &page_size);
+  mi_assert_internal(mi_page_block_size(page) <= page_size);
+  mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
  mi_assert_internal(page_size / block_size < (1L<<16));
  page->reserved = (uint16_t)(page_size / block_size);
  #ifdef MI_ENCODE_FREELIST
@ -622,6 +638,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
  #endif
  page->is_zero = page->is_zero_init;

+  mi_assert_internal(page->is_committed);
+  mi_assert_internal(!page->is_reset);
  mi_assert_internal(page->capacity == 0);
  mi_assert_internal(page->free == NULL);
  mi_assert_internal(page->used == 0);
@ -683,7 +701,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
  mi_stat_counter_increase(heap->tld->stats.searches, count);

  if (page == NULL) {
-    _mi_heap_collect_retired(heap, false); // perhaps make a page available
+    _mi_heap_collect_retired(heap, false); // perhaps make a page available?
    page = mi_page_fresh(heap, pq);
    if (page == NULL && first_try) {
      // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
@ -754,26 +772,35 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
  General allocation
 ----------------------------------------------------------- */

-// A huge page is allocated directly without being in a queue.
+// Large and huge page allocation.
+// Huge pages are allocated directly without being in a queue.
 // Because huge pages contain just one block, and the segment contains
 // just that page, we always treat them as abandoned and any thread
 // that frees the block can free the whole page and segment directly.
-static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
+static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
  size_t block_size = _mi_os_good_alloc_size(size);
  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
-  mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size);
+  bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX);
+  mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
+  mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size);
  if (page != NULL) {
-    const size_t bsize = mi_page_block_size(page);  // note: not `mi_page_usable_block_size` as `size` includes padding already
-    mi_assert_internal(bsize >= size);
+    const size_t bsize = mi_page_block_size(page);  // note: not `mi_page_usable_block_size` as `size` includes padding
    mi_assert_internal(mi_page_immediate_available(page));
-    mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE);
-    mi_assert_internal(_mi_page_segment(page)->used==1);
-    mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
-    mi_page_set_heap(page, NULL);
+    mi_assert_internal(bsize >= size);

-    if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
-      _mi_stat_increase(&heap->tld->stats.giant, bsize);
-      _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1);
+    if (pq == NULL) {
+      // huge pages are directly abandoned
+      mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
+      mi_assert_internal(_mi_page_segment(page)->used==1);
+      mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
+      mi_page_set_heap(page, NULL);
+    }
+    else {
+      mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
+    }
+    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+      _mi_stat_increase(&heap->tld->stats.large, bsize);
+      _mi_stat_counter_increase(&heap->tld->stats.large_count, 1);
    }
    else {
      _mi_stat_increase(&heap->tld->stats.huge, bsize);
@ -789,13 +816,13 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
 static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
  // huge allocation?
  const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
-  if (mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) {
+  if (mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) {
    if (mi_unlikely(req_size > PTRDIFF_MAX)) {  // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
      _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
      return NULL;
    }
    else {
-      return mi_huge_page_alloc(heap,size);
+      return mi_large_huge_page_alloc(heap,size);
    }
  }
  else {
--- a/third-party/mimalloc/src/random.c
+++ b/third-party/mimalloc/src/random.c
@ -1,9 +1,13 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019, Microsoft Research, Daan Leijen
+Copyright (c) 2019-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE   // for syscall() on Linux
+#endif
+
 #include "mimalloc.h"
 #include "mimalloc-internal.h"

@ -188,14 +192,16 @@ static bool os_random_buf(void* buf, size_t buf_len) {

 #elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
-      defined(__sun) || defined(__wasi__)
+      defined(__sun) // todo: what to use with __wasi__?
 #include <stdlib.h>
 static bool os_random_buf(void* buf, size_t buf_len) {
  arc4random_buf(buf, buf_len);
  return true;
 }
-#elif defined(__linux__)
+#elif defined(__linux__) || defined(__HAIKU__)
+#if defined(__linux__)
 #include <sys/syscall.h>
+#endif
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@ -251,8 +257,8 @@ static bool os_random_buf(void* buf, size_t buf_len) {
 #include <time.h>
 #endif

-uintptr_t _os_random_weak(uintptr_t extra_seed) {
-  uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random
+uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
+  uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random
  
  #if defined(_WIN32)
    LARGE_INTEGER pcount;
@ -280,8 +286,10 @@ void _mi_random_init(mi_random_ctx_t* ctx) {
  if (!os_random_buf(key, sizeof(key))) {
    // if we fail to get random data from the OS, we fall back to a
    // weak random source based on the current time
+    #if !defined(__wasi__)
    _mi_warning_message("unable to use secure randomness\n");
-    uintptr_t x = _os_random_weak(0);
+    #endif
+    uintptr_t x = _mi_os_random_weak(0);
    for (size_t i = 0; i < 8; i++) {  // key is eight 32-bit words.
      x = _mi_random_shuffle(x);
      ((uint32_t*)key)[i] = (uint32_t)x;
--- a/third-party/mimalloc/src/region.c
+++ b/third-party/mimalloc/src/region.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019, Microsoft Research, Daan Leijen
+Copyright (c) 2019-2020, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -40,7 +40,7 @@ Possible issues:
 #include "bitmap.h"

 // Internal raw OS interface
-size_t  _mi_os_large_page_size();
+size_t  _mi_os_large_page_size(void);
 bool    _mi_os_protect(void* addr, size_t size);
 bool    _mi_os_unprotect(void* addr, size_t size);
 bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
@ -57,9 +57,9 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo

 // Constants
 #if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 64KiB for the region map 
+#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map 
 #elif (MI_INTPTR_SIZE==4)
-#define MI_HEAP_REGION_MAX_SIZE    (3 * GiB)    // ~ KiB for the region map
+#define MI_HEAP_REGION_MAX_SIZE    (3 * MI_GiB)    // ~ KiB for the region map
 #else
 #error "define the maximum heap space allowed for regions on this platform"
 #endif
@ -74,7 +74,7 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo

 // Region info 
 typedef union mi_region_info_u {
-  uintptr_t value;      
+  size_t value;      
  struct {
    bool  valid;        // initialized?
    bool  is_large:1;   // allocated in fixed large/huge OS pages
@ -87,21 +87,21 @@ typedef union mi_region_info_u {
 // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
-  _Atomic(uintptr_t)        info;        // mi_region_info_t.value
+  _Atomic(size_t)           info;        // mi_region_info_t.value
  _Atomic(void*)            start;       // start of the memory area 
  mi_bitmap_field_t         in_use;      // bit per in-use block
  mi_bitmap_field_t         dirty;       // track if non-zero per block
  mi_bitmap_field_t         commit;      // track if committed per block
  mi_bitmap_field_t         reset;       // track if reset per block
-  _Atomic(uintptr_t)        arena_memid; // if allocated from a (huge page) arena
-  uintptr_t                 padding;     // round to 8 fields
+  _Atomic(size_t)           arena_memid; // if allocated from a (huge page) arena
+  size_t                    padding;     // round to 8 fields
 } mem_region_t;

 // The region map
 static mem_region_t regions[MI_REGION_MAX];

 // Allocated regions
-static _Atomic(uintptr_t) regions_count; // = 0;        
+static _Atomic(size_t) regions_count; // = 0;        


 /* ----------------------------------------------------------------------------
@ -186,21 +186,21 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
  mi_assert_internal(!region_large || region_commit);

  // claim a fresh slot
-  const uintptr_t idx = mi_atomic_increment_acq_rel(&regions_count);
+  const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
  if (idx >= MI_REGION_MAX) {
    mi_atomic_decrement_acq_rel(&regions_count);
    _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
-    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB));
+    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
    return false;
  }

  // allocated, initialize and claim the initial blocks
  mem_region_t* r = &regions[idx];
  r->arena_memid  = arena_memid;
-  mi_atomic_store_release(&r->in_use, (uintptr_t)0);
+  mi_atomic_store_release(&r->in_use, (size_t)0);
  mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
  mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
-  mi_atomic_store_release(&r->reset, (uintptr_t)0);
+  mi_atomic_store_release(&r->reset, (size_t)0);
  *bit_idx = 0;
  _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
  mi_atomic_store_ptr_release(void,&r->start, start);
@ -441,7 +441,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re

    // and unclaim
    bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-    mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed);
+    mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed);
  }
 }

@ -451,21 +451,21 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
 -----------------------------------------------------------------------------*/
 void _mi_mem_collect(mi_os_tld_t* tld) {
  // free every region that has no segments in use.
-  uintptr_t rcount = mi_atomic_load_relaxed(&regions_count);
+  size_t rcount = mi_atomic_load_relaxed(&regions_count);
  for (size_t i = 0; i < rcount; i++) {
    mem_region_t* region = &regions[i];
    if (mi_atomic_load_relaxed(&region->info) != 0) {
      // if no segments used, try to claim the whole region
-      uintptr_t m = mi_atomic_load_relaxed(&region->in_use);
+      size_t m = mi_atomic_load_relaxed(&region->in_use);
      while (m == 0 && !mi_atomic_cas_weak_release(&region->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
      if (m == 0) {
        // on success, free the whole region
        uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&regions[i].start);
        size_t arena_memid = mi_atomic_load_relaxed(&regions[i].arena_memid);
-        uintptr_t commit = mi_atomic_load_relaxed(&regions[i].commit);
-        memset(&regions[i], 0, sizeof(mem_region_t));
+        size_t commit = mi_atomic_load_relaxed(&regions[i].commit);
+        memset((void*)&regions[i], 0, sizeof(mem_region_t));  // cast to void* to avoid atomic warning
        // and release the whole region
-        mi_atomic_store_release(&region->info, (uintptr_t)0);
+        mi_atomic_store_release(&region->info, (size_t)0);
        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {         
          _mi_abandoned_await_readers(); // ensure no pending reads
          _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
--- a/third-party/mimalloc/src/segment-cache.c
+++ b/third-party/mimalloc/src/segment-cache.c
@ -0,0 +1,354 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2020, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+  Implements a cache of segments to avoid expensive OS calls and to reuse
+  the commit_mask to optimize the commit/decommit calls.
+  The full memory map of all segments is also implemented here.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+#include "mimalloc-atomic.h"
+
+#include "bitmap.h"  // atomic bitmap
+
+//#define MI_CACHE_DISABLE 1    // define to completely disable the segment cache
+
+#define MI_CACHE_FIELDS     (16)
+#define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
+
+#define BITS_SET()          ATOMIC_VAR_INIT(UINTPTR_MAX)
+#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)                          // note: update if MI_CACHE_FIELDS changes
+
+typedef struct mi_cache_slot_s {
+  void*               p;
+  size_t              memid;
+  bool                is_pinned;
+  mi_commit_mask_t    commit_mask;
+  mi_commit_mask_t    decommit_mask;
+  _Atomic(mi_msecs_t) expire;
+} mi_cache_slot_t;
+
+static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
+
+static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
+static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
+static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
+
+
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{
+#ifdef MI_CACHE_DISABLE
+  return NULL;
+#else
+
+  // only segment blocks
+  if (size != MI_SEGMENT_SIZE) return NULL;
+
+  // numa node determines start field
+  const int numa_node = _mi_os_numa_node(tld);
+  size_t start_field = 0;
+  if (numa_node > 0) {
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
+  }
+
+  // find an available slot
+  mi_bitmap_index_t bitidx = 0;
+  bool claimed = false;
+  if (*large) {  // large allowed?
+    claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    if (claimed) *large = true;
+  }
+  if (!claimed) {
+    claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    if (claimed) *large = false;
+  }
+
+  if (!claimed) return NULL;
+
+  // found a slot
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  void* p = slot->p;
+  *memid = slot->memid;
+  *is_pinned = slot->is_pinned;
+  *is_zero = false;
+  *commit_mask = slot->commit_mask;     
+  *decommit_mask = slot->decommit_mask;
+  slot->p = NULL;
+  mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
+  
+  // mark the slot as free again
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
+  _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
+  return p;
+#endif
+}
+
+static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
+{
+  if (mi_commit_mask_is_empty(cmask)) {
+    // nothing
+  }
+  else if (mi_commit_mask_is_full(cmask)) {
+    _mi_os_decommit(p, total, stats);
+  }
+  else {
+    // todo: one call to decommit the whole at once?
+    mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
+    size_t part = total/MI_COMMIT_MASK_BITS;
+    size_t idx;
+    size_t count;    
+    mi_commit_mask_foreach(cmask, idx, count) {
+      void*  start = (uint8_t*)p + (idx*part);
+      size_t size = count*part;
+      _mi_os_decommit(start, size, stats);
+    }
+    mi_commit_mask_foreach_end()
+  }
+  mi_commit_mask_create_empty(cmask);
+}
+
+#define MI_MAX_PURGE_PER_PUSH  (4)
+
+static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
+{
+  MI_UNUSED(tld);
+  mi_msecs_t now = _mi_clock_now();
+  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
+  size_t purged = 0;
+  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
+    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
+    mi_cache_slot_t* slot = &cache[idx];
+    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
+    if (expire != 0 && now >= expire) {  // racy read
+      // seems expired, first claim it from available
+      purged++;
+      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
+      if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
+        // was available, we claimed it
+        expire = mi_atomic_loadi64_acquire(&slot->expire);
+        if (expire != 0 && now >= expire) {  // safe read
+          // still expired, decommit it
+          mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
+          mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+          _mi_abandoned_await_readers();  // wait until safe to decommit
+          // decommit committed parts
+          // TODO: instead of decommit, we could also free to the OS?
+          mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
+          mi_commit_mask_create_empty(&slot->decommit_mask);
+        }
+        _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
+      }
+      if (purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
+    }
+  }
+}
+
+mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
+{
+#ifdef MI_CACHE_DISABLE
+  return false;
+#else
+
+  // only for normal segment blocks
+  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
+
+  // numa node determines start field
+  int numa_node = _mi_os_numa_node(NULL);
+  size_t start_field = 0;
+  if (numa_node > 0) {
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
+  }
+
+  // purge expired entries
+  mi_segment_cache_purge(tld);
+
+  // find an available slot
+  mi_bitmap_index_t bitidx;
+  bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+  if (!claimed) return false;
+
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+#if MI_DEBUG>1
+  if (is_pinned || is_large) {
+    mi_assert_internal(mi_commit_mask_is_full(commit_mask));
+  }
+#endif
+
+  // set the slot
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  slot->p = start;
+  slot->memid = memid;
+  slot->is_pinned = is_pinned;
+  mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
+  slot->commit_mask = *commit_mask;
+  slot->decommit_mask = *decommit_mask;
+  if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
+    long delay = mi_option_get(mi_option_segment_decommit_delay);
+    if (delay == 0) {
+      _mi_abandoned_await_readers(); // wait until safe to decommit
+      mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
+      mi_commit_mask_create_empty(&slot->decommit_mask);
+    }
+    else {
+      mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
+    }
+  }
+
+  // make it available
+  _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
+  return true;
+#endif
+}
+
+
+/* -----------------------------------------------------------
+  The following functions are to reliably find the segment or
+  block that encompasses any pointer p (or NULL if it is not
+  in any of our segments).
+  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
+  set to 1 if it contains the segment meta data.
+----------------------------------------------------------- */
+
+
+#if (MI_INTPTR_SIZE==8)
+#define MI_MAX_ADDRESS    ((size_t)20 << 40)  // 20TB
+#else
+#define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
+#endif
+
+#define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
+#define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
+#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
+
+static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
+
+static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
+  mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
+  if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
+    *bitidx = 0;
+    return MI_SEGMENT_MAP_WSIZE;
+  }
+  else {
+    const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
+    *bitidx = segindex % MI_INTPTR_BITS;
+    const size_t mapindex = segindex / MI_INTPTR_BITS;
+    mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
+    return mapindex;
+  }
+}
+
+void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
+  if (index==MI_SEGMENT_MAP_WSIZE) return;
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  uintptr_t newmask;
+  do {
+    newmask = (mask | ((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+void _mi_segment_map_freed_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
+  if (index == MI_SEGMENT_MAP_WSIZE) return;
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  uintptr_t newmask;
+  do {
+    newmask = (mask & ~((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
+static mi_segment_t* _mi_segment_of(const void* p) {
+  mi_segment_t* segment = _mi_ptr_segment(p);
+  if (segment == NULL) return NULL; 
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
+  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) {
+    return segment; // yes, allocated by us
+  }
+  if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
+
+  // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
+
+  // search downwards for the first segment in case it is an interior pointer
+  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
+  // valid huge objects
+  // note: we could maintain a lowest index to speed up the path for invalid pointers?
+  size_t lobitidx;
+  size_t loindex;
+  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
+  if (lobits != 0) {
+    loindex = index;
+    lobitidx = mi_bsr(lobits);    // lobits != 0
+  }
+  else if (index == 0) {
+    return NULL;
+  }
+  else {
+    mi_assert_internal(index > 0);
+    uintptr_t lomask = mask;
+    loindex = index;
+    do {
+      loindex--;  
+      lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);      
+    } while (lomask != 0 && loindex > 0);
+    if (lomask == 0) return NULL;
+    lobitidx = mi_bsr(lomask);    // lomask != 0
+  }
+  mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
+  // take difference as the addresses could be larger than the MAX_ADDRESS space.
+  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
+  segment = (mi_segment_t*)((uint8_t*)segment - diff);
+
+  if (segment == NULL) return NULL;
+  mi_assert_internal((void*)segment < p);
+  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
+  mi_assert_internal(cookie_ok);
+  if (mi_unlikely(!cookie_ok)) return NULL;
+  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
+  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
+  return segment;
+}
+
+// Is this a valid pointer in our heap?
+static bool  mi_is_valid_pointer(const void* p) {
+  return (_mi_segment_of(p) != NULL);
+}
+
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return mi_is_valid_pointer(p);
+}
+
+/*
+// Return the full segment range belonging to a pointer
+static void* mi_segment_range_of(const void* p, size_t* size) {
+  mi_segment_t* segment = _mi_segment_of(p);
+  if (segment == NULL) {
+    if (size != NULL) *size = 0;
+    return NULL;
+  }
+  else {
+    if (size != NULL) *size = segment->segment_size;
+    return segment;
+  }
+  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
+  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
+  mi_reset_delayed(tld);
+  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
+  return page;
+}
+*/
--- a/third-party/mimalloc/src/segment.c
+++ b/third-party/mimalloc/src/segment.c
--- a/third-party/mimalloc/src/static.c
+++ b/third-party/mimalloc/src/static.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -25,7 +25,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "os.c"
 #include "bitmap.c"
 #include "arena.c"
-#include "region.c"
+#include "segment-cache.c"
 #include "segment.c"
 #include "page.c"
 #include "heap.c"
--- a/third-party/mimalloc/src/stats.c
+++ b/third-party/mimalloc/src/stats.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -105,7 +105,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
  mi_stat_add(&stats->segments_cache, &src->segments_cache, 1);
  mi_stat_add(&stats->normal, &src->normal, 1);
  mi_stat_add(&stats->huge, &src->huge, 1);
-  mi_stat_add(&stats->giant, &src->giant, 1);
+  mi_stat_add(&stats->large, &src->large, 1);

  mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1);
  mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1);
@ -115,7 +115,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
  mi_stat_counter_add(&stats->searches, &src->searches, 1);
  mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1);
  mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
-  mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1);
+  mi_stat_counter_add(&stats->large_count, &src->large_count, 1);
 #if MI_STAT>1
  for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
    if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) {
@ -133,25 +133,29 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
 // unit == 0: count as decimal
 // unit < 0 : count in binary
 static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
-  char buf[32];
+  char buf[32]; buf[0] = 0;  
  int  len = 32;
-  const char* suffix = (unit <= 0 ? " " : "b");
+  const char* suffix = (unit <= 0 ? " " : "B");
  const int64_t base = (unit == 0 ? 1000 : 1024);
  if (unit>0) n *= unit;

  const int64_t pos = (n < 0 ? -n : n);
  if (pos < base) {
-    snprintf(buf, len, "%d %s ", (int)n, suffix);
+    if (n!=1 || suffix[0] != 'B') {  // skip printing 1 B for the unit column
+      snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix));
+    }
  }
  else {
-    int64_t divider = base;
-    const char* magnitude = "k";
-    if (pos >= divider*base) { divider *= base; magnitude = "m"; }
-    if (pos >= divider*base) { divider *= base; magnitude = "g"; }
+    int64_t divider = base;    
+    const char* magnitude = "K";
+    if (pos >= divider*base) { divider *= base; magnitude = "M"; }
+    if (pos >= divider*base) { divider *= base; magnitude = "G"; }
    const int64_t tens = (n / (divider/10));
    const long whole = (long)(tens/10);
    const long frac1 = (long)(tens%10);
-    snprintf(buf, len, "%ld.%ld %s%s", whole, (frac1 < 0 ? -frac1 : frac1), magnitude, suffix);
+    char unitdesc[8];
+    snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix);
+    snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc);
  }
  _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf);
 }
@ -221,7 +225,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char*


 static void mi_print_header(mi_output_fun* out, void* arg ) {
-  _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak  ", "total  ", "freed  ", "current  ", "unit  ", "count  ");
+  _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak   ", "total   ", "freed   ", "current   ", "unit   ", "count   ");
 }

 #if MI_STAT>1
@ -296,12 +300,12 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
  #endif
  #if MI_STAT
  mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg);
+  mi_stat_print(&stats->large, "large", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out, arg);
  mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg);
-  mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg);
  mi_stat_count_t total = { 0,0,0,0 };
  mi_stat_add(&total, &stats->normal, 1);
+  mi_stat_add(&total, &stats->large, 1);
  mi_stat_add(&total, &stats->huge, 1);
-  mi_stat_add(&total, &stats->giant, 1);
  mi_stat_print(&total, "total", 1, out, arg);
  #endif
  #if MI_STAT>1
@ -323,7 +327,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
  mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
  mi_stat_print(&stats->threads, "threads", -1, out, arg);
  mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
-  _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count());
+  _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count());
  
  mi_msecs_t elapsed;
  mi_msecs_t user_time;
@ -412,10 +416,14 @@ mi_msecs_t _mi_clock_now(void) {
 }
 #else
 #include <time.h>
-#ifdef CLOCK_REALTIME
+#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)
 mi_msecs_t _mi_clock_now(void) {
  struct timespec t;
+  #ifdef CLOCK_MONOTONIC
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  #else  
  clock_gettime(CLOCK_REALTIME, &t);
+  #endif
  return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
 }
 #else
@ -479,12 +487,12 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
  *page_faults    = (size_t)info.PageFaultCount;  
 }

-#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__)
+#elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__))
 #include <stdio.h>
 #include <unistd.h>
 #include <sys/resource.h>

-#if defined(__APPLE__) && defined(__MACH__)
+#if defined(__APPLE__)
 #include <mach/mach.h>
 #endif

@ -520,7 +528,8 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
  while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
    *peak_rss += mem.ram_size;
  }
-#elif defined(__APPLE__) && defined(__MACH__)
+  *page_faults = 0;
+#elif defined(__APPLE__)
  *peak_rss = rusage.ru_maxrss;         // BSD reports in bytes
  struct mach_task_basic_info info;
  mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
--- a/third-party/mimalloc/test/CMakeLists.txt
+++ b/third-party/mimalloc/test/CMakeLists.txt
@ -1,6 +1,9 @@
 cmake_minimum_required(VERSION 3.0)
 project(mimalloc-test C CXX)

+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
 # Set default build type
 if (NOT CMAKE_BUILD_TYPE)
  if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$")
@ -13,8 +16,8 @@ if (NOT CMAKE_BUILD_TYPE)
 endif()

 # Import mimalloc (if installed)
-find_package(mimalloc 1.7 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
-message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}")
+find_package(mimalloc 2.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
+message(STATUS "Found mimalloc installed at: ${MIMALLOC_LIBRARY_DIR}")

 # overriding with a dynamic library
 add_executable(dynamic-override  main-override.c)
@ -26,8 +29,8 @@ target_link_libraries(dynamic-override-cxx PUBLIC mimalloc)

 # overriding with a static object file works reliable as the symbols in the
 # object file have priority over those in library files
-add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o)
-target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include)
+add_executable(static-override-obj main-override.c ${MIMALLOC_LIBRARY_DIR}/mimalloc.o)
+target_include_directories(static-override-obj PUBLIC ${MIMALLOC_INCLUDE_DIR})
 target_link_libraries(static-override-obj PUBLIC pthread)


--- a/third-party/mimalloc/test/main-override-static.c
+++ b/third-party/mimalloc/test/main-override-static.c
@ -7,6 +7,173 @@
 #include <mimalloc.h>
 #include <mimalloc-override.h>  // redefines malloc etc.

+
+#include <stdint.h>
+#include <stdbool.h>
+
+#define MI_INTPTR_SIZE 8
+#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
+
+#define MI_BIN_HUGE 100
+//#define MI_ALIGN2W
+
+// Bit scan reverse: return the index of the highest bit.
+static inline uint8_t mi_bsr32(uint32_t x);
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#include <intrin.h>
+static inline uint8_t mi_bsr32(uint32_t x) {
+  uint32_t idx;
+  _BitScanReverse((DWORD*)&idx, x);
+  return idx;
+}
+#elif defined(__GNUC__) || defined(__clang__)
+static inline uint8_t mi_bsr32(uint32_t x) {
+  return (31 - __builtin_clz(x));
+}
+#else
+static inline uint8_t mi_bsr32(uint32_t x) {
+  // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
+  static const uint8_t debruijn[32] = {
+     31,  0, 22,  1, 28, 23, 18,  2, 29, 26, 24, 10, 19,  7,  3, 12,
+     30, 21, 27, 17, 25,  9,  6, 11, 20, 16,  8,  5, 15,  4, 14, 13,
+  };
+  x |= x >> 1;
+  x |= x >> 2;
+  x |= x >> 4;
+  x |= x >> 8;
+  x |= x >> 16;
+  x++;
+  return debruijn[(x*0x076be629) >> 27];
+}
+#endif
+
+/*
+// Bit scan reverse: return the index of the highest bit.
+uint8_t _mi_bsr(uintptr_t x) {
+  if (x == 0) return 0;
+  #if MI_INTPTR_SIZE==8
+  uint32_t hi = (x >> 32);
+  return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
+  #elif MI_INTPTR_SIZE==4
+  return mi_bsr32(x);
+  #else
+  # error "define bsr for non-32 or 64-bit platforms"
+  #endif
+}
+*/
+
+
+static inline size_t _mi_wsize_from_size(size_t size) {
+  return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
+}
+
+// Return the bin for a given field size.
+// Returns MI_BIN_HUGE if the size is too large.
+// We use `wsize` for the size in "machine word sizes",
+// i.e. byte size == `wsize*sizeof(void*)`.
+extern inline uint8_t _mi_bin8(size_t size) {
+  size_t wsize = _mi_wsize_from_size(size);
+  uint8_t bin;
+  if (wsize <= 1) {
+    bin = 1;
+  }
+  #if defined(MI_ALIGN4W)
+  else if (wsize <= 4) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+  #elif defined(MI_ALIGN2W)
+  else if (wsize <= 8) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+  #else
+  else if (wsize <= 8) {
+    bin = (uint8_t)wsize;
+  }
+  #endif
+  else if (wsize > MI_LARGE_WSIZE_MAX) {
+    bin = MI_BIN_HUGE;
+  }
+  else {
+    #if defined(MI_ALIGN4W)
+    if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
+    #endif
+    wsize--;
+    // find the highest bit
+    uint8_t b = mi_bsr32((uint32_t)wsize);
+    // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
+    // - adjust with 3 because we use do not round the first 8 sizes
+    //   which each get an exact bin
+    bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
+  }
+  return bin;
+}
+
+extern inline uint8_t _mi_bin4(size_t size) {
+  size_t wsize = _mi_wsize_from_size(size);
+  uint8_t bin;
+  if (wsize <= 1) {
+    bin = 1;
+  }
+  #if defined(MI_ALIGN4W)
+  else if (wsize <= 4) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+  #elif defined(MI_ALIGN2W)
+  else if (wsize <= 8) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+  #else
+  else if (wsize <= 8) {
+    bin = (uint8_t)wsize;
+  }
+  #endif
+  else if (wsize > MI_LARGE_WSIZE_MAX) {
+    bin = MI_BIN_HUGE;
+  }
+  else {
+    uint8_t b = mi_bsr32((uint32_t)wsize);
+    bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
+  }
+  return bin;
+}
+
+size_t _mi_binx4(size_t bsize) {
+  if (bsize==0) return 0;
+  uint8_t b = mi_bsr32((uint32_t)bsize);
+  if (b <= 1) return bsize;
+  size_t bin =  ((b << 1) | (bsize >> (b - 1))&0x01);
+  return bin;
+}
+
+size_t _mi_binx8(size_t bsize) {
+  if (bsize<=1) return bsize;
+  uint8_t b = mi_bsr32((uint32_t)bsize);
+  if (b <= 2) return bsize;
+  size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
+  return bin;
+}
+
+void mi_bins() {
+  //printf("  QNULL(1), /* 0 */ \\\n  ");
+  size_t last_bin = 0;
+  size_t min_bsize = 0;
+  size_t last_bsize = 0;
+  for (size_t bsize = 1; bsize < 2*1024; bsize++) {
+    size_t size = bsize * 64 * 1024;
+    size_t bin = _mi_binx8(bsize);
+    if (bin != last_bin) {      
+      printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin);
+      //printf("QNULL(%6zd), ", wsize);
+      //if (last_bin%8 == 0) printf("/* %i */ \\\n  ", last_bin);
+      last_bin = bin;
+      min_bsize = bsize;
+    }
+    last_bsize = bsize;
+  }
+}
+
 static void double_free1();
 static void double_free2();
 static void corrupt_free();
@ -17,6 +184,7 @@ static void test_process_info(void);
 static void test_reserved(void);
 static void negative_stat(void);

+
 int main() {
  mi_version();
  mi_stats_reset();
--- a/third-party/mimalloc/test/main-override.cpp
+++ b/third-party/mimalloc/test/main-override.cpp
@ -26,23 +26,33 @@ static void msleep(unsigned long msecs) { Sleep(msecs); }
 static void msleep(unsigned long msecs) { usleep(msecs * 1000UL); }
 #endif

-void heap_thread_free_large(); // issue #221
-void heap_no_delete();         // issue #202
-void heap_late_free();         // issue #204
-void padding_shrink();         // issue #209
-void various_tests();
-void test_mt_shutdown();
-void fail_aslr();              // issue #372
+static void heap_thread_free_large(); // issue #221
+static void heap_no_delete();         // issue #202
+static void heap_late_free();         // issue #204
+static void padding_shrink();         // issue #209
+static void various_tests();
+static void test_mt_shutdown();
+static void large_alloc(void);        // issue #363
+static void fail_aslr();              // issue #372
+static void tsan_numa_test();         // issue #414
+static void strdup_test();            // issue #445 
+static void bench_alloc_large(void);  // issue #xxx

 int main() {
  mi_stats_reset();  // ignore earlier allocations
-  heap_thread_free_large();
-  heap_no_delete();
-  heap_late_free();
-  padding_shrink();
-  various_tests();
+
+   heap_thread_free_large();
+   heap_no_delete();
+   heap_late_free();
+   padding_shrink();
+   various_tests();
+   large_alloc();
+   tsan_numa_test();
+   strdup_test();
+
  //test_mt_shutdown();
  //fail_aslr();
+  bench_alloc_large();
  mi_stats_print(NULL);
  return 0;
 }
@ -63,7 +73,7 @@ public:
 };


-void various_tests() {
+static void various_tests() {
  atexit(free_p);
  void* p1 = malloc(78);
  void* p2 = mi_malloc_aligned(16, 24);
@ -71,18 +81,13 @@ void various_tests() {
  p1 = malloc(8);
  char* s = mi_strdup("hello\n");

-  //char* s = _strdup("hello\n");
-  //char* buf = NULL;
-  //size_t len;
-  //_dupenv_s(&buf,&len,"MIMALLOC_VERBOSE"); 
-  //mi_free(buf);
-
  mi_free(p2);
  p2 = malloc(16);
  p1 = realloc(p1, 32);
  free(p1);
  free(p2);
  mi_free(s);
+
  Test* t = new Test(42);
  delete t;
  t = new (std::nothrow) Test(42);
@ -106,7 +111,7 @@ public:
 static Static s = Static();


-bool test_stl_allocator1() {
+static bool test_stl_allocator1() {
  std::vector<int, mi_stl_allocator<int> > vec;
  vec.push_back(1);
  vec.pop_back();
@ -115,38 +120,48 @@ bool test_stl_allocator1() {

 struct some_struct { int i; int j; double z; };

-bool test_stl_allocator2() {
+static bool test_stl_allocator2() {
  std::vector<some_struct, mi_stl_allocator<some_struct> > vec;
  vec.push_back(some_struct());
  vec.pop_back();
  return vec.size() == 0;
 }

-
+// issue 445
+static void strdup_test() {
+#ifdef _MSC_VER
+  char* s = _strdup("hello\n");
+  char* buf = NULL;
+  size_t len;
+  _dupenv_s(&buf, &len, "MIMALLOC_VERBOSE");
+  mi_free(buf);
+  mi_free(s);
+#endif
+}

 // Issue #202
-void heap_no_delete_worker() {
+static void heap_no_delete_worker() {
  mi_heap_t* heap = mi_heap_new();
  void* q = mi_heap_malloc(heap, 1024);
  // mi_heap_delete(heap); // uncomment to prevent assertion
 }

-void heap_no_delete() {
+static void heap_no_delete() {
  auto t1 = std::thread(heap_no_delete_worker);
  t1.join();
 }


 // Issue #204
-volatile void* global_p;
+static volatile void* global_p;

-void t1main() {
+static void t1main() {
  mi_heap_t* heap = mi_heap_new();
  global_p = mi_heap_malloc(heap, 1024);
  mi_heap_delete(heap);
 }

-void heap_late_free() {
+static void heap_late_free() {
  auto t1 = std::thread(t1main);

  msleep(2000);
@ -163,7 +178,7 @@ static void alloc0(/* void* arg */)
  shared_p = mi_malloc(8);
 }

-void padding_shrink(void)
+static void padding_shrink(void)
 {
  auto t1 = std::thread(alloc0);
  t1.join();
@ -172,13 +187,13 @@ void padding_shrink(void)


 // Issue #221
-void heap_thread_free_large_worker() {
+static void heap_thread_free_large_worker() {
  mi_free(shared_p);
 }

-void heap_thread_free_large() {
+static void heap_thread_free_large() {
  for (int i = 0; i < 100; i++) {
-    shared_p = mi_malloc_aligned(2*1024*1024 + 1, 8);
+    shared_p = mi_malloc_aligned(2 * 1024 * 1024 + 1, 8);
    auto t1 = std::thread(heap_thread_free_large_worker);
    t1.join();
  }
@ -186,7 +201,7 @@ void heap_thread_free_large() {



-void test_mt_shutdown()
+static void test_mt_shutdown()
 {
  const int threads = 5;
  std::vector< std::future< std::vector< char* > > > ts;
@ -210,10 +225,65 @@ void test_mt_shutdown()
  std::cout << "done" << std::endl;
 }

+// issue #363
+using namespace std;
+
+void large_alloc(void)
+{
+  char* a = new char[1ull << 25];
+  thread th([&] {
+    delete[] a;
+    });
+  th.join();
+}
+
 // issue #372
-void fail_aslr() {
+static void fail_aslr() {
  size_t sz = (4ULL << 40); // 4TiB
  void* p = malloc(sz);
  printf("pointer p: %p: area up to %p\n", p, (uint8_t*)p + sz);
  *(int*)0x5FFFFFFF000 = 0;  // should segfault
-}
+}
+
+// issues #414
+static void dummy_worker() {
+  void* p = mi_malloc(0);
+  mi_free(p);
+}
+
+static void tsan_numa_test() {
+  auto t1 = std::thread(dummy_worker);
+  dummy_worker();
+  t1.join();
+}
+
+// issue #?
+#include <chrono>
+#include <random>
+#include <iostream>
+
+static void bench_alloc_large(void) {
+  static constexpr int kNumBuffers = 20;
+  static constexpr size_t kMinBufferSize = 5 * 1024 * 1024;
+  static constexpr size_t kMaxBufferSize = 25 * 1024 * 1024;
+  std::unique_ptr<char[]> buffers[kNumBuffers];
+
+  std::random_device rd;
+  std::mt19937 gen(42); //rd());
+  std::uniform_int_distribution<> size_distribution(kMinBufferSize, kMaxBufferSize);
+  std::uniform_int_distribution<> buf_number_distribution(0, kNumBuffers - 1);
+
+  static constexpr int kNumIterations = 2000;
+  const auto start = std::chrono::steady_clock::now();
+  for (int i = 0; i < kNumIterations; ++i) {
+    int buffer_idx = buf_number_distribution(gen);
+    size_t new_size = size_distribution(gen);
+    buffers[buffer_idx] = std::make_unique<char[]>(new_size);
+  }
+  const auto end = std::chrono::steady_clock::now();
+  const auto num_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+  const auto us_per_allocation = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / kNumIterations;
+  std::cout << kNumIterations << " allocations Done in " << num_ms << "ms." << std::endl;
+  std::cout << "Avg " << us_per_allocation << " us per allocation" << std::endl;
+}
+
--- a/third-party/mimalloc/test/test-api.c
+++ b/third-party/mimalloc/test/test-api.c
@ -1,9 +1,12 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic ignored "-Walloc-size-larger-than="
+#endif

 /*
 Testing allocators is difficult as bugs may only surface after particular
@ -64,15 +67,15 @@ static int failed = 0;
 // ---------------------------------------------------------------------------
 // Test functions
 // ---------------------------------------------------------------------------
-bool test_heap1();
-bool test_heap2();
-bool test_stl_allocator1();
-bool test_stl_allocator2();
+bool test_heap1(void);
+bool test_heap2(void);
+bool test_stl_allocator1(void);
+bool test_stl_allocator2(void);

 // ---------------------------------------------------------------------------
 // Main testing
 // ---------------------------------------------------------------------------
-int main() {
+int main(void) {
  mi_option_disable(mi_option_verbose);

  // ---------------------------------------------------
@ -83,7 +86,7 @@ int main() {
    void* p = mi_malloc(0); mi_free(p);
  });
  CHECK_BODY("malloc-nomem1",{
-    result = (mi_malloc(SIZE_MAX/2) == NULL);
+    result = (mi_malloc((size_t)PTRDIFF_MAX + (size_t)1) == NULL);
  });
  CHECK_BODY("malloc-null",{
    mi_free(NULL);
--- a/third-party/mimalloc/test/test-stress.c
+++ b/third-party/mimalloc/test/test-stress.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018,2019 Microsoft Research, Daan Leijen
+Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license.
 -----------------------------------------------------------------------------*/
@ -25,7 +25,7 @@ terms of the MIT license.
 //
 // argument defaults
 static int THREADS = 32;      // more repeatable if THREADS <= #processors
-static int SCALE   = 10;      // scaling factor
+static int SCALE   = 25;      // scaling factor
 static int ITER    = 50;      // N full iterations destructing and re-creating all threads

 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
@ -37,13 +37,14 @@ static bool   allow_large_objects = true;    // allow very large objects?
 static size_t use_one_size = 0;              // use single object size of `N * sizeof(uintptr_t)`?


+// #define USE_STD_MALLOC
 #ifdef USE_STD_MALLOC
-#define custom_calloc(n,s)    calloc(n,s)
+#define custom_calloc(n,s)    malloc(n*s)
 #define custom_realloc(p,s)   realloc(p,s)
 #define custom_free(p)        free(p)
 #else
 #include <mimalloc.h>
-#define custom_calloc(n,s)    mi_calloc(n,s)
+#define custom_calloc(n,s)    mi_malloc(n*s)
 #define custom_realloc(p,s)   mi_realloc(p,s)
 #define custom_free(p)        mi_free(p)
 #endif
@ -181,14 +182,15 @@ static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
 static void test_stress(void) {
  uintptr_t r = rand();
  for (int n = 0; n < ITER; n++) {
-    run_os_threads(THREADS, &stress);
+    run_os_threads(THREADS, &stress);    
    for (int i = 0; i < TRANSFERS; i++) {
      if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
        void* p = atomic_exchange_ptr(&transfer[i], NULL);
        free_items(p);
      }
    }
-    // mi_collect(false);
+    //mi_collect(false);
+    //mi_debug_show_arenas();    
 #if !defined(NDEBUG) || defined(MI_TSAN)
    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
@ -243,15 +245,24 @@ int main(int argc, char** argv) {

  // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
  srand(0x7feb352d);
-  // mi_stats_reset();
-#ifdef STRESS
-    test_stress();
-#else
-    test_leak();
+  
+  //mi_reserve_os_memory(512ULL << 20, true, true);
+
+#if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
+  mi_stats_reset();
+#endif
+
+#ifdef STRESS
+  test_stress();
+#else
+  test_leak();
 #endif

-  // mi_collect(true);
 #ifndef USE_STD_MALLOC
+  #ifndef NDEBUG
+  mi_collect(true);
+  //mi_debug_show_arenas();
+  #endif
  mi_stats_print(NULL);
 #endif
  //bench_end_program();