diff --git a/pkgs/development/libraries/expat/2.6.0-fix-tests-flakiness.patch b/pkgs/development/libraries/expat/2.6.0-fix-tests-flakiness.patch
new file mode 100644
index 000000000000..9817b1833627
--- /dev/null
+++ b/pkgs/development/libraries/expat/2.6.0-fix-tests-flakiness.patch
@@ -0,0 +1,252 @@
+diff --git a/lib/internal.h b/lib/internal.h
+index cce71e4c..a217b3f9 100644
+--- a/lib/internal.h
++++ b/lib/internal.h
+@@ -31,7 +31,7 @@
+    Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
+    Copyright (c) 2018      Yury Gribov <tetra2005@gmail.com>
+    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
+-   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
++   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
+    Licensed under the MIT license:
+ 
+    Permission is  hereby granted,  free of charge,  to any  person obtaining
+@@ -162,7 +162,7 @@ const char *unsignedCharToPrintable(unsigned char c);
+ #endif
+ 
+ extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+-extern unsigned int g_parseAttempts;             // used for testing only
++extern unsigned int g_bytesScanned;              // used for testing only
+ 
+ #ifdef __cplusplus
+ }
+diff --git a/lib/xmlparse.c b/lib/xmlparse.c
+index aaf0fa9c..6de99d99 100644
+--- a/lib/xmlparse.c
++++ b/lib/xmlparse.c
+@@ -38,7 +38,7 @@
+    Copyright (c) 2022      Jann Horn <jannh@google.com>
+    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
+    Copyright (c) 2023      Owain Davies <owaind@bath.edu>
+-   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
++   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
+    Licensed under the MIT license:
+ 
+    Permission is  hereby granted,  free of charge,  to any  person obtaining
+@@ -630,7 +630,7 @@ static unsigned long getDebugLevel(const char *variableName,
+        : ((*((pool)->ptr)++ = c), 1))
+ 
+ XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
+-unsigned int g_parseAttempts = 0;                    // used for testing only
++unsigned int g_bytesScanned = 0;                     // used for testing only
+ 
+ struct XML_ParserStruct {
+   /* The first member must be m_userData so that the XML_GetUserData
+@@ -1017,7 +1017,7 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
+       return XML_ERROR_NONE;
+     }
+   }
+-  g_parseAttempts += 1;
++  g_bytesScanned += (unsigned)have_now;
+   const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+   if (ret == XML_ERROR_NONE) {
+     // if we consumed nothing, remember what we had on this parse attempt.
+diff --git a/tests/basic_tests.c b/tests/basic_tests.c
+index 7112a440..a9cc3861 100644
+--- a/tests/basic_tests.c
++++ b/tests/basic_tests.c
+@@ -5202,13 +5202,7 @@ START_TEST(test_nested_entity_suspend) {
+ END_TEST
+ 
+ /* Regression test for quadratic parsing on large tokens */
+-START_TEST(test_big_tokens_take_linear_time) {
+-  const char *const too_slow_failure_message
+-      = "Compared to the baseline runtime of the first test, this test has a "
+-        "slowdown of more than <max_slowdown>. "
+-        "Please keep increasing the value by 1 until it reliably passes the "
+-        "test on your hardware and open a bug sharing that number with us. "
+-        "Thanks in advance!";
++START_TEST(test_big_tokens_scale_linearly) {
+   const struct {
+     const char *pre;
+     const char *post;
+@@ -5220,65 +5214,57 @@ START_TEST(test_big_tokens_take_linear_time) {
+       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
+   };
+   const int num_cases = sizeof(text) / sizeof(text[0]);
+-  // For the test we need a <max_slowdown> value that is:
+-  // (1) big enough that the test passes reliably (avoiding flaky tests), and
+-  // (2) small enough that the test actually catches regressions.
+-  const int max_slowdown = 15;
+   char aaaaaa[4096];
+   const int fillsize = (int)sizeof(aaaaaa);
+   const int fillcount = 100;
++  const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
++  const unsigned max_factor = 4;
++  const unsigned max_scanned = max_factor * approx_bytes;
+ 
+   memset(aaaaaa, 'a', fillsize);
+ 
+   if (! g_reparseDeferralEnabledDefault) {
+     return; // heuristic is disabled; we would get O(n^2) and fail.
+   }
+-#if ! defined(__linux__)
+-  if (CLOCKS_PER_SEC < 100000) {
+-    // Skip this test if clock() doesn't have reasonably good resolution.
+-    // This workaround is primarily targeting Windows and FreeBSD, since
+-    // XSI requires the value to be 1.000.000 (10x the condition here), and
+-    // we want to be very sure that at least one platform in CI can catch
+-    // regressions (through a failing test).
+-    return;
+-  }
+-#endif
+ 
+-  clock_t baseline = 0;
+   for (int i = 0; i < num_cases; ++i) {
+     XML_Parser parser = XML_ParserCreate(NULL);
+     assert_true(parser != NULL);
+     enum XML_Status status;
+-    set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
+-                text[i].pre, text[i].post);
+-    const clock_t start = clock();
++    set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
+ 
+     // parse the start text
++    g_bytesScanned = 0;
+     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
+                                      (int)strlen(text[i].pre), XML_FALSE);
+     if (status != XML_STATUS_OK) {
+       xml_failure(parser);
+     }
++
+     // parse lots of 'a', failing the test early if it takes too long
++    unsigned past_max_count = 0;
+     for (int f = 0; f < fillcount; ++f) {
+       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
+       if (status != XML_STATUS_OK) {
+         xml_failure(parser);
+       }
+-      // i == 0 means we're still calculating the baseline value
+-      if (i > 0) {
+-        const clock_t now = clock();
+-        const clock_t clocks_so_far = now - start;
+-        const int slowdown = clocks_so_far / baseline;
+-        if (slowdown >= max_slowdown) {
+-          fprintf(
+-              stderr,
+-              "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
+-              f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
+-          fail(too_slow_failure_message);
+-        }
++      if (g_bytesScanned > max_scanned) {
++        // We're not done, and have already passed the limit -- the test will
++        // definitely fail. This block allows us to save time by failing early.
++        const unsigned pushed
++            = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
++        fprintf(
++            stderr,
++            "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
++            f + 1, fillcount, pushed, g_bytesScanned,
++            g_bytesScanned / (double)pushed, max_scanned, max_factor);
++        past_max_count++;
++        // We are failing, but allow a few log prints first. If we don't reach
++        // a count of five, the test will fail after the loop instead.
++        assert_true(past_max_count < 5);
+       }
+     }
++
+     // parse the end text
+     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
+                                      (int)strlen(text[i].post), XML_TRUE);
+@@ -5286,18 +5272,14 @@ START_TEST(test_big_tokens_take_linear_time) {
+       xml_failure(parser);
+     }
+ 
+-    // how long did it take in total?
+-    const clock_t end = clock();
+-    const clock_t taken = end - start;
+-    if (i == 0) {
+-      assert_true(taken > 0); // just to make sure we don't div-by-0 later
+-      baseline = taken;
+-    }
+-    const int slowdown = taken / baseline;
+-    if (slowdown >= max_slowdown) {
+-      fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
+-              (int)taken, (int)baseline, slowdown, max_slowdown);
+-      fail(too_slow_failure_message);
++    assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
++    if (g_bytesScanned > max_scanned) {
++      fprintf(
++          stderr,
++          "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
++          g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
++          max_factor);
++      fail("scanned too many bytes");
+     }
+ 
+     XML_ParserFree(parser);
+@@ -5774,19 +5756,17 @@ START_TEST(test_varying_buffer_fills) {
+                 fillsize[2], fillsize[3]);
+     XML_Parser parser = XML_ParserCreate(NULL);
+     assert_true(parser != NULL);
+-    g_parseAttempts = 0;
+ 
+     CharData storage;
+     CharData_Init(&storage);
+     XML_SetUserData(parser, &storage);
+     XML_SetStartElementHandler(parser, start_element_event_handler);
+ 
++    g_bytesScanned = 0;
+     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
+-    int scanned_bytes = 0;   // sum of (buffered bytes at each actual parse)
+     int offset = 0;
+     while (*fillsize >= 0) {
+       assert_true(offset + *fillsize <= document_length); // or test is invalid
+-      const unsigned attempts_before = g_parseAttempts;
+       const enum XML_Status status
+           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
+       if (status != XML_STATUS_OK) {
+@@ -5796,28 +5776,20 @@ START_TEST(test_varying_buffer_fills) {
+       fillsize++;
+       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
+       worstcase_bytes += offset; // we might've tried to parse all pending bytes
+-      if (g_parseAttempts != attempts_before) {
+-        assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
+-        assert_true(offset <= INT_MAX - scanned_bytes);      // avoid overflow
+-        scanned_bytes += offset; // we *did* try to parse all pending bytes
+-      }
+     }
+     assert_true(storage.count == 1); // the big token should've been parsed
+-    assert_true(scanned_bytes > 0);  // test-the-test: does our counter work?
++    assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
+     if (g_reparseDeferralEnabledDefault) {
+       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
+-      const int max_bytes_scanned = -*fillsize;
+-      if (scanned_bytes > max_bytes_scanned) {
++      const unsigned max_bytes_scanned = -*fillsize;
++      if (g_bytesScanned > max_bytes_scanned) {
+         fprintf(stderr,
+-                "bytes scanned in parse attempts: actual=%d limit=%d \n",
+-                scanned_bytes, max_bytes_scanned);
++                "bytes scanned in parse attempts: actual=%u limit=%u \n",
++                g_bytesScanned, max_bytes_scanned);
+         fail("too many bytes scanned in parse attempts");
+       }
+-      assert_true(scanned_bytes <= worstcase_bytes);
+-    } else {
+-      // heuristic is disabled; every XML_Parse() will have reparsed
+-      assert_true(scanned_bytes == worstcase_bytes);
+     }
++    assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
+ 
+     XML_ParserFree(parser);
+   }
+@@ -6065,7 +6037,7 @@ make_basic_test_case(Suite *s) {
+   tcase_add_test__ifdef_xml_dtd(tc_basic,
+                                 test_pool_integrity_with_unfinished_attr);
+   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
+-  tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
++  tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
+   tcase_add_test(tc_basic, test_set_reparse_deferral);
+   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
+   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
diff --git a/pkgs/development/libraries/expat/default.nix b/pkgs/development/libraries/expat/default.nix
index babb19e6cfb0..74c8b0ae609a 100644
--- a/pkgs/development/libraries/expat/default.nix
+++ b/pkgs/development/libraries/expat/default.nix
@@ -26,6 +26,11 @@ in stdenv.mkDerivation rec {
     hash = "sha256-y19ajqIR4cq9Wb4KkzpS48Aswyboak04fY0hjn7kej4=";
   };
 
+  patches = [
+    # Fix tests flakiness on some platforms (like aarch64-darwin), should be released in 2.6.1
+    ./2.6.0-fix-tests-flakiness.patch
+  ];
+
   strictDeps = true;
 
   outputs = [ "out" "dev" ]; # TODO: fix referrers