From ef7665770a53a6f278078d337c6b1f1bafe97dcd Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Mon, 4 May 2026 08:34:56 -0700
Subject: [PATCH 1/4] Use adaptive collection threshold for free-threaded GC.

---
 Include/internal/pycore_interp_structs.h |  10 +-
 Python/gc_free_threading.c               | 344 ++++++++---------------
 2 files changed, 129 insertions(+), 225 deletions(-)

diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h
index 2d04c173e85abe..dd37463a03818a 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -264,12 +264,10 @@ struct _gc_runtime_state {
     /* True if gc.freeze() has been used. */
     int freeze_active;
 
-    /* Memory usage of the process (RSS + swap) after last GC. */
-    Py_ssize_t last_mem;
-
-    /* This accumulates the new object count whenever collection is deferred
-       due to the RSS increase condition not being meet.  Reset on collection. */
-    Py_ssize_t deferred_count;
+    /* Adaptive threshold used to decide when to trigger a collection.
+       Adjusted after each collection based on the fraction of objects found to
+       be trash. */
+    int adaptive_threshold;
 
     /* Mutex held for gc_should_collect_mem_usage(). */
     PyMutex mutex;
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index b4fcd365592aa5..1a380d2f221309 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -17,30 +17,22 @@
 
 #include "pydtrace.h"
 
-// Platform-specific includes for get_process_mem_usage().
-#ifdef _WIN32
-    #include <windows.h>
-    #include <psapi.h> // For GetProcessMemoryInfo
-#elif defined(__linux__)
-    #include <unistd.h> // For sysconf, getpid
-#elif defined(__APPLE__)
-    #include <mach/mach.h>
-    #include <mach/task.h> // Required for TASK_VM_INFO
-    #include <unistd.h> // For sysconf, getpid
-#elif defined(__FreeBSD__)
-    #include <sys/types.h>
-    #include <sys/sysctl.h>
-    #include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
-    #include <kvm.h>
-    #include <unistd.h> // For sysconf, getpid
-    #include <fcntl.h> // For O_RDONLY
-    #include <limits.h> // For _POSIX2_LINE_MAX
-#elif defined(__OpenBSD__)
-    #include <sys/types.h>
-    #include <sys/sysctl.h>
-    #include <sys/user.h> // For kinfo_proc
-    #include <unistd.h> // For sysconf, getpid
-#endif
+// Upper bound on the adaptive threshold, expressed as long_lived_total / N
+// (where long_lived_total is the count of objects in the mimalloc GC heap).
+// Scaling with the heap size keeps the amortized GC cost roughly linear in
+// total allocations: when the heap is large we can afford to wait longer
+// between passes, since each pass costs O(long_lived_total) for the
+// mark-alive walk.  At divisor 2, no more than one GC pass fires per heap
+// doubling in the no-trash limit.
+#define GC_THRESHOLD_MAX_DIVISOR 2
+
+// Decay constant for mapping the trash ratio (collected / long_lived_total)
+// to a target threshold via 1 / (1 + K * ratio).  With K=8: ratio=0.05 maps
+// to ~71% of the max range, ratio=0.25 to ~33%, ratio=0.5 to ~20%,
+// ratio=1.0 to ~11%.  Higher K decays faster.  The 1/4-up / 3/4-down step
+// applied later does most of the noise filtering, so the exact shape here
+// matters less than the monotonicity.
+#define GC_THRESHOLD_DECAY_K 8
 
 // enable the "mark alive" pass of GC
 #define GC_ENABLE_MARK_ALIVE 1
@@ -1690,6 +1682,7 @@ _PyGC_InitState(GCState *gcstate)
 {
     // TODO: move to pycore_runtime_init.h once the incremental GC lands.
     gcstate->young.threshold = 2000;
+    gcstate->adaptive_threshold = gcstate->young.threshold;
 }
 
 
@@ -2016,205 +2009,32 @@ cleanup_worklist(struct worklist *worklist)
     }
 }
 
-// Return the memory usage (typically RSS + swap) of the process, in units of
-// KB.  Returns -1 if this operation is not supported or on failure.
-static Py_ssize_t
-get_process_mem_usage(void)
-{
-#ifdef _WIN32
-    // Windows implementation using GetProcessMemoryInfo
-    // Returns WorkingSetSize + PagefileUsage
-    PROCESS_MEMORY_COUNTERS pmc;
-    HANDLE hProcess = GetCurrentProcess();
-    if (NULL == hProcess) {
-        // Should not happen for the current process
-        return -1;
-    }
-
-    // GetProcessMemoryInfo returns non-zero on success
-    if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) {
-        // Values are in bytes, convert to KB.
-        return (Py_ssize_t)((pmc.WorkingSetSize + pmc.PagefileUsage) / 1024);
-    }
-    else {
-        return -1;
-    }
-
-#elif __linux__
-    FILE* fp = fopen("/proc/self/status", "r");
-    if (fp == NULL) {
-        return -1;
-    }
-
-    char line_buffer[256];
-    long long rss_kb = -1;
-    long long swap_kb = -1;
-
-    while (fgets(line_buffer, sizeof(line_buffer), fp) != NULL) {
-        if (rss_kb == -1 && strncmp(line_buffer, "VmRSS:", 6) == 0) {
-            sscanf(line_buffer + 6, "%lld", &rss_kb);
-        }
-        else if (swap_kb == -1 && strncmp(line_buffer, "VmSwap:", 7) == 0) {
-            sscanf(line_buffer + 7, "%lld", &swap_kb);
-        }
-        if (rss_kb != -1 && swap_kb != -1) {
-            break; // Found both
-        }
-    }
-    fclose(fp);
-
-    if (rss_kb != -1 && swap_kb != -1) {
-        return (Py_ssize_t)(rss_kb + swap_kb);
-    }
-    return -1;
-
-#elif defined(__APPLE__)
-    // --- MacOS (Darwin) ---
-    // Returns phys_footprint (RAM + compressed memory)
-    task_vm_info_data_t vm_info;
-    mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
-    kern_return_t kerr;
-
-    kerr = task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count);
-    if (kerr != KERN_SUCCESS) {
-        return -1;
-    }
-    // phys_footprint is in bytes. Convert to KB.
-    return (Py_ssize_t)(vm_info.phys_footprint / 1024);
-
-#elif defined(__FreeBSD__)
-    // NOTE: Returns RSS only. Per-process swap usage isn't readily available
-    long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
-    if (page_size_kb <= 0) {
-        return -1;
-    }
-
-    // Using /dev/null for vmcore avoids needing dump file.
-    // NULL for kernel file uses running kernel.
-    char errbuf[_POSIX2_LINE_MAX]; // For kvm error messages
-    kvm_t *kd = kvm_openfiles(NULL, "/dev/null", NULL, O_RDONLY, errbuf);
-    if (kd == NULL) {
-        return -1;
-    }
-
-    // KERN_PROC_PID filters for the specific process ID
-    // n_procs will contain the number of processes returned (should be 1 or 0)
-    pid_t pid = getpid();
-    int n_procs;
-    struct kinfo_proc *kp = kvm_getprocs(kd, KERN_PROC_PID, pid, &n_procs);
-    if (kp == NULL) {
-        kvm_close(kd);
-        return -1;
-    }
-
-    Py_ssize_t rss_kb = -1;
-    if (n_procs > 0) {
-        // kp[0] contains the info for our process
-        // ki_rssize is in pages. Convert to KB.
-        rss_kb = (Py_ssize_t)kp->ki_rssize * page_size_kb;
-    }
-    else {
-        // Process with PID not found, shouldn't happen for self.
-        rss_kb = -1;
-    }
-
-    kvm_close(kd);
-    return rss_kb;
-
-#elif defined(__OpenBSD__)
-    // NOTE: Returns RSS only. Per-process swap usage isn't readily available
-    long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
-    if (page_size_kb <= 0) {
-        return -1;
-    }
-
-    struct kinfo_proc kp;
-    pid_t pid = getpid();
-    int mib[6];
-    size_t len = sizeof(kp);
-
-    mib[0] = CTL_KERN;
-    mib[1] = KERN_PROC;
-    mib[2] = KERN_PROC_PID;
-    mib[3] = pid;
-    mib[4] = sizeof(struct kinfo_proc); // size of the structure we want
-    mib[5] = 1;                         // want 1 structure back
-    if (sysctl(mib, 6, &kp, &len, NULL, 0) == -1) {
-         return -1;
-    }
-
-    if (len > 0) {
-        // p_vm_rssize is in pages on OpenBSD. Convert to KB.
-        return (Py_ssize_t)kp.p_vm_rssize * page_size_kb;
-    }
-    else {
-        // Process info not returned
-        return -1;
-    }
-#else
-    // Unsupported platform
-    return -1;
-#endif
-}
-
 static bool
-gc_should_collect_mem_usage(GCState *gcstate)
-{
-    Py_ssize_t mem = get_process_mem_usage();
-    if (mem < 0) {
-        // Reading process memory usage is not support or failed.
-        return true;
-    }
-    int threshold = gcstate->young.threshold;
-    Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed(&gcstate->deferred_count);
-    if (deferred > threshold * 40) {
-        // Too many new container objects since last GC, even though memory use
-        // might not have increased much.  This is intended to avoid resource
-        // exhaustion if some objects consume resources but don't result in a
-        // memory usage increase.  We use 40x as the factor here because older
-        // versions of Python would do full collections after roughly every
-        // 70,000 new container objects.
-        return true;
-    }
-    Py_ssize_t last_mem = _Py_atomic_load_ssize_relaxed(&gcstate->last_mem);
-    Py_ssize_t mem_threshold = Py_MAX(last_mem / 10, 128);
-    if ((mem - last_mem) > mem_threshold) {
-        // The process memory usage has increased too much, do a collection.
-        return true;
-    }
-    else {
-        // The memory usage has not increased enough, defer the collection and
-        // clear the young object count so we don't check memory usage again
-        // on the next call to gc_should_collect().
-        PyMutex_Lock(&gcstate->mutex);
-        int young_count = _Py_atomic_exchange_int(&gcstate->young.count, 0);
-        _Py_atomic_store_ssize_relaxed(&gcstate->deferred_count,
-                                       gcstate->deferred_count + young_count);
-        PyMutex_Unlock(&gcstate->mutex);
-        return false;
-    }
-}
-
-static bool
-gc_should_collect(GCState *gcstate)
+gc_should_collect(PyThreadState *tstate)
 {
+    GCState *gcstate = &tstate->interp->gc;
     int count = _Py_atomic_load_int_relaxed(&gcstate->young.count);
-    int threshold = gcstate->young.threshold;
+    int base = gcstate->young.threshold;
+    int adaptive = gcstate->adaptive_threshold;
     int gc_enabled = _Py_atomic_load_int_relaxed(&gcstate->enabled);
-    if (count <= threshold || threshold == 0 || !gc_enabled) {
+    if (base == 0 || !gc_enabled) {
         return false;
     }
     if (gcstate->old[0].threshold == 0) {
-        // A few tests rely on immediate scheduling of the GC so we ignore the
-        // extra conditions if generations[1].threshold is set to zero.
-        return true;
+        // A few tests rely on immediate scheduling of the GC so we ignore
+        // the adaptive threshold if generations[1].threshold is set to zero
+        // and just trigger when the base is exceeded.
+        return count > base;
+    }
+    if (count <= adaptive) {
+        return false;
     }
     if (count < gcstate->long_lived_total / 4) {
-        // Avoid quadratic behavior by scaling threshold to the number of live
-        // objects.
+        // Avoid quadratic behavior by scaling the trigger to the number of
+        // live objects.
         return false;
     }
-    return gc_should_collect_mem_usage(gcstate);
+    return true;
 }
 
 static void
@@ -2231,7 +2051,7 @@ record_allocation(PyThreadState *tstate)
         _Py_atomic_add_int(&gcstate->young.count, (int)gc->alloc_count);
         gc->alloc_count = 0;
 
-        if (gc_should_collect(gcstate) &&
+        if (gc_should_collect(tstate) &&
             !_Py_atomic_load_int_relaxed(&gcstate->collecting))
         {
             _Py_ScheduleGC(tstate);
@@ -2264,6 +2084,89 @@ record_deallocation(PyThreadState *tstate)
     }
 }
 
+// Update the adaptive threshold for the next collection based on how
+// much trash this pass found relative to the cost of the pass.  The
+// GC cost is dominated by the mark-alive walk, which is O(objects in
+// the mimalloc GC heap) -- that's exactly what long_lived_total
+// counts (including untracked and frozen objects in the heap).  So
+// the productive ratio is collected / long_lived_total: the fraction
+// of GC work that actually freed memory.  A high ratio means we
+// should collect sooner; a low ratio means GC work was largely wasted
+// and we can afford to wait longer.  We map the ratio through a
+// hyperbolic decay to a target in [base, max_threshold]:
+//     target = base + (max - base) * total / (total + K * collected)
+// where max_threshold scales with long_lived_total so that amortized
+// GC cost stays linear in total allocations on large heaps.
+//
+// We adapt the threshold asymmetrically: slowly when raising it and
+// quickly when lowering it.  The two directions have very different
+// failure modes -- raising too aggressively risks heap blowup (and
+// possibly OOM in memory-constrained environments like containers),
+// while lowering too slowly only costs a few extra GC passes.  So we
+// err on the side of more frequent collection.  When trash appears,
+// we snap toward the new (lower) target in a single big step; when
+// trash disappears, we creep up gradually so that one fortunate pass
+// doesn't push us into a long deferral.
+//
+// Both updates are weighted moves toward the target rather than
+// direct assignments, to avoid "hunting" -- bouncing around due to
+// pass-to-pass noise.  Up: 1/4 step.  Down: 3/4 step.
+static void
+update_adaptive_threshold(GCState *gcstate, Py_ssize_t collected,
+                          Py_ssize_t total)
+{
+    int base = gcstate->young.threshold;
+    if (base <= 0) {
+        return;
+    }
+    Py_ssize_t max_threshold = total / GC_THRESHOLD_MAX_DIVISOR;
+    if (max_threshold > INT_MAX) {
+        max_threshold = INT_MAX;
+    }
+    if (max_threshold < base) {
+        // For small heaps the heap-scaled max would be below the
+        // user-configured base; fall back to base in that case.
+        max_threshold = base;
+    }
+    // Scale total/collected down if needed to keep the multiply below
+    // from overflowing.  Only the ratio matters here, not the scale.
+    Py_ssize_t r_total = total;
+    Py_ssize_t r_collected = collected;
+    while (r_total > ((Py_ssize_t)1 << 30)) {
+        r_total >>= 1;
+        r_collected >>= 1;
+    }
+    Py_ssize_t denom = r_total + (Py_ssize_t)GC_THRESHOLD_DECAY_K * r_collected;
+    Py_ssize_t target = denom > 0
+        ? base + (max_threshold - base) * r_total / denom
+        : max_threshold;
+    int target_i = target > INT_MAX ? INT_MAX : (int)target;
+    int adaptive = gcstate->adaptive_threshold;
+    if (adaptive < base) {
+        // User changed the base via gc.set_threshold; resync.
+        adaptive = base;
+    }
+    if (target_i >= adaptive) {
+        // Raising the threshold: cautious 1/4 step.
+        adaptive = (int)(((long long)adaptive * 3 + (long long)target_i) / 4);
+    }
+    else {
+        // Lowering the threshold: aggressive 3/4 step.
+        adaptive = (int)(((long long)adaptive + (long long)target_i * 3) / 4);
+    }
+    if (adaptive < base) {
+        adaptive = base;
+    }
+    else if (adaptive > max_threshold) {
+        adaptive = (int)max_threshold;
+    }
+    gcstate->adaptive_threshold = adaptive;
+#if 0
+    fprintf(stderr, "gc adapt collected %zd long_lived %zd max %zd target %zd adaptive %d\n",
+            collected, total, max_threshold, target, adaptive);
+#endif
+}
+
 static void
 gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, int generation)
 {
@@ -2275,7 +2178,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
     }
 
     state->gcstate->young.count = 0;
-    state->gcstate->deferred_count = 0;
     for (int i = 1; i <= generation; ++i) {
         state->gcstate->old[i-1].count = 0;
     }
@@ -2379,10 +2281,14 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
     // to be freed.
     delete_garbage(state);
 
-    // Store the current memory usage, can be smaller now if breaking cycles
-    // freed some memory.
-    Py_ssize_t last_mem = get_process_mem_usage();
-    _Py_atomic_store_ssize_relaxed(&state->gcstate->last_mem, last_mem);
+    // Only update the adaptive threshold for collections triggered by
+    // hitting the threshold itself.  Manual gc.collect() calls and
+    // shutdown collections are not representative of the steady-state
+    // trash ratio and would skew the adaptation.
+    if (state->reason == _Py_GC_REASON_HEAP) {
+        update_adaptive_threshold(state->gcstate, state->collected,
+                                  state->long_lived_total);
+    }
 
     // Append objects with legacy finalizers to the "gc.garbage" list.
     handle_legacy_finalizers(state);
@@ -2423,7 +2329,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
         return 0;
     }
 
-    if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(gcstate)) {
+    if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(tstate)) {
         // Don't collect if the threshold is not exceeded.
         _Py_atomic_store_int(&gcstate->collecting, 0);
         return 0;

From b1dd2f83d6d9a457a8b5083d0c88506bb05e4941 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Mon, 4 May 2026 15:54:17 -0700
Subject: [PATCH 2/4] More robust threshold calculations.

Remove smoothing, set adaptive threshold directly.
---
 Python/gc_free_threading.c | 154 +++++++++++++++++++------------------
 1 file changed, 80 insertions(+), 74 deletions(-)

diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 1a380d2f221309..1dd3ea707646fa 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -18,21 +18,37 @@
 #include "pydtrace.h"
 
 // Upper bound on the adaptive threshold, expressed as long_lived_total / N
-// (where long_lived_total is the count of objects in the mimalloc GC heap).
-// Scaling with the heap size keeps the amortized GC cost roughly linear in
-// total allocations: when the heap is large we can afford to wait longer
-// between passes, since each pass costs O(long_lived_total) for the
-// mark-alive walk.  At divisor 2, no more than one GC pass fires per heap
-// doubling in the no-trash limit.
+// (where long_lived_total is the count of *surviving* objects in the
+// mimalloc GC heap after the most recent pass -- it is decremented as
+// unreachable objects are identified).  Scaling with the survivor count
+// keeps the amortized GC cost roughly linear in total allocations: when
+// the live heap is large we can afford to wait longer between passes,
+// since each pass costs O(long_lived_total) for the mark-alive walk.
+#ifndef GC_THRESHOLD_MAX_DIVISOR
 #define GC_THRESHOLD_MAX_DIVISOR 2
+#endif
 
-// Decay constant for mapping the trash ratio (collected / long_lived_total)
-// to a target threshold via 1 / (1 + K * ratio).  With K=8: ratio=0.05 maps
-// to ~71% of the max range, ratio=0.25 to ~33%, ratio=0.5 to ~20%,
-// ratio=1.0 to ~11%.  Higher K decays faster.  The 1/4-up / 3/4-down step
-// applied later does most of the noise filtering, so the exact shape here
-// matters less than the monotonicity.
+// Decay constant for mapping the trash ratio collected/long_lived_total
+// (i.e. trash collected per surviving live object, equivalently C/(N-C)
+// in pre-collection terms -- unbounded above) to a target threshold via
+// 1 / (1 + K * ratio).  With K=8, expressing the input as the fraction
+// of pre-collection heap that was trash:  5% trash maps to ~70% of the
+// [min, max] range, 20% to ~33%, 50% to ~11%, 75% to ~4%, 90% to
+// ~1.4%.  Higher K decays faster.  The lower endpoint of the range is
+// base (so the user's gc.set_threshold value is a hard floor); see
+// GC_THRESHOLD_MIN_DIVISOR if you want to change that.
+#ifndef GC_THRESHOLD_DECAY_K
 #define GC_THRESHOLD_DECAY_K 8
+#endif
+
+// Lower asymptote of the adaptive curve, expressed as base / N.  N=1
+// makes the user's threshold a hard floor: the adaptive system
+// never collects more often than the user asked via gc.set_threshold.
+// Larger N treats base as a pivot, allowing heavy-trash workloads to
+// collect more frequently than requested.
+#ifndef GC_THRESHOLD_MIN_DIVISOR
+#define GC_THRESHOLD_MIN_DIVISOR 1
+#endif
 
 // enable the "mark alive" pass of GC
 #define GC_ENABLE_MARK_ALIVE 1
@@ -2085,86 +2101,76 @@ record_deallocation(PyThreadState *tstate)
 }
 
 // Update the adaptive threshold for the next collection based on how
-// much trash this pass found relative to the cost of the pass.  The
-// GC cost is dominated by the mark-alive walk, which is O(objects in
-// the mimalloc GC heap) -- that's exactly what long_lived_total
-// counts (including untracked and frozen objects in the heap).  So
-// the productive ratio is collected / long_lived_total: the fraction
-// of GC work that actually freed memory.  A high ratio means we
-// should collect sooner; a low ratio means GC work was largely wasted
-// and we can afford to wait longer.  We map the ratio through a
-// hyperbolic decay to a target in [base, max_threshold]:
-//     target = base + (max - base) * total / (total + K * collected)
-// where max_threshold scales with long_lived_total so that amortized
-// GC cost stays linear in total allocations on large heaps.
-//
-// We adapt the threshold asymmetrically: slowly when raising it and
-// quickly when lowering it.  The two directions have very different
-// failure modes -- raising too aggressively risks heap blowup (and
-// possibly OOM in memory-constrained environments like containers),
-// while lowering too slowly only costs a few extra GC passes.  So we
-// err on the side of more frequent collection.  When trash appears,
-// we snap toward the new (lower) target in a single big step; when
-// trash disappears, we creep up gradually so that one fortunate pass
-// doesn't push us into a long deferral.
-//
-// Both updates are weighted moves toward the target rather than
-// direct assignments, to avoid "hunting" -- bouncing around due to
-// pass-to-pass noise.  Up: 1/4 step.  Down: 3/4 step.
+// much trash this pass found relative to the cost of the pass.
 static void
-update_adaptive_threshold(GCState *gcstate, Py_ssize_t collected,
-                          Py_ssize_t total)
-{
+update_adaptive_threshold(GCState *gcstate, long long collected,
+                          long long live)
+{
+    // The GC cost is dominated by the mark-alive walk, which is O(objects in
+    // the mimalloc GC heap) -- that's exactly what long_lived_total counts
+    // (including untracked and frozen objects in the heap).  By the time we
+    // are called it has already been decremented for the objects this pass
+    // identified as unreachable, so it is the survivor count L (= N - C in
+    // pre-collection terms).  The productive ratio is collected/live = C/L,
+    // i.e. trash freed per surviving live object; equivalently C/(N-C).  This
+    // is unbounded above: as a pass approaches collecting everything, L
+    // shrinks toward zero and the ratio grows without bound, which is what we
+    // want -- a 99%-trash pass should drive the threshold to its floor.  A
+    // high ratio means we should collect sooner; a low ratio means GC work
+    // was largely wasted and we can afford to wait longer.  We map the ratio
+    // through a hyperbolic decay to a target in [min, max_threshold]: target
+    // = min + (max - min) * live / (live + K * collected) where max_threshold
+    // scales with long_lived_total so that amortized GC cost stays linear
+    // in total allocations on large heaps, and min_threshold = base /
+    // GC_THRESHOLD_MIN_DIVISOR acts as the curve's lower asymptote and hard
+    // floor.  The default MIN_DIVISOR=1 makes the user's gc.set_threshold
+    // value a true minimum interval between collections.
     int base = gcstate->young.threshold;
     if (base <= 0) {
         return;
     }
-    Py_ssize_t max_threshold = total / GC_THRESHOLD_MAX_DIVISOR;
+    int min_threshold = base / GC_THRESHOLD_MIN_DIVISOR;
+    if (min_threshold < 1) {
+        min_threshold = 1;
+    }
+    if (collected < 0) {
+        collected = 0;
+    }
+    if (live < 0) {
+        live = 0;
+    }
+    long long max_threshold = live / GC_THRESHOLD_MAX_DIVISOR;
     if (max_threshold > INT_MAX) {
         max_threshold = INT_MAX;
     }
     if (max_threshold < base) {
-        // For small heaps the heap-scaled max would be below the
-        // user-configured base; fall back to base in that case.
         max_threshold = base;
     }
-    // Scale total/collected down if needed to keep the multiply below
+    // Scale live/collected down if needed to keep the multiply below
     // from overflowing.  Only the ratio matters here, not the scale.
-    Py_ssize_t r_total = total;
-    Py_ssize_t r_collected = collected;
-    while (r_total > ((Py_ssize_t)1 << 30)) {
-        r_total >>= 1;
-        r_collected >>= 1;
-    }
-    Py_ssize_t denom = r_total + (Py_ssize_t)GC_THRESHOLD_DECAY_K * r_collected;
-    Py_ssize_t target = denom > 0
-        ? base + (max_threshold - base) * r_total / denom
+    // Cap at 2^30 so that K*collected and (max-min)*live both fit
+    // comfortably in long long.
+    while (live > (1LL << 30)) {
+        live >>= 1;
+        collected >>= 1;
+    }
+    long long denom = live + GC_THRESHOLD_DECAY_K * collected;
+    long long target = denom > 0
+        ? min_threshold + (max_threshold - min_threshold) * live / denom
         : max_threshold;
-    int target_i = target > INT_MAX ? INT_MAX : (int)target;
-    int adaptive = gcstate->adaptive_threshold;
-    if (adaptive < base) {
-        // User changed the base via gc.set_threshold; resync.
-        adaptive = base;
-    }
-    if (target_i >= adaptive) {
-        // Raising the threshold: cautious 1/4 step.
-        adaptive = (int)(((long long)adaptive * 3 + (long long)target_i) / 4);
-    }
-    else {
-        // Lowering the threshold: aggressive 3/4 step.
-        adaptive = (int)(((long long)adaptive + (long long)target_i * 3) / 4);
-    }
-    if (adaptive < base) {
-        adaptive = base;
+    int adaptive = target > INT_MAX ? INT_MAX : (int)target;
+    if (adaptive < min_threshold) {
+        adaptive = min_threshold;
     }
     else if (adaptive > max_threshold) {
         adaptive = (int)max_threshold;
     }
+    // The new threshold is set directly to the computed target -- no
+    // smoothing.  Software workloads can change abruptly (a program may go
+    // from zero cyclic trash to millions/sec and back within seconds), and in
+    // that regime the most recent pass is a better predictor of the next pass
+    // than a moving average.
     gcstate->adaptive_threshold = adaptive;
-#if 0
-    fprintf(stderr, "gc adapt collected %zd long_lived %zd max %zd target %zd adaptive %d\n",
-            collected, total, max_threshold, target, adaptive);
-#endif
 }
 
 static void

From 6a12aef2fdee16aefa77c309e2d9f0e98685edde Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Tue, 5 May 2026 03:34:17 -0700
Subject: [PATCH 3/4] Remove unneeded 'mutex' member.

---
 Include/internal/pycore_gc.h             |  1 +
 Include/internal/pycore_interp_structs.h |  3 --
 Python/gc_free_threading.c               | 41 ++++++++++++++++++++++++
 Python/sysmodule.c                       | 29 +++++++++++++++++
 4 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index bfe52f42f1141c..77745266ebee75 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -341,6 +341,7 @@ extern int _PyGC_VisitStackRef(union _PyStackRef *ref, visitproc visit, void *ar
 #ifdef Py_GIL_DISABLED
 extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
                                            gcvisitobjects_t callback, void *arg);
+extern Py_ssize_t _PyGC_GetMimallocAllocatedBytes(PyInterpreterState *interp);
 #endif
 
 #ifdef __cplusplus
diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h
index dd37463a03818a..445399859d2dba 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -268,9 +268,6 @@ struct _gc_runtime_state {
        Adjusted after each collection based on the fraction of objects found to
        be trash. */
     int adaptive_threshold;
-
-    /* Mutex held for gc_should_collect_mem_usage(). */
-    PyMutex mutex;
 #else
     PyGC_Head *generation0;
 #endif
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 1dd3ea707646fa..80d923d0bcc468 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -463,6 +463,47 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor,
     return err;
 }
 
+// Visitor for _PyGC_GetMimallocAllocatedBytes(): called once per heap area
+// when visit_blocks=false.  Sums area->used * area->block_size.
+static bool
+mimalloc_used_area_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
+                           void *block, size_t block_size, void *arg)
+{
+    if (block == NULL) {
+        *(Py_ssize_t *)arg += (Py_ssize_t)(area->used * area->block_size);
+    }
+    return true;
+}
+
+// Return the total bytes in use across all mimalloc heaps for all threads in
+// the interpreter, plus the per-interp abandoned pool.
+Py_ssize_t
+_PyGC_GetMimallocAllocatedBytes(PyInterpreterState *interp)
+{
+    Py_ssize_t total = 0;
+    _PyEval_StopTheWorld(interp);
+    HEAD_LOCK(&_PyRuntime);
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) {
+        struct _mimalloc_thread_state *m =
+            &((_PyThreadStateImpl *)p)->mimalloc;
+        if (!_Py_atomic_load_int(&m->initialized)) {
+            continue;
+        }
+        for (int h = 0; h < _Py_MIMALLOC_HEAP_COUNT; h++) {
+            mi_heap_visit_blocks(&m->heaps[h], false,
+                                 mimalloc_used_area_visitor, &total);
+        }
+    }
+    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
+    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
+        _mi_abandoned_pool_visit_blocks(pool, tag, false,
+                                        mimalloc_used_area_visitor, &total);
+    }
+    HEAD_UNLOCK(&_PyRuntime);
+    _PyEval_StartTheWorld(interp);
+    return total;
+}
+
 static inline void
 gc_visit_stackref(_PyStackRef stackref)
 {
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index c6447d03369a94..2ced66c85a5a0c 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -19,6 +19,7 @@ Data members:
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
 #include "pycore_ceval.h"         // _PyEval_SetAsyncGenFinalizer()
 #include "pycore_frame.h"         // _PyInterpreterFrame
+#include "pycore_gc.h"            // _PyGC_GetMimallocAllocatedBytes()
 #include "pycore_import.h"        // _PyImport_SetDLOpenFlags()
 #include "pycore_initconfig.h"    // _PyStatus_EXCEPTION()
 #include "pycore_interpframe.h"   // _PyFrame_GetFirstComplete()
@@ -2060,6 +2061,32 @@ sys_getallocatedblocks_impl(PyObject *module)
     return _Py_GetGlobalAllocatedBlocks();
 }
 
+PyDoc_STRVAR(sys__get_mimalloc_allocated_bytes__doc__,
+"_get_mimalloc_allocated_bytes($module, /)\n"
+"--\n"
+"\n"
+"Return total bytes allocated across all mimalloc heaps in this interpreter.\n"
+"\n"
+"Free-threaded build only.  Stops the world while reading per-thread heap\n"
+"structures.  Intended for benchmarking: the OS RSS does not reliably reflect\n"
+"Python's live memory because mimalloc retains freed pages.\n"
+"Raises NotImplementedError on the GIL-enabled build.");
+
+static PyObject *
+sys__get_mimalloc_allocated_bytes(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+#ifdef Py_GIL_DISABLED
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    Py_ssize_t total = _PyGC_GetMimallocAllocatedBytes(interp);
+    return PyLong_FromSsize_t(total);
+#else
+    PyErr_SetString(PyExc_NotImplementedError,
+                    "sys._get_mimalloc_allocated_bytes() is only available "
+                    "on the free-threaded build");
+    return NULL;
+#endif
+}
+
 /*[clinic input]
 sys.getunicodeinternedsize -> Py_ssize_t
 
@@ -2927,6 +2954,8 @@ static PyMethodDef sys_methods[] = {
     SYS_GETDEFAULTENCODING_METHODDEF
     SYS_GETDLOPENFLAGS_METHODDEF
     SYS_GETALLOCATEDBLOCKS_METHODDEF
+    {"_get_mimalloc_allocated_bytes", sys__get_mimalloc_allocated_bytes,
+     METH_NOARGS, sys__get_mimalloc_allocated_bytes__doc__},
     SYS_GETUNICODEINTERNEDSIZE_METHODDEF
     SYS_GETFILESYSTEMENCODING_METHODDEF
     SYS_GETFILESYSTEMENCODEERRORS_METHODDEF

From 67a2a6625c9d9f23aa29aafbcd2d0129cfdfaa8a Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Tue, 5 May 2026 04:01:32 -0700
Subject: [PATCH 4/4] Add blurb, update docs, remove debug func.

Remove the sys._get_mimalloc_allocated_bytes() function.
---
 Doc/library/gc.rst                            |  10 +-
 Include/internal/pycore_gc.h                  |   1 -
 InternalDocs/garbage_collector.md             | 118 ++++++++++++++++++
 ...-05-05-03-40-24.gh-issue-148937.2EvYx-.rst |   3 +
 Python/gc_free_threading.c                    |  69 +---------
 Python/sysmodule.c                            |  29 -----
 6 files changed, 130 insertions(+), 100 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-05-05-03-40-24.gh-issue-148937.2EvYx-.rst

diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst
index 701af579453ce3..444e7df028c183 100644
--- a/Doc/library/gc.rst
+++ b/Doc/library/gc.rst
@@ -133,10 +133,12 @@ The :mod:`!gc` module provides the following functions:
    With the third generation, things are a bit more complicated,
    see `Collecting the oldest generation <https://github.com/python/cpython/blob/ff0ef0a54bef26fc507fbf9b7a6009eb7d3f17f5/InternalDocs/garbage_collector.md#collecting-the-oldest-generation>`_ for more information.
 
-   In the free-threaded build, the increase in process memory usage is also
-   checked before running the collector.  If the memory usage has not increased
-   by 10% since the last collection and the net number of object allocations
-   has not exceeded 40 times *threshold0*, the collection is not run.
+   In the free-threaded build, the effective collection threshold is adapted
+   based on how much cyclic trash the last collection found.  If few trash
+   cycles were found, the threshold is adjusted higher, up to half the count
+   of live objects.  If many were found, the threshold is adjusted lower, down
+   to a minimum of *threshold0*.  Setting *threshold1* to zero disables this
+   adaptation and causes *threshold0* to be used directly.
 
    See `Garbage collector design <https://github.com/python/cpython/blob/3.15/InternalDocs/garbage_collector.md>`_ for more information.
 
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index 77745266ebee75..bfe52f42f1141c 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -341,7 +341,6 @@ extern int _PyGC_VisitStackRef(union _PyStackRef *ref, visitproc visit, void *ar
 #ifdef Py_GIL_DISABLED
 extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
                                            gcvisitobjects_t callback, void *arg);
-extern Py_ssize_t _PyGC_GetMimallocAllocatedBytes(PyInterpreterState *interp);
 #endif
 
 #ifdef __cplusplus
diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md
index 0ef45ff8e02bc5..f85113f4cd72db 100644
--- a/InternalDocs/garbage_collector.md
+++ b/InternalDocs/garbage_collector.md
@@ -458,6 +458,124 @@ in the total number of objects (the effect of which can be summarized thusly:
 grows, but we do fewer and fewer of them").
 
 
+Adaptive collection threshold (free-threaded build)
+===================================================
+
+> [!NOTE]
+> This section applies only to the free-threaded build.  The default
+> (GIL) build uses the generational thresholds described above.
+
+The free-threaded GC is non-generational: every collection scans the entire
+heap.  It therefore needs a different mechanism than `threshold0` /
+`threshold1` to decide when to run.  Instead, it maintains an *adaptive*
+trigger that scales with the size of the live heap and adjusts itself based
+on how much trash recent collections actually found.  The logic lives in
+`update_adaptive_threshold()` in `Python/gc_free_threading.c`, which is
+called after each collection that fired because the threshold was reached
+(`reason == _Py_GC_REASON_HEAP`).  Manual `gc.collect()` calls and shutdown
+collections do not update the adaptive state — they aren't representative of
+the steady-state trash rate.
+
+Every allocation increments `young.count`.  A collection is considered when
+`count` exceeds `gcstate->adaptive_threshold` (subject to the quadratic
+guard below).  The job of `update_adaptive_threshold()` is to choose a good
+value for `adaptive_threshold` for the *next* pass.
+
+The cost model
+--------------
+
+A free-threaded GC pass is dominated by the mark-alive walk over the
+mimalloc GC heap, whose cost is roughly `O(L)` where `L` is the count of
+*surviving* live objects (this is what `long_lived_total` records — by the
+time `update_adaptive_threshold()` runs it has already been decremented for
+the unreachable objects identified this pass).  If `T` is the number of
+allocations between passes, the amortized GC cost per allocation is
+proportional to `L / T`.  To keep amortized cost roughly linear in total
+allocations as the program grows, `T` should scale with `L`.  This gives an
+upper bound:
+
+    T_max = L / GC_THRESHOLD_MAX_DIVISOR
+
+`T_max` alone is wrong, however: a program churning short-lived cycles
+wants GC to run often, not just once per heap doubling.  We also have a
+user-configured pivot — the value of `gc.set_threshold()`, called `base`
+below — and a derived lower bound:
+
+    T_min = base / GC_THRESHOLD_MIN_DIVISOR
+
+The adaptive threshold lives in `[T_min, T_max]`, and `update_adaptive_threshold()`
+chooses where in that range to sit based on recent trash productivity.
+
+Trash ratio and hyperbolic decay
+--------------------------------
+
+After a threshold-triggered collection we know two numbers: how many
+objects the pass collected, `C`, and the survivor count `L` (so the
+pre-collection heap size was `N = L + C`).  The trash ratio
+
+    r = C / L
+
+measures trash freed per surviving live object — equivalently, how many
+extra walk units the next pass would do as a multiple of the walk units
+already paid for in survivors.  A high ratio means the pass paid for
+itself; a low ratio means the walk was largely wasted.  We use `C/L`
+rather than `C/N` because (a) `L` is what the *next* pass will walk, not
+`N`, and (b) `C/L` is unbounded above (as `C` approaches `N`, `L` shrinks
+toward zero and `r` grows without bound), which lets the curve drive the
+threshold all the way to its floor in genuinely high-trash regimes.
+
+We map `r` to a target threshold via a hyperbolic decay:
+
+    target = T_min + (T_max - T_min) / (1 + K * r)
+
+with `K = GC_THRESHOLD_DECAY_K`.  At `r = 0` (no trash) the target equals
+`T_max`; as `r` grows the target decays smoothly toward the asymptote
+`T_min`.  In the implementation this is rearranged to keep the math in
+integers:
+
+    target = T_min + (T_max - T_min) * L / (L + K * C)
+
+`L` and `C` are scaled down (right-shifted) ahead of the multiply if `L`
+exceeds 2^30, since only the ratio matters.  If a pass somehow collects
+everything (`L == 0`), the rearranged form would have a zero denominator;
+in that case we fall back to `T_max`.
+
+The new threshold is set directly to the computed target — there is no
+EMA or weighted step.  Software workloads can change abruptly (a program
+may go from zero cyclic trash to millions per second and back within
+seconds), and in that regime the most recent pass is a better predictor
+of the next than a long-history average.
+
+Tunables
+--------
+
+Three compile-time `#define`s in `Python/gc_free_threading.c` control the
+shape of the curve.  All three are `#ifndef`-guarded so a build can
+override them with `-DGC_THRESHOLD_*=value`:
+
+| Macro | Default | Meaning |
+|---|---|---|
+| `GC_THRESHOLD_MAX_DIVISOR` | 2 | `T_max = L / N`.  Larger N collects less often on big heaps. |
+| `GC_THRESHOLD_DECAY_K` | 8 | Decay rate of the hyperbolic curve.  Larger K reaches `T_min` faster. |
+| `GC_THRESHOLD_MIN_DIVISOR` | 1 | `T_min = base / N`.  N=1 makes the user's `gc.set_threshold` value a hard minimum interval between collections. |
+
+If `T_max` (i.e. `L / GC_THRESHOLD_MAX_DIVISOR`) falls below `base`, it is
+clamped up to `base`: on a small heap the curve runs over `[T_min, base]`
+rather than over `[T_min, L/N]` — which would otherwise collapse below
+`base` for tiny heaps.
+
+Quadratic-behavior guard
+------------------------
+
+Even if `count` exceeds `adaptive_threshold`, GC will not actually fire
+unless `count >= long_lived_total / 4` (see `gc_should_collect()`).  This
+pre-existing guard prevents pathological behavior on heaps that are
+growing in pure-non-trash regions: it gives `T` a second floor proportional
+to the live heap so that no matter how aggressively the adaptive math
+pushes the threshold down, we never collect so often that GC cost
+dominates allocation cost.
+
+
 Optimization: excluding reachable objects
 =========================================
 
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-05-03-40-24.gh-issue-148937.2EvYx-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-05-03-40-24.gh-issue-148937.2EvYx-.rst
new file mode 100644
index 00000000000000..e618641b6fef71
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-05-03-40-24.gh-issue-148937.2EvYx-.rst
@@ -0,0 +1,3 @@
+For the free-threaded build, the cyclic GC now adapts the collection
+threshold based on how successful the last automatic collection was in
+finding trash.
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 80d923d0bcc468..0003d8818621c1 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -463,47 +463,6 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor,
     return err;
 }
 
-// Visitor for _PyGC_GetMimallocAllocatedBytes(): called once per heap area
-// when visit_blocks=false.  Sums area->used * area->block_size.
-static bool
-mimalloc_used_area_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
-                           void *block, size_t block_size, void *arg)
-{
-    if (block == NULL) {
-        *(Py_ssize_t *)arg += (Py_ssize_t)(area->used * area->block_size);
-    }
-    return true;
-}
-
-// Return the total bytes in use across all mimalloc heaps for all threads in
-// the interpreter, plus the per-interp abandoned pool.
-Py_ssize_t
-_PyGC_GetMimallocAllocatedBytes(PyInterpreterState *interp)
-{
-    Py_ssize_t total = 0;
-    _PyEval_StopTheWorld(interp);
-    HEAD_LOCK(&_PyRuntime);
-    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) {
-        struct _mimalloc_thread_state *m =
-            &((_PyThreadStateImpl *)p)->mimalloc;
-        if (!_Py_atomic_load_int(&m->initialized)) {
-            continue;
-        }
-        for (int h = 0; h < _Py_MIMALLOC_HEAP_COUNT; h++) {
-            mi_heap_visit_blocks(&m->heaps[h], false,
-                                 mimalloc_used_area_visitor, &total);
-        }
-    }
-    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
-    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
-        _mi_abandoned_pool_visit_blocks(pool, tag, false,
-                                        mimalloc_used_area_visitor, &total);
-    }
-    HEAD_UNLOCK(&_PyRuntime);
-    _PyEval_StartTheWorld(interp);
-    return total;
-}
-
 static inline void
 gc_visit_stackref(_PyStackRef stackref)
 {
@@ -2142,30 +2101,13 @@ record_deallocation(PyThreadState *tstate)
 }
 
 // Update the adaptive threshold for the next collection based on how
-// much trash this pass found relative to the cost of the pass.
+// much trash this pass found relative to the cost of the pass.  See
+// InternalDocs/garbage_collector.md for additional explaination of this
+// calculation.
 static void
 update_adaptive_threshold(GCState *gcstate, long long collected,
                           long long live)
 {
-    // The GC cost is dominated by the mark-alive walk, which is O(objects in
-    // the mimalloc GC heap) -- that's exactly what long_lived_total counts
-    // (including untracked and frozen objects in the heap).  By the time we
-    // are called it has already been decremented for the objects this pass
-    // identified as unreachable, so it is the survivor count L (= N - C in
-    // pre-collection terms).  The productive ratio is collected/live = C/L,
-    // i.e. trash freed per surviving live object; equivalently C/(N-C).  This
-    // is unbounded above: as a pass approaches collecting everything, L
-    // shrinks toward zero and the ratio grows without bound, which is what we
-    // want -- a 99%-trash pass should drive the threshold to its floor.  A
-    // high ratio means we should collect sooner; a low ratio means GC work
-    // was largely wasted and we can afford to wait longer.  We map the ratio
-    // through a hyperbolic decay to a target in [min, max_threshold]: target
-    // = min + (max - min) * live / (live + K * collected) where max_threshold
-    // scales with long_lived_total so that amortized GC cost stays linear
-    // in total allocations on large heaps, and min_threshold = base /
-    // GC_THRESHOLD_MIN_DIVISOR acts as the curve's lower asymptote and hard
-    // floor.  The default MIN_DIVISOR=1 makes the user's gc.set_threshold
-    // value a true minimum interval between collections.
     int base = gcstate->young.threshold;
     if (base <= 0) {
         return;
@@ -2206,11 +2148,6 @@ update_adaptive_threshold(GCState *gcstate, long long collected,
     else if (adaptive > max_threshold) {
         adaptive = (int)max_threshold;
     }
-    // The new threshold is set directly to the computed target -- no
-    // smoothing.  Software workloads can change abruptly (a program may go
-    // from zero cyclic trash to millions/sec and back within seconds), and in
-    // that regime the most recent pass is a better predictor of the next pass
-    // than a moving average.
     gcstate->adaptive_threshold = adaptive;
 }
 
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 2ced66c85a5a0c..c6447d03369a94 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -19,7 +19,6 @@ Data members:
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
 #include "pycore_ceval.h"         // _PyEval_SetAsyncGenFinalizer()
 #include "pycore_frame.h"         // _PyInterpreterFrame
-#include "pycore_gc.h"            // _PyGC_GetMimallocAllocatedBytes()
 #include "pycore_import.h"        // _PyImport_SetDLOpenFlags()
 #include "pycore_initconfig.h"    // _PyStatus_EXCEPTION()
 #include "pycore_interpframe.h"   // _PyFrame_GetFirstComplete()
@@ -2061,32 +2060,6 @@ sys_getallocatedblocks_impl(PyObject *module)
     return _Py_GetGlobalAllocatedBlocks();
 }
 
-PyDoc_STRVAR(sys__get_mimalloc_allocated_bytes__doc__,
-"_get_mimalloc_allocated_bytes($module, /)\n"
-"--\n"
-"\n"
-"Return total bytes allocated across all mimalloc heaps in this interpreter.\n"
-"\n"
-"Free-threaded build only.  Stops the world while reading per-thread heap\n"
-"structures.  Intended for benchmarking: the OS RSS does not reliably reflect\n"
-"Python's live memory because mimalloc retains freed pages.\n"
-"Raises NotImplementedError on the GIL-enabled build.");
-
-static PyObject *
-sys__get_mimalloc_allocated_bytes(PyObject *module, PyObject *Py_UNUSED(ignored))
-{
-#ifdef Py_GIL_DISABLED
-    PyInterpreterState *interp = _PyInterpreterState_GET();
-    Py_ssize_t total = _PyGC_GetMimallocAllocatedBytes(interp);
-    return PyLong_FromSsize_t(total);
-#else
-    PyErr_SetString(PyExc_NotImplementedError,
-                    "sys._get_mimalloc_allocated_bytes() is only available "
-                    "on the free-threaded build");
-    return NULL;
-#endif
-}
-
 /*[clinic input]
 sys.getunicodeinternedsize -> Py_ssize_t
 
@@ -2954,8 +2927,6 @@ static PyMethodDef sys_methods[] = {
     SYS_GETDEFAULTENCODING_METHODDEF
     SYS_GETDLOPENFLAGS_METHODDEF
     SYS_GETALLOCATEDBLOCKS_METHODDEF
-    {"_get_mimalloc_allocated_bytes", sys__get_mimalloc_allocated_bytes,
-     METH_NOARGS, sys__get_mimalloc_allocated_bytes__doc__},
     SYS_GETUNICODEINTERNEDSIZE_METHODDEF
     SYS_GETFILESYSTEMENCODING_METHODDEF
     SYS_GETFILESYSTEMENCODEERRORS_METHODDEF