From 2dbd3cc85e2a90cc107f700abb49e221334ddd7e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 13 Apr 2026 20:10:20 +0200 Subject: [PATCH 01/28] f --- include/jemalloc/internal/cache_bin.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index bea3a2fc04..63a3bbd265 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -403,6 +403,11 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { if (likely(low_bits != bin->low_bits_low_water)) { bin->stack_head = new_head; *success = true; + /* Verify we're not returning NULL from a non-empty bin. */ + if (unlikely(*(void * volatile *)&ret == NULL)) { + safety_check_fail( + "NULL pointer returned from tcache bin alloc\n"); + } return ret; } if (!adjust_low_water) { @@ -418,6 +423,11 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { bin->stack_head = new_head; bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head; *success = true; + /* Verify we're not returning NULL from a non-empty bin. */ + if (unlikely(*(void * volatile *)&ret == NULL)) { + safety_check_fail( + "NULL pointer returned from tcache bin alloc\n"); + } return ret; } *success = false; @@ -508,6 +518,12 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { cache_bin_assert_earlier(bin, bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head); + /* Verify the store survived optimization — volatile forces the read. */ + if (unlikely(*(void * volatile *)bin->stack_head == NULL)) { + safety_check_fail( + "NULL detected in tcache bin after store of %p\n", ptr); + } + return true; } From 9d10dd768a899e749ee636a3ec023413c506b3e5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Apr 2026 22:08:06 +0200 Subject: [PATCH 02/28] f --- include/jemalloc/internal/bit_util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h index 88c7942e95..9c92df1645 100644 --- a/include/jemalloc/internal/bit_util.h +++ b/include/jemalloc/internal/bit_util.h @@ -19,19 +19,19 @@ */ static inline unsigned ffs_llu(unsigned long long x) { - util_assume(x != 0); + assert(x != 0); return JEMALLOC_INTERNAL_FFSLL(x) - 1; } static inline unsigned ffs_lu(unsigned long x) { - util_assume(x != 0); + assert(x != 0); return JEMALLOC_INTERNAL_FFSL(x) - 1; } static inline unsigned ffs_u(unsigned x) { - util_assume(x != 0); + assert(x != 0); return JEMALLOC_INTERNAL_FFS(x) - 1; } From 8e53f62bf03faa097e7a8ac9181d42924e29d639 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Apr 2026 22:08:22 +0200 Subject: [PATCH 03/28] f --- include/jemalloc/internal/cache_bin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 63a3bbd265..2782e3d635 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -392,7 +392,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { * This may read from the empty position; however the loaded value won't * be used. It's safe because the stack has one more slot reserved. */ - void *ret = *bin->stack_head; + void *ret = *(void * volatile *)bin->stack_head; cache_bin_sz_t low_bits = (cache_bin_sz_t)(uintptr_t)bin->stack_head; void **new_head = bin->stack_head + 1; From c0c674b97df19f7958273055b707592809ad9472 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Apr 2026 22:09:01 +0200 Subject: [PATCH 04/28] f --- include/jemalloc/internal/bitmap.h | 29 +++++++++++++++++++++++++++++ include/jemalloc/internal/edata.h | 3 +++ 2 files changed, 32 insertions(+) diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index e0f596fba9..2d2c6833bb 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -224,6 +224,12 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; + /* Verify the store was not eliminated by DSE. */ + if (unlikely(*(volatile bitmap_t *)gp != g)) { + safety_check_fail( + "bitmap_set: leaf store eliminated at group %zu, " + "expected %lx\n", goff, (unsigned long)g); + } assert(bitmap_get(bitmap, binfo, bit)); #ifdef BITMAP_USE_TREE /* Propagate group state transitions up the tree. */ @@ -237,6 +243,13 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; + /* Verify tree store was not eliminated. */ + if (unlikely(*(volatile bitmap_t *)gp != g)) { + safety_check_fail( + "bitmap_set: tree store eliminated at " + "level %u group %zu, expected %lx\n", + i, goff, (unsigned long)g); + } if (g != 0) { break; } @@ -320,10 +333,21 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { #ifdef BITMAP_USE_TREE i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; + if (unlikely(g == 0)) { + safety_check_fail( + "bitmap_sfu: tree root is zero (bitmap full), " + "nlevels %u\n", binfo->nlevels); + } bit = ffs_lu(g); while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; + if (unlikely(g == 0)) { + safety_check_fail( + "bitmap_sfu: tree level %u group is zero " + "at offset %zu\n", i, + binfo->levels[i].group_offset + bit); + } bit = (bit << LG_BITMAP_GROUP_NBITS) + ffs_lu(g); } #else @@ -331,6 +355,11 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { g = bitmap[0]; while (g == 0) { i++; + if (unlikely(i >= BITMAP_BITS2GROUPS(binfo->nbits))) { + safety_check_fail( + "bitmap_sfu: all %u groups are zero " + "(bitmap full)\n", i); + } g = bitmap[i]; } bit = (i << LG_BITMAP_GROUP_NBITS) + ffs_lu(g); diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 06b6c5454f..e23ea2c279 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -14,6 +14,7 @@ #include "jemalloc/internal/slab_data.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/typed_list.h" +#include /* * sizeof(edata_t) is 128 bytes on 64-bit architectures. Ensure the alignment @@ -572,6 +573,8 @@ edata_nfree_inc(edata_t *edata) { static inline void edata_nfree_dec(edata_t *edata) { assert(edata_slab_get(edata)); + if (unlikely(edata_nfree_get(edata) == 0)) + __builtin_trap(); edata->e_bits -= ((uint64_t)1U << EDATA_BITS_NFREE_SHIFT); } From 96872891bde6a7cd38dce126be98adfa232f0415 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 00:39:10 +0200 Subject: [PATCH 05/28] f --- include/jemalloc/internal/bitmap.h | 1 + src/arena.c | 74 ++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index 2d2c6833bb..1550ac5677 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -4,6 +4,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/sc.h" +#include "jemalloc/internal/safety_check.h" typedef unsigned long bitmap_t; #define LG_SIZEOF_BITMAP LG_SIZEOF_LONG diff --git a/src/arena.c b/src/arena.c index d7c8cd1fce..c4ae3b657f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -863,6 +863,19 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, edata_nfree_binshard_set(slab, bin_info->nregs, binshard); bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false); + /* + * Verify bitmap_init was not optimized away by LTO. + * An "empty" (all-free) bitmap should have the first group + * set to all-ones (0xff...ff). If it's zero, the memset(0xff) + * inside bitmap_init was eliminated. + */ + if (unlikely(*(volatile bitmap_t *)&slab_data->bitmap[0] == 0)) { + safety_check_fail( + "arena_slab_alloc: bitmap_init was eliminated by " + "the compiler, bitmap[0]=%lx\n", + (unsigned long)slab_data->bitmap[0]); + } + return slab; } @@ -914,6 +927,36 @@ arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, /* Try batch-fill from slabcur first. */ edata_t *slabcur = bin->slabcur; if (slabcur != NULL && edata_nfree_get(slabcur) > 0) { + /* + * Debug: verify nfree matches actual bitmap + * free count for the crashing size class. + */ + { + slab_data_t *sd = edata_slab_data_get(slabcur); + unsigned actual_free = 0; + unsigned ngroups = +#ifdef BITMAP_USE_TREE + bin_info->bitmap_info.levels[ + bin_info->bitmap_info.nlevels] + .group_offset; +#else + bin_info->bitmap_info.ngroups; +#endif + for (unsigned gi = 0; gi < ngroups; gi++) { + actual_free += popcount_lu( + sd->bitmap[gi]); + } + if (unlikely(actual_free + != edata_nfree_get(slabcur))) { + safety_check_fail( + "arena_cache_bin_fill_small: " + "nfree/bitmap mismatch for " + "binind %u: nfree=%u actual=%u" + "\n", binind, + edata_nfree_get(slabcur), + actual_free); + } + } /* * Use up the free slots if the total filled <= nfill_max. * Otherwise, fallback to nfill_min for a more conservative @@ -926,6 +969,37 @@ arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bin_slab_reg_alloc_batch( slabcur, bin_info, cnt, &arr->ptr[filled]); + + /* Debug: verify consistency after batch alloc. */ + { + slab_data_t *sd = edata_slab_data_get( + slabcur); + unsigned actual_free = 0; + unsigned ngroups = +#ifdef BITMAP_USE_TREE + bin_info->bitmap_info.levels[ + bin_info->bitmap_info.nlevels] + .group_offset; +#else + bin_info->bitmap_info.ngroups; +#endif + for (unsigned gi = 0; gi < ngroups; gi++) { + actual_free += popcount_lu( + sd->bitmap[gi]); + } + if (unlikely(actual_free + != edata_nfree_get(slabcur))) { + safety_check_fail( + "arena_cache_bin_fill_small: " + "post-batch nfree/bitmap " + "mismatch for binind %u: " + "nfree=%u actual=%u cnt=%u\n", + binind, + edata_nfree_get(slabcur), + actual_free, cnt); + } + } + made_progress = true; filled += cnt; continue; From 7f3e654923c5ca977e1bca91d56a43148d26bb1c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 01:01:29 +0200 Subject: [PATCH 06/28] f --- include/jemalloc/internal/bin_inlines.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h index f429116986..fb56e0edd3 100644 --- a/include/jemalloc/internal/bin_inlines.h +++ b/include/jemalloc/internal/bin_inlines.h @@ -83,6 +83,29 @@ bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin, bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind); edata_nfree_inc(slab); + /* Debug: verify nfree/bitmap consistency after free. */ + { + unsigned actual_free = 0; + unsigned ngroups = +#ifdef BITMAP_USE_TREE + bin_info->bitmap_info.levels[ + bin_info->bitmap_info.nlevels].group_offset; +#else + bin_info->bitmap_info.ngroups; +#endif + for (unsigned gi = 0; gi < ngroups; gi++) { + actual_free += popcount_lu(slab_data->bitmap[gi]); + } + if (unlikely(actual_free != edata_nfree_get(slab))) { + safety_check_fail( + "bin_dalloc_locked_step: post-free " + "nfree/bitmap mismatch for binind %u " + "regind %zu: nfree=%u actual=%u\n", + binind, regind, + edata_nfree_get(slab), actual_free); + } + } + if (config_stats) { info->ndalloc++; } From 2ba908af8c508d10261391b3672a5e78476665b1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 09:43:46 +0200 Subject: [PATCH 07/28] f --- include/jemalloc/internal/bin_inlines.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h index fb56e0edd3..680f5dccf9 100644 --- a/include/jemalloc/internal/bin_inlines.h +++ b/include/jemalloc/internal/bin_inlines.h @@ -80,7 +80,25 @@ bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin, /* Freeing an unallocated pointer can cause assertion failure. */ assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind)); + /* Debug: snapshot bitmap group before unset. */ + size_t goff_dbg = regind >> LG_BITMAP_GROUP_NBITS; + bitmap_t before_dbg = *(volatile bitmap_t *)&slab_data->bitmap[goff_dbg]; + bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind); + + /* Debug: verify the bit was actually flipped. */ + bitmap_t after_dbg = *(volatile bitmap_t *)&slab_data->bitmap[goff_dbg]; + bitmap_t expected_bit = ZU(1) << (regind & BITMAP_GROUP_NBITS_MASK); + if (unlikely((before_dbg | expected_bit) != after_dbg)) { + safety_check_fail( + "bitmap_unset lost: binind %u regind %zu " + "goff %zu before %lx after %lx expected_bit %lx\n", + binind, regind, goff_dbg, + (unsigned long)before_dbg, + (unsigned long)after_dbg, + (unsigned long)expected_bit); + } + edata_nfree_inc(slab); /* Debug: verify nfree/bitmap consistency after free. */ From db14f6a38fe4d393a5276ee2e5f6f3efa69825ac Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 12:54:14 +0200 Subject: [PATCH 08/28] f --- include/jemalloc/internal/cache_bin.h | 33 ++++++++++++++++----------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 2782e3d635..646f4995ba 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -403,10 +403,19 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { if (likely(low_bits != bin->low_bits_low_water)) { bin->stack_head = new_head; *success = true; - /* Verify we're not returning NULL from a non-empty bin. */ - if (unlikely(*(void * volatile *)&ret == NULL)) { - safety_check_fail( - "NULL pointer returned from tcache bin alloc\n"); + /* Check for duplicate: ret should not still be in the bin. */ + { + cache_bin_sz_t remain = + cache_bin_ncached_get_internal(bin); + unsigned scan = remain < 200 ? remain : 200; + for (unsigned di = 0; di < scan; di++) { + if (unlikely(new_head[di] == ret)) { + safety_check_fail( + "tcache alloc returned duplicate " + "ptr %p (also at pos %u)\n", + ret, di); + } + } } return ret; } @@ -477,14 +486,12 @@ cache_bin_full(cache_bin_t *bin) { */ JEMALLOC_ALWAYS_INLINE bool cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) { - if (!config_debug || opt_debug_double_free_max_scan == 0) { - return false; - } - + /* + * Force-enabled double-free scan to debug LTO miscompilation + * that causes the same pointer to appear in a tcache bin twice. + */ cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin); - unsigned max_scan = opt_debug_double_free_max_scan < ncached - ? opt_debug_double_free_max_scan - : ncached; + unsigned max_scan = ncached < 200 ? ncached : 200; void **cur = bin->stack_head; void **limit = cur + max_scan; @@ -492,8 +499,8 @@ cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) { if (*cur == ptr) { safety_check_fail( "Invalid deallocation detected: double free of " - "pointer %p\n", - ptr); + "pointer %p (found at position %zu in tcache)\n", + ptr, (size_t)(cur - bin->stack_head)); return true; } } From aaba34b27d514e07d8c3274646dae3403482e254 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 13:19:01 +0200 Subject: [PATCH 09/28] f --- include/jemalloc/internal/cache_bin.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 646f4995ba..9f5c0f541e 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -403,20 +403,6 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { if (likely(low_bits != bin->low_bits_low_water)) { bin->stack_head = new_head; *success = true; - /* Check for duplicate: ret should not still be in the bin. */ - { - cache_bin_sz_t remain = - cache_bin_ncached_get_internal(bin); - unsigned scan = remain < 200 ? remain : 200; - for (unsigned di = 0; di < scan; di++) { - if (unlikely(new_head[di] == ret)) { - safety_check_fail( - "tcache alloc returned duplicate " - "ptr %p (also at pos %u)\n", - ret, di); - } - } - } return ret; } if (!adjust_low_water) { From ce7704e64f61eb2110a3c687de4f36390e18953a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 13:36:33 +0200 Subject: [PATCH 10/28] f --- include/jemalloc/internal/bin_inlines.h | 27 ++++++++++++++++++------- include/jemalloc/internal/div.h | 9 +++++---- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h index 680f5dccf9..14ab1c998f 100644 --- a/include/jemalloc/internal/bin_inlines.h +++ b/include/jemalloc/internal/bin_inlines.h @@ -29,18 +29,31 @@ bin_slab_regind_impl( size_t diff, regind; /* Freeing a pointer outside the slab can cause assertion failure. */ - assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab)); - assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab)); - /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) - % (uintptr_t)bin_infos[binind].reg_size - == 0); + if (unlikely((uintptr_t)ptr < (uintptr_t)edata_addr_get(slab) + || (uintptr_t)ptr >= (uintptr_t)edata_past_get(slab))) { + safety_check_fail( + "bin_slab_regind: ptr %p outside slab [%p, %p)\n", + ptr, edata_addr_get(slab), edata_past_get(slab)); + } diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)); + if (unlikely(diff % (uintptr_t)bin_infos[binind].reg_size != 0)) { + safety_check_fail( + "bin_slab_regind: ptr %p not aligned to reg_size " + "%zu (diff=%zu)\n", + ptr, bin_infos[binind].reg_size, diff); + } + /* Avoid doing division with a variable divisor. */ regind = div_compute(div_info, diff); - assert(regind < bin_infos[binind].nregs); + + if (unlikely(regind >= bin_infos[binind].nregs)) { + safety_check_fail( + "bin_slab_regind: regind %zu >= nregs %u for " + "binind %u\n", + regind, bin_infos[binind].nregs, binind); + } return regind; } diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h index 56d5f463fa..a99d7f2bc1 100644 --- a/include/jemalloc/internal/div.h +++ b/include/jemalloc/internal/div.h @@ -3,6 +3,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/safety_check.h" /* * This module does the division that computes the index of a region in a slab, @@ -25,7 +26,10 @@ void div_init(div_info_t *div_info, size_t divisor); static inline size_t div_compute(div_info_t *div_info, size_t n) { - assert(n <= (uint32_t)-1); + if (unlikely(n > (uint32_t)-1)) { + safety_check_fail( + "div_compute: n=%zu exceeds uint32 range\n", n); + } /* * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine, * the compilers I tried were all smart enough to turn this into the @@ -33,9 +37,6 @@ div_compute(div_info_t *div_info, size_t n) { * mul; mov edx eax; on x86, umull on arm, etc.). */ size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32; -#ifdef JEMALLOC_DEBUG - assert(i * div_info->d == n); -#endif return i; } From b46dba9a990658072ca3b7c2e3e17a2e8cec764d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 13:40:24 +0200 Subject: [PATCH 11/28] f --- include/jemalloc/internal/cache_bin.h | 33 ++++++++++++--------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 9f5c0f541e..21806e9859 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -403,6 +403,20 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { if (likely(low_bits != bin->low_bits_low_water)) { bin->stack_head = new_head; *success = true; + /* Check for duplicate: ret should not still be in the bin. */ + { + cache_bin_sz_t remain = + cache_bin_ncached_get_internal(bin); + unsigned scan = remain < 200 ? remain : 200; + for (unsigned di = 0; di < scan; di++) { + if (unlikely(new_head[di] == ret)) { + safety_check_fail( + "tcache alloc returned duplicate " + "ptr %p (also at pos %u)\n", + ret, di); + } + } + } return ret; } if (!adjust_low_water) { @@ -472,24 +486,7 @@ cache_bin_full(cache_bin_t *bin) { */ JEMALLOC_ALWAYS_INLINE bool cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) { - /* - * Force-enabled double-free scan to debug LTO miscompilation - * that causes the same pointer to appear in a tcache bin twice. - */ - cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin); - unsigned max_scan = ncached < 200 ? ncached : 200; - - void **cur = bin->stack_head; - void **limit = cur + max_scan; - for (; cur < limit; cur++) { - if (*cur == ptr) { - safety_check_fail( - "Invalid deallocation detected: double free of " - "pointer %p (found at position %zu in tcache)\n", - ptr, (size_t)(cur - bin->stack_head)); - return true; - } - } + /* Disabled for test — checking if alloc-side scan alone prevents crash. */ return false; } From f406318c2e6d4fbf8fc352c6528a48087b3ecdc9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 13:46:34 +0200 Subject: [PATCH 12/28] f --- include/jemalloc/internal/cache_bin.h | 40 +++++++++++++++------------ 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 21806e9859..a171801f8d 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -403,20 +403,6 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { if (likely(low_bits != bin->low_bits_low_water)) { bin->stack_head = new_head; *success = true; - /* Check for duplicate: ret should not still be in the bin. */ - { - cache_bin_sz_t remain = - cache_bin_ncached_get_internal(bin); - unsigned scan = remain < 200 ? remain : 200; - for (unsigned di = 0; di < scan; di++) { - if (unlikely(new_head[di] == ret)) { - safety_check_fail( - "tcache alloc returned duplicate " - "ptr %p (also at pos %u)\n", - ret, di); - } - } - } return ret; } if (!adjust_low_water) { @@ -486,7 +472,26 @@ cache_bin_full(cache_bin_t *bin) { */ JEMALLOC_ALWAYS_INLINE bool cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) { - /* Disabled for test — checking if alloc-side scan alone prevents crash. */ + if (!config_debug || opt_debug_double_free_max_scan == 0) { + return false; + } + + cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin); + unsigned max_scan = opt_debug_double_free_max_scan < ncached + ? opt_debug_double_free_max_scan + : ncached; + + void **cur = bin->stack_head; + void **limit = cur + max_scan; + for (; cur < limit; cur++) { + if (*cur == ptr) { + safety_check_fail( + "Invalid deallocation detected: double free of " + "pointer %p\n", + ptr); + return true; + } + } return false; } @@ -509,9 +514,10 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { (cache_bin_sz_t)(uintptr_t)bin->stack_head); /* Verify the store survived optimization — volatile forces the read. */ - if (unlikely(*(void * volatile *)bin->stack_head == NULL)) { + if (unlikely(*(void * volatile *)bin->stack_head != ptr)) { safety_check_fail( - "NULL detected in tcache bin after store of %p\n", ptr); + "tcache push store lost: wrote %p, read back %p\n", + ptr, *bin->stack_head); } return true; From 430de40aa67a59be365051ba5e8eef83619a358a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 14:20:28 +0200 Subject: [PATCH 13/28] f --- src/tcache.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/tcache.c b/src/tcache.c index 10fa7c2130..f3f8bb0a7c 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -664,6 +664,27 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind, unsigned rem) { + /* Debug: scan for duplicates in the tcache bin before flushing. */ + { + cache_bin_sz_t ncached = + cache_bin_ncached_get_local(cache_bin); + void **head = cache_bin->stack_head; + for (cache_bin_sz_t i = 0; i < ncached; i++) { + for (cache_bin_sz_t j = i + 1; j < ncached; j++) { + if (unlikely(head[i] == head[j])) { + safety_check_fail( + "tcache_bin_flush_small: " + "duplicate ptr %p at positions " + "%u and %u (binind %u, " + "ncached %u)\n", + head[i], (unsigned)i, + (unsigned)j, binind, + (unsigned)ncached); + } + } + } + } + tcache_nfill_small_burst_reset(tcache->tcache_slow, binind); tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, /* small */ true); From a672160a227803c16bad74c092043a48cfd13069 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 14:41:32 +0200 Subject: [PATCH 14/28] f --- include/jemalloc/internal/bitmap.h | 16 ++++++++++++---- src/tcache.c | 21 --------------------- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index 1550ac5677..ab3767d30c 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -333,7 +333,14 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { #ifdef BITMAP_USE_TREE i = binfo->nlevels - 1; - g = bitmap[binfo->levels[i].group_offset]; + /* + * Use volatile reads to prevent LTO from hoisting/caching bitmap + * loads across iterations when bitmap_sfu is called in a loop + * (e.g. bin_slab_reg_alloc_batch). Without volatile, the compiler + * may reuse a stale bitmap value from before bitmap_set's store, + * causing the same bit to be returned twice. + */ + g = *(volatile bitmap_t *)&bitmap[binfo->levels[i].group_offset]; if (unlikely(g == 0)) { safety_check_fail( "bitmap_sfu: tree root is zero (bitmap full), " @@ -342,7 +349,8 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { bit = ffs_lu(g); while (i > 0) { i--; - g = bitmap[binfo->levels[i].group_offset + bit]; + g = *(volatile bitmap_t *)&bitmap[ + binfo->levels[i].group_offset + bit]; if (unlikely(g == 0)) { safety_check_fail( "bitmap_sfu: tree level %u group is zero " @@ -353,7 +361,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { } #else i = 0; - g = bitmap[0]; + g = *(volatile bitmap_t *)&bitmap[0]; while (g == 0) { i++; if (unlikely(i >= BITMAP_BITS2GROUPS(binfo->nbits))) { @@ -361,7 +369,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { "bitmap_sfu: all %u groups are zero " "(bitmap full)\n", i); } - g = bitmap[i]; + g = *(volatile bitmap_t *)&bitmap[i]; } bit = (i << LG_BITMAP_GROUP_NBITS) + ffs_lu(g); #endif diff --git a/src/tcache.c b/src/tcache.c index f3f8bb0a7c..10fa7c2130 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -664,27 +664,6 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind, unsigned rem) { - /* Debug: scan for duplicates in the tcache bin before flushing. */ - { - cache_bin_sz_t ncached = - cache_bin_ncached_get_local(cache_bin); - void **head = cache_bin->stack_head; - for (cache_bin_sz_t i = 0; i < ncached; i++) { - for (cache_bin_sz_t j = i + 1; j < ncached; j++) { - if (unlikely(head[i] == head[j])) { - safety_check_fail( - "tcache_bin_flush_small: " - "duplicate ptr %p at positions " - "%u and %u (binind %u, " - "ncached %u)\n", - head[i], (unsigned)i, - (unsigned)j, binind, - (unsigned)ncached); - } - } - } - } - tcache_nfill_small_burst_reset(tcache->tcache_slow, binind); tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, /* small */ true); From 6ce5f6e7e379be72b018f09ebba5bd335e856631 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 14:52:21 +0200 Subject: [PATCH 15/28] f --- src/arena.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index c4ae3b657f..918addd9e9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -970,7 +970,48 @@ arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bin_slab_reg_alloc_batch( slabcur, bin_info, cnt, &arr->ptr[filled]); - /* Debug: verify consistency after batch alloc. */ + /* + * Debug: check for duplicate pointers in the + * batch just returned, and also against + * previously filled entries. + */ + { + for (unsigned di = 0; di < cnt; di++) { + void *p = arr->ptr[filled + di]; + /* Check within this batch. */ + for (unsigned dj = di + 1; + dj < cnt; dj++) { + if (unlikely(p == + arr->ptr[filled + dj])) { + safety_check_fail( + "batch alloc " + "dup: ptr %p " + "at %u and %u " + "(binind %u " + "cnt %u)\n", + p, di, dj, + binind, cnt); + } + } + /* Check against earlier fills. */ + for (unsigned dk = 0; + dk < filled; dk++) { + if (unlikely(p == + arr->ptr[dk])) { + safety_check_fail( + "refill dup: " + "ptr %p at " + "batch %u and " + "prev %u " + "(binind %u)\n", + p, di, dk, + binind); + } + } + } + } + + /* Debug: verify nfree/bitmap consistency. */ { slab_data_t *sd = edata_slab_data_get( slabcur); From 0e3299aa7f0a6297c88e0f8c02ec327380893a40 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 15:28:49 +0200 Subject: [PATCH 16/28] f --- include/jemalloc/internal/cache_bin.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index a171801f8d..45c55e45ab 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -723,8 +723,15 @@ static inline void cache_bin_finish_flush( cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) { unsigned rem = cache_bin_ncached_get_local(bin) - nflushed; - memmove( - bin->stack_head + nflushed, bin->stack_head, rem * sizeof(void *)); + /* + * Use volatile pointers to prevent LTO from optimizing this + * memmove based on built-in memcpy/memmove/memset knowledge. + * Misoptimization here can leave stale flushed pointers in the + * bin, causing duplicate allocations. + */ + volatile void *dst = bin->stack_head + nflushed; + volatile void *src = bin->stack_head; + memmove((void *)dst, (void *)src, rem * sizeof(void *)); bin->stack_head += nflushed; cache_bin_low_water_adjust(bin); /* Reset the bin stats as it's merged during flush. */ From 25554611041d05cc74e44f4c172899aa0be8e62d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 20:10:42 +0200 Subject: [PATCH 17/28] f --- include/jemalloc/internal/cache_bin.h | 15 ++++----------- include/jemalloc/internal/safety_check.h | 4 ++++ src/safety_check.c | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 45c55e45ab..368b8cc300 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -418,11 +418,6 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { bin->stack_head = new_head; bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head; *success = true; - /* Verify we're not returning NULL from a non-empty bin. */ - if (unlikely(*(void * volatile *)&ret == NULL)) { - safety_check_fail( - "NULL pointer returned from tcache bin alloc\n"); - } return ret; } *success = false; @@ -513,12 +508,10 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { cache_bin_assert_earlier(bin, bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head); - /* Verify the store survived optimization — volatile forces the read. */ - if (unlikely(*(void * volatile *)bin->stack_head != ptr)) { - safety_check_fail( - "tcache push store lost: wrote %p, read back %p\n", - ptr, *bin->stack_head); - } + tcache_debug_check_bin_after_push( + bin->stack_head, + cache_bin_ncached_get_internal(bin), + ptr); return true; } diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h index 2b4b2d0e73..05c11ca2e2 100644 --- a/include/jemalloc/internal/safety_check.h +++ b/include/jemalloc/internal/safety_check.h @@ -16,6 +16,10 @@ typedef void (*safety_check_abort_hook_t)(const char *message); /* Can set to NULL for a default. */ void safety_check_set_abort(safety_check_abort_hook_t abort_fn); +/* Debug: scan tcache bin for duplicate after push. */ +JEMALLOC_NOINLINE void tcache_debug_check_bin_after_push( + void **stack_head, unsigned ncached, void *ptr); + #define REDZONE_SIZE ((size_t)32) #define REDZONE_FILL_VALUE 0xBC diff --git a/src/safety_check.c b/src/safety_check.c index d052718dae..5908345a7d 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -51,3 +51,24 @@ safety_check_fail(const char *format, ...) { safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug( buf); } + +/* + * Debug: scan the tcache bin for duplicate pointers after a push. + * noinline so LTO cannot optimize through this call — it must remain + * an opaque barrier to prevent the optimizer from reordering/merging + * the tcache push with surrounding code. + */ +JEMALLOC_NOINLINE void +tcache_debug_check_bin_after_push(void **stack_head, unsigned ncached, + void *ptr) { + for (unsigned i = 1; i < ncached; i++) { + if (stack_head[i] == ptr) { + safety_check_fail( + "tcache duplicate detected on push: " + "ptr %p already at position %u " + "(ncached %u)\n", + ptr, i, ncached); + return; + } + } +} From 906940bcb7803f8372ba5458d35cea1809acba48 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 21:45:12 +0200 Subject: [PATCH 18/28] f --- include/jemalloc/internal/cache_bin.h | 2 + include/jemalloc/internal/safety_check.h | 4 +- src/safety_check.c | 134 +++++++++++++++++++++-- 3 files changed, 131 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 368b8cc300..2e14161086 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -403,6 +403,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { if (likely(low_bits != bin->low_bits_low_water)) { bin->stack_head = new_head; *success = true; + tcache_debug_on_pop(ret); return ret; } if (!adjust_low_water) { @@ -418,6 +419,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { bin->stack_head = new_head; bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head; *success = true; + tcache_debug_on_pop(ret); return ret; } *success = false; diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h index 05c11ca2e2..b32b20d124 100644 --- a/include/jemalloc/internal/safety_check.h +++ b/include/jemalloc/internal/safety_check.h @@ -16,9 +16,11 @@ typedef void (*safety_check_abort_hook_t)(const char *message); /* Can set to NULL for a default. */ void safety_check_set_abort(safety_check_abort_hook_t abort_fn); -/* Debug: scan tcache bin for duplicate after push. */ +/* Debug: scan tcache bin for duplicate after push, record backtrace. */ JEMALLOC_NOINLINE void tcache_debug_check_bin_after_push( void **stack_head, unsigned ncached, void *ptr); +/* Debug: remove backtrace record when pointer is popped from tcache. */ +JEMALLOC_NOINLINE void tcache_debug_on_pop(void *ptr); #define REDZONE_SIZE ((size_t)32) #define REDZONE_FILL_VALUE 0xBC diff --git a/src/safety_check.c b/src/safety_check.c index 5908345a7d..7e41308820 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -53,22 +53,140 @@ safety_check_fail(const char *format, ...) { } /* - * Debug: scan the tcache bin for duplicate pointers after a push. - * noinline so LTO cannot optimize through this call — it must remain - * an opaque barrier to prevent the optimizer from reordering/merging - * the tcache push with surrounding code. + * Debug: per-pointer backtrace tracker for tcache pushes. + * When a duplicate is detected, prints both the current and original + * stack traces. + */ +#include + +#define TCACHE_BT_FRAMES 15 +#define TCACHE_BT_TABLE_SIZE 16384 /* must be power of 2 */ +#define TCACHE_BT_TABLE_MASK (TCACHE_BT_TABLE_SIZE - 1) + +typedef struct { + void *ptr; + void *frames[TCACHE_BT_FRAMES]; + int nframes; +} tcache_bt_entry_t; + +static __thread tcache_bt_entry_t tcache_bt_table[TCACHE_BT_TABLE_SIZE]; + +static unsigned +tcache_bt_hash(void *ptr) { + uintptr_t v = (uintptr_t)ptr; + v ^= v >> 16; + v *= 0x45d9f3b; + v ^= v >> 16; + return (unsigned)(v & TCACHE_BT_TABLE_MASK); +} + +static void +tcache_bt_record(void *ptr) { + unsigned idx = tcache_bt_hash(ptr); + for (unsigned i = 0; i < 64; i++) { + unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; + if (tcache_bt_table[slot].ptr == NULL + || tcache_bt_table[slot].ptr == ptr) { + tcache_bt_table[slot].ptr = ptr; + tcache_bt_table[slot].nframes = backtrace( + tcache_bt_table[slot].frames, TCACHE_BT_FRAMES); + return; + } + } +} + +static tcache_bt_entry_t * +tcache_bt_find(void *ptr) { + unsigned idx = tcache_bt_hash(ptr); + for (unsigned i = 0; i < 64; i++) { + unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; + if (tcache_bt_table[slot].ptr == ptr) { + return &tcache_bt_table[slot]; + } + if (tcache_bt_table[slot].ptr == NULL) { + return NULL; + } + } + return NULL; +} + +static void +tcache_bt_remove(void *ptr) { + unsigned idx = tcache_bt_hash(ptr); + for (unsigned i = 0; i < 64; i++) { + unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; + if (tcache_bt_table[slot].ptr == ptr) { + tcache_bt_table[slot].ptr = NULL; + return; + } + if (tcache_bt_table[slot].ptr == NULL) { + return; + } + } +} + +static void +tcache_bt_print(const char *label, tcache_bt_entry_t *entry) { + char buf[256]; + malloc_snprintf(buf, sizeof(buf), + " %s backtrace (%d frames):\n", label, entry->nframes); + malloc_write(buf); + for (int i = 0; i < entry->nframes; i++) { + malloc_snprintf(buf, sizeof(buf), + " #%d: %p\n", i, entry->frames[i]); + malloc_write(buf); + } +} + +/* + * Called after every tcache push to record the backtrace and check + * for duplicates. */ JEMALLOC_NOINLINE void tcache_debug_check_bin_after_push(void **stack_head, unsigned ncached, void *ptr) { + /* Check for duplicate in the bin. */ for (unsigned i = 1; i < ncached; i++) { if (stack_head[i] == ptr) { - safety_check_fail( - "tcache duplicate detected on push: " - "ptr %p already at position %u " - "(ncached %u)\n", + char buf[256]; + malloc_snprintf(buf, sizeof(buf), + "tcache duplicate on push: ptr %p at " + "position %u (ncached %u)\n", ptr, i, ncached); + malloc_write(buf); + + /* Print the original push backtrace. */ + tcache_bt_entry_t *orig = tcache_bt_find(ptr); + if (orig != NULL) { + tcache_bt_print("ORIGINAL push", orig); + } else { + malloc_write(" (original backtrace " + "not found)\n"); + } + + /* Print current backtrace. */ + tcache_bt_entry_t current; + current.nframes = backtrace( + current.frames, TCACHE_BT_FRAMES); + tcache_bt_print("DUPLICATE push", ¤t); + + safety_check_fail( + "tcache duplicate detected: ptr %p\n", ptr); return; } } + + /* No duplicate — record this push. */ + tcache_bt_record(ptr); +} + +/* + * Called when a pointer is popped from tcache (allocation). + * Removes the backtrace record so it can be re-recorded on next push. + */ +JEMALLOC_NOINLINE void +tcache_debug_on_pop(void *ptr) { + if (ptr != NULL) { + tcache_bt_remove(ptr); + } } From a8aafde7bf8410e2bc48ad290d364d74919cdb4d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 21:50:54 +0200 Subject: [PATCH 19/28] f --- src/safety_check.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/safety_check.c b/src/safety_check.c index 7e41308820..e7166bdac1 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -58,9 +58,10 @@ safety_check_fail(const char *format, ...) { * stack traces. */ #include +#include -#define TCACHE_BT_FRAMES 15 -#define TCACHE_BT_TABLE_SIZE 16384 /* must be power of 2 */ +#define TCACHE_BT_FRAMES 8 +#define TCACHE_BT_TABLE_SIZE (1 << 16) /* 65536, must be power of 2 */ #define TCACHE_BT_TABLE_MASK (TCACHE_BT_TABLE_SIZE - 1) typedef struct { @@ -68,8 +69,20 @@ typedef struct { void *frames[TCACHE_BT_FRAMES]; int nframes; } tcache_bt_entry_t; +/* 8 + 64 + 4 = 76 bytes per entry, 65536 entries = ~5MB per thread */ -static __thread tcache_bt_entry_t tcache_bt_table[TCACHE_BT_TABLE_SIZE]; +static __thread tcache_bt_entry_t *tcache_bt_table; + +static void +tcache_bt_ensure_table(void) { + if (likely(tcache_bt_table != NULL)) { + return; + } + /* Use mmap to avoid re-entering jemalloc. */ + size_t sz = TCACHE_BT_TABLE_SIZE * sizeof(tcache_bt_entry_t); + tcache_bt_table = (tcache_bt_entry_t *)mmap(NULL, sz, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} static unsigned tcache_bt_hash(void *ptr) { @@ -82,6 +95,8 @@ tcache_bt_hash(void *ptr) { static void tcache_bt_record(void *ptr) { + tcache_bt_ensure_table(); + if (tcache_bt_table == MAP_FAILED) return; unsigned idx = tcache_bt_hash(ptr); for (unsigned i = 0; i < 64; i++) { unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; @@ -97,6 +112,8 @@ tcache_bt_record(void *ptr) { static tcache_bt_entry_t * tcache_bt_find(void *ptr) { + if (tcache_bt_table == NULL || tcache_bt_table == MAP_FAILED) + return NULL; unsigned idx = tcache_bt_hash(ptr); for (unsigned i = 0; i < 64; i++) { unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; @@ -112,6 +129,8 @@ tcache_bt_find(void *ptr) { static void tcache_bt_remove(void *ptr) { + if (tcache_bt_table == NULL || tcache_bt_table == MAP_FAILED) + return; unsigned idx = tcache_bt_hash(ptr); for (unsigned i = 0; i < 64; i++) { unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; From 287f44776084ab64bd63ca70df714b5bc651cc9c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 22:24:15 +0200 Subject: [PATCH 20/28] f --- include/jemalloc/internal/safety_check.h | 4 ++-- src/safety_check.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h index b32b20d124..a5b377fa36 100644 --- a/include/jemalloc/internal/safety_check.h +++ b/include/jemalloc/internal/safety_check.h @@ -17,10 +17,10 @@ typedef void (*safety_check_abort_hook_t)(const char *message); void safety_check_set_abort(safety_check_abort_hook_t abort_fn); /* Debug: scan tcache bin for duplicate after push, record backtrace. */ -JEMALLOC_NOINLINE void tcache_debug_check_bin_after_push( +void tcache_debug_check_bin_after_push( void **stack_head, unsigned ncached, void *ptr); /* Debug: remove backtrace record when pointer is popped from tcache. */ -JEMALLOC_NOINLINE void tcache_debug_on_pop(void *ptr); +void tcache_debug_on_pop(void *ptr); #define REDZONE_SIZE ((size_t)32) #define REDZONE_FILL_VALUE 0xBC diff --git a/src/safety_check.c b/src/safety_check.c index e7166bdac1..abd627e608 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -161,7 +161,7 @@ tcache_bt_print(const char *label, tcache_bt_entry_t *entry) { * Called after every tcache push to record the backtrace and check * for duplicates. */ -JEMALLOC_NOINLINE void +void tcache_debug_check_bin_after_push(void **stack_head, unsigned ncached, void *ptr) { /* Check for duplicate in the bin. */ @@ -203,7 +203,7 @@ tcache_debug_check_bin_after_push(void **stack_head, unsigned ncached, * Called when a pointer is popped from tcache (allocation). * Removes the backtrace record so it can be re-recorded on next push. */ -JEMALLOC_NOINLINE void +void tcache_debug_on_pop(void *ptr) { if (ptr != NULL) { tcache_bt_remove(ptr); From 173ef1186ad188d90ac850fbd46073b8924cd550 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Apr 2026 22:50:23 +0200 Subject: [PATCH 21/28] f --- include/jemalloc/internal/cache_bin.h | 5 +- include/jemalloc/internal/safety_check.h | 9 +-- src/safety_check.c | 79 ++++++++++++------------ src/tcache.c | 4 ++ 4 files changed, 51 insertions(+), 46 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 2e14161086..2dcbcda644 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -510,10 +510,7 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { cache_bin_assert_earlier(bin, bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head); - tcache_debug_check_bin_after_push( - bin->stack_head, - cache_bin_ncached_get_internal(bin), - ptr); + tcache_debug_bt_record(ptr); return true; } diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h index a5b377fa36..dd5f02bf33 100644 --- a/include/jemalloc/internal/safety_check.h +++ b/include/jemalloc/internal/safety_check.h @@ -16,11 +16,12 @@ typedef void (*safety_check_abort_hook_t)(const char *message); /* Can set to NULL for a default. */ void safety_check_set_abort(safety_check_abort_hook_t abort_fn); -/* Debug: scan tcache bin for duplicate after push, record backtrace. */ -void tcache_debug_check_bin_after_push( - void **stack_head, unsigned ncached, void *ptr); -/* Debug: remove backtrace record when pointer is popped from tcache. */ +/* Debug: record backtrace on tcache push. */ +void tcache_debug_bt_record(void *ptr); +/* Debug: remove backtrace record on tcache pop. */ void tcache_debug_on_pop(void *ptr); +/* Debug: scan for duplicates during tcache flush, print backtraces. */ +void tcache_debug_check_flush(void **ptrs, unsigned nflush); #define REDZONE_SIZE ((size_t)32) #define REDZONE_FILL_VALUE 0xBC diff --git a/src/safety_check.c b/src/safety_check.c index abd627e608..b7ad28f6fa 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -158,50 +158,16 @@ tcache_bt_print(const char *label, tcache_bt_entry_t *entry) { } /* - * Called after every tcache push to record the backtrace and check - * for duplicates. + * Record backtrace on every tcache push. Inlineable by LTO — just a + * hash table insert, no scan, no barrier effect. */ void -tcache_debug_check_bin_after_push(void **stack_head, unsigned ncached, - void *ptr) { - /* Check for duplicate in the bin. */ - for (unsigned i = 1; i < ncached; i++) { - if (stack_head[i] == ptr) { - char buf[256]; - malloc_snprintf(buf, sizeof(buf), - "tcache duplicate on push: ptr %p at " - "position %u (ncached %u)\n", - ptr, i, ncached); - malloc_write(buf); - - /* Print the original push backtrace. */ - tcache_bt_entry_t *orig = tcache_bt_find(ptr); - if (orig != NULL) { - tcache_bt_print("ORIGINAL push", orig); - } else { - malloc_write(" (original backtrace " - "not found)\n"); - } - - /* Print current backtrace. */ - tcache_bt_entry_t current; - current.nframes = backtrace( - current.frames, TCACHE_BT_FRAMES); - tcache_bt_print("DUPLICATE push", ¤t); - - safety_check_fail( - "tcache duplicate detected: ptr %p\n", ptr); - return; - } - } - - /* No duplicate — record this push. */ +tcache_debug_bt_record(void *ptr) { tcache_bt_record(ptr); } /* - * Called when a pointer is popped from tcache (allocation). - * Removes the backtrace record so it can be re-recorded on next push. + * Remove backtrace record on tcache pop. Inlineable by LTO. */ void tcache_debug_on_pop(void *ptr) { @@ -209,3 +175,40 @@ tcache_debug_on_pop(void *ptr) { tcache_bt_remove(ptr); } } + +/* + * Called during tcache flush to scan for duplicates. + * This runs in tcache.c (not inlined into callers), so it won't + * affect LTO optimization of the push/pop fast paths. + */ +void +tcache_debug_check_flush(void **ptrs, unsigned nflush) { + for (unsigned i = 0; i < nflush; i++) { + for (unsigned j = i + 1; j < nflush; j++) { + if (ptrs[i] == ptrs[j]) { + char buf[256]; + malloc_snprintf(buf, sizeof(buf), + "tcache duplicate in flush: ptr %p " + "at positions %u and %u " + "(nflush %u)\n", + ptrs[i], i, j, nflush); + malloc_write(buf); + + /* Print first push backtrace. */ + tcache_bt_entry_t *orig = + tcache_bt_find(ptrs[i]); + if (orig != NULL) { + tcache_bt_print("push", orig); + } else { + malloc_write( + " (backtrace not found)\n"); + } + + safety_check_fail( + "tcache duplicate: ptr %p\n", + ptrs[i]); + return; + } + } + } +} diff --git a/src/tcache.c b/src/tcache.c index 10fa7c2130..de23050298 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -664,6 +664,10 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind, unsigned rem) { + /* Debug: scan entire bin for duplicates before flush. */ + cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin); + tcache_debug_check_flush(cache_bin->stack_head, ncached); + tcache_nfill_small_burst_reset(tcache->tcache_slow, binind); tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, /* small */ true); From 3ed74f8bb0d9121cd72f1ea89fb7af846f87b445 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 Apr 2026 00:08:04 +0200 Subject: [PATCH 22/28] f --- include/jemalloc/internal/safety_check.h | 2 ++ src/safety_check.c | 7 +------ src/tcache.c | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h index dd5f02bf33..5f503b72e0 100644 --- a/include/jemalloc/internal/safety_check.h +++ b/include/jemalloc/internal/safety_check.h @@ -16,6 +16,8 @@ typedef void (*safety_check_abort_hook_t)(const char *message); /* Can set to NULL for a default. */ void safety_check_set_abort(safety_check_abort_hook_t abort_fn); +/* Debug: pre-allocate backtrace table (call from tcache_init). */ +void tcache_bt_ensure_table(void); /* Debug: record backtrace on tcache push. */ void tcache_debug_bt_record(void *ptr); /* Debug: remove backtrace record on tcache pop. */ diff --git a/src/safety_check.c b/src/safety_check.c index b7ad28f6fa..709cd5ef47 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -73,7 +73,7 @@ typedef struct { static __thread tcache_bt_entry_t *tcache_bt_table; -static void +void tcache_bt_ensure_table(void) { if (likely(tcache_bt_table != NULL)) { return; @@ -95,8 +95,6 @@ tcache_bt_hash(void *ptr) { static void tcache_bt_record(void *ptr) { - tcache_bt_ensure_table(); - if (tcache_bt_table == MAP_FAILED) return; unsigned idx = tcache_bt_hash(ptr); for (unsigned i = 0; i < 64; i++) { unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; @@ -112,7 +110,6 @@ tcache_bt_record(void *ptr) { static tcache_bt_entry_t * tcache_bt_find(void *ptr) { - if (tcache_bt_table == NULL || tcache_bt_table == MAP_FAILED) return NULL; unsigned idx = tcache_bt_hash(ptr); for (unsigned i = 0; i < 64; i++) { @@ -129,8 +126,6 @@ tcache_bt_find(void *ptr) { static void tcache_bt_remove(void *ptr) { - if (tcache_bt_table == NULL || tcache_bt_table == MAP_FAILED) - return; unsigned idx = tcache_bt_hash(ptr); for (unsigned i = 0; i < 64; i++) { unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; diff --git a/src/tcache.c b/src/tcache.c index de23050298..b4aa974d69 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -821,6 +821,7 @@ tcache_default_settings_init(tcache_slow_t *tcache_slow) { static void tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, void *mem, const cache_bin_info_t *tcache_bin_info) { + tcache_bt_ensure_table(); tcache->tcache_slow = tcache_slow; tcache_slow->tcache = tcache; From c4612d7edef94073b01d28aae104cfd549337bf9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 Apr 2026 16:10:54 +0200 Subject: [PATCH 23/28] f --- src/tcache.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/tcache.c b/src/tcache.c index b4aa974d69..80a8e0083f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -621,6 +621,24 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, assert(filled >= nfill_min && filled <= nfill_max); assert(cache_bin_ncached_get_local(cache_bin) == filled); + /* Debug: scan refilled bin for duplicate pointers. */ + { + void **head = cache_bin->stack_head; + for (cache_bin_sz_t di = 0; di < filled; di++) { + for (cache_bin_sz_t dj = di + 1; dj < filled; dj++) { + if (head[di] == head[dj]) { + safety_check_fail( + "tcache refill duplicate: " + "ptr %p at %u and %u " + "(binind %u filled %u)\n", + head[di], (unsigned)di, + (unsigned)dj, binind, + (unsigned)filled); + } + } + } + } + tcache_slow->bin_refilled[binind] = true; tcache_nfill_small_burst_prepare(tcache_slow, binind); ret = cache_bin_alloc(cache_bin, tcache_success); From 9a9fd621d6a0733910758f98db1f9f80994a572a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 Apr 2026 16:17:03 +0200 Subject: [PATCH 24/28] f --- include/jemalloc/internal/cache_bin.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 2dcbcda644..01b0c0179f 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -510,8 +510,6 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { cache_bin_assert_earlier(bin, bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head); - tcache_debug_bt_record(ptr); - return true; } From 24c306f748a49f3cb0fa4c332dad2e0222edfbfe Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 Apr 2026 18:26:21 +0200 Subject: [PATCH 25/28] f --- include/jemalloc/internal/emap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h index 8869235648..d7e3e00b53 100644 --- a/include/jemalloc/internal/emap.h +++ b/include/jemalloc/internal/emap.h @@ -171,7 +171,7 @@ JEMALLOC_ALWAYS_INLINE bool emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) { if (!config_debug) { /* For assertions only. */ - return false; + return true; } /* From ad700bcea1da180259badc7e44c6676b05ea1a11 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 Apr 2026 18:26:32 +0200 Subject: [PATCH 26/28] f --- include/jemalloc/internal/emap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h index d7e3e00b53..966805922f 100644 --- a/include/jemalloc/internal/emap.h +++ b/include/jemalloc/internal/emap.h @@ -157,7 +157,6 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) { JEMALLOC_ALWAYS_INLINE bool emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) { - assert(config_debug); emap_assert_mapped(tsdn, emap, edata); EMAP_DECLARE_RTREE_CTX; From c3e6bb7021cfb8d7f4fe979cb814fe1d5b7e73d3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 Apr 2026 18:47:11 +0200 Subject: [PATCH 27/28] Fix assertion failure in `arena_choose_huge` with `oversize_threshold:0` When huge arena is disabled (e.g. `oversize_threshold:0`), `arena_init_huge` updates the global `oversize_threshold` but not `a0`'s per-arena `pac.oversize_threshold`, which was set to `OVERSIZE_THRESHOLD_DEFAULT` (8MB) before conf parsing. This causes allocations >= 8MB through `a0` to enter `arena_choose_huge` where `huge_arena_ind` is still 0, hitting `assert(!malloc_initialized())`. Update `a0`'s threshold in the disabled path, matching what the enabled path already does. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/arena.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/arena.c b/src/arena.c index 918addd9e9..bc26f7f403 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2063,6 +2063,9 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) { || opt_oversize_threshold < SC_LARGE_MINCLASS) { opt_oversize_threshold = 0; oversize_threshold = SC_LARGE_MAXCLASS + PAGE; + /* a0 was created before conf init with the default threshold. */ + atomic_store_zu(&a0->pa_shard.pac.oversize_threshold, + oversize_threshold, ATOMIC_RELAXED); huge_enabled = false; } else { /* Reserve the index for the huge arena. */ From 3451645bc3a304fba1ae5b2d1f62822482b8afdf Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 17 Apr 2026 00:21:05 +0200 Subject: [PATCH 28/28] f --- include/jemalloc/internal/tcache_inlines.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 6bd1b339ea..d2387c33ee 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -202,6 +202,12 @@ tcache_dalloc_small( cache_bin_sz_t max = cache_bin_ncached_max_get(bin); unsigned remain = max >> opt_lg_tcache_flush_small_div; tcache_bin_flush_small(tsd, tcache, bin, binind, remain); + /* + * Compiler barrier: force reload of bin->stack_head after + * flush. Without this, LTO may cache stack_head from before + * the flush and use a stale value in the second dalloc_easy. + */ + __asm__ volatile("" : "+m"(*bin)); bool ret = cache_bin_dalloc_easy(bin, ptr); assert(ret); }