diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h index f429116986..14ab1c998f 100644 --- a/include/jemalloc/internal/bin_inlines.h +++ b/include/jemalloc/internal/bin_inlines.h @@ -29,18 +29,31 @@ bin_slab_regind_impl( size_t diff, regind; /* Freeing a pointer outside the slab can cause assertion failure. */ - assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab)); - assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab)); - /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) - % (uintptr_t)bin_infos[binind].reg_size - == 0); + if (unlikely((uintptr_t)ptr < (uintptr_t)edata_addr_get(slab) + || (uintptr_t)ptr >= (uintptr_t)edata_past_get(slab))) { + safety_check_fail( + "bin_slab_regind: ptr %p outside slab [%p, %p)\n", + ptr, edata_addr_get(slab), edata_past_get(slab)); + } diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)); + if (unlikely(diff % (uintptr_t)bin_infos[binind].reg_size != 0)) { + safety_check_fail( + "bin_slab_regind: ptr %p not aligned to reg_size " + "%zu (diff=%zu)\n", + ptr, bin_infos[binind].reg_size, diff); + } + /* Avoid doing division with a variable divisor. */ regind = div_compute(div_info, diff); - assert(regind < bin_infos[binind].nregs); + + if (unlikely(regind >= bin_infos[binind].nregs)) { + safety_check_fail( + "bin_slab_regind: regind %zu >= nregs %u for " + "binind %u\n", + regind, bin_infos[binind].nregs, binind); + } return regind; } @@ -80,9 +93,50 @@ bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin, /* Freeing an unallocated pointer can cause assertion failure. */ assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind)); + /* Debug: snapshot bitmap group before unset. */ + size_t goff_dbg = regind >> LG_BITMAP_GROUP_NBITS; + bitmap_t before_dbg = *(volatile bitmap_t *)&slab_data->bitmap[goff_dbg]; + bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind); + + /* Debug: verify the bit was actually flipped. */ + bitmap_t after_dbg = *(volatile bitmap_t *)&slab_data->bitmap[goff_dbg]; + bitmap_t expected_bit = ZU(1) << (regind & BITMAP_GROUP_NBITS_MASK); + if (unlikely((before_dbg | expected_bit) != after_dbg)) { + safety_check_fail( + "bitmap_unset lost: binind %u regind %zu " + "goff %zu before %lx after %lx expected_bit %lx\n", + binind, regind, goff_dbg, + (unsigned long)before_dbg, + (unsigned long)after_dbg, + (unsigned long)expected_bit); + } + edata_nfree_inc(slab); + /* Debug: verify nfree/bitmap consistency after free. */ + { + unsigned actual_free = 0; + unsigned ngroups = +#ifdef BITMAP_USE_TREE + bin_info->bitmap_info.levels[ + bin_info->bitmap_info.nlevels].group_offset; +#else + bin_info->bitmap_info.ngroups; +#endif + for (unsigned gi = 0; gi < ngroups; gi++) { + actual_free += popcount_lu(slab_data->bitmap[gi]); + } + if (unlikely(actual_free != edata_nfree_get(slab))) { + safety_check_fail( + "bin_dalloc_locked_step: post-free " + "nfree/bitmap mismatch for binind %u " + "regind %zu: nfree=%u actual=%u\n", + binind, regind, + edata_nfree_get(slab), actual_free); + } + } + if (config_stats) { info->ndalloc++; } diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h index 88c7942e95..9c92df1645 100644 --- a/include/jemalloc/internal/bit_util.h +++ b/include/jemalloc/internal/bit_util.h @@ -19,19 +19,19 @@ */ static inline unsigned ffs_llu(unsigned long long x) { - util_assume(x != 0); + assert(x != 0); return JEMALLOC_INTERNAL_FFSLL(x) - 1; } static inline unsigned ffs_lu(unsigned long x) { - util_assume(x != 0); + assert(x != 0); return JEMALLOC_INTERNAL_FFSL(x) - 1; } static inline unsigned ffs_u(unsigned x) { - util_assume(x != 0); + assert(x != 0); return JEMALLOC_INTERNAL_FFS(x) - 1; } diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index e0f596fba9..ab3767d30c 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -4,6 +4,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/sc.h" +#include "jemalloc/internal/safety_check.h" typedef unsigned long bitmap_t; #define LG_SIZEOF_BITMAP LG_SIZEOF_LONG @@ -224,6 +225,12 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; + /* Verify the store was not eliminated by DSE. */ + if (unlikely(*(volatile bitmap_t *)gp != g)) { + safety_check_fail( + "bitmap_set: leaf store eliminated at group %zu, " + "expected %lx\n", goff, (unsigned long)g); + } assert(bitmap_get(bitmap, binfo, bit)); #ifdef BITMAP_USE_TREE /* Propagate group state transitions up the tree. */ @@ -237,6 +244,13 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; + /* Verify tree store was not eliminated. */ + if (unlikely(*(volatile bitmap_t *)gp != g)) { + safety_check_fail( + "bitmap_set: tree store eliminated at " + "level %u group %zu, expected %lx\n", + i, goff, (unsigned long)g); + } if (g != 0) { break; } @@ -319,19 +333,43 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { #ifdef BITMAP_USE_TREE i = binfo->nlevels - 1; - g = bitmap[binfo->levels[i].group_offset]; + /* + * Use volatile reads to prevent LTO from hoisting/caching bitmap + * loads across iterations when bitmap_sfu is called in a loop + * (e.g. bin_slab_reg_alloc_batch). Without volatile, the compiler + * may reuse a stale bitmap value from before bitmap_set's store, + * causing the same bit to be returned twice. + */ + g = *(volatile bitmap_t *)&bitmap[binfo->levels[i].group_offset]; + if (unlikely(g == 0)) { + safety_check_fail( + "bitmap_sfu: tree root is zero (bitmap full), " + "nlevels %u\n", binfo->nlevels); + } bit = ffs_lu(g); while (i > 0) { i--; - g = bitmap[binfo->levels[i].group_offset + bit]; + g = *(volatile bitmap_t *)&bitmap[ + binfo->levels[i].group_offset + bit]; + if (unlikely(g == 0)) { + safety_check_fail( + "bitmap_sfu: tree level %u group is zero " + "at offset %zu\n", i, + binfo->levels[i].group_offset + bit); + } bit = (bit << LG_BITMAP_GROUP_NBITS) + ffs_lu(g); } #else i = 0; - g = bitmap[0]; + g = *(volatile bitmap_t *)&bitmap[0]; while (g == 0) { i++; - g = bitmap[i]; + if (unlikely(i >= BITMAP_BITS2GROUPS(binfo->nbits))) { + safety_check_fail( + "bitmap_sfu: all %u groups are zero " + "(bitmap full)\n", i); + } + g = *(volatile bitmap_t *)&bitmap[i]; } bit = (i << LG_BITMAP_GROUP_NBITS) + ffs_lu(g); #endif diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index bea3a2fc04..01b0c0179f 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -392,7 +392,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { * This may read from the empty position; however the loaded value won't * be used. It's safe because the stack has one more slot reserved. */ - void *ret = *bin->stack_head; + void *ret = *(void * volatile *)bin->stack_head; cache_bin_sz_t low_bits = (cache_bin_sz_t)(uintptr_t)bin->stack_head; void **new_head = bin->stack_head + 1; @@ -403,6 +403,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { if (likely(low_bits != bin->low_bits_low_water)) { bin->stack_head = new_head; *success = true; + tcache_debug_on_pop(ret); return ret; } if (!adjust_low_water) { @@ -418,6 +419,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { bin->stack_head = new_head; bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head; *success = true; + tcache_debug_on_pop(ret); return ret; } *success = false; @@ -711,8 +713,15 @@ static inline void cache_bin_finish_flush( cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) { unsigned rem = cache_bin_ncached_get_local(bin) - nflushed; - memmove( - bin->stack_head + nflushed, bin->stack_head, rem * sizeof(void *)); + /* + * Use volatile pointers to prevent LTO from optimizing this + * memmove based on built-in memcpy/memmove/memset knowledge. + * Misoptimization here can leave stale flushed pointers in the + * bin, causing duplicate allocations. + */ + volatile void *dst = bin->stack_head + nflushed; + volatile void *src = bin->stack_head; + memmove((void *)dst, (void *)src, rem * sizeof(void *)); bin->stack_head += nflushed; cache_bin_low_water_adjust(bin); /* Reset the bin stats as it's merged during flush. */ diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h index 56d5f463fa..a99d7f2bc1 100644 --- a/include/jemalloc/internal/div.h +++ b/include/jemalloc/internal/div.h @@ -3,6 +3,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/safety_check.h" /* * This module does the division that computes the index of a region in a slab, @@ -25,7 +26,10 @@ void div_init(div_info_t *div_info, size_t divisor); static inline size_t div_compute(div_info_t *div_info, size_t n) { - assert(n <= (uint32_t)-1); + if (unlikely(n > (uint32_t)-1)) { + safety_check_fail( + "div_compute: n=%zu exceeds uint32 range\n", n); + } /* * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine, * the compilers I tried were all smart enough to turn this into the @@ -33,9 +37,6 @@ div_compute(div_info_t *div_info, size_t n) { * mul; mov edx eax; on x86, umull on arm, etc.). */ size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32; -#ifdef JEMALLOC_DEBUG - assert(i * div_info->d == n); -#endif return i; } diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 06b6c5454f..e23ea2c279 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -14,6 +14,7 @@ #include "jemalloc/internal/slab_data.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/typed_list.h" +#include /* * sizeof(edata_t) is 128 bytes on 64-bit architectures. Ensure the alignment @@ -572,6 +573,8 @@ edata_nfree_inc(edata_t *edata) { static inline void edata_nfree_dec(edata_t *edata) { assert(edata_slab_get(edata)); + if (unlikely(edata_nfree_get(edata) == 0)) + __builtin_trap(); edata->e_bits -= ((uint64_t)1U << EDATA_BITS_NFREE_SHIFT); } diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h index 8869235648..966805922f 100644 --- a/include/jemalloc/internal/emap.h +++ b/include/jemalloc/internal/emap.h @@ -157,7 +157,6 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) { JEMALLOC_ALWAYS_INLINE bool emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) { - assert(config_debug); emap_assert_mapped(tsdn, emap, edata); EMAP_DECLARE_RTREE_CTX; @@ -171,7 +170,7 @@ JEMALLOC_ALWAYS_INLINE bool emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) { if (!config_debug) { /* For assertions only. */ - return false; + return true; } /* diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h index 2b4b2d0e73..5f503b72e0 100644 --- a/include/jemalloc/internal/safety_check.h +++ b/include/jemalloc/internal/safety_check.h @@ -16,6 +16,15 @@ typedef void (*safety_check_abort_hook_t)(const char *message); /* Can set to NULL for a default. */ void safety_check_set_abort(safety_check_abort_hook_t abort_fn); +/* Debug: pre-allocate backtrace table (call from tcache_init). */ +void tcache_bt_ensure_table(void); +/* Debug: record backtrace on tcache push. */ +void tcache_debug_bt_record(void *ptr); +/* Debug: remove backtrace record on tcache pop. */ +void tcache_debug_on_pop(void *ptr); +/* Debug: scan for duplicates during tcache flush, print backtraces. */ +void tcache_debug_check_flush(void **ptrs, unsigned nflush); + #define REDZONE_SIZE ((size_t)32) #define REDZONE_FILL_VALUE 0xBC diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 6bd1b339ea..d2387c33ee 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -202,6 +202,12 @@ tcache_dalloc_small( cache_bin_sz_t max = cache_bin_ncached_max_get(bin); unsigned remain = max >> opt_lg_tcache_flush_small_div; tcache_bin_flush_small(tsd, tcache, bin, binind, remain); + /* + * Compiler barrier: force reload of bin->stack_head after + * flush. Without this, LTO may cache stack_head from before + * the flush and use a stale value in the second dalloc_easy. + */ + __asm__ volatile("" : "+m"(*bin)); bool ret = cache_bin_dalloc_easy(bin, ptr); assert(ret); } diff --git a/src/arena.c b/src/arena.c index d7c8cd1fce..bc26f7f403 100644 --- a/src/arena.c +++ b/src/arena.c @@ -863,6 +863,19 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, edata_nfree_binshard_set(slab, bin_info->nregs, binshard); bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false); + /* + * Verify bitmap_init was not optimized away by LTO. + * An "empty" (all-free) bitmap should have the first group + * set to all-ones (0xff...ff). If it's zero, the memset(0xff) + * inside bitmap_init was eliminated. + */ + if (unlikely(*(volatile bitmap_t *)&slab_data->bitmap[0] == 0)) { + safety_check_fail( + "arena_slab_alloc: bitmap_init was eliminated by " + "the compiler, bitmap[0]=%lx\n", + (unsigned long)slab_data->bitmap[0]); + } + return slab; } @@ -914,6 +927,36 @@ arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, /* Try batch-fill from slabcur first. */ edata_t *slabcur = bin->slabcur; if (slabcur != NULL && edata_nfree_get(slabcur) > 0) { + /* + * Debug: verify nfree matches actual bitmap + * free count for the crashing size class. + */ + { + slab_data_t *sd = edata_slab_data_get(slabcur); + unsigned actual_free = 0; + unsigned ngroups = +#ifdef BITMAP_USE_TREE + bin_info->bitmap_info.levels[ + bin_info->bitmap_info.nlevels] + .group_offset; +#else + bin_info->bitmap_info.ngroups; +#endif + for (unsigned gi = 0; gi < ngroups; gi++) { + actual_free += popcount_lu( + sd->bitmap[gi]); + } + if (unlikely(actual_free + != edata_nfree_get(slabcur))) { + safety_check_fail( + "arena_cache_bin_fill_small: " + "nfree/bitmap mismatch for " + "binind %u: nfree=%u actual=%u" + "\n", binind, + edata_nfree_get(slabcur), + actual_free); + } + } /* * Use up the free slots if the total filled <= nfill_max. * Otherwise, fallback to nfill_min for a more conservative @@ -926,6 +969,78 @@ arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bin_slab_reg_alloc_batch( slabcur, bin_info, cnt, &arr->ptr[filled]); + + /* + * Debug: check for duplicate pointers in the + * batch just returned, and also against + * previously filled entries. + */ + { + for (unsigned di = 0; di < cnt; di++) { + void *p = arr->ptr[filled + di]; + /* Check within this batch. */ + for (unsigned dj = di + 1; + dj < cnt; dj++) { + if (unlikely(p == + arr->ptr[filled + dj])) { + safety_check_fail( + "batch alloc " + "dup: ptr %p " + "at %u and %u " + "(binind %u " + "cnt %u)\n", + p, di, dj, + binind, cnt); + } + } + /* Check against earlier fills. */ + for (unsigned dk = 0; + dk < filled; dk++) { + if (unlikely(p == + arr->ptr[dk])) { + safety_check_fail( + "refill dup: " + "ptr %p at " + "batch %u and " + "prev %u " + "(binind %u)\n", + p, di, dk, + binind); + } + } + } + } + + /* Debug: verify nfree/bitmap consistency. */ + { + slab_data_t *sd = edata_slab_data_get( + slabcur); + unsigned actual_free = 0; + unsigned ngroups = +#ifdef BITMAP_USE_TREE + bin_info->bitmap_info.levels[ + bin_info->bitmap_info.nlevels] + .group_offset; +#else + bin_info->bitmap_info.ngroups; +#endif + for (unsigned gi = 0; gi < ngroups; gi++) { + actual_free += popcount_lu( + sd->bitmap[gi]); + } + if (unlikely(actual_free + != edata_nfree_get(slabcur))) { + safety_check_fail( + "arena_cache_bin_fill_small: " + "post-batch nfree/bitmap " + "mismatch for binind %u: " + "nfree=%u actual=%u cnt=%u\n", + binind, + edata_nfree_get(slabcur), + actual_free, cnt); + } + } + made_progress = true; filled += cnt; continue; @@ -1948,6 +2063,9 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) { || opt_oversize_threshold < SC_LARGE_MINCLASS) { opt_oversize_threshold = 0; oversize_threshold = SC_LARGE_MAXCLASS + PAGE; + /* a0 was created before conf init with the default threshold. */ + atomic_store_zu(&a0->pa_shard.pac.oversize_threshold, + oversize_threshold, ATOMIC_RELAXED); huge_enabled = false; } else { /* Reserve the index for the huge arena. */ diff --git a/src/safety_check.c b/src/safety_check.c index d052718dae..709cd5ef47 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -51,3 +51,159 @@ safety_check_fail(const char *format, ...) { safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug( buf); } + +/* + * Debug: per-pointer backtrace tracker for tcache pushes. + * When a duplicate is detected, prints both the current and original + * stack traces. + */ +#include +#include + +#define TCACHE_BT_FRAMES 8 +#define TCACHE_BT_TABLE_SIZE (1 << 16) /* 65536, must be power of 2 */ +#define TCACHE_BT_TABLE_MASK (TCACHE_BT_TABLE_SIZE - 1) + +typedef struct { + void *ptr; + void *frames[TCACHE_BT_FRAMES]; + int nframes; +} tcache_bt_entry_t; +/* 8 + 64 + 4 = 76 bytes per entry, 65536 entries = ~5MB per thread */ + +static __thread tcache_bt_entry_t *tcache_bt_table; + +void +tcache_bt_ensure_table(void) { + if (likely(tcache_bt_table != NULL)) { + return; + } + /* Use mmap to avoid re-entering jemalloc. */ + size_t sz = TCACHE_BT_TABLE_SIZE * sizeof(tcache_bt_entry_t); + tcache_bt_table = (tcache_bt_entry_t *)mmap(NULL, sz, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} + +static unsigned +tcache_bt_hash(void *ptr) { + uintptr_t v = (uintptr_t)ptr; + v ^= v >> 16; + v *= 0x45d9f3b; + v ^= v >> 16; + return (unsigned)(v & TCACHE_BT_TABLE_MASK); +} + +static void +tcache_bt_record(void *ptr) { + unsigned idx = tcache_bt_hash(ptr); + for (unsigned i = 0; i < 64; i++) { + unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; + if (tcache_bt_table[slot].ptr == NULL + || tcache_bt_table[slot].ptr == ptr) { + tcache_bt_table[slot].ptr = ptr; + tcache_bt_table[slot].nframes = backtrace( + tcache_bt_table[slot].frames, TCACHE_BT_FRAMES); + return; + } + } +} + +static tcache_bt_entry_t * +tcache_bt_find(void *ptr) { + return NULL; + unsigned idx = tcache_bt_hash(ptr); + for (unsigned i = 0; i < 64; i++) { + unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; + if (tcache_bt_table[slot].ptr == ptr) { + return &tcache_bt_table[slot]; + } + if (tcache_bt_table[slot].ptr == NULL) { + return NULL; + } + } + return NULL; +} + +static void +tcache_bt_remove(void *ptr) { + unsigned idx = tcache_bt_hash(ptr); + for (unsigned i = 0; i < 64; i++) { + unsigned slot = (idx + i) & TCACHE_BT_TABLE_MASK; + if (tcache_bt_table[slot].ptr == ptr) { + tcache_bt_table[slot].ptr = NULL; + return; + } + if (tcache_bt_table[slot].ptr == NULL) { + return; + } + } +} + +static void +tcache_bt_print(const char *label, tcache_bt_entry_t *entry) { + char buf[256]; + malloc_snprintf(buf, sizeof(buf), + " %s backtrace (%d frames):\n", label, entry->nframes); + malloc_write(buf); + for (int i = 0; i < entry->nframes; i++) { + malloc_snprintf(buf, sizeof(buf), + " #%d: %p\n", i, entry->frames[i]); + malloc_write(buf); + } +} + +/* + * Record backtrace on every tcache push. Inlineable by LTO — just a + * hash table insert, no scan, no barrier effect. + */ +void +tcache_debug_bt_record(void *ptr) { + tcache_bt_record(ptr); +} + +/* + * Remove backtrace record on tcache pop. Inlineable by LTO. + */ +void +tcache_debug_on_pop(void *ptr) { + if (ptr != NULL) { + tcache_bt_remove(ptr); + } +} + +/* + * Called during tcache flush to scan for duplicates. + * This runs in tcache.c (not inlined into callers), so it won't + * affect LTO optimization of the push/pop fast paths. + */ +void +tcache_debug_check_flush(void **ptrs, unsigned nflush) { + for (unsigned i = 0; i < nflush; i++) { + for (unsigned j = i + 1; j < nflush; j++) { + if (ptrs[i] == ptrs[j]) { + char buf[256]; + malloc_snprintf(buf, sizeof(buf), + "tcache duplicate in flush: ptr %p " + "at positions %u and %u " + "(nflush %u)\n", + ptrs[i], i, j, nflush); + malloc_write(buf); + + /* Print first push backtrace. */ + tcache_bt_entry_t *orig = + tcache_bt_find(ptrs[i]); + if (orig != NULL) { + tcache_bt_print("push", orig); + } else { + malloc_write( + " (backtrace not found)\n"); + } + + safety_check_fail( + "tcache duplicate: ptr %p\n", + ptrs[i]); + return; + } + } + } +} diff --git a/src/tcache.c b/src/tcache.c index 10fa7c2130..80a8e0083f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -621,6 +621,24 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, assert(filled >= nfill_min && filled <= nfill_max); assert(cache_bin_ncached_get_local(cache_bin) == filled); + /* Debug: scan refilled bin for duplicate pointers. */ + { + void **head = cache_bin->stack_head; + for (cache_bin_sz_t di = 0; di < filled; di++) { + for (cache_bin_sz_t dj = di + 1; dj < filled; dj++) { + if (head[di] == head[dj]) { + safety_check_fail( + "tcache refill duplicate: " + "ptr %p at %u and %u " + "(binind %u filled %u)\n", + head[di], (unsigned)di, + (unsigned)dj, binind, + (unsigned)filled); + } + } + } + } + tcache_slow->bin_refilled[binind] = true; tcache_nfill_small_burst_prepare(tcache_slow, binind); ret = cache_bin_alloc(cache_bin, tcache_success); @@ -664,6 +682,10 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind, unsigned rem) { + /* Debug: scan entire bin for duplicates before flush. */ + cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin); + tcache_debug_check_flush(cache_bin->stack_head, ncached); + tcache_nfill_small_burst_reset(tcache->tcache_slow, binind); tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, /* small */ true); @@ -817,6 +839,7 @@ tcache_default_settings_init(tcache_slow_t *tcache_slow) { static void tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, void *mem, const cache_bin_info_t *tcache_bin_info) { + tcache_bt_ensure_table(); tcache->tcache_slow = tcache_slow; tcache_slow->tcache = tcache;