Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions src/iso_alloc_profiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,92 @@ INTERNAL_HIDDEN uint64_t _iso_alloc_zone_leak_detector(iso_alloc_zone_t *zone, b
uint32_t was_used = 0;
int64_t bms = zone->bitmap_size / sizeof(bitmap_index_t);

#if USE_NEON
/* Process the bitmap two qwords at a time. The common case is that a
* qword is entirely zero (every chunk in those 32 slots is free and
* never used) — vectorising the zero-check lets us skip both qwords at
* once. For each non-zero qword we popcount the was_used pattern in
* O(1) and walk only the chunks whose low bit is set with ctz, instead
* of scanning all 32 pairs. */
int64_t i = 0;
const int64_t bms_pair = bms & ~(int64_t) 1;

for(; i < bms_pair; i += 2) {
int64x2_t v = vld1q_s64((const int64_t *) &bm[i]);
const uint64_t lane0 = (uint64_t) vgetq_lane_s64(v, 0);
const uint64_t lane1 = (uint64_t) vgetq_lane_s64(v, 1);

if((lane0 | lane1) == 0) {
continue;
}

const uint64_t lanes[2] = {lane0, lane1};
for(int k = 0; k < 2; k++) {
uint64_t bts = lanes[k];
if(bts == 0) {
continue;
}

/* was_used: chunks encoded 01 (low=0, high=1). The mask of
* odd-bit positions whose paired even bit is clear is
* (~bts) & (bts >> 1) & USED_BIT_VECTOR. */
uint64_t was_used_mask = (~bts) & (bts >> 1) & USED_BIT_VECTOR;
was_used += __builtin_popcountll(was_used_mask);

/* Chunks with low bit set are either in-use (10) or canary
* (11). Walk just those positions to verify canaries. */
uint64_t in_use_low = bts & USED_BIT_VECTOR;
while(in_use_low) {
int j = __builtin_ctzll(in_use_low);
in_use_low &= in_use_low - 1;

int64_t bit_two = GET_BIT(bts, (j + 1));
bit_slot_t bit_slot = (((bitmap_index_t)(i + k)) * BITS_PER_QWORD) + j;
const void *leak = (zone->user_pages_start + ((bit_slot >> 1) * zone->chunk_size));

if(bit_two == 1 && (check_canary_no_abort(zone, leak) != ERR)) {
continue;
} else {
in_use++;

if(profile == false) {
LOG("Leaked chunk (%d) in zone[%d] of %d bytes detected at 0x%p (bit position = %d)", in_use, zone->index, zone->chunk_size, leak, bit_slot);
}
}
}
}
}

for(; i < bms; i++) {
uint64_t bts = (uint64_t) bm[i];
if(bts == 0) {
continue;
}

uint64_t was_used_mask = (~bts) & (bts >> 1) & USED_BIT_VECTOR;
was_used += __builtin_popcountll(was_used_mask);

uint64_t in_use_low = bts & USED_BIT_VECTOR;
while(in_use_low) {
int j = __builtin_ctzll(in_use_low);
in_use_low &= in_use_low - 1;

int64_t bit_two = GET_BIT(bts, (j + 1));
bit_slot_t bit_slot = ((bitmap_index_t) i * BITS_PER_QWORD) + j;
const void *leak = (zone->user_pages_start + ((bit_slot >> 1) * zone->chunk_size));

if(bit_two == 1 && (check_canary_no_abort(zone, leak) != ERR)) {
continue;
} else {
in_use++;

if(profile == false) {
LOG("Leaked chunk (%d) in zone[%d] of %d bytes detected at 0x%p (bit position = %d)", in_use, zone->index, zone->chunk_size, leak, bit_slot);
}
}
}
}
#else
for(bitmap_index_t i = 0; i < bms; i++) {
for(int j = 0; j < BITS_PER_QWORD; j += BITS_PER_CHUNK) {

Expand Down Expand Up @@ -161,6 +247,7 @@ INTERNAL_HIDDEN uint64_t _iso_alloc_zone_leak_detector(iso_alloc_zone_t *zone, b
}
}
}
#endif

if(profile == false) {
LOG("Zone[%d] Total number of %d byte chunks(%d) used and free'd (%d) (%d percent), in use = %d", zone->index, zone->chunk_size, zone->chunk_count,
Expand Down
54 changes: 53 additions & 1 deletion src/iso_alloc_sanity.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,58 @@ INTERNAL_HIDDEN void _verify_zone(iso_alloc_zone_t *zone) {
}
}

for(bitmap_index_t i = 0; i < zone->max_bitmap_idx; i++) {
const bitmap_index_t max = zone->max_bitmap_idx;

#if USE_NEON
/* A chunk needs its canary verified when the high bit of its 2-bit
* slot is set (was-used-now-free=01 or canary=11). The mask of all
* odd bit positions is ~USED_BIT_VECTOR. We process two qwords at a
* time and quick-reject any pair that has no odd bits set, then walk
* just the set odd-bit positions with ctz instead of checking all 32. */
const uint64_t CANARY_BIT_VECTOR = ~(uint64_t) USED_BIT_VECTOR;
bitmap_index_t i = 0;
const bitmap_index_t max_pair = max & ~(bitmap_index_t) 1;

for(; i < max_pair; i += 2) {
int64x2_t v = vld1q_s64((const int64_t *) &bm[i]);
const uint64_t lo = (uint64_t) vgetq_lane_s64(v, 0);
const uint64_t hi = (uint64_t) vgetq_lane_s64(v, 1);

if(((lo | hi) & CANARY_BIT_VECTOR) == 0) {
continue;
}

uint64_t mask0 = lo & CANARY_BIT_VECTOR;
while(mask0) {
int j = __builtin_ctzll(mask0);
mask0 &= mask0 - 1;
bit_slot = ((bit_slot_t) i << BITS_PER_QWORD_SHIFT) + j;
const void *p = POINTER_FROM_BITSLOT(zone, bit_slot);
check_canary(zone, p);
}

uint64_t mask1 = hi & CANARY_BIT_VECTOR;
while(mask1) {
int j = __builtin_ctzll(mask1);
mask1 &= mask1 - 1;
bit_slot = ((bit_slot_t)(i + 1) << BITS_PER_QWORD_SHIFT) + j;
const void *p = POINTER_FROM_BITSLOT(zone, bit_slot);
check_canary(zone, p);
}
}

for(; i < max; i++) {
uint64_t mask = (uint64_t) bm[i] & CANARY_BIT_VECTOR;
while(mask) {
int j = __builtin_ctzll(mask);
mask &= mask - 1;
bit_slot = ((bit_slot_t) i << BITS_PER_QWORD_SHIFT) + j;
const void *p = POINTER_FROM_BITSLOT(zone, bit_slot);
check_canary(zone, p);
}
}
#else
for(bitmap_index_t i = 0; i < max; i++) {
bit_slot_t bsl = bm[i];
for(int64_t j = 1; j < BITS_PER_QWORD; j += BITS_PER_CHUNK) {
/* If this bit is set it is either a free chunk or
Expand All @@ -116,6 +167,7 @@ INTERNAL_HIDDEN void _verify_zone(iso_alloc_zone_t *zone) {
}
}
}
#endif

MASK_ZONE_PTRS(zone);
}
Expand Down
Loading