diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py index 5dc876105277..aecf1894080e 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py @@ -19,7 +19,6 @@ import os import random import time -from io import BytesIO import pytest @@ -114,6 +113,26 @@ def _download_time_based_json(client, filename, params): return total_bytes_downloaded +# _DummyListBuffer is used instead of io.BytesIO to avoid GIL contention +# during profiling. io.BytesIO.write() holds the GIL while copying data, +# which introduces significant noise and bottlenecks in performance tests +# with high concurrency or large data transfers. +# This buffer simply collects chunks in a list and tracks the total size. +class _DummyListBuffer: + def __init__(self): + self.chunks = [] + self.size = 0 + + def write(self, data): + self.chunks.append(data) + nbytes = len(data) + self.size += nbytes + return nbytes + + def getvalue(self): + return b"".join(self.chunks) + + async def _download_time_based_async(client, filename, params): mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename) await mrd.open() @@ -138,17 +157,17 @@ async def _worker_coro(): offset = random.randint( 0, params.file_size_bytes - params.chunk_size_bytes ) - ranges.append((offset, params.chunk_size_bytes, BytesIO())) + ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer())) else: # seq for _ in range(params.num_ranges): - ranges.append((offset, params.chunk_size_bytes, BytesIO())) + ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer())) offset += params.chunk_size_bytes if offset + params.chunk_size_bytes > params.file_size_bytes: offset = 0 # Reset offset if end of file is reached await mrd.download_ranges(ranges) - bytes_in_buffers = sum(r[2].getbuffer().nbytes for r in ranges) + bytes_in_buffers = sum(r[2].size for r in ranges) assert bytes_in_buffers == params.chunk_size_bytes * params.num_ranges if not is_warming_up: