From 7fc3100299d8e5f8c692b079c9e8b7732559eceb Mon Sep 17 00:00:00 2001 From: zhixiangli Date: Wed, 29 Apr 2026 01:19:44 +0000 Subject: [PATCH 1/2] perf: use _DummyListBuffer in test_reads.py to avoid GIL contention --- .../time_based/reads/test_reads.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py index 5dc876105277..ae4e38c6adec 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py @@ -114,6 +114,24 @@ def _download_time_based_json(client, filename, params): return total_bytes_downloaded +# _DummyListBuffer is used instead of io.BytesIO to avoid GIL contention +# during profiling. io.BytesIO.write() holds the GIL while copying data, +# which introduces significant noise and bottlenecks in performance tests +# with high concurrency or large data transfers. +# This buffer simply collects chunks in a list and tracks the total size. +class _DummyListBuffer: + def __init__(self): + self.chunks = [] + self.size = 0 + + def write(self, data): + self.chunks.append(data) + self.size += len(data) + + def getvalue(self): + return b"".join(self.chunks) + + async def _download_time_based_async(client, filename, params): mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename) await mrd.open() @@ -138,17 +156,17 @@ async def _worker_coro(): offset = random.randint( 0, params.file_size_bytes - params.chunk_size_bytes ) - ranges.append((offset, params.chunk_size_bytes, BytesIO())) + ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer())) else: # seq for _ in range(params.num_ranges): - ranges.append((offset, params.chunk_size_bytes, BytesIO())) + ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer())) offset += params.chunk_size_bytes if offset + params.chunk_size_bytes > params.file_size_bytes: offset = 0 # Reset offset if end of file is reached await mrd.download_ranges(ranges) - bytes_in_buffers = sum(r[2].getbuffer().nbytes for r in ranges) + bytes_in_buffers = sum(r[2].size for r in ranges) assert bytes_in_buffers == params.chunk_size_bytes * params.num_ranges if not is_warming_up: From 918ac98d1610fec2d6f58ff40ce56212bb107674 Mon Sep 17 00:00:00 2001 From: zhixiangli Date: Wed, 29 Apr 2026 02:16:40 +0000 Subject: [PATCH 2/2] test(perf): return number of bytes written in _DummyListBuffer.write Also fixed a pre-existing lint error (unused import io.BytesIO). --- .../perf/microbenchmarks/time_based/reads/test_reads.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py index ae4e38c6adec..aecf1894080e 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py @@ -19,7 +19,6 @@ import os import random import time -from io import BytesIO import pytest @@ -126,7 +125,9 @@ def __init__(self): def write(self, data): self.chunks.append(data) - self.size += len(data) + nbytes = len(data) + self.size += nbytes + return nbytes def getvalue(self): return b"".join(self.chunks)