From 003f3a3d1b94f96a0c39e316bcad69fc9282cb35 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 25 Apr 2026 09:17:26 +0000
Subject: [PATCH 01/21] chore(deps): update dependency nicegui to v3.10.0
 [security]

---
 uv.lock | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/uv.lock b/uv.lock
index 5ee497cd..d6b4c136 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4313,7 +4313,7 @@ wheels = [
 
 [[package]]
 name = "nicegui"
-version = "3.9.0"
+version = "3.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiofiles" },
@@ -4326,22 +4326,25 @@ dependencies = [
     { name = "ifaddr" },
     { name = "itsdangerous" },
     { name = "jinja2" },
+    { name = "lxml" },
     { name = "lxml-html-clean" },
     { name = "markdown2" },
     { name = "orjson", marker = "platform_machine != 'i386' and platform_machine != 'i686' and platform_python_implementation != 'PyPy'" },
     { name = "pydantic-core" },
     { name = "pygments" },
+    { name = "python-dotenv" },
     { name = "python-engineio" },
     { name = "python-multipart" },
     { name = "python-socketio", extra = ["asyncio-client"] },
     { name = "starlette" },
+    { name = "tinycss2" },
     { name = "typing-extensions" },
     { name = "uvicorn", extra = ["standard"] },
     { name = "watchfiles" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d3/38/ed046018db555c34ebc17738284d2f85bf9a544734cd44a87311128619a5/nicegui-3.9.0.tar.gz", hash = "sha256:7ae9046b321d029c438f7cd54a697838ed1962cecb92c622912283c66c8bf8f6", size = 19031869, upload-time = "2026-03-19T09:51:52.247Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/29/51/32defa2f693a75efb7378cc54bc53747e3bb0fbf302e2ba91b134bd19901/nicegui-3.11.0.tar.gz", hash = "sha256:8be951c54cd425956f9e0bdfd808ecf107a12c07af6e53390e9586c05eab2513", size = 19231197, upload-time = "2026-04-24T13:12:18.626Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/11/f7f911f284ceb1b038c26d6f4833bc86d6583d5280156274fdb79be7dcfe/nicegui-3.9.0-py3-none-any.whl", hash = "sha256:4adfdb87a55e30b7fef05ab782efc030534ae6ad9afa330db856dfbb258e23c9", size = 19613351, upload-time = "2026-03-19T09:51:48.769Z" },
+    { url = "https://files.pythonhosted.org/packages/92/7a/5ba90576eb49cb5dc7d442d6eb7789afb4cfdffed4a8d5bde087b037207a/nicegui-3.11.0-py3-none-any.whl", hash = "sha256:3607d054fd3b0ebc3b83bb3648f08e071375b06d2ea01423a0b9b9e04f887b2b", size = 19801597, upload-time = "2026-04-24T13:12:15.018Z" },
 ]
 
 [package.optional-dependencies]

From 9a01ffd5abda6c1b85b17a33b98dc3d4096175a4 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sat, 25 Apr 2026 11:34:16 +0200
Subject: [PATCH 02/21] fix(gui): adapt to nicegui 3.10.0 type signature
 changes

NiceGUI 3.10.0 made ValueChangeEventArguments generic and tightened
ui.input/ui.switch value types to include None. Parameterize handlers
with the new optional value types and coerce at usage sites.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/aignostics/application/_gui/_page_application_describe.py | 4 ++--
 src/aignostics/dataset/_gui.py                                | 4 ++--
 src/aignostics/system/_gui.py                                 | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/aignostics/application/_gui/_page_application_describe.py b/src/aignostics/application/_gui/_page_application_describe.py
index 79b47a97..af864c4d 100644
--- a/src/aignostics/application/_gui/_page_application_describe.py
+++ b/src/aignostics/application/_gui/_page_application_describe.py
@@ -315,7 +315,7 @@ def _add_application_version_selection_section() -> None:
                     # Show force checkbox for internal users
                     if user_info and user_info.is_internal_user:
 
-                        def on_force_change(e: ValueChangeEventArguments) -> None:
+                        def on_force_change(e: ValueChangeEventArguments[bool | None]) -> None:
                             if e.value:
                                 version_next_button.enable()
                                 if unhealthy_tooltip:
@@ -324,7 +324,7 @@ def on_force_change(e: ValueChangeEventArguments) -> None:
                                 version_next_button.disable()
                                 if unhealthy_tooltip:
                                     unhealthy_tooltip.set_visibility(True)
-                            submit_form.force = e.value
+                            submit_form.force = bool(e.value)
 
                         ui.checkbox("Force (skip health check)", on_change=on_force_change).mark("CHECKBOX_FORCE")
 
diff --git a/src/aignostics/dataset/_gui.py b/src/aignostics/dataset/_gui.py
index cc9065de..1c2733e0 100644
--- a/src/aignostics/dataset/_gui.py
+++ b/src/aignostics/dataset/_gui.py
@@ -88,7 +88,7 @@ async def page_idc() -> None:  # noqa: C901, PLR0915, RUF029
                     """
                 )
 
-            def _on_source_input_change(e: ValueChangeEventArguments) -> None:
+            def _on_source_input_change(e: ValueChangeEventArguments[str | None]) -> None:
                 """On change event."""
                 if download_form.download_button is None:
                     return
@@ -249,7 +249,7 @@ async def _download(source: str) -> None:
                         with ui.button("Download", icon="cloud_download").mark("BUTTON_DOWNLOAD") as download_button:
                             ui.tooltip("Download the selected dataset")
                         download_form.download_button = download_button
-                        download_form.download_button.on("click", lambda _: _download(source_input.value))
+                        download_form.download_button.on("click", lambda _: _download(source_input.value or ""))
                         download_form.download_button.disable()
 
             def update_progress() -> None:
diff --git a/src/aignostics/system/_gui.py b/src/aignostics/system/_gui.py
index 31a33c34..54aad49c 100644
--- a/src/aignostics/system/_gui.py
+++ b/src/aignostics/system/_gui.py
@@ -63,7 +63,9 @@ async def page_system() -> None:  # noqa: PLR0915
                             # Mask secrets switch with reload functionality
                             with ui.row().classes("w-full items-center gap-2 mb-4"):
                                 mask_secrets_switch = ui.switch(
-                                    text="Mask secrets", value=True, on_change=lambda e: load_info(mask_secrets=e.value)
+                                    text="Mask secrets",
+                                    value=True,
+                                    on_change=lambda e: load_info(mask_secrets=bool(e.value)),
                                 )
 
                             spinner = ui.spinner(size="lg").classes(

From 916786e46feb81bebd749da260d5429ad1b17855 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sat, 25 Apr 2026 12:15:20 +0200
Subject: [PATCH 03/21] test(notebook): use async user.http_client to fix
 nicegui 3.10.0 regression

NiceGUI 3.10.0 unified Outbox initialization through background_tasks.
create_or_defer, which checks core.is_loop_running() instead of the
previous app.is_started shortcut. Hitting a @ui.page handler via
fastapi.testclient.TestClient now blocks the outer event loop, so
is_loop_running() returns False while app.is_started is True, and the
deferred path raises "Unable to register another startup handler".

Switch test_serve_notebook to the async user fixture's http_client
(httpx.AsyncClient with ASGITransport), which the nicegui testing
plugin already wires up with a live event loop.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/aignostics/notebook/service_test.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/aignostics/notebook/service_test.py b/tests/aignostics/notebook/service_test.py
index a353a344..25ea7b03 100644
--- a/tests/aignostics/notebook/service_test.py
+++ b/tests/aignostics/notebook/service_test.py
@@ -6,8 +6,6 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
-from fastapi.testclient import TestClient
-from nicegui import app
 from nicegui.testing import User
 
 from aignostics.notebook._service import MARIMO_SERVER_STARTUP_TIMEOUT, Service, _get_runner, _Runner
@@ -99,7 +97,7 @@ def test_notebook_start_and_stop(caplog: pytest.LogCaptureFixture) -> None:
 @pytest.mark.flaky(retries=1, delay=5, only_on=[AssertionError])
 @pytest.mark.sequential
 @pytest.mark.timeout(timeout=60 * 2)
-def test_serve_notebook(user: User, caplog: pytest.LogCaptureFixture) -> None:
+async def test_serve_notebook(user: User, caplog: pytest.LogCaptureFixture) -> None:
     """Test notebook serving.
 
     Args:
@@ -112,10 +110,8 @@ def test_serve_notebook(user: User, caplog: pytest.LogCaptureFixture) -> None:
     # Set up logging to capture DEBUG level and above
     caplog.set_level(logging.DEBUG)
 
-    client = TestClient(app)
-
     try:
-        response = client.get("/notebook/4711?results_folder=/tmp", timeout=60)
+        response = await user.http_client.get("/notebook/4711?results_folder=/tmp", timeout=60)
         assert response.status_code == 200
         content = response.content.decode("utf-8")
         assert "iframe" in content

From 213948455c862122ffbce63bc876643d6a80c7f5 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sat, 25 Apr 2026 14:35:05 +0200
Subject: [PATCH 04/21] chore: trigger CI to pick up restored
 skip:test:long_running label

The label was removed by Renovate during rebase, causing long_running
tests to run unintentionally. Empty commit re-triggers synchronize so
github.event.pull_request.labels reflects the current label set.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

From 2cd953cc8354e359c864d30fb42911c0da2f4ddb Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sat, 25 Apr 2026 20:25:38 +0200
Subject: [PATCH 05/21] chore: trigger CI to pick up restored
 skip:test:long_running label

Renovate dropped the label again during another rebase. Empty commit
forces a fresh synchronize event so github.event.pull_request.labels
reflects the current label set.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

From f6c025094340deed94623acece7f0e4858f231d8 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sat, 25 Apr 2026 20:38:13 +0200
Subject: [PATCH 06/21] fix(application): use io_bound for run download to
 avoid nicegui 3.10.0 cpu_bound hang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switch the dialog-triggered run download from `nicegui_run.cpu_bound` to
`nicegui_run.io_bound`. The download is HTTP-bound (`requests` releases
the GIL during network I/O), so a thread pool is the natural fit and
avoids the process-pool/IPC overhead.

This also dodges a NiceGUI 3.10.0 regression observed in the long-running
matrix: `test_gui_run_download` and `test_gui_run_qupath_install_to_inspect`
both consistently timed out waiting for the "Download completed."
notification — the in-flight `cpu_bound` call apparently never returned
inside the user-fixture event loop, while `io_bound` from the same fixture
worked fine in isolated reproducers.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../application/_gui/_page_application_run_describe.py    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index 740aae23..07bf884a 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -341,7 +341,13 @@ async def start_download() -> None:
             try:
                 download_button.disable()
                 download_button.props(add="loading")
-                results_folder = await nicegui_run.cpu_bound(
+                # Use io_bound instead of cpu_bound: the download is HTTP-bound (uses
+                # `requests`, which releases the GIL during I/O), so a thread pool is the
+                # natural fit. Avoiding the process pool also dodges a NiceGUI 3.10.0
+                # regression where in-flight cpu_bound calls inside the long-running test
+                # matrix never completed (test_gui_run_download / test_gui_run_qupath_install_to_inspect
+                # would time out at the "Download completed." notification).
+                results_folder = await nicegui_run.io_bound(
                     Service.application_run_download_static,
                     run_id=run.run_id,
                     destination_directory=Path(current_folder),

From 3bd95858808e9264cd064e1eedfd46482d3759b4 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sat, 25 Apr 2026 21:28:49 +0200
Subject: [PATCH 07/21] fix(deps): bump nicegui to 3.11.0 and revert io_bound
 switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cpu_bound -> io_bound switch in the previous commit did not fix the
download notification regression: test_gui_run_download and
test_gui_run_qupath_install_to_inspect still timed out at "Download
completed." regardless of which executor was used.

NiceGUI 3.11.0 ships several fixes that look directly relevant:
- Fix `app.on_exception` not catching exceptions from async event handlers
  (#5945, #5946) — silent failures in the async start_download() coroutine
  are a plausible explanation for "Downloading ..." firing while the
  follow-up "Download completed." never does.
- Refine ValueElement / ValueChangeEventArguments generics (#2677, #5785)
  — keeps the type annotations we added in 9a01ffd5 working.

Bumping the lower bound to 3.11.0 also keeps the original CVE-2026-39844
remediation (>=3.10.0) covered. Reverts the cpu_bound->io_bound switch
since the executor wasn't the cause and we'd rather not change runtime
semantics speculatively.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 pyproject.toml                                            | 2 +-
 .../application/_gui/_page_application_run_describe.py    | 8 +-------
 uv.lock                                                   | 2 +-
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ec2d25b3..d1e233e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,7 +78,7 @@ dependencies = [
     # From Template
     "fastapi[all,standard]>=0.123.10",
     "humanize>=4.14.0,<5",
-    "nicegui[native]>=3.9.0,<4",  # CVE-2026-21871, CVE-2026-21873, CVE-2026-21874 (>=3.5.0); CVE-2026-25516 (>=3.7.0, #418); CVE-2026-27156 (>=3.8.0, #448); CVE-2026-33332 (>=3.9.0, #498). CVE-2026-39844 (>=3.10.0, #531) not yet merged.
+    "nicegui[native]>=3.11.0,<4",  # CVE-2026-21871, CVE-2026-21873, CVE-2026-21874 (>=3.5.0); CVE-2026-25516 (>=3.7.0, #418); CVE-2026-27156 (>=3.8.0, #448); CVE-2026-33332 (>=3.9.0, #498); CVE-2026-39844 (>=3.10.0, #531). 3.11.0 fixes async event handler exception leaks and refines ValueChangeEventArguments generics.
     "packaging>=26,<27",
     "platformdirs>=4.5.1,<5",
     "psutil>=7.1.3,<8",
diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index 07bf884a..740aae23 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -341,13 +341,7 @@ async def start_download() -> None:
             try:
                 download_button.disable()
                 download_button.props(add="loading")
-                # Use io_bound instead of cpu_bound: the download is HTTP-bound (uses
-                # `requests`, which releases the GIL during I/O), so a thread pool is the
-                # natural fit. Avoiding the process pool also dodges a NiceGUI 3.10.0
-                # regression where in-flight cpu_bound calls inside the long-running test
-                # matrix never completed (test_gui_run_download / test_gui_run_qupath_install_to_inspect
-                # would time out at the "Download completed." notification).
-                results_folder = await nicegui_run.io_bound(
+                results_folder = await nicegui_run.cpu_bound(
                     Service.application_run_download_static,
                     run_id=run.run_id,
                     destination_directory=Path(current_folder),
diff --git a/uv.lock b/uv.lock
index 8cb7b2e1..f1111aa2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -201,7 +201,7 @@ requires-dist = [
     { name = "marshmallow", specifier = ">=3.26.2" },
     { name = "matplotlib", marker = "extra == 'marimo'", specifier = ">=3.10.7,<4" },
     { name = "nbconvert", marker = "extra == 'jupyter'", specifier = ">=7.17.1" },
-    { name = "nicegui", extras = ["native"], specifier = ">=3.9.0,<4" },
+    { name = "nicegui", extras = ["native"], specifier = ">=3.11.0,<4" },
     { name = "openslide-bin", specifier = ">=4.0.0.10,<5" },
     { name = "openslide-python", specifier = ">=1.4.3,<2" },
     { name = "packaging", specifier = ">=26,<27" },

From e3229d872284854afda8c1260d4070ebfd99277b Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sat, 25 Apr 2026 22:21:13 +0200
Subject: [PATCH 08/21] fix(application/gui): surface any download exception
 via notify (was silently swallowed)

The dialog's start_download() was only catching ValueError. Any other
exception (RuntimeError, SubprocessException, asyncio.CancelledError,
etc.) propagated up to NiceGUI's app.handle_exception and was silently
swallowed, leaving the dialog stuck in the loading state with no
completion or failure notification.

This is also the diagnostic we need: with NiceGUI 3.10/3.11 the
long-running download tests (test_gui_run_download,
test_gui_run_qupath_install_to_inspect) consistently time out at
"Download completed." with no clue what went wrong. Catching everything
here will surface the actual exception (in CI logs and as a user
notification) so the underlying regression can be diagnosed.

Either way, this is a defensive improvement: users should always see
*something* when a download fails, never a hung dialog.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../_gui/_page_application_run_describe.py        | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index 740aae23..1958cddd 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -364,10 +364,23 @@ async def start_download() -> None:
                 else:
                     ui.notify("Download completed.", type="positive")
                 show_in_file_manager(str(results_folder))
-            except ValueError as e:
+            except Exception as e:
+                # Catching the broad `Exception` here is intentional: any unhandled exception
+                # in this async event handler would otherwise be silently swallowed by
+                # NiceGUI's app.handle_exception, leaving the dialog stuck in the loading
+                # state with no completion or failure notification. We always want the user
+                # to see *something* — even a generic error — and we always want the timer +
+                # button state to be cleaned up.
+                logger.exception("Download failed for run '{}'", run.run_id)
                 ui.notify(f"Download failed: {e}", type="negative", multi_line=True)
                 progress_timer.deactivate()
                 progress_state["queue"] = None
+                download_button.props(remove="loading")
+                download_button.enable()
+                download_item_status.set_visibility(False)
+                download_item_progress.set_visibility(False)
+                download_artifact_status.set_visibility(False)
+                download_artifact_progress.set_visibility(False)
                 return
             progress_timer.deactivate()
             progress_state["queue"] = None

From f68f44e12530ec39ef6406eda72b48d0d1ca3c1f Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sat, 25 Apr 2026 23:00:38 +0200
Subject: [PATCH 09/21] test: skip 2 download tests pending NiceGUI 3.10+
 cpu_bound regression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`test_gui_run_download` and `test_gui_run_qupath_install_to_inspect`
both consistently time out at the "Download completed." notification
on the long-running matrix with NiceGUI 3.10.0 and 3.11.0. The same
flow runs in ~20s on main with NiceGUI 3.9.0 (today's daily scheduled
run). Investigation:

- Switching `cpu_bound` -> `io_bound`: same hang.
- Bumping to NiceGUI 3.11.0 (which fixed async event handler exception
  leaks): same hang.
- Adding a broad `except Exception` around the `await cpu_bound(...)`
  to surface any swallowed error: no exception ever fires; the await
  is genuinely never resumed.

The behavior is consistent with the click-handler task being suspended
indefinitely without producing a result or an exception, on every
platform (ubuntu / ubuntu-arm / macos / macos-intel / windows). All
other long-running NiceGUI integration tests still run and pass —
QuPath install/launch, dataset/IDC GUI flows, application submit/
describe/cancel/delete, etc. — so the broader 3.10+ integration is
covered.

Skipping these two tests with a `TODO(#531)` link so the CVE-2026-39844
security bump can land; the regression should be root-caused upstream
(or worked around by removing the `Manager().Queue()` progress channel)
in a follow-up.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/aignostics/application/gui_test.py | 7 +++++++
 tests/aignostics/qupath/gui_test.py      | 6 ++++++
 2 files changed, 13 insertions(+)

diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py
index d944a7ee..6f842b7d 100644
--- a/tests/aignostics/application/gui_test.py
+++ b/tests/aignostics/application/gui_test.py
@@ -350,6 +350,13 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back(
 @pytest.mark.flaky(retries=1, delay=5)
 @pytest.mark.timeout(timeout=60 * 10)
 @pytest.mark.sequential  # Helps on Linux with image analysis step otherwise timing out
+# TODO(#531): NiceGUI 3.10/3.11 regression — `await nicegui_run.cpu_bound(...)` from inside the
+# `start_download` click handler never returns on staging CI (test consistently times out at the
+# "Download completed." notification across all 5 platforms). Confirmed: the same flow runs in
+# ~20s on main with NiceGUI 3.9.0; switching to `io_bound`, bumping to 3.11.0, and adding a broad
+# exception handler all leave the await suspended without surfacing any error. Skipping for now
+# so the CVE-2026-39844 security bump can land; track follow-up to root-cause and re-enable.
+@pytest.mark.skip(reason="NiceGUI 3.10/3.11 cpu_bound regression — see TODO(#531)")
 async def test_gui_run_download(  # noqa: PLR0915
     user: User, runner: CliRunner, tmp_path: Path, silent_logging: None, record_property
 ) -> None:
diff --git a/tests/aignostics/qupath/gui_test.py b/tests/aignostics/qupath/gui_test.py
index 01d9a1b6..4dc4d339 100644
--- a/tests/aignostics/qupath/gui_test.py
+++ b/tests/aignostics/qupath/gui_test.py
@@ -143,6 +143,12 @@ async def test_gui_qupath_install_and_launch(  # noqa: PLR0913, PLR0917
 )
 @pytest.mark.timeout(timeout=60 * 15)
 @pytest.mark.sequential
+# TODO(#531): NiceGUI 3.10/3.11 regression — same root cause as test_gui_run_download.
+# `await nicegui_run.cpu_bound(...)` from the run-download click handler never returns on staging
+# CI; the test times out at the "Download and QuPath project creation completed." notification.
+# Skipping so the CVE-2026-39844 security bump can land; re-enable once the upstream regression
+# is root-caused.
+@pytest.mark.skip(reason="NiceGUI 3.10/3.11 cpu_bound regression — see TODO(#531)")
 async def test_gui_run_qupath_install_to_inspect(  # noqa: C901, PLR0912, PLR0913, PLR0914, PLR0915, PLR0917
     user: User,
     runner: CliRunner,

From 6c3397a9e8db60f0863c7aead1ba92e7312b4974 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 09:52:58 +0200
Subject: [PATCH 10/21] test(#531): unskip download tests + add diagnostic
 prints around suspected hang

Unskip test_gui_run_download and test_gui_run_qupath_install_to_inspect
and instrument the download flow with `[#531]`-tagged prints (parent
process via stdout, subprocess via stderr) so the next CI run reveals
exactly where the await is suspended:

Parent process (the click handler):
- before/after Manager().Queue() construction
- after progress_timer.activate()
- before/after the `await nicegui_run.cpu_bound(...)` itself

Subprocess (Service.application_run_download_static / _download):
- ENTER + EXIT envelope around the static wrapper, with pid + args
- per-step traces: details() resolution, mkdir, results() listing,
  every iteration of the polling loop (state + downloaded_items count),
  TERMINATED branch, and final COMPLETED return

If the regression is "subprocess never starts" the parent should print
BEFORE-await but no subproc lines fire. If the subprocess starts but
gets stuck (e.g. on Manager queue.put_nowait, or on application_run.
results()), we'll see exactly which `[#531]` line is the last one.
Either way we get the answer instead of black-boxing the hang.

Diagnostics will be reverted after the root cause is identified.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../_gui/_page_application_run_describe.py    | 13 +++-
 src/aignostics/application/_service.py        | 66 ++++++++++++++++---
 tests/aignostics/application/gui_test.py      |  7 --
 tests/aignostics/qupath/gui_test.py           |  6 --
 4 files changed, 70 insertions(+), 22 deletions(-)

diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index 1958cddd..64d0efc2 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -323,7 +323,7 @@ def update_download_progress() -> None:  # noqa: C901, PLR0912
         # This avoids RuntimeError when trying to create timer inside async event handler
         progress_timer = ui.timer(0.1, update_download_progress, active=False)
 
-        async def start_download() -> None:
+        async def start_download() -> None:  # noqa: PLR0915 - diagnostic prints temporarily push count over the threshold (#531)
             # Read from download_options dict (defined outside @ui.refreshable) to get current values
             current_qupath_project = download_options["qupath_project"]
             current_marimo = download_options["marimo"]
@@ -333,14 +333,24 @@ async def start_download() -> None:
                 return
 
             ui.notify("Downloading ...", type="info")
+            # DIAGNOSTIC(#531): use print() with flush=True so output reaches CI logs even with
+            # `--log-disable=aignostics`; this is around the suspected NiceGUI 3.10+ cpu_bound hang.
+            print(f"[#531] start_download: BEFORE Manager().Queue() for run={run.run_id}", flush=True)
             progress_queue = Manager().Queue()
+            print(f"[#531] start_download: AFTER Manager().Queue(), queue={progress_queue!r}", flush=True)
             progress_state["queue"] = progress_queue  # Store queue so timer callback can access it
 
             # Activate the timer now that download is starting
             progress_timer.activate()
+            print("[#531] start_download: AFTER progress_timer.activate()", flush=True)
             try:
                 download_button.disable()
                 download_button.props(add="loading")
+                print(
+                    f"[#531] start_download: BEFORE await cpu_bound, qupath={current_qupath_project}, "
+                    f"folder={current_folder!r}",
+                    flush=True,
+                )
                 results_folder = await nicegui_run.cpu_bound(
                     Service.application_run_download_static,
                     run_id=run.run_id,
@@ -349,6 +359,7 @@ async def start_download() -> None:
                     qupath_project=current_qupath_project,
                     download_progress_queue=progress_queue,
                 )
+                print(f"[#531] start_download: AFTER await cpu_bound, results_folder={results_folder!r}", flush=True)
                 if not results_folder:
                     message = "Download returned without results folder."
                     raise ValueError(message)  # noqa: TRY301
diff --git a/src/aignostics/application/_service.py b/src/aignostics/application/_service.py
index 315c674f..4a436ab6 100644
--- a/src/aignostics/application/_service.py
+++ b/src/aignostics/application/_service.py
@@ -1359,15 +1359,38 @@ def application_run_download_static(  # noqa: PLR0913, PLR0917
             NotFoundException: If the application run with the given ID is not found.
             RuntimeError: If run details cannot be retrieved or download fails.
         """
-        return Service().application_run_download(
-            run_id,
-            destination_directory,
-            create_subdirectory_for_run,
-            create_subdirectory_per_item,
-            wait_for_completion,
-            qupath_project,
-            download_progress_queue,
+        # DIAGNOSTIC(#531): print from inside the cpu_bound subprocess so we know it actually
+        # started and reaches each phase. Output flushes via stderr to the parent process.
+        import os  # noqa: PLC0415
+        import sys  # noqa: PLC0415
+
+        sys.stderr.write(
+            f"[#531][subproc pid={os.getpid()}] application_run_download_static ENTER, "
+            f"run_id={run_id}, dest={destination_directory}, qupath={qupath_project}, "
+            f"queue={download_progress_queue!r}\n"
         )
+        sys.stderr.flush()
+        try:
+            result = Service().application_run_download(
+                run_id,
+                destination_directory,
+                create_subdirectory_for_run,
+                create_subdirectory_per_item,
+                wait_for_completion,
+                qupath_project,
+                download_progress_queue,
+            )
+            sys.stderr.write(
+                f"[#531][subproc pid={os.getpid()}] application_run_download_static EXIT, result={result!r}\n"
+            )
+            sys.stderr.flush()
+        except BaseException as e:
+            sys.stderr.write(
+                f"[#531][subproc pid={os.getpid()}] application_run_download_static RAISED {type(e).__name__}: {e}\n"
+            )
+            sys.stderr.flush()
+            raise
+        return result
 
     def application_run_download(  # noqa: C901, PLR0912, PLR0913, PLR0914, PLR0915, PLR0917
         self,
@@ -1417,18 +1440,30 @@ def application_run_download(  # noqa: C901, PLR0912, PLR0913, PLR0914, PLR0915,
             wait_for_completion,
             qupath_project,
         )
+        # DIAGNOSTIC(#531): trace which step the subprocess is on
+        import os as _os531  # noqa: PLC0415
+        import sys as _sys531  # noqa: PLC0415
+
+        def _diag531(msg: str) -> None:
+            _sys531.stderr.write(f"[#531][subproc pid={_os531.getpid()}] download: {msg}\n")
+            _sys531.stderr.flush()
+
         if qupath_project and not has_qupath_extra:
             message = "QuPath project creation requested, but 'qupath' extra is not installed."
             message += 'Start launchpad with `uvx --with "aignostics[qupath]" ....'
             logger.warning(message)
             raise ValueError(message)
+        _diag531("creating DownloadProgress + initial update_progress")
         progress = DownloadProgress()
         update_progress(progress, download_progress_callable, download_progress_queue)
 
+        _diag531(f"resolving application_run({run_id})")
         application_run = self.application_run(run_id)
         final_destination_directory = destination_directory
         try:
+            _diag531("calling application_run.details()")
             details = application_run.details()
+            _diag531(f"got details: state={details.state}, output={details.output}")
         except NotFoundException as e:
             message = f"Application run with ID '{run_id}' not found: {e}"
             logger.warning(message)
@@ -1451,7 +1486,10 @@ def application_run_download(  # noqa: C901, PLR0912, PLR0913, PLR0914, PLR0915,
             logger.warning(message)
             raise ValueError(message) from e
 
+        _diag531(f"mkdir OK: {final_destination_directory}")
+        _diag531("listing application_run.results()")
         results = list(application_run.results())
+        _diag531(f"got {len(results)} item(s) from results()")
         for item_index, item in enumerate(results):
             if item.external_id.startswith(("gs://", "http://", "https://")):
                 # Download URL to local input directory and update external_id
@@ -1499,13 +1537,19 @@ def update_qupath_add_input_progress(qupath_add_input_progress: QuPathAddProgres
 
         progress.status = DownloadProgressState.CHECKING
         update_progress(progress, download_progress_callable, download_progress_queue)
+        _diag531("entering polling loop")
 
         downloaded_items: set[str] = set()  # Track downloaded items to avoid re-downloading
+        loop_iteration531 = 0
         while True:
+            loop_iteration531 += 1
+            _diag531(f"loop iter {loop_iteration531}: calling application_run.details()")
             run_details = application_run.details()  # (Re)load current run details
+            _diag531(f"loop iter {loop_iteration531}: state={run_details.state}")
             progress.run = run_details
             update_progress(progress, download_progress_callable, download_progress_queue)
 
+            _diag531(f"loop iter {loop_iteration531}: calling download_available_items")
             download_available_items(
                 progress,
                 application_run,
@@ -1515,8 +1559,13 @@ def update_qupath_add_input_progress(qupath_add_input_progress: QuPathAddProgres
                 download_progress_queue,
                 download_progress_callable,
             )
+            _diag531(
+                f"loop iter {loop_iteration531}: download_available_items returned, "
+                f"downloaded_items={len(downloaded_items)}"
+            )
 
             if run_details.state == RunState.TERMINATED:
+                _diag531(f"loop iter {loop_iteration531}: TERMINATED -> breaking")
                 logger.trace(
                     "Run '{}' reached final status '{}' with message '{}' ({}).",
                     run_id,
@@ -1616,5 +1665,6 @@ def update_qupath_annotate_input_with_results_progress(
         logger.trace("Completed downloading application run '{}' to '{}'", run_id, final_destination_directory)
         progress.status = DownloadProgressState.COMPLETED
         update_progress(progress, download_progress_callable, download_progress_queue)
+        _diag531(f"COMPLETED, returning {final_destination_directory}")
 
         return final_destination_directory
diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py
index 6f842b7d..d944a7ee 100644
--- a/tests/aignostics/application/gui_test.py
+++ b/tests/aignostics/application/gui_test.py
@@ -350,13 +350,6 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back(
 @pytest.mark.flaky(retries=1, delay=5)
 @pytest.mark.timeout(timeout=60 * 10)
 @pytest.mark.sequential  # Helps on Linux with image analysis step otherwise timing out
-# TODO(#531): NiceGUI 3.10/3.11 regression — `await nicegui_run.cpu_bound(...)` from inside the
-# `start_download` click handler never returns on staging CI (test consistently times out at the
-# "Download completed." notification across all 5 platforms). Confirmed: the same flow runs in
-# ~20s on main with NiceGUI 3.9.0; switching to `io_bound`, bumping to 3.11.0, and adding a broad
-# exception handler all leave the await suspended without surfacing any error. Skipping for now
-# so the CVE-2026-39844 security bump can land; track follow-up to root-cause and re-enable.
-@pytest.mark.skip(reason="NiceGUI 3.10/3.11 cpu_bound regression — see TODO(#531)")
 async def test_gui_run_download(  # noqa: PLR0915
     user: User, runner: CliRunner, tmp_path: Path, silent_logging: None, record_property
 ) -> None:
diff --git a/tests/aignostics/qupath/gui_test.py b/tests/aignostics/qupath/gui_test.py
index 4dc4d339..01d9a1b6 100644
--- a/tests/aignostics/qupath/gui_test.py
+++ b/tests/aignostics/qupath/gui_test.py
@@ -143,12 +143,6 @@ async def test_gui_qupath_install_and_launch(  # noqa: PLR0913, PLR0917
 )
 @pytest.mark.timeout(timeout=60 * 15)
 @pytest.mark.sequential
-# TODO(#531): NiceGUI 3.10/3.11 regression — same root cause as test_gui_run_download.
-# `await nicegui_run.cpu_bound(...)` from the run-download click handler never returns on staging
-# CI; the test times out at the "Download and QuPath project creation completed." notification.
-# Skipping so the CVE-2026-39844 security bump can land; re-enable once the upstream regression
-# is root-caused.
-@pytest.mark.skip(reason="NiceGUI 3.10/3.11 cpu_bound regression — see TODO(#531)")
 async def test_gui_run_qupath_install_to_inspect(  # noqa: C901, PLR0912, PLR0913, PLR0914, PLR0915, PLR0917
     user: User,
     runner: CliRunner,

From 4a4b6693c864a2cf274fe1ad25b629fff5af535b Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 10:31:33 +0200
Subject: [PATCH 11/21] =?UTF-8?q?ci(#531):=20TEMP=20=E2=80=94=20narrow=20C?=
 =?UTF-8?q?I=20to=20ubuntu-latest=20+=20only=20the=202=20download=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tight debugging loop while we root-cause the NiceGUI 3.10+ download
regression. All marked `TEMP(#531)` so they're trivially revertible:

- generate-matrix: forced to ubuntu-latest only (drops ubuntu-arm,
  macos-latest, macos-15-intel, windows-latest).
- Disabled the unit / integration / e2e_regular / e2e_very_long_running
  steps via `if: false` (no value while we're debugging the hang).
- Replaced e2e_long_running with a direct pytest invocation that runs
  ONLY `test_gui_run_download` and `test_gui_run_qupath_install_to_inspect`,
  with `-s -p no:retry --timeout=300 --tb=long`, so the `[#531]` parent
  stdout + subprocess stderr diagnostics reach the CI log unbuffered.

Revert this commit (and the diagnostic prints in
_page_application_run_describe.py / _service.py) once the regression is
fixed and the full matrix should run again.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/_test.yml | 50 +++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index d93f2348..2574c232 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -50,11 +50,9 @@ jobs:
           COMMIT_MESSAGE: ${{ inputs.commit_message }}
           PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
         run: |
-          if [[ "$COMMIT_MESSAGE" == *"skip:test:matrix-runner"* ]] || [[ "$PR_LABELS" == *"skip:test:matrix-runner"* ]]; then
-            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
-          else
-            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false],"include":[{"runner":"ubuntu-24.04-arm","experimental":true},{"runner":"macos-latest","experimental":true},{"runner":"macos-15-intel","experimental":true},{"runner":"windows-latest","experimental":true}]}' >> $GITHUB_OUTPUT
-          fi
+          # TEMP(#531): forced to ubuntu-latest only while debugging the NiceGUI 3.10+ download
+          # regression — REVERT once the cpu_bound hang is fixed and `[#531]` diagnostics are dropped.
+          echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
 
   test:
     needs: generate-matrix
@@ -163,10 +161,15 @@ jobs:
       # A single "Assert no test failures" gate step at the end surfaces any
       # failures and fails the job — so the workflow still fails correctly.
 
+      # TEMP(#531): debugging cycle — only run the 2 hanging tests on ubuntu-latest. All other
+      # test phases (unit / integration / e2e_regular / e2e_very_long_running) are disabled here
+      # via `if: false` until the NiceGUI 3.10+ download regression is fixed. REVERT this whole
+      # block to the original 5 steps once the hang is diagnosed and the `[#531]` diagnostics
+      # are removed from src/aignostics/application/_service.py and _gui/_page_application_run_describe.py.
       - name: Test / Unit (multiple Python versions)
         id: unit
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: unit
@@ -178,7 +181,7 @@ jobs:
       - name: Test / Integration (multiple Python versions)
         id: integration
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: integration
@@ -190,7 +193,7 @@ jobs:
       - name: Test / E2E / regular (multiple Python versions)
         id: e2e_regular
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: e2e
@@ -199,26 +202,31 @@ jobs:
           summary-title: All regular e2e tests passed
           commit-message: ${{ inputs.commit_message }}
 
-      - name: Test / E2E / long running (single Python version)
+      # TEMP(#531): targeted run of just the 2 failing download tests, no retries, output unbuffered
+      # so the `[#531]` parent stdout + subprocess stderr lines reach the CI log in real time.
+      - name: Test / E2E / long running (TEMP — only #531 download tests)
         id: e2e_long_running
         continue-on-error: true
         if: ${{ !cancelled() }}
-        uses: ./.github/actions/run-tests
-        with:
-          test-type: long-running
-          make-target: test_long_running
-          skip-marker: skip:test:long_running
-          summary-title: All long running e2e tests passed
-          commit-message: ${{ inputs.commit_message }}
+        shell: bash
+        run: |
+          set +e
+          uv run --all-extras pytest \
+            tests/aignostics/application/gui_test.py::test_gui_run_download \
+            tests/aignostics/qupath/gui_test.py::test_gui_run_qupath_install_to_inspect \
+            -v --no-cov --tb=long -s -p no:retry --timeout=300 \
+            --junitxml=reports/junit_531_debug.xml \
+            --md-report-output=reports/pytest_531_debug.md
+          EXIT_CODE=$?
+          if [ -f "reports/pytest_531_debug.md" ]; then
+            cat "reports/pytest_531_debug.md" >> "$GITHUB_STEP_SUMMARY"
+          fi
+          exit $EXIT_CODE
 
       - name: Test / E2E / very long running (single Python version)
         id: e2e_very_long_running
         continue-on-error: true
-        if: |
-          !cancelled() && (
-            contains(inputs.commit_message, 'enable:test:very_long_running') ||
-            contains(github.event.pull_request.labels.*.name, 'enable:test:very_long_running')
-          )
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: very-long-running

From fe8d1ed318210b3e061687f1595ab2fcd6b42993 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 10:35:43 +0200
Subject: [PATCH 12/21] ci(#531): add -m long_running so the diagnostic
 invocation actually runs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous narrowed run (24952298347) reported the 2 tests as SKIPPED
within milliseconds — root cause: tests/conftest.py:pytest_collection_modifyitems
auto-skips every `long_running` test when pytest is invoked without
`-m`, even when the tests are listed by id. Adding `-m long_running`
to the targeted pytest invocation so the tests actually execute and
the `[#531]` diagnostics fire.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/_test.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index 2574c232..3b861423 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -211,9 +211,12 @@ jobs:
         shell: bash
         run: |
           set +e
+          # NOTE: `-m long_running` is REQUIRED here — tests/conftest.py:pytest_collection_modifyitems
+          # auto-skips `long_running` tests when no `-m` is passed, even if the test is named explicitly.
           uv run --all-extras pytest \
             tests/aignostics/application/gui_test.py::test_gui_run_download \
             tests/aignostics/qupath/gui_test.py::test_gui_run_qupath_install_to_inspect \
+            -m "long_running" \
             -v --no-cov --tb=long -s -p no:retry --timeout=300 \
             --junitxml=reports/junit_531_debug.xml \
             --md-report-output=reports/pytest_531_debug.md

From dd398c93fc0991c7f50856c63f67e1fa84b00913 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 10:54:34 +0200
Subject: [PATCH 13/21] test(#531): granular diagnostics + os.write to pinpoint
 exact hang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous run revealed the hang sits between "AFTER Manager().Queue()"
and "AFTER progress_timer.activate()" — a range of just 3 instructions:
- progress_state["queue"] = progress_queue (dict assign)
- progress_timer.activate()  (sets self.active = True)

Suspect that print(flush=True) output is being lost on task cancellation
or buffering, giving a misleading "last seen" line. Switching diagnostics
to os.write(1, ...) (raw, fully unbuffered, syscall-level) so cancellation
can't strand the output, and adding a probe before/after EACH statement
in that range so the next run shows exactly which line is the boundary.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../_gui/_page_application_run_describe.py    | 31 ++++++++++++-------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index 64d0efc2..ac860853 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -333,24 +333,33 @@ async def start_download() -> None:  # noqa: PLR0915 - diagnostic prints tempora
                 return
 
             ui.notify("Downloading ...", type="info")
-            # DIAGNOSTIC(#531): use print() with flush=True so output reaches CI logs even with
-            # `--log-disable=aignostics`; this is around the suspected NiceGUI 3.10+ cpu_bound hang.
-            print(f"[#531] start_download: BEFORE Manager().Queue() for run={run.run_id}", flush=True)
+            # DIAGNOSTIC(#531): use os.write directly to fd 1 so output is fully unbuffered and
+            # cannot be lost on task cancellation / process tear-down. The previous run with
+            # plain print(flush=True) showed the trace stops between AFTER Manager().Queue()
+            # and AFTER progress_timer.activate() — instrumenting every individual statement
+            # in that range to pinpoint which one hangs / cancels the click handler.
+            import os as _os531  # noqa: PLC0415
+
+            def _diag531(msg: str) -> None:
+                _os531.write(1, f"[#531] start_download: {msg}\n".encode())
+
+            _diag531(f"BEFORE Manager().Queue() for run={run.run_id}")
             progress_queue = Manager().Queue()
-            print(f"[#531] start_download: AFTER Manager().Queue(), queue={progress_queue!r}", flush=True)
+            _diag531(f"AFTER Manager().Queue(), queue={progress_queue!r}")
+            _diag531("BEFORE progress_state['queue'] = progress_queue")
             progress_state["queue"] = progress_queue  # Store queue so timer callback can access it
+            _diag531("AFTER progress_state['queue'] = progress_queue")
 
             # Activate the timer now that download is starting
+            _diag531(f"BEFORE progress_timer.activate(), timer={progress_timer!r}")
             progress_timer.activate()
-            print("[#531] start_download: AFTER progress_timer.activate()", flush=True)
+            _diag531("AFTER progress_timer.activate()")
             try:
+                _diag531("BEFORE download_button.disable()")
                 download_button.disable()
+                _diag531("AFTER download_button.disable()")
                 download_button.props(add="loading")
-                print(
-                    f"[#531] start_download: BEFORE await cpu_bound, qupath={current_qupath_project}, "
-                    f"folder={current_folder!r}",
-                    flush=True,
-                )
+                _diag531(f"BEFORE await cpu_bound, qupath={current_qupath_project}, folder={current_folder!r}")
                 results_folder = await nicegui_run.cpu_bound(
                     Service.application_run_download_static,
                     run_id=run.run_id,
@@ -359,7 +368,7 @@ async def start_download() -> None:  # noqa: PLR0915 - diagnostic prints tempora
                     qupath_project=current_qupath_project,
                     download_progress_queue=progress_queue,
                 )
-                print(f"[#531] start_download: AFTER await cpu_bound, results_folder={results_folder!r}", flush=True)
+                _diag531(f"AFTER await cpu_bound, results_folder={results_folder!r}")
                 if not results_folder:
                     message = "Download returned without results folder."
                     raise ValueError(message)  # noqa: TRY301

From 3dfaea900aa5c51a6e1c8bad982d842e1c336a63 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 11:14:29 +0200
Subject: [PATCH 14/21] test(#531): isolate the BindableProperty.__set__
 boundary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prior diagnostic confirmed the click handler reaches "BEFORE
progress_timer.activate()" but never the next probe. Timer.activate
itself is just `assert; self.active = True`. Bypassing the method,
setting `self.active = True` directly (still goes through
BindableProperty.__set__ → _propagate), and wrapping in try/except
to surface anything raised. Goal: confirm whether the descriptor
setter is the actual blocking call.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../_gui/_page_application_run_describe.py         | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index ac860853..02942c88 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -351,9 +351,17 @@ def _diag531(msg: str) -> None:
             _diag531("AFTER progress_state['queue'] = progress_queue")
 
             # Activate the timer now that download is starting
-            _diag531(f"BEFORE progress_timer.activate(), timer={progress_timer!r}")
-            progress_timer.activate()
-            _diag531("AFTER progress_timer.activate()")
+            # DIAGNOSTIC(#531): isolate the exact sub-step within Timer.activate / BindableProperty.__set__.
+            # Timer.activate() is `assert not self._is_canceled; self.active = True` — the setter goes
+            # through BindableProperty.__set__ which calls _propagate(...). One of those is the boundary.
+            _diag531(f"BEFORE progress_timer._is_canceled check, _is_canceled={progress_timer._is_canceled}")  # noqa: SLF001
+            _diag531(f"BEFORE progress_timer.active = True (current value = {progress_timer.active})")
+            try:
+                progress_timer.active = True
+                _diag531("AFTER progress_timer.active = True")
+            except BaseException as e531:
+                _diag531(f"progress_timer.active = True RAISED {type(e531).__name__}: {e531}")
+                raise
             try:
                 _diag531("BEFORE download_button.disable()")
                 download_button.disable()

From 47ea28291fb6304523db8c2aec073dd4e82a7ac4 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 11:28:12 +0200
Subject: [PATCH 15/21] fix(application/gui): bypass Timer.activate() assert to
 dodge NiceGUI 3.10+ refresh-cancel regression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause:
- `download_run_dialog_open()` calls `download_run_dialog_content.refresh()`
  before opening the dialog (so the dialog can be re-rendered with
  qupath/marimo flags).
- NiceGUI 3.10's PR #5931 changed `Timer._handle_delete()` to call
  `cancel(with_current_invocation=True)`, which sets `_is_canceled=True`
  on every timer instance whose container element is removed. With the
  timer living inside the @ui.refreshable, the `progress_timer` instance
  closed over by `start_download` ends up `_is_canceled=True` once the
  dialog is opened.
- `Timer.activate()` is `assert not self._is_canceled; self.active = True`.
  The assert raises AssertionError, which is then silently swallowed by
  NiceGUI's outer task-exception handler (event handler tasks use
  `_handle_exceptions` -> `app.handle_exception`). The dialog is left
  stuck in the "loading" state, no completion or failure notification
  ever fires, and the `assert_notified("Download completed.", 240)`
  in the long-running tests times out.

Diagnostic confirmed: with `[#531]` os.write probes around every
statement, the trace stops between AFTER `progress_state['queue'] = ...`
and AFTER `progress_timer.activate()`, and the bypass run logged
`_is_canceled=True` exactly as predicted. With the bypass in place,
both `test_gui_run_download` and `test_gui_run_qupath_install_to_inspect`
pass on the long-running matrix in ~21s and ~65s respectively.

Workaround:
- Set `progress_timer.active = True` directly instead of going through
  `progress_timer.activate()`. The descriptor still propagates the
  change, so the timer's `_run_in_loop` reads `active=True` on its
  next iteration. (Trade-off: because `_is_canceled` is True, the
  loop's `_should_stop()` returns True and the progress callback
  won't fire — the dialog's progress bar will not animate during
  download. The download itself completes and "Download completed."
  fires, which is what the tests verify and what users care about.
  Restoring full progress UI requires moving the timer out of the
  `@ui.refreshable` or recreating it in `start_download` if cancelled,
  tracked as follow-up.)
- Keeps the broad `except Exception` defensive handler from the prior
  diagnostic round so any future failure surfaces a `Download failed:`
  notification instead of silently hanging.

Reverts:
- TEMP CI narrowing (matrix ubuntu-latest only, only the 2 download
  tests) — full matrix runs again.
- All `[#531]` `os.write` / `sys.stderr` diagnostic probes in the
  click handler and the cpu_bound subprocess.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/_test.yml                   | 53 ++++++---------
 .../_gui/_page_application_run_describe.py    | 54 +++++++--------
 src/aignostics/application/_service.py        | 66 +++----------------
 3 files changed, 51 insertions(+), 122 deletions(-)

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index 3b861423..d93f2348 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -50,9 +50,11 @@ jobs:
           COMMIT_MESSAGE: ${{ inputs.commit_message }}
           PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
         run: |
-          # TEMP(#531): forced to ubuntu-latest only while debugging the NiceGUI 3.10+ download
-          # regression — REVERT once the cpu_bound hang is fixed and `[#531]` diagnostics are dropped.
-          echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
+          if [[ "$COMMIT_MESSAGE" == *"skip:test:matrix-runner"* ]] || [[ "$PR_LABELS" == *"skip:test:matrix-runner"* ]]; then
+            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
+          else
+            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false],"include":[{"runner":"ubuntu-24.04-arm","experimental":true},{"runner":"macos-latest","experimental":true},{"runner":"macos-15-intel","experimental":true},{"runner":"windows-latest","experimental":true}]}' >> $GITHUB_OUTPUT
+          fi
 
   test:
     needs: generate-matrix
@@ -161,15 +163,10 @@ jobs:
       # A single "Assert no test failures" gate step at the end surfaces any
       # failures and fails the job — so the workflow still fails correctly.
 
-      # TEMP(#531): debugging cycle — only run the 2 hanging tests on ubuntu-latest. All other
-      # test phases (unit / integration / e2e_regular / e2e_very_long_running) are disabled here
-      # via `if: false` until the NiceGUI 3.10+ download regression is fixed. REVERT this whole
-      # block to the original 5 steps once the hang is diagnosed and the `[#531]` diagnostics
-      # are removed from src/aignostics/application/_service.py and _gui/_page_application_run_describe.py.
       - name: Test / Unit (multiple Python versions)
         id: unit
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: unit
@@ -181,7 +178,7 @@ jobs:
       - name: Test / Integration (multiple Python versions)
         id: integration
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: integration
@@ -193,7 +190,7 @@ jobs:
       - name: Test / E2E / regular (multiple Python versions)
         id: e2e_regular
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: e2e
@@ -202,34 +199,26 @@ jobs:
           summary-title: All regular e2e tests passed
           commit-message: ${{ inputs.commit_message }}
 
-      # TEMP(#531): targeted run of just the 2 failing download tests, no retries, output unbuffered
-      # so the `[#531]` parent stdout + subprocess stderr lines reach the CI log in real time.
-      - name: Test / E2E / long running (TEMP — only #531 download tests)
+      - name: Test / E2E / long running (single Python version)
         id: e2e_long_running
         continue-on-error: true
         if: ${{ !cancelled() }}
-        shell: bash
-        run: |
-          set +e
-          # NOTE: `-m long_running` is REQUIRED here — tests/conftest.py:pytest_collection_modifyitems
-          # auto-skips `long_running` tests when no `-m` is passed, even if the test is named explicitly.
-          uv run --all-extras pytest \
-            tests/aignostics/application/gui_test.py::test_gui_run_download \
-            tests/aignostics/qupath/gui_test.py::test_gui_run_qupath_install_to_inspect \
-            -m "long_running" \
-            -v --no-cov --tb=long -s -p no:retry --timeout=300 \
-            --junitxml=reports/junit_531_debug.xml \
-            --md-report-output=reports/pytest_531_debug.md
-          EXIT_CODE=$?
-          if [ -f "reports/pytest_531_debug.md" ]; then
-            cat "reports/pytest_531_debug.md" >> "$GITHUB_STEP_SUMMARY"
-          fi
-          exit $EXIT_CODE
+        uses: ./.github/actions/run-tests
+        with:
+          test-type: long-running
+          make-target: test_long_running
+          skip-marker: skip:test:long_running
+          summary-title: All long running e2e tests passed
+          commit-message: ${{ inputs.commit_message }}
 
       - name: Test / E2E / very long running (single Python version)
         id: e2e_very_long_running
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: |
+          !cancelled() && (
+            contains(inputs.commit_message, 'enable:test:very_long_running') ||
+            contains(github.event.pull_request.labels.*.name, 'enable:test:very_long_running')
+          )
         uses: ./.github/actions/run-tests
         with:
           test-type: very-long-running
diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index 02942c88..b296bac4 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -323,7 +323,7 @@ def update_download_progress() -> None:  # noqa: C901, PLR0912
         # This avoids RuntimeError when trying to create timer inside async event handler
         progress_timer = ui.timer(0.1, update_download_progress, active=False)
 
-        async def start_download() -> None:  # noqa: PLR0915 - diagnostic prints temporarily push count over the threshold (#531)
+        async def start_download() -> None:
             # Read from download_options dict (defined outside @ui.refreshable) to get current values
             current_qupath_project = download_options["qupath_project"]
             current_marimo = download_options["marimo"]
@@ -333,41 +333,32 @@ async def start_download() -> None:  # noqa: PLR0915 - diagnostic prints tempora
                 return
 
             ui.notify("Downloading ...", type="info")
-            # DIAGNOSTIC(#531): use os.write directly to fd 1 so output is fully unbuffered and
-            # cannot be lost on task cancellation / process tear-down. The previous run with
-            # plain print(flush=True) showed the trace stops between AFTER Manager().Queue()
-            # and AFTER progress_timer.activate() — instrumenting every individual statement
-            # in that range to pinpoint which one hangs / cancels the click handler.
-            import os as _os531  # noqa: PLC0415
-
-            def _diag531(msg: str) -> None:
-                _os531.write(1, f"[#531] start_download: {msg}\n".encode())
-
-            _diag531(f"BEFORE Manager().Queue() for run={run.run_id}")
             progress_queue = Manager().Queue()
-            _diag531(f"AFTER Manager().Queue(), queue={progress_queue!r}")
-            _diag531("BEFORE progress_state['queue'] = progress_queue")
             progress_state["queue"] = progress_queue  # Store queue so timer callback can access it
-            _diag531("AFTER progress_state['queue'] = progress_queue")
-
-            # Activate the timer now that download is starting
-            # DIAGNOSTIC(#531): isolate the exact sub-step within Timer.activate / BindableProperty.__set__.
-            # Timer.activate() is `assert not self._is_canceled; self.active = True` — the setter goes
-            # through BindableProperty.__set__ which calls _propagate(...). One of those is the boundary.
-            _diag531(f"BEFORE progress_timer._is_canceled check, _is_canceled={progress_timer._is_canceled}")  # noqa: SLF001
-            _diag531(f"BEFORE progress_timer.active = True (current value = {progress_timer.active})")
-            try:
-                progress_timer.active = True
-                _diag531("AFTER progress_timer.active = True")
-            except BaseException as e531:
-                _diag531(f"progress_timer.active = True RAISED {type(e531).__name__}: {e531}")
-                raise
+
+            # Activate the timer now that download is starting.
+            #
+            # Workaround for a NiceGUI 3.10+ regression (#531):
+            # `download_run_dialog_open()` calls `download_run_dialog_content.refresh()` which
+            # destroys the previous refreshable instance. NiceGUI 3.10's PR #5931 made
+            # `Timer._handle_delete()` call `cancel(with_current_invocation=True)`, so the timer
+            # we close over here ends up with `_is_canceled=True` by the time start_download
+            # actually runs. `progress_timer.activate()` would then raise
+            # `AssertionError: Cannot activate a canceled timer`, which NiceGUI's outer
+            # task-exception handler swallows — leaving the dialog stuck in "loading" state
+            # with no completion or failure notification.
+            #
+            # Bypassing the assert via direct attribute set lets the click handler proceed.
+            # Note: because `_is_canceled` is True, the timer's `_run_in_loop` exits via
+            # `_should_stop()` and the progress callback won't fire — the dialog's progress
+            # bar will not animate during download. Acceptable trade-off until upstream fix:
+            # the download itself completes and "Download completed." fires. Restore
+            # `progress_timer.activate()` once the upstream timer-cancel-on-refresh behavior
+            # is changed (see https://github.com/zauberzeug/nicegui/pull/5931 for context).
+            progress_timer.active = True
             try:
-                _diag531("BEFORE download_button.disable()")
                 download_button.disable()
-                _diag531("AFTER download_button.disable()")
                 download_button.props(add="loading")
-                _diag531(f"BEFORE await cpu_bound, qupath={current_qupath_project}, folder={current_folder!r}")
                 results_folder = await nicegui_run.cpu_bound(
                     Service.application_run_download_static,
                     run_id=run.run_id,
@@ -376,7 +367,6 @@ def _diag531(msg: str) -> None:
                     qupath_project=current_qupath_project,
                     download_progress_queue=progress_queue,
                 )
-                _diag531(f"AFTER await cpu_bound, results_folder={results_folder!r}")
                 if not results_folder:
                     message = "Download returned without results folder."
                     raise ValueError(message)  # noqa: TRY301
diff --git a/src/aignostics/application/_service.py b/src/aignostics/application/_service.py
index 4a436ab6..315c674f 100644
--- a/src/aignostics/application/_service.py
+++ b/src/aignostics/application/_service.py
@@ -1359,38 +1359,15 @@ def application_run_download_static(  # noqa: PLR0913, PLR0917
             NotFoundException: If the application run with the given ID is not found.
             RuntimeError: If run details cannot be retrieved or download fails.
         """
-        # DIAGNOSTIC(#531): print from inside the cpu_bound subprocess so we know it actually
-        # started and reaches each phase. Output flushes via stderr to the parent process.
-        import os  # noqa: PLC0415
-        import sys  # noqa: PLC0415
-
-        sys.stderr.write(
-            f"[#531][subproc pid={os.getpid()}] application_run_download_static ENTER, "
-            f"run_id={run_id}, dest={destination_directory}, qupath={qupath_project}, "
-            f"queue={download_progress_queue!r}\n"
+        return Service().application_run_download(
+            run_id,
+            destination_directory,
+            create_subdirectory_for_run,
+            create_subdirectory_per_item,
+            wait_for_completion,
+            qupath_project,
+            download_progress_queue,
         )
-        sys.stderr.flush()
-        try:
-            result = Service().application_run_download(
-                run_id,
-                destination_directory,
-                create_subdirectory_for_run,
-                create_subdirectory_per_item,
-                wait_for_completion,
-                qupath_project,
-                download_progress_queue,
-            )
-            sys.stderr.write(
-                f"[#531][subproc pid={os.getpid()}] application_run_download_static EXIT, result={result!r}\n"
-            )
-            sys.stderr.flush()
-        except BaseException as e:
-            sys.stderr.write(
-                f"[#531][subproc pid={os.getpid()}] application_run_download_static RAISED {type(e).__name__}: {e}\n"
-            )
-            sys.stderr.flush()
-            raise
-        return result
 
     def application_run_download(  # noqa: C901, PLR0912, PLR0913, PLR0914, PLR0915, PLR0917
         self,
@@ -1440,30 +1417,18 @@ def application_run_download(  # noqa: C901, PLR0912, PLR0913, PLR0914, PLR0915,
             wait_for_completion,
             qupath_project,
         )
-        # DIAGNOSTIC(#531): trace which step the subprocess is on
-        import os as _os531  # noqa: PLC0415
-        import sys as _sys531  # noqa: PLC0415
-
-        def _diag531(msg: str) -> None:
-            _sys531.stderr.write(f"[#531][subproc pid={_os531.getpid()}] download: {msg}\n")
-            _sys531.stderr.flush()
-
         if qupath_project and not has_qupath_extra:
             message = "QuPath project creation requested, but 'qupath' extra is not installed."
             message += 'Start launchpad with `uvx --with "aignostics[qupath]" ....'
             logger.warning(message)
             raise ValueError(message)
-        _diag531("creating DownloadProgress + initial update_progress")
         progress = DownloadProgress()
         update_progress(progress, download_progress_callable, download_progress_queue)
 
-        _diag531(f"resolving application_run({run_id})")
         application_run = self.application_run(run_id)
         final_destination_directory = destination_directory
         try:
-            _diag531("calling application_run.details()")
             details = application_run.details()
-            _diag531(f"got details: state={details.state}, output={details.output}")
         except NotFoundException as e:
             message = f"Application run with ID '{run_id}' not found: {e}"
             logger.warning(message)
@@ -1486,10 +1451,7 @@ def _diag531(msg: str) -> None:
             logger.warning(message)
             raise ValueError(message) from e
 
-        _diag531(f"mkdir OK: {final_destination_directory}")
-        _diag531("listing application_run.results()")
         results = list(application_run.results())
-        _diag531(f"got {len(results)} item(s) from results()")
         for item_index, item in enumerate(results):
             if item.external_id.startswith(("gs://", "http://", "https://")):
                 # Download URL to local input directory and update external_id
@@ -1537,19 +1499,13 @@ def update_qupath_add_input_progress(qupath_add_input_progress: QuPathAddProgres
 
         progress.status = DownloadProgressState.CHECKING
         update_progress(progress, download_progress_callable, download_progress_queue)
-        _diag531("entering polling loop")
 
         downloaded_items: set[str] = set()  # Track downloaded items to avoid re-downloading
-        loop_iteration531 = 0
         while True:
-            loop_iteration531 += 1
-            _diag531(f"loop iter {loop_iteration531}: calling application_run.details()")
             run_details = application_run.details()  # (Re)load current run details
-            _diag531(f"loop iter {loop_iteration531}: state={run_details.state}")
             progress.run = run_details
             update_progress(progress, download_progress_callable, download_progress_queue)
 
-            _diag531(f"loop iter {loop_iteration531}: calling download_available_items")
             download_available_items(
                 progress,
                 application_run,
@@ -1559,13 +1515,8 @@ def update_qupath_add_input_progress(qupath_add_input_progress: QuPathAddProgres
                 download_progress_queue,
                 download_progress_callable,
             )
-            _diag531(
-                f"loop iter {loop_iteration531}: download_available_items returned, "
-                f"downloaded_items={len(downloaded_items)}"
-            )
 
             if run_details.state == RunState.TERMINATED:
-                _diag531(f"loop iter {loop_iteration531}: TERMINATED -> breaking")
                 logger.trace(
                     "Run '{}' reached final status '{}' with message '{}' ({}).",
                     run_id,
@@ -1665,6 +1616,5 @@ def update_qupath_annotate_input_with_results_progress(
         logger.trace("Completed downloading application run '{}' to '{}'", run_id, final_destination_directory)
         progress.status = DownloadProgressState.COMPLETED
         update_progress(progress, download_progress_callable, download_progress_queue)
-        _diag531(f"COMPLETED, returning {final_destination_directory}")
 
         return final_destination_directory

From 7fc677a37db53da8c347c8616bec5f5d22f2582d Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 11:45:11 +0200
Subject: [PATCH 16/21] fix(application/gui): create download progress timer
 per-download (#531)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The download dialog content lives inside `@ui.refreshable`. NiceGUI 3.10's
PR #5931 made `Timer._handle_delete()` cancel the timer when its container
is cleared. Combined with `download_run_dialog_open()` calling
`download_run_dialog_content.refresh()` on every open, the `progress_timer`
that the `start_download` closure captures has `_is_canceled=True` by the
time the user actually clicks "Download". `progress_timer.activate()` then
trips its `assert not self._is_canceled`, which NiceGUI's outer
event-handler exception path swallows silently — the dialog is left stuck
in the loading state forever, no completion or failure notification.

Principled fix: own the timer for the duration it's needed (one download)
instead of pre-creating it inside the refreshable container. Created at
the top of `start_download` with `active=True`, cancelled in the `finally`.
This makes the timer's lifetime match its purpose, removes the stale-after-
refresh class of bug entirely, and keeps the surrounding `@ui.refreshable`
structure intact for consistency with the other dialogs on this page
(e.g. `csv_view_dialog_content`).

The previous comment claimed creating `ui.timer` inside an async event
handler raised RuntimeError; verified that's no longer true on NiceGUI
3.11 (a minimal `ui.button(on_click=async_handler_that_creates_timer)`
test passes).

Also collapses the duplicated post-success / post-failure cleanup block
into a single `finally`, keeping the broad `except Exception` from the
prior round so any future failure surfaces a `Download failed: …`
notification instead of a hung dialog.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../_gui/_page_application_run_describe.py    | 62 ++++++-------------
 1 file changed, 18 insertions(+), 44 deletions(-)

diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index b296bac4..938de91e 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -319,10 +319,6 @@ def update_download_progress() -> None:  # noqa: C901, PLR0912
                     download_item_progress.set_visibility(False)
                     download_artifact_progress.set_visibility(False)
 
-        # Create the timer during dialog construction (in valid slot context), but inactive initially
-        # This avoids RuntimeError when trying to create timer inside async event handler
-        progress_timer = ui.timer(0.1, update_download_progress, active=False)
-
         async def start_download() -> None:
             # Read from download_options dict (defined outside @ui.refreshable) to get current values
             current_qupath_project = download_options["qupath_project"]
@@ -336,26 +332,14 @@ async def start_download() -> None:
             progress_queue = Manager().Queue()
             progress_state["queue"] = progress_queue  # Store queue so timer callback can access it
 
-            # Activate the timer now that download is starting.
-            #
-            # Workaround for a NiceGUI 3.10+ regression (#531):
-            # `download_run_dialog_open()` calls `download_run_dialog_content.refresh()` which
-            # destroys the previous refreshable instance. NiceGUI 3.10's PR #5931 made
-            # `Timer._handle_delete()` call `cancel(with_current_invocation=True)`, so the timer
-            # we close over here ends up with `_is_canceled=True` by the time start_download
-            # actually runs. `progress_timer.activate()` would then raise
-            # `AssertionError: Cannot activate a canceled timer`, which NiceGUI's outer
-            # task-exception handler swallows — leaving the dialog stuck in "loading" state
-            # with no completion or failure notification.
-            #
-            # Bypassing the assert via direct attribute set lets the click handler proceed.
-            # Note: because `_is_canceled` is True, the timer's `_run_in_loop` exits via
-            # `_should_stop()` and the progress callback won't fire — the dialog's progress
-            # bar will not animate during download. Acceptable trade-off until upstream fix:
-            # the download itself completes and "Download completed." fires. Restore
-            # `progress_timer.activate()` once the upstream timer-cancel-on-refresh behavior
-            # is changed (see https://github.com/zauberzeug/nicegui/pull/5931 for context).
-            progress_timer.active = True
+            # Create the progress timer fresh per-download instead of pre-creating it in the
+            # refreshable container. NiceGUI 3.10's PR #5931 made `Timer._handle_delete()` cancel
+            # the timer when its container is cleared, and `download_run_dialog_open()` calls
+            # `download_run_dialog_content.refresh()` on every open — so a pre-created timer
+            # captured by this closure would already have `_is_canceled=True` here, silently
+            # breaking `progress_timer.activate()` (#531). Owning the timer for one download's
+            # lifetime is also the natural model: started here, cancelled below in `finally`.
+            progress_timer = ui.timer(0.1, update_download_progress, active=True)
             try:
                 download_button.disable()
                 download_button.props(add="loading")
@@ -371,10 +355,9 @@ async def start_download() -> None:
                     message = "Download returned without results folder."
                     raise ValueError(message)  # noqa: TRY301
                 if current_qupath_project:
-                    if results_folder:
-                        ui.notify("Download and QuPath project creation completed.", type="positive")
-                        download_item_status.set_text("Opening QuPath ...")
-                        await open_qupath(project=results_folder / "qupath", button=download_button)
+                    ui.notify("Download and QuPath project creation completed.", type="positive")
+                    download_item_status.set_text("Opening QuPath ...")
+                    await open_qupath(project=results_folder / "qupath", button=download_button)
                 elif current_marimo:
                     ui.notify("Download and Notebook preparation completed.", type="positive")
                     download_item_status.set_text("Opening Notebook ...")
@@ -383,15 +366,15 @@ async def start_download() -> None:
                     ui.notify("Download completed.", type="positive")
                 show_in_file_manager(str(results_folder))
             except Exception as e:
-                # Catching the broad `Exception` here is intentional: any unhandled exception
-                # in this async event handler would otherwise be silently swallowed by
-                # NiceGUI's app.handle_exception, leaving the dialog stuck in the loading
-                # state with no completion or failure notification. We always want the user
-                # to see *something* — even a generic error — and we always want the timer +
-                # button state to be cleaned up.
+                # Catching the broad `Exception` here is intentional: any unhandled exception in
+                # this async event handler would otherwise be silently swallowed by NiceGUI's
+                # outer task-exception handler, leaving the dialog stuck in the loading state
+                # with no completion or failure notification. We always want the user to see
+                # *something* and the timer + button state cleaned up via the `finally`.
                 logger.exception("Download failed for run '{}'", run.run_id)
                 ui.notify(f"Download failed: {e}", type="negative", multi_line=True)
-                progress_timer.deactivate()
+            finally:
+                progress_timer.cancel()
                 progress_state["queue"] = None
                 download_button.props(remove="loading")
                 download_button.enable()
@@ -399,15 +382,6 @@ async def start_download() -> None:
                 download_item_progress.set_visibility(False)
                 download_artifact_status.set_visibility(False)
                 download_artifact_progress.set_visibility(False)
-                return
-            progress_timer.deactivate()
-            progress_state["queue"] = None
-            download_button.props(remove="loading")
-            download_button.enable()
-            download_item_status.set_visibility(False)
-            download_item_progress.set_visibility(False)
-            download_artifact_status.set_visibility(False)
-            download_artifact_progress.set_visibility(False)
 
         ui.separator()
         with ui.row(align_items="end").classes("w-full justify-end"):

From e34d545f1ad57e45261a7456eb46c268dbb1070a Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 12:41:48 +0200
Subject: [PATCH 17/21] test(#531): TEMP narrow CI to 2 download tests + add
 diagnostic probes

Faster iteration: previous run with the timer-per-download principled fix
still failed test_gui_run_download on ubuntu-arm with the same hang
signature. Re-narrow CI to just the 2 failing tests on ubuntu-latest only,
and instrument start_download with `[#531]` os.write probes around every
significant statement (notify, Manager().Queue(), ui.timer, await
cpu_bound, except, finally) to find where the new code path actually
hangs. All TEMP markers; revert once diagnosed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/_test.yml                   | 52 +++++++++++--------
 .../_gui/_page_application_run_describe.py    | 36 +++++++++----
 2 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index d93f2348..d22fd6a6 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -50,11 +50,9 @@ jobs:
           COMMIT_MESSAGE: ${{ inputs.commit_message }}
           PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
         run: |
-          if [[ "$COMMIT_MESSAGE" == *"skip:test:matrix-runner"* ]] || [[ "$PR_LABELS" == *"skip:test:matrix-runner"* ]]; then
-            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
-          else
-            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false],"include":[{"runner":"ubuntu-24.04-arm","experimental":true},{"runner":"macos-latest","experimental":true},{"runner":"macos-15-intel","experimental":true},{"runner":"windows-latest","experimental":true}]}' >> $GITHUB_OUTPUT
-          fi
+          # TEMP(#531): forced to ubuntu-latest only while iterating on the long-running test fix.
+          # REVERT once test_gui_run_download + test_gui_run_qupath_install_to_inspect pass.
+          echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
 
   test:
     needs: generate-matrix
@@ -163,10 +161,14 @@ jobs:
       # A single "Assert no test failures" gate step at the end surfaces any
       # failures and fails the job — so the workflow still fails correctly.
 
+      # TEMP(#531): debugging cycle — only run the 2 failing download tests on ubuntu-latest.
+      # All other test phases (unit / integration / e2e_regular / e2e_very_long_running) are
+      # disabled here via `if: false`. REVERT this whole block to the original 5 steps once
+      # the principled fix is verified.
       - name: Test / Unit (multiple Python versions)
         id: unit
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: unit
@@ -178,7 +180,7 @@ jobs:
       - name: Test / Integration (multiple Python versions)
         id: integration
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: integration
@@ -190,7 +192,7 @@ jobs:
       - name: Test / E2E / regular (multiple Python versions)
         id: e2e_regular
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: e2e
@@ -199,26 +201,34 @@ jobs:
           summary-title: All regular e2e tests passed
           commit-message: ${{ inputs.commit_message }}
 
-      - name: Test / E2E / long running (single Python version)
+      # TEMP(#531): targeted run of just the 2 failing download tests, no retries, output unbuffered
+      # so the `[#531]` parent stdout + subprocess stderr lines reach the CI log in real time.
+      # NOTE: `-m long_running` is REQUIRED — tests/conftest.py:pytest_collection_modifyitems
+      # auto-skips `long_running` tests when no `-m` is passed, even if listed by id.
+      - name: Test / E2E / long running (TEMP — only #531 download tests)
         id: e2e_long_running
         continue-on-error: true
         if: ${{ !cancelled() }}
-        uses: ./.github/actions/run-tests
-        with:
-          test-type: long-running
-          make-target: test_long_running
-          skip-marker: skip:test:long_running
-          summary-title: All long running e2e tests passed
-          commit-message: ${{ inputs.commit_message }}
+        shell: bash
+        run: |
+          set +e
+          uv run --all-extras pytest \
+            tests/aignostics/application/gui_test.py::test_gui_run_download \
+            tests/aignostics/qupath/gui_test.py::test_gui_run_qupath_install_to_inspect \
+            -m "long_running" \
+            -v --no-cov --tb=long -s -p no:retry --timeout=300 \
+            --junitxml=reports/junit_531_debug.xml \
+            --md-report-output=reports/pytest_531_debug.md
+          EXIT_CODE=$?
+          if [ -f "reports/pytest_531_debug.md" ]; then
+            cat "reports/pytest_531_debug.md" >> "$GITHUB_STEP_SUMMARY"
+          fi
+          exit $EXIT_CODE
 
       - name: Test / E2E / very long running (single Python version)
         id: e2e_very_long_running
         continue-on-error: true
-        if: |
-          !cancelled() && (
-            contains(inputs.commit_message, 'enable:test:very_long_running') ||
-            contains(github.event.pull_request.labels.*.name, 'enable:test:very_long_running')
-          )
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: very-long-running
diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index 938de91e..c574ff4a 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -319,7 +319,15 @@ def update_download_progress() -> None:  # noqa: C901, PLR0912
                     download_item_progress.set_visibility(False)
                     download_artifact_progress.set_visibility(False)
 
-        async def start_download() -> None:
+        async def start_download() -> None:  # noqa: PLR0915 - diagnostic prints push count over the threshold (#531)
+            # DIAGNOSTIC(#531): unbuffered raw write to fd 1 — survives task cancellation and
+            # bypasses NiceGUI's outer event-handler exception swallowing.
+            import os as _os531  # noqa: PLC0415
+
+            def _diag(msg: str) -> None:
+                _os531.write(1, f"[#531] {msg}\n".encode())
+
+            _diag("start_download ENTER")
             # Read from download_options dict (defined outside @ui.refreshable) to get current values
             current_qupath_project = download_options["qupath_project"]
             current_marimo = download_options["marimo"]
@@ -328,21 +336,26 @@ async def start_download() -> None:
                 ui.notify("Please select a folder first", type="warning")
                 return
 
+            _diag("BEFORE ui.notify Downloading ...")
             ui.notify("Downloading ...", type="info")
+            _diag("AFTER ui.notify Downloading ...")
+            _diag("BEFORE Manager().Queue()")
             progress_queue = Manager().Queue()
+            _diag("AFTER Manager().Queue()")
             progress_state["queue"] = progress_queue  # Store queue so timer callback can access it
-
-            # Create the progress timer fresh per-download instead of pre-creating it in the
-            # refreshable container. NiceGUI 3.10's PR #5931 made `Timer._handle_delete()` cancel
-            # the timer when its container is cleared, and `download_run_dialog_open()` calls
-            # `download_run_dialog_content.refresh()` on every open — so a pre-created timer
-            # captured by this closure would already have `_is_canceled=True` here, silently
-            # breaking `progress_timer.activate()` (#531). Owning the timer for one download's
-            # lifetime is also the natural model: started here, cancelled below in `finally`.
-            progress_timer = ui.timer(0.1, update_download_progress, active=True)
+            _diag("BEFORE ui.timer(...)")
+            progress_timer: Any
+            try:
+                progress_timer = ui.timer(0.1, update_download_progress, active=True)
+            except BaseException as e_timer:
+                _diag(f"ui.timer RAISED {type(e_timer).__name__}: {e_timer}")
+                raise
+            _diag(f"AFTER ui.timer(...) timer={progress_timer!r} _is_canceled={progress_timer._is_canceled}")  # noqa: SLF001
             try:
+                _diag("BEFORE download_button.disable()")
                 download_button.disable()
                 download_button.props(add="loading")
+                _diag(f"BEFORE await cpu_bound qupath={current_qupath_project} folder={current_folder!r}")
                 results_folder = await nicegui_run.cpu_bound(
                     Service.application_run_download_static,
                     run_id=run.run_id,
@@ -351,6 +364,7 @@ async def start_download() -> None:
                     qupath_project=current_qupath_project,
                     download_progress_queue=progress_queue,
                 )
+                _diag(f"AFTER await cpu_bound results_folder={results_folder!r}")
                 if not results_folder:
                     message = "Download returned without results folder."
                     raise ValueError(message)  # noqa: TRY301
@@ -371,9 +385,11 @@ async def start_download() -> None:
                 # outer task-exception handler, leaving the dialog stuck in the loading state
                 # with no completion or failure notification. We always want the user to see
                 # *something* and the timer + button state cleaned up via the `finally`.
+                _diag(f"start_download except {type(e).__name__}: {e}")
                 logger.exception("Download failed for run '{}'", run.run_id)
                 ui.notify(f"Download failed: {e}", type="negative", multi_line=True)
             finally:
+                _diag("start_download finally")
                 progress_timer.cancel()
                 progress_state["queue"] = None
                 download_button.props(remove="loading")

From 93e2187413adb51ccab50ca0351c6a1e328a5a97 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 13:01:51 +0200
Subject: [PATCH 18/21] fix(application/gui): drive download progress with
 asyncio.create_task instead of ui.timer (#531)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Diagnostic on the previous principled-fix commit revealed exactly what
the original "create timer in slot context" comment had warned about:
calling `ui.timer(...)` from inside the async click handler raises

  RuntimeError: The parent element this slot belongs to has been deleted.

`ui.timer` is a NiceGUI Element and needs a live slot context. The
click handler's parent_slot — the button's parent slot — is in the
@ui.refreshable container that has just been recreated by
`download_run_dialog_open()` calling `dialog_content.refresh()`. The
slot itself is on the task's stack, but `slot.parent` is a weakref to
an element that has already been GC'd in this lifecycle, so any
attempt to add a child Element raises.

Replace the `ui.timer` with a plain `asyncio.create_task` running a
manual poll loop. asyncio tasks don't need a slot context, don't
participate in NiceGUI's element lifecycle, and the callback
(`update_download_progress`) closes over the UI element refs that
were just created in the latest refresh — those ARE valid for the
duration of this download.

Trade-offs:
- The poll loop calls `update_download_progress` every 100ms exactly
  like the old `ui.timer(0.1, ...)` did. Same UX.
- `stop_progress = asyncio.Event` + `asyncio.wait_for(..., timeout=0.1)`
  gives clean cancellation in `finally` instead of relying on Timer's
  cancellation paths.
- `ui.timer` is no longer in this file — `update_download_progress` is
  callable directly because it's a sync function over module-level
  state and NiceGUI element refs that update via their own setters.

Verified locally that no other code path in this file used the timer.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../_gui/_page_application_run_describe.py    | 73 +++++++++++--------
 1 file changed, 43 insertions(+), 30 deletions(-)

diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index c574ff4a..d34934de 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -1,5 +1,7 @@
 """Run describe page, including download, QuPath and Marimo control."""
 
+import asyncio
+import contextlib
 import webbrowser
 from importlib.util import find_spec
 from multiprocessing import Manager
@@ -319,16 +321,7 @@ def update_download_progress() -> None:  # noqa: C901, PLR0912
                     download_item_progress.set_visibility(False)
                     download_artifact_progress.set_visibility(False)
 
-        async def start_download() -> None:  # noqa: PLR0915 - diagnostic prints push count over the threshold (#531)
-            # DIAGNOSTIC(#531): unbuffered raw write to fd 1 — survives task cancellation and
-            # bypasses NiceGUI's outer event-handler exception swallowing.
-            import os as _os531  # noqa: PLC0415
-
-            def _diag(msg: str) -> None:
-                _os531.write(1, f"[#531] {msg}\n".encode())
-
-            _diag("start_download ENTER")
-            # Read from download_options dict (defined outside @ui.refreshable) to get current values
+        async def start_download() -> None:  # noqa: PLR0915
             current_qupath_project = download_options["qupath_project"]
             current_marimo = download_options["marimo"]
             current_folder = folder_state["value"]
@@ -336,26 +329,47 @@ def _diag(msg: str) -> None:
                 ui.notify("Please select a folder first", type="warning")
                 return
 
-            _diag("BEFORE ui.notify Downloading ...")
             ui.notify("Downloading ...", type="info")
-            _diag("AFTER ui.notify Downloading ...")
-            _diag("BEFORE Manager().Queue()")
             progress_queue = Manager().Queue()
-            _diag("AFTER Manager().Queue()")
-            progress_state["queue"] = progress_queue  # Store queue so timer callback can access it
-            _diag("BEFORE ui.timer(...)")
-            progress_timer: Any
-            try:
-                progress_timer = ui.timer(0.1, update_download_progress, active=True)
-            except BaseException as e_timer:
-                _diag(f"ui.timer RAISED {type(e_timer).__name__}: {e_timer}")
-                raise
-            _diag(f"AFTER ui.timer(...) timer={progress_timer!r} _is_canceled={progress_timer._is_canceled}")  # noqa: SLF001
+            progress_state["queue"] = progress_queue
+
+            # Drive the progress UI from a plain asyncio task instead of `ui.timer`.
+            #
+            # Background: NiceGUI 3.10's PR #5931 made `Timer._handle_delete()` cancel the timer
+            # whenever its parent container is cleared. The download dialog content lives inside
+            # `@ui.refreshable`, and `download_run_dialog_open()` calls `dialog_content.refresh()`
+            # on every open — so a pre-created `ui.timer` captured by this closure is always
+            # `_is_canceled=True` by the time the user clicks Download, and `Timer.activate()`'s
+            # `assert not self._is_canceled` raises silently inside NiceGUI's event-handler
+            # exception path (#531).
+            #
+            # Trying to *create* the timer fresh inside this click handler instead doesn't work
+            # either: `ui.timer` is an Element and needs a live slot context, but the click
+            # handler's parent_slot is the button's parent slot which has been GC'd in this
+            # @ui.refreshable + dialog flow ("RuntimeError: The parent element this slot belongs
+            # to has been deleted.").
+            #
+            # An `asyncio.create_task` running our callback doesn't need a slot context, doesn't
+            # depend on the refreshable lifecycle, and the callback closes over UI element refs
+            # (`download_item_status`, etc.) that were just created in the latest refresh and are
+            # valid for the duration of this download.
+            stop_progress = asyncio.Event()
+
+            async def _drive_progress() -> None:
+                while not stop_progress.is_set():
+                    try:
+                        update_download_progress()
+                    except Exception:
+                        logger.exception("update_download_progress raised; continuing")
+                    try:
+                        await asyncio.wait_for(stop_progress.wait(), timeout=0.1)
+                    except TimeoutError:
+                        continue
+
+            progress_task = asyncio.create_task(_drive_progress(), name=f"download-progress-{run.run_id}")
             try:
-                _diag("BEFORE download_button.disable()")
                 download_button.disable()
                 download_button.props(add="loading")
-                _diag(f"BEFORE await cpu_bound qupath={current_qupath_project} folder={current_folder!r}")
                 results_folder = await nicegui_run.cpu_bound(
                     Service.application_run_download_static,
                     run_id=run.run_id,
@@ -364,7 +378,6 @@ def _diag(msg: str) -> None:
                     qupath_project=current_qupath_project,
                     download_progress_queue=progress_queue,
                 )
-                _diag(f"AFTER await cpu_bound results_folder={results_folder!r}")
                 if not results_folder:
                     message = "Download returned without results folder."
                     raise ValueError(message)  # noqa: TRY301
@@ -384,13 +397,13 @@ def _diag(msg: str) -> None:
                 # this async event handler would otherwise be silently swallowed by NiceGUI's
                 # outer task-exception handler, leaving the dialog stuck in the loading state
                 # with no completion or failure notification. We always want the user to see
-                # *something* and the timer + button state cleaned up via the `finally`.
-                _diag(f"start_download except {type(e).__name__}: {e}")
+                # *something* and the progress task + button state cleaned up via the `finally`.
                 logger.exception("Download failed for run '{}'", run.run_id)
                 ui.notify(f"Download failed: {e}", type="negative", multi_line=True)
             finally:
-                _diag("start_download finally")
-                progress_timer.cancel()
+                stop_progress.set()
+                with contextlib.suppress(asyncio.CancelledError, Exception):
+                    await progress_task
                 progress_state["queue"] = None
                 download_button.props(remove="loading")
                 download_button.enable()

From e2d3c1e46845daea80c30e10625bfb0e653188d9 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 13:09:51 +0200
Subject: [PATCH 19/21] =?UTF-8?q?ci(#531):=20revert=20TEMP=20narrowing=20?=
 =?UTF-8?q?=E2=80=94=20full=20matrix=20back?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The asyncio.create_task replacement for ui.timer (commit 93e21874) was
verified on the narrow run: both test_gui_run_download (19.4s) and
test_gui_run_qupath_install_to_inspect (65.4s) PASS. Restore the full
test matrix and all test phases (unit / integration / e2e_regular /
e2e_long_running / e2e_very_long_running on ubuntu-latest, ubuntu-arm,
macos-latest, macos-15-intel, windows-latest).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/_test.yml | 52 +++++++++++++++----------------------
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index d22fd6a6..d93f2348 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -50,9 +50,11 @@ jobs:
           COMMIT_MESSAGE: ${{ inputs.commit_message }}
           PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
         run: |
-          # TEMP(#531): forced to ubuntu-latest only while iterating on the long-running test fix.
-          # REVERT once test_gui_run_download + test_gui_run_qupath_install_to_inspect pass.
-          echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
+          if [[ "$COMMIT_MESSAGE" == *"skip:test:matrix-runner"* ]] || [[ "$PR_LABELS" == *"skip:test:matrix-runner"* ]]; then
+            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
+          else
+            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false],"include":[{"runner":"ubuntu-24.04-arm","experimental":true},{"runner":"macos-latest","experimental":true},{"runner":"macos-15-intel","experimental":true},{"runner":"windows-latest","experimental":true}]}' >> $GITHUB_OUTPUT
+          fi
 
   test:
     needs: generate-matrix
@@ -161,14 +163,10 @@ jobs:
       # A single "Assert no test failures" gate step at the end surfaces any
       # failures and fails the job — so the workflow still fails correctly.
 
-      # TEMP(#531): debugging cycle — only run the 2 failing download tests on ubuntu-latest.
-      # All other test phases (unit / integration / e2e_regular / e2e_very_long_running) are
-      # disabled here via `if: false`. REVERT this whole block to the original 5 steps once
-      # the principled fix is verified.
       - name: Test / Unit (multiple Python versions)
         id: unit
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: unit
@@ -180,7 +178,7 @@ jobs:
       - name: Test / Integration (multiple Python versions)
         id: integration
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: integration
@@ -192,7 +190,7 @@ jobs:
       - name: Test / E2E / regular (multiple Python versions)
         id: e2e_regular
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: e2e
@@ -201,34 +199,26 @@ jobs:
           summary-title: All regular e2e tests passed
           commit-message: ${{ inputs.commit_message }}
 
-      # TEMP(#531): targeted run of just the 2 failing download tests, no retries, output unbuffered
-      # so the `[#531]` parent stdout + subprocess stderr lines reach the CI log in real time.
-      # NOTE: `-m long_running` is REQUIRED — tests/conftest.py:pytest_collection_modifyitems
-      # auto-skips `long_running` tests when no `-m` is passed, even if listed by id.
-      - name: Test / E2E / long running (TEMP — only #531 download tests)
+      - name: Test / E2E / long running (single Python version)
         id: e2e_long_running
         continue-on-error: true
         if: ${{ !cancelled() }}
-        shell: bash
-        run: |
-          set +e
-          uv run --all-extras pytest \
-            tests/aignostics/application/gui_test.py::test_gui_run_download \
-            tests/aignostics/qupath/gui_test.py::test_gui_run_qupath_install_to_inspect \
-            -m "long_running" \
-            -v --no-cov --tb=long -s -p no:retry --timeout=300 \
-            --junitxml=reports/junit_531_debug.xml \
-            --md-report-output=reports/pytest_531_debug.md
-          EXIT_CODE=$?
-          if [ -f "reports/pytest_531_debug.md" ]; then
-            cat "reports/pytest_531_debug.md" >> "$GITHUB_STEP_SUMMARY"
-          fi
-          exit $EXIT_CODE
+        uses: ./.github/actions/run-tests
+        with:
+          test-type: long-running
+          make-target: test_long_running
+          skip-marker: skip:test:long_running
+          summary-title: All long running e2e tests passed
+          commit-message: ${{ inputs.commit_message }}
 
       - name: Test / E2E / very long running (single Python version)
         id: e2e_very_long_running
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: |
+          !cancelled() && (
+            contains(inputs.commit_message, 'enable:test:very_long_running') ||
+            contains(github.event.pull_request.labels.*.name, 'enable:test:very_long_running')
+          )
         uses: ./.github/actions/run-tests
         with:
           test-type: very-long-running

From 6cfd3d39ec44c16ec78a4bfdab79b19e5eef6db1 Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 13:19:18 +0200
Subject: [PATCH 20/21] fix(application/gui): harden async progress driver
 against queue / IPC hangs (#531)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two robustness tweaks on top of the asyncio.create_task driver:

1. `update_download_progress()` now drains the queue with `get_nowait()`
   inside an explicit `queue.Empty` loop instead of pairing `empty()` +
   blocking `get()`. Closes a tiny IPC race on
   `multiprocessing.Manager().Queue` where the queue could be drained
   between `empty()` returning False and `get()` running, leaving `get()`
   to block indefinitely and stall the progress driver.

2. `start_download`'s `finally` now hard-cancels the progress task and
   awaits it with a 2 s bound:

       stop_progress.set()
       progress_task.cancel()
       with contextlib.suppress(asyncio.CancelledError, Exception, TimeoutError):
           await asyncio.wait_for(progress_task, timeout=2.0)

   Even if a future regression makes `update_download_progress()` hang
   inside a sync call (so the loop can't observe `stop_progress` itself),
   the cleanup completes in finite time, the dialog state is reset, and
   the user sees the "Download completed." / "Download failed: ..."
   notification. No path can hang the dialog forever again.

Also re-applies the TEMP CI narrowing (ubuntu-latest, only the 2 download
tests) for fast verification — will revert once green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/_test.yml                   | 44 ++++++++++---------
 .../_gui/_page_application_run_describe.py    | 24 +++++++---
 2 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index d93f2348..46a2dbc5 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -50,11 +50,8 @@ jobs:
           COMMIT_MESSAGE: ${{ inputs.commit_message }}
           PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
         run: |
-          if [[ "$COMMIT_MESSAGE" == *"skip:test:matrix-runner"* ]] || [[ "$PR_LABELS" == *"skip:test:matrix-runner"* ]]; then
-            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
-          else
-            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false],"include":[{"runner":"ubuntu-24.04-arm","experimental":true},{"runner":"macos-latest","experimental":true},{"runner":"macos-15-intel","experimental":true},{"runner":"windows-latest","experimental":true}]}' >> $GITHUB_OUTPUT
-          fi
+          # TEMP(#531): forced to ubuntu-latest only while iterating on the long-running test fix.
+          echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
 
   test:
     needs: generate-matrix
@@ -163,10 +160,11 @@ jobs:
       # A single "Assert no test failures" gate step at the end surfaces any
       # failures and fails the job — so the workflow still fails correctly.
 
+      # TEMP(#531): debugging cycle — only run the 2 download tests on ubuntu-latest.
       - name: Test / Unit (multiple Python versions)
         id: unit
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: unit
@@ -178,7 +176,7 @@ jobs:
       - name: Test / Integration (multiple Python versions)
         id: integration
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: integration
@@ -190,7 +188,7 @@ jobs:
       - name: Test / E2E / regular (multiple Python versions)
         id: e2e_regular
         continue-on-error: true
-        if: ${{ !cancelled() }}
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: e2e
@@ -199,26 +197,30 @@ jobs:
           summary-title: All regular e2e tests passed
           commit-message: ${{ inputs.commit_message }}
 
-      - name: Test / E2E / long running (single Python version)
+      - name: Test / E2E / long running (TEMP — only #531 download tests)
         id: e2e_long_running
         continue-on-error: true
         if: ${{ !cancelled() }}
-        uses: ./.github/actions/run-tests
-        with:
-          test-type: long-running
-          make-target: test_long_running
-          skip-marker: skip:test:long_running
-          summary-title: All long running e2e tests passed
-          commit-message: ${{ inputs.commit_message }}
+        shell: bash
+        run: |
+          set +e
+          uv run --all-extras pytest \
+            tests/aignostics/application/gui_test.py::test_gui_run_download \
+            tests/aignostics/qupath/gui_test.py::test_gui_run_qupath_install_to_inspect \
+            -m "long_running" \
+            -v --no-cov --tb=long -s -p no:retry --timeout=300 \
+            --junitxml=reports/junit_531_debug.xml \
+            --md-report-output=reports/pytest_531_debug.md
+          EXIT_CODE=$?
+          if [ -f "reports/pytest_531_debug.md" ]; then
+            cat "reports/pytest_531_debug.md" >> "$GITHUB_STEP_SUMMARY"
+          fi
+          exit $EXIT_CODE
 
       - name: Test / E2E / very long running (single Python version)
         id: e2e_very_long_running
         continue-on-error: true
-        if: |
-          !cancelled() && (
-            contains(inputs.commit_message, 'enable:test:very_long_running') ||
-            contains(github.event.pull_request.labels.*.name, 'enable:test:very_long_running')
-          )
+        if: false # TEMP(#531)
         uses: ./.github/actions/run-tests
         with:
           test-type: very-long-running
diff --git a/src/aignostics/application/_gui/_page_application_run_describe.py b/src/aignostics/application/_gui/_page_application_run_describe.py
index d34934de..684fd470 100644
--- a/src/aignostics/application/_gui/_page_application_run_describe.py
+++ b/src/aignostics/application/_gui/_page_application_run_describe.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import contextlib
+import queue as queue_module
 import webbrowser
 from importlib.util import find_spec
 from multiprocessing import Manager
@@ -256,13 +257,21 @@ def _select_data() -> None:
         # This allows the timer callback to access the queue that's created during start_download
         progress_state: dict[str, Any] = {"queue": None}
 
-        def update_download_progress() -> None:  # noqa: C901, PLR0912
+        def update_download_progress() -> None:  # noqa: C901, PLR0912, PLR0915
             """Update the progress indicator with values from the queue."""
             progress_queue = progress_state.get("queue")
             if progress_queue is None:
                 return
-            while not progress_queue.empty():
-                progress = progress_queue.get()
+            # Drain everything currently in the queue, but never block on `get()`. Using
+            # `get_nowait()` (instead of the previously-paired `empty()` check + blocking `get()`)
+            # guards against a tiny IPC race on `multiprocessing.Manager().Queue` where the queue
+            # could be drained between `empty()` returning False and `get()` running, leaving
+            # `get()` to block indefinitely and stall the async progress task.
+            while True:
+                try:
+                    progress = progress_queue.get_nowait()
+                except queue_module.Empty:
+                    break
                 if progress.status is DownloadProgressState.DOWNLOADING_INPUT:
                     status_text = (
                         f"Downloading input slide {progress.item_index + 1} of {progress.item_count}"
@@ -401,9 +410,14 @@ async def _drive_progress() -> None:
                 logger.exception("Download failed for run '{}'", run.run_id)
                 ui.notify(f"Download failed: {e}", type="negative", multi_line=True)
             finally:
+                # Force the progress task to terminate even if it's stuck in a sync call (e.g.
+                # a Manager().Queue() IPC hiccup): set the stop event, hard-cancel the task,
+                # and wait a bounded time. This guarantees `start_download` cleanup completes
+                # in finite time no matter what the progress driver was doing.
                 stop_progress.set()
-                with contextlib.suppress(asyncio.CancelledError, Exception):
-                    await progress_task
+                progress_task.cancel()
+                with contextlib.suppress(asyncio.CancelledError, Exception, TimeoutError):
+                    await asyncio.wait_for(progress_task, timeout=2.0)
                 progress_state["queue"] = None
                 download_button.props(remove="loading")
                 download_button.enable()

From b3c62b12d4547b3a6a3698040edd1a01f9dee89c Mon Sep 17 00:00:00 2001
From: Helmut Hoffer von Ankershoffen <helmuthva@googlemail.com>
Date: Sun, 26 Apr 2026 13:31:58 +0200
Subject: [PATCH 21/21] =?UTF-8?q?ci(#531):=20revert=20TEMP=20narrowing=20?=
 =?UTF-8?q?=E2=80=94=20full=20matrix=20back,=20robustness=20tweaks=20verif?=
 =?UTF-8?q?ied?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Narrow run 24955336794 verified the asyncio.create_task driver + robustness
tweaks (get_nowait drain + bounded hard-cancel cleanup): both
test_gui_run_download (~21s) and test_gui_run_qupath_install_to_inspect
(~53s) PASS on ubuntu-latest. Restore the full 5-platform matrix and all
test phases.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/_test.yml | 44 ++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index 46a2dbc5..d93f2348 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -50,8 +50,11 @@ jobs:
           COMMIT_MESSAGE: ${{ inputs.commit_message }}
           PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
         run: |
-          # TEMP(#531): forced to ubuntu-latest only while iterating on the long-running test fix.
-          echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
+          if [[ "$COMMIT_MESSAGE" == *"skip:test:matrix-runner"* ]] || [[ "$PR_LABELS" == *"skip:test:matrix-runner"* ]]; then
+            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false]}' >> $GITHUB_OUTPUT
+          else
+            echo 'matrix={"runner":["ubuntu-latest"],"experimental":[false],"include":[{"runner":"ubuntu-24.04-arm","experimental":true},{"runner":"macos-latest","experimental":true},{"runner":"macos-15-intel","experimental":true},{"runner":"windows-latest","experimental":true}]}' >> $GITHUB_OUTPUT
+          fi
 
   test:
     needs: generate-matrix
@@ -160,11 +163,10 @@ jobs:
       # A single "Assert no test failures" gate step at the end surfaces any
       # failures and fails the job — so the workflow still fails correctly.
 
-      # TEMP(#531): debugging cycle — only run the 2 download tests on ubuntu-latest.
       - name: Test / Unit (multiple Python versions)
         id: unit
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: unit
@@ -176,7 +178,7 @@ jobs:
       - name: Test / Integration (multiple Python versions)
         id: integration
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: integration
@@ -188,7 +190,7 @@ jobs:
       - name: Test / E2E / regular (multiple Python versions)
         id: e2e_regular
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: ${{ !cancelled() }}
         uses: ./.github/actions/run-tests
         with:
           test-type: e2e
@@ -197,30 +199,26 @@ jobs:
           summary-title: All regular e2e tests passed
           commit-message: ${{ inputs.commit_message }}
 
-      - name: Test / E2E / long running (TEMP — only #531 download tests)
+      - name: Test / E2E / long running (single Python version)
         id: e2e_long_running
         continue-on-error: true
         if: ${{ !cancelled() }}
-        shell: bash
-        run: |
-          set +e
-          uv run --all-extras pytest \
-            tests/aignostics/application/gui_test.py::test_gui_run_download \
-            tests/aignostics/qupath/gui_test.py::test_gui_run_qupath_install_to_inspect \
-            -m "long_running" \
-            -v --no-cov --tb=long -s -p no:retry --timeout=300 \
-            --junitxml=reports/junit_531_debug.xml \
-            --md-report-output=reports/pytest_531_debug.md
-          EXIT_CODE=$?
-          if [ -f "reports/pytest_531_debug.md" ]; then
-            cat "reports/pytest_531_debug.md" >> "$GITHUB_STEP_SUMMARY"
-          fi
-          exit $EXIT_CODE
+        uses: ./.github/actions/run-tests
+        with:
+          test-type: long-running
+          make-target: test_long_running
+          skip-marker: skip:test:long_running
+          summary-title: All long running e2e tests passed
+          commit-message: ${{ inputs.commit_message }}
 
       - name: Test / E2E / very long running (single Python version)
         id: e2e_very_long_running
         continue-on-error: true
-        if: false # TEMP(#531)
+        if: |
+          !cancelled() && (
+            contains(inputs.commit_message, 'enable:test:very_long_running') ||
+            contains(github.event.pull_request.labels.*.name, 'enable:test:very_long_running')
+          )
         uses: ./.github/actions/run-tests
         with:
           test-type: very-long-running