From eaa55aa1181507e23c1ada7404f0da1fbc17e262 Mon Sep 17 00:00:00 2001 From: Todd Short Date: Thu, 23 Apr 2026 16:30:51 -0400 Subject: [PATCH] fix(catalogd): follow-up fixes after HA PR merge Two fixes on top of the catalogd HA work (#2674): 1. serverutil: wrap http.Server.Serve error with the catalog listen address so the failure message is self-diagnosing in aggregated manager logs. 2. e2e: fix CatalogdHA gate to override node-count check with actual catalogd replica count. The node-count heuristic fires on any multi-node cluster (e.g. OpenShift standard e2e on AWS), causing the @CatalogdHA scenario to run even when catalogd has only 1 replica, which makes the step "catalogd is ready to reconcile resources" fail with exit status 1. After olmNamespace is populated, query the catalogd deployment's spec.replicas and set the gate unconditionally to (replicas >= 2), overriding the earlier node-count result. For upgrade scenarios the detectOLMDeployment early-return keeps the node-count value intact. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Todd Short --- internal/catalogd/serverutil/serverutil.go | 2 +- test/e2e/steps/hooks.go | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/internal/catalogd/serverutil/serverutil.go b/internal/catalogd/serverutil/serverutil.go index c29b5742f..72e1e5652 100644 --- a/internal/catalogd/serverutil/serverutil.go +++ b/internal/catalogd/serverutil/serverutil.go @@ -120,7 +120,7 @@ func (r *catalogServerRunnable) Start(ctx context.Context) error { }() if err := r.server.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) { - return err + return fmt.Errorf("catalog server on %q failed: %w", r.cfg.CatalogAddr, err) } return nil } diff --git a/test/e2e/steps/hooks.go b/test/e2e/steps/hooks.go index 5325a79f2..b7062ba89 100644 --- a/test/e2e/steps/hooks.go +++ b/test/e2e/steps/hooks.go @@ -148,6 +148,23 @@ func BeforeSuite() { } olmNamespace = olm.Namespace + // Refine CatalogdHA based on actual catalogd replica count now that + // olmNamespace is known. The node-count check above can fire on any + // multi-node cluster even when catalogd runs with only 1 replica. + // Override the gate: HA scenarios require ≥2 catalogd replicas. + // If the deployment is not found (kubectl error), or the replica + // count is empty/non-numeric, the gate keeps whatever the node-count + // check set. + if repOut, err := k8sClient("get", "deployments", "-n", olmNamespace, + "-l", "app.kubernetes.io/name=catalogd", + "-o", "jsonpath={.items[0].spec.replicas}"); err == nil { + if repOut = strings.TrimSpace(repOut); repOut != "" { + if replicas, err := strconv.Atoi(repOut); err == nil { + featureGates[catalogdHAFeature] = replicas >= 2 + } + } + } + featureGatePattern := regexp.MustCompile(`--feature-gates=([[:alnum:]]+)=(true|false)`) for _, c := range olm.Spec.Template.Spec.Containers { if c.Name == "manager" {