diff --git a/test/e2e/steps/ha_steps.go b/test/e2e/steps/ha_steps.go index 285917850..1d5d9844c 100644 --- a/test/e2e/steps/ha_steps.go +++ b/test/e2e/steps/ha_steps.go @@ -31,7 +31,7 @@ func CatalogdLeaderPodIsForceDeleted(ctx context.Context) error { } logger.Info("Force-deleting catalogd leader pod", "pod", leaderPod) - if _, err := k8sClient("delete", "pod", leaderPod, "-n", olmNamespace, + if _, err := k8sClient("delete", "pod", leaderPod, "-n", namespaceForComponent("catalogd"), "--force", "--grace-period=0"); err != nil { return fmt.Errorf("failed to force-delete catalogd leader pod %q: %w", leaderPod, err) } @@ -46,7 +46,7 @@ func NewCatalogdLeaderIsElected(ctx context.Context) error { oldLeader := sc.leaderPods["catalogd"] waitFor(ctx, func() bool { - holder, err := k8sClient("get", "lease", leaseNames["catalogd"], "-n", olmNamespace, + holder, err := k8sClient("get", "lease", leaseNames["catalogd"], "-n", namespaceForComponent("catalogd"), "-o", "jsonpath={.spec.holderIdentity}") if err != nil || holder == "" { return false diff --git a/test/e2e/steps/hooks.go b/test/e2e/steps/hooks.go index b7062ba89..d34ccd31e 100644 --- a/test/e2e/steps/hooks.go +++ b/test/e2e/steps/hooks.go @@ -108,22 +108,33 @@ func RegisterHooks(sc *godog.ScenarioContext) { sc.After(ScenarioCleanup) } -func detectOLMDeployment() (*appsv1.Deployment, error) { +// detectOLMDeployments returns the operator-controller deployment (first) and the catalogd +// deployment (second) found via the app.kubernetes.io/part-of=olm label across all namespaces. +// The catalogd return value may be nil when OLM is not yet installed (upgrade scenarios +// install it in a Background step). +func detectOLMDeployments() (*appsv1.Deployment, *appsv1.Deployment, error) { raw, err := k8sClient("get", "deployments", "-A", "-l", "app.kubernetes.io/part-of=olm", "-o", "jsonpath={.items}") if err != nil { - return nil, err + return nil, nil, err } dl := []appsv1.Deployment{} if err := json.Unmarshal([]byte(raw), &dl); err != nil { - return nil, fmt.Errorf("failed to unmarshal OLM deployments: %v", err) + return nil, nil, fmt.Errorf("failed to unmarshal OLM deployments: %v", err) } - for _, d := range dl { - if d.Name == olmDeploymentName { - return &d, nil + var operatorController, catalogd *appsv1.Deployment + for i := range dl { + switch dl[i].Name { + case olmDeploymentName: + operatorController = &dl[i] + case catalogdDeploymentName: + catalogd = &dl[i] } } - return nil, fmt.Errorf("failed to detect OLM Deployment") + if operatorController == nil { + return nil, nil, fmt.Errorf("failed to detect OLM Deployment") + } + return operatorController, catalogd, nil } func BeforeSuite() { @@ -141,27 +152,29 @@ func BeforeSuite() { featureGates[catalogdHAFeature] = true } - olm, err := detectOLMDeployment() + olm, catalogdDep, err := detectOLMDeployments() if err != nil { logger.Info("OLM deployments not found; skipping feature gate detection (upgrade scenarios will install OLM in Background)") return } olmNamespace = olm.Namespace + componentNamespaces["operator-controller"] = olmNamespace - // Refine CatalogdHA based on actual catalogd replica count now that - // olmNamespace is known. The node-count check above can fire on any - // multi-node cluster even when catalogd runs with only 1 replica. - // Override the gate: HA scenarios require ≥2 catalogd replicas. - // If the deployment is not found (kubectl error), or the replica - // count is empty/non-numeric, the gate keeps whatever the node-count - // check set. - if repOut, err := k8sClient("get", "deployments", "-n", olmNamespace, - "-l", "app.kubernetes.io/name=catalogd", - "-o", "jsonpath={.items[0].spec.replicas}"); err == nil { - if repOut = strings.TrimSpace(repOut); repOut != "" { - if replicas, err := strconv.Atoi(repOut); err == nil { - featureGates[catalogdHAFeature] = replicas >= 2 - } + // Catalogd may be in a different namespace than operator-controller. + catalogdNS := olmNamespace + if catalogdDep != nil { + catalogdNS = catalogdDep.Namespace + } + componentNamespaces["catalogd"] = catalogdNS + + // Refine CatalogdHA based on actual catalogd replica count now that catalogdNS is + // known. The node-count check above can fire on any multi-node cluster even when + // catalogd runs with only 1 replica. Override the gate: HA scenarios require ≥2 + // catalogd replicas. Fall back to whatever the node-count check set when catalogd + // was not found or the replica count is not parseable. + if catalogdDep != nil { + if replicas := catalogdDep.Spec.Replicas; replicas != nil { + featureGates[catalogdHAFeature] = *replicas >= 2 } } diff --git a/test/e2e/steps/steps.go b/test/e2e/steps/steps.go index a91bfbacb..0a9bf2115 100644 --- a/test/e2e/steps/steps.go +++ b/test/e2e/steps/steps.go @@ -50,13 +50,15 @@ import ( ) const ( - olmDeploymentName = "operator-controller-controller-manager" - timeout = 5 * time.Minute - tick = 1 * time.Second + olmDeploymentName = "operator-controller-controller-manager" + catalogdDeploymentName = "catalogd-controller-manager" + timeout = 5 * time.Minute + tick = 1 * time.Second ) var ( olmNamespace = "olmv1-system" + componentNamespaces = map[string]string{} // keyed by component label (e.g. "catalogd"); falls back to olmNamespace kubeconfigPath string k8sCli string deployImageRegistry = sync.OnceValue(func() error { @@ -214,6 +216,15 @@ func init() { } } +// namespaceForComponent returns the namespace for the named OLM component. +// Falls back to olmNamespace when the component has no dedicated entry (e.g. upstream single-namespace deployments). +func namespaceForComponent(component string) string { + if ns, ok := componentNamespaces[component]; ok { + return ns + } + return olmNamespace +} + func k8sClient(args ...string) (string, error) { cmd := exec.Command(k8sCli, args...) logger.V(1).Info("Running", "command", strings.Join(cmd.Args, " ")) diff --git a/test/e2e/steps/upgrade_steps.go b/test/e2e/steps/upgrade_steps.go index 0c82437a2..47aab69d2 100644 --- a/test/e2e/steps/upgrade_steps.go +++ b/test/e2e/steps/upgrade_steps.go @@ -29,7 +29,7 @@ var ( if err != nil { return err } - olm, err := detectOLMDeployment() + olm, _, err := detectOLMDeployments() if err != nil { return err } @@ -82,19 +82,20 @@ func OLMIsUpgraded(ctx context.Context) error { // then checks the leader election lease and stores the leader pod name in the scenario context. func ComponentIsReadyToReconcile(ctx context.Context, component string) error { sc := scenarioCtx(ctx) + ns := namespaceForComponent(component) // Wait for deployment rollout to complete - depName, err := k8sClient("get", "deployments", "-n", olmNamespace, + depName, err := k8sClient("get", "deployments", "-n", ns, "-l", fmt.Sprintf("app.kubernetes.io/name=%s", component), "-o", "jsonpath={.items[0].metadata.name}") if err != nil { - return fmt.Errorf("failed to find deployment for component %s: %w", component, err) + return fmt.Errorf("failed to find deployment for component %s in namespace %s: %w", component, ns, err) } if depName == "" { - return fmt.Errorf("failed to find deployment for component %s: no matching deployments found", component) + return fmt.Errorf("failed to find deployment for component %s in namespace %s: no matching deployments found", component, ns) } if _, err := k8sClient("rollout", "status", fmt.Sprintf("deployment/%s", depName), - "-n", olmNamespace, fmt.Sprintf("--timeout=%s", timeout)); err != nil { + "-n", ns, fmt.Sprintf("--timeout=%s", timeout)); err != nil { return fmt.Errorf("deployment rollout failed for %s: %w", component, err) } @@ -106,7 +107,7 @@ func ComponentIsReadyToReconcile(ctx context.Context, component string) error { // Leader election can take up to LeaseDuration (137s) + RetryPeriod (26s) ≈ 163s in the worst case waitFor(ctx, func() bool { - output, err := k8sClient("get", "lease", leaseName, "-n", olmNamespace, + output, err := k8sClient("get", "lease", leaseName, "-n", ns, "-o", "jsonpath={.spec.holderIdentity}") if err != nil || output == "" { return false @@ -129,9 +130,9 @@ var resourceTypeToComponent = map[string]string{ // reconcileEndingCheck returns a function that checks whether the leader pod's logs // contain a "reconcile ending" entry for the given resource name. -func reconcileEndingCheck(leaderPod, resourceName string) func() bool { +func reconcileEndingCheck(namespace, leaderPod, resourceName string) func() bool { return func() bool { - logs, err := k8sClient("logs", leaderPod, "-n", olmNamespace, "--all-containers=true", "--tail=1000") + logs, err := k8sClient("logs", leaderPod, "-n", namespace, "--all-containers=true", "--tail=1000") if err != nil { return false } @@ -160,13 +161,13 @@ func ClusterExtensionIsReconciled(ctx context.Context) error { } leaderPod := sc.leaderPods[component] - waitFor(ctx, reconcileEndingCheck(leaderPod, resourceName)) + waitFor(ctx, reconcileEndingCheck(namespaceForComponent(component), leaderPod, resourceName)) return nil } // clusterCatalogUnpackedAfterPodCreation returns a check function that verifies the // ClusterCatalog is serving and its lastUnpacked timestamp is after the leader pod's creation. -func clusterCatalogUnpackedAfterPodCreation(resourceName, leaderPod string) func() bool { +func clusterCatalogUnpackedAfterPodCreation(namespace, resourceName, leaderPod string) func() bool { return func() bool { catalogJSON, err := k8sClient("get", "clustercatalog", resourceName, "-o", "json") if err != nil { @@ -190,7 +191,7 @@ func clusterCatalogUnpackedAfterPodCreation(resourceName, leaderPod string) func return false } - podJSON, err := k8sClient("get", "pod", leaderPod, "-n", olmNamespace, "-o", "json") + podJSON, err := k8sClient("get", "pod", leaderPod, "-n", namespace, "-o", "json") if err != nil { return false } @@ -229,8 +230,9 @@ func allResourcesAreReconciled(ctx context.Context, resourceType string) error { return fmt.Errorf("no %s resources found", resourceType) } + ns := namespaceForComponent(component) for _, name := range resourceNames { - waitFor(ctx, reconcileEndingCheck(leaderPod, name)) + waitFor(ctx, reconcileEndingCheck(ns, leaderPod, name)) } return nil @@ -254,11 +256,12 @@ func ScenarioCatalogIsReconciled(ctx context.Context, catalogUserName string) er return fmt.Errorf("leader pod not found for component %s", component) } - waitFor(ctx, reconcileEndingCheck(leaderPod, catalogName)) + ns := namespaceForComponent(component) + waitFor(ctx, reconcileEndingCheck(ns, leaderPod, catalogName)) // Also verify that lastUnpacked is after the leader pod's creation. // This mitigates flakiness caused by https://github.com/operator-framework/operator-controller/issues/1626 - waitFor(ctx, clusterCatalogUnpackedAfterPodCreation(catalogName, leaderPod)) + waitFor(ctx, clusterCatalogUnpackedAfterPodCreation(ns, catalogName, leaderPod)) return nil }