From d393a109948678a8fff9d8b3a801dad6bb4b9334 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 11 May 2026 13:11:52 +0200 Subject: [PATCH 1/4] confd: skip neighbor/address flush for interfaces in container netns When an interface has been handed off to a container it lives in another netns, so `ip neigh/addr flush dev FOO` fails on the host. The failure aborts dagger, and since interfaces_change() runs before containers_change() in change_cb(), the container delete path is never reached -- the stale container keeps the interface trapped in its netns, breaking the next reconfiguration. Guard both the neighbor and address flush exit scripts with `if_nametoindex(ifname)` -- true exactly when the interface is in the host netns, false for both "in a container" and "already gone". This replaces the pre-existing `!cni_find(ifname) && if_nametoindex( ifname)` guard at the addr site: cni_find() added no information for this check and would popen(container find) for nothing when the interface had been deleted entirely. Also harden wrap() in /usr/sbin/container so a stale setup pidfile doesn't short-circuit Finit's stop attempt -- kill the setup PID and still ask podman to stop the container. Fixes #1493 Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 7 ++++--- src/confd/src/ip.c | 28 +++++++++++++++++--------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index 4bc99d9a7..f79353d7d 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -652,7 +652,9 @@ wrap() if [ "$cmd" = "stop" ]; then # The setup phase may run forever in the background trying to fetch - # the image. It saves its PID in /run/containers/${name}.pid + # the image. It saves its PID in /run/containers/${name}.pid. Kill + # any in-flight setup, then fall through to podman stop -- a stale + # pidfile is not proof the container isn't running. if [ -f "$pidfile" ]; then pid=$(cat "$pidfile") @@ -663,10 +665,9 @@ wrap() fi rm -f "$pidfile" - return 0 fi - # Only the 'podman stop' command takes -i and --timeout + # Only the 'podman stop' command takes -i (ignore missing) and --timeout args="-i --timeout $timeout" fi diff --git a/src/confd/src/ip.c b/src/confd/src/ip.c index a83c10170..463b3b1e1 100644 --- a/src/confd/src/ip.c +++ b/src/confd/src/ip.c @@ -295,11 +295,18 @@ int netdag_gen_ip_neighs(struct dagger *net, FILE *ip, const char *proto, int err = 0; if (!ipconf || !lydx_is_enabled(ipconf, "enabled")) { - FILE *fp = dagger_fopen_net_exit(net, ifname, NETDAG_EXIT_PRE, "flush-neigh.sh"); + FILE *fp; + + /* Skip if interface is currently in another netns (container) see #1493 */ + if (!if_nametoindex(ifname)) + return 0; + + fp = dagger_fopen_net_exit(net, ifname, NETDAG_EXIT_PRE, "flush-neigh.sh"); if (fp) { fprintf(fp, "ip -%c neigh flush dev %s nud permanent\n", proto[3], ifname); fclose(fp); } + return 0; } @@ -323,15 +330,18 @@ int netdag_gen_ip_addrs(struct dagger *net, FILE *ip, const char *proto, const char *ifname = lydx_get_cattr(dif, "name"); if (!ipconf || !lydx_is_enabled(ipconf, "enabled")) { - if (!cni_find(ifname) && if_nametoindex(ifname)) { - FILE *fp; - - fp = dagger_fopen_net_exit(net, ifname, NETDAG_EXIT_PRE, "flush.sh"); - if (fp) { - fprintf(fp, "ip -%c addr flush dev %s\n", proto[3], ifname); - fclose(fp); - } + FILE *fp; + + /* Skip if interface is currently in another netns (container) see #1493 */ + if (!if_nametoindex(ifname)) + return 0; + + fp = dagger_fopen_net_exit(net, ifname, NETDAG_EXIT_PRE, "flush.sh"); + if (fp) { + fprintf(fp, "ip -%c addr flush dev %s\n", proto[3], ifname); + fclose(fp); } + return 0; } From 3be640409a859f36f9a5dc167a275361fab4a042 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 11 May 2026 18:27:01 +0200 Subject: [PATCH 2/4] confd: cache progress description and reprint on final status The \r-overwrite of an in-progress line breaks when something else writes to stderr between conout(3) and the final call, leaving blank "[ OK ]" lines. Cache the description and reprint it whole, so the final status line is robust to intervening output. Signed-off-by: Joachim Wiberg --- src/confd/src/main.c | 53 ++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/src/confd/src/main.c b/src/confd/src/main.c index 8f9863391..f679d9df6 100644 --- a/src/confd/src/main.c +++ b/src/confd/src/main.c @@ -74,27 +74,38 @@ struct plugin { }; static sig_atomic_t pump_running = 1; -static int restart; /* set when sentinel found; suppresses conout() */ +static int restart; /* set when sentinel found; suppresses progress output */ int debug = 0; -/* Finit style progress output on console */ +/* + * Finit-style progress output. conout(3, "fmt", ...) marks a step pending + * and caches the description; conout(0/1/2, NULL) finalises with OK/FAIL/ + * WARN, reprinting the cached description so the line survives intervening + * stderr writes (sysrepo logs, NOTE/ERROR, ...). + */ static void conout(int rc, const char *fmt, ...) { - const char *sta = "%s\e[1m[\e[1;%dm%s\e[0m\e[1m]\e[0m %s"; const char *msg[] = { " OK ", "FAIL", "WARN", " ⋯ " }; - const char *cr = rc == 3 ? "" : "\r"; - const int col[] = { 32, 31, 33, 33 }; - char buf[80]; - va_list ap; + const int col[] = { 32, 31, 33, 33 }; + static char desc[80]; if (restart) return; - snprintf(buf, sizeof(buf), sta, cr, col[rc], msg[rc], fmt); - va_start(ap, fmt); - vfprintf(stderr, buf, ap); - va_end(ap); + if (fmt) { + va_list ap; + + va_start(ap, fmt); + vsnprintf(desc, sizeof(desc), fmt, ap); + va_end(ap); + } + + fprintf(stderr, "\r\e[K\e[1m[\e[1;%dm%s\e[0m\e[1m]\e[0m %s%s", + col[rc], msg[rc], desc, rc == 3 ? "" : "\n"); + + if (rc != 3) + desc[0] = '\0'; } static void version_print(void) @@ -527,7 +538,7 @@ static void maybe_enable_test_mode(void) conout(3, "Enabling test mode"); rc = systemf("sysrepoctl -c infix-test -e test-mode-enable"); - conout(rc ? 1 : 0, "\n"); + conout(!!rc, NULL); } } @@ -711,7 +722,7 @@ int main(int argc, char **argv) gen_pid = fork(); if (gen_pid < 0) { ERRNO("Failed to fork gen-config"); - conout(1, "\n"); + conout(1, NULL); goto cleanup; } if (gen_pid == 0) @@ -733,10 +744,10 @@ int main(int argc, char **argv) waitpid(gen_pid, &status, 0); if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { ERROR("gen-config failed (status=%d)", status); - conout(1, "\n"); + conout(1, NULL); goto cleanup; } - conout(0, "\n"); + conout(0, NULL); /* Phase 4: Install factory defaults into all datastores */ NOTE("Loading factory-default datastore from %s ...", factory_path); @@ -744,10 +755,10 @@ int main(int argc, char **argv) r = sr_install_factory_config(conn, factory_path); if (r != SR_ERR_OK) { ERROR("sr_install_factory_config failed: %s", sr_strerror(r)); - conout(1, "\n"); + conout(1, NULL); goto cleanup; } - conout(0, "\n"); + conout(0, NULL); } /* Phase 5: Start running-datastore session */ @@ -780,7 +791,7 @@ int main(int argc, char **argv) if (r) { ERROR("Plugin \"%s\" initialization failed (%s).", plugins[i].name, sr_strerror(r)); if (fatal_fail) { - conout(1, "\n"); + conout(1, NULL); goto cleanup; } } else { @@ -788,7 +799,7 @@ int main(int argc, char **argv) plugins[i].initialized = 1; } } - conout(0, "\n"); + conout(0, NULL); /* Phase 8: Collect subscription contexts from plugins */ for (i = 0; i < plugin_count; i++) { @@ -816,10 +827,10 @@ int main(int argc, char **argv) failure_path, test_path, timeout_ms)) { kill(pump_pid, SIGTERM); waitpid(pump_pid, NULL, 0); - conout(1, "\n"); + conout(1, NULL); goto cleanup; } - conout(0, "\n"); + conout(0, NULL); /* Phase 11: Stop event pump — bootstrap is done */ kill(pump_pid, SIGTERM); From e4203b0c9ec9048f58f5aa6a25439e80f6c48487 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Tue, 12 May 2026 10:45:16 +0200 Subject: [PATCH 3/4] package/finit: backport stale-pidfile and death-log fixes Backport two fixes addressing a critical failure reported by a customer: an unclean dbus-daemon exit leaves a lingering /run/messagebus.pid, the daemon then refuse to start, and Finit's restart loop gives up. 0001 service: clean stale pidfile after unclean daemon exit Drop a daemon-owned (pid:!) pidfile when it still names the just-reaped PID and that PID is no longer alive. 0002 service: log signal name and core dumps in death message "by signal: 9" -> "killed by SIGKILL", with ", core dumped" when applicable. Stronger breadcrumb for sudden deaths. Signed-off-by: Joachim Wiberg --- ...ale-pidfile-after-unclean-daemon-exi.patch | 86 +++++++++++++++++++ ...al-name-and-core-dumps-in-death-mess.patch | 50 +++++++++++ 2 files changed, 136 insertions(+) create mode 100644 package/finit/0001-service-clean-stale-pidfile-after-unclean-daemon-exi.patch create mode 100644 package/finit/0002-service-log-signal-name-and-core-dumps-in-death-mess.patch diff --git a/package/finit/0001-service-clean-stale-pidfile-after-unclean-daemon-exi.patch b/package/finit/0001-service-clean-stale-pidfile-after-unclean-daemon-exi.patch new file mode 100644 index 000000000..459bb1161 --- /dev/null +++ b/package/finit/0001-service-clean-stale-pidfile-after-unclean-daemon-exi.patch @@ -0,0 +1,86 @@ +From 4a53f610cd05c2aba3da770384460f7e66488ff5 Mon Sep 17 00:00:00 2001 +From: Joachim Wiberg +Date: Mon, 11 May 2026 13:55:11 +0200 +Subject: [PATCH 1/2] service: clean stale pidfile after unclean daemon exit +Organization: Wires + +With `pid:!/path` Finit does not manage the file -- the daemon +creates it on start and removes it on graceful exit. If the daemon +dies before cleanup (SIGKILL, OOM, segfault, exit during startup) +the file lingers and can block the next instance from starting, +e.g. dbus-daemon refuses with EEXIST and the restart loop fails. + +Remove the file when it still names the just-reaped PID and that +PID is no longer alive (the liveness check guards against reuse). +Called from service_cleanup(), and from service_monitor()'s +forking+starting branch where cleanup was previously skipped. + +Signed-off-by: Joachim Wiberg +--- + src/service.c | 36 +++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +diff --git a/src/service.c b/src/service.c +index 7ed4fceb..e930c4fd 100644 +--- a/src/service.c ++++ b/src/service.c +@@ -1120,6 +1120,35 @@ static void service_notify_stop(svc_t *svc) + } + } + ++/* ++ * Drop a daemon-owned (pid:!) pidfile if it still names the just-reaped ++ * PID and that PID is gone. The liveness check guards against reuse. ++ */ ++static void service_clean_pidfile(svc_t *svc, pid_t reaped) ++{ ++ pid_t pid; ++ char *fn; ++ ++ if (reaped <= 1) ++ return; ++ ++ fn = pid_file(svc); ++ if (!fn) ++ return; ++ ++ pid = pid_file_read(fn); ++ if (pid != reaped || pid_alive(pid)) ++ return; ++ ++ if (remove(fn) && errno != ENOENT) { ++ logit(LOG_CRIT, "Failed removing stale service %s pidfile %s", ++ svc_ident(svc, NULL, 0), fn); ++ return; ++ } ++ ++ dbg("Removed stale service %s pidfile %s", svc_ident(svc, NULL, 0), fn); ++} ++ + /* + * Clean up any lingering state from dead/killed services + */ +@@ -1137,6 +1166,8 @@ static void service_cleanup(svc_t *svc) + if (remove(fn) && errno != ENOENT) + logit(LOG_CRIT, "Failed removing service %s pidfile %s", + svc_ident(svc, NULL, 0), fn); ++ } else if (svc->pidfile[0] == '!') { ++ service_clean_pidfile(svc, svc->pid); + } + + /* +@@ -2405,7 +2436,10 @@ void service_monitor(pid_t lost, int status) + if (svc_is_forking(svc)) { + /* Likely start script exiting */ + if (svc_is_starting(svc)) { +- svc->pid = 0; /* Expect no more activity from this one */ ++ /* Daemon died before clearing 'starting'; drop any stale pidfile. */ ++ service_clean_pidfile(svc, lost); ++ svc->oldpid = lost; /* So service_retry() logs the real PID */ ++ svc->pid = 0; /* Expect no more activity from this one */ + goto cont; + } + +-- +2.43.0 + diff --git a/package/finit/0002-service-log-signal-name-and-core-dumps-in-death-mess.patch b/package/finit/0002-service-log-signal-name-and-core-dumps-in-death-mess.patch new file mode 100644 index 000000000..60ec0375d --- /dev/null +++ b/package/finit/0002-service-log-signal-name-and-core-dumps-in-death-mess.patch @@ -0,0 +1,50 @@ +From 30f2ca3b2e64bce7db1e2d9dcb37a06d53e0b6bf Mon Sep 17 00:00:00 2001 +From: Joachim Wiberg +Date: Mon, 11 May 2026 17:08:25 +0200 +Subject: [PATCH 2/2] service: log signal name and core dumps in death message +Organization: Wires + +Replace the bare signal number ("by signal: 9") with the symbolic +name ("killed by SIGKILL") and annotate when the kernel wrote a +core:("killed by SIGSEGV, core dumped"). Makes the restart line +self-explanatory and gives operators a strong breadcrumb when a +daemon dies unexpectedly. + +Signed-off-by: Joachim Wiberg +--- + src/service.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/src/service.c b/src/service.c +index e930c4fd..127e0099 100644 +--- a/src/service.c ++++ b/src/service.c +@@ -2828,13 +2828,18 @@ static void service_retry(svc_t *svc) + timeout = ((*restart_cnt) <= (svc->restart_max / 2)) ? 2000 : 5000; + /* If a longer timeout was specified in the conf, use that instead. */ + svc->restart_tmo = max(svc->restart_tmo, timeout); +- logit(LOG_CONSOLE|LOG_WARNING, "Service %s[%d] died (%s%d), restarting (retry in %d msec) (attempt: %d/%d)", +- svc_ident(svc, NULL, 0), svc->oldpid, +- WIFEXITED(svc->status) ? "with exit status: " : "by signal: ", +- WIFEXITED(svc->status) ? WEXITSTATUS(svc->status) : WTERMSIG(svc->status), +- svc->restart_tmo, +- *restart_cnt, +- svc->restart_max); ++ if (WIFEXITED(svc->status)) ++ logit(LOG_CONSOLE|LOG_WARNING, ++ "Service %s[%d] died (exit status: %d), restarting (retry in %d msec) (attempt: %d/%d)", ++ svc_ident(svc, NULL, 0), svc->oldpid, WEXITSTATUS(svc->status), ++ svc->restart_tmo, *restart_cnt, svc->restart_max); ++ else ++ logit(LOG_CONSOLE|LOG_WARNING, ++ "Service %s[%d] died (killed by %s%s), restarting (retry in %d msec) (attempt: %d/%d)", ++ svc_ident(svc, NULL, 0), svc->oldpid, ++ sig_name(WTERMSIG(svc->status)), ++ WCOREDUMP(svc->status) ? ", core dumped" : "", ++ svc->restart_tmo, *restart_cnt, svc->restart_max); + + svc_unblock(svc); + service_step(svc); +-- +2.43.0 + From f3183cd911f0a4e6f8258a0dddb9c12c73f04211 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Tue, 12 May 2026 11:06:19 +0200 Subject: [PATCH 4/4] doc: update changelog [skip ci] Signed-off-by: Joachim Wiberg --- doc/ChangeLog.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/ChangeLog.md b/doc/ChangeLog.md index b4e0c88d4..f3352da27 100644 --- a/doc/ChangeLog.md +++ b/doc/ChangeLog.md @@ -3,6 +3,21 @@ Change Log All notable changes to the project are documented in this file. +[v26.05.0][UNRELEASED] +------------------------- + +### Changes + +- Upgrade Linux kernel to 6.18.29 (LTS) + +### Fixes + +- Fix #1493: container with a physical interface not properly removed + when switching to a configuration without containers +- Handle unclean daemon exits better, e.g., `dbus-daemon` crashing and + leaving a stale pidfile behind, causing it to refuse to be restarted +- Fix occasional blank or garbled `[ OK ]` lines at startup + [v26.04.0][] - 2026-04-30 -------------------------