Skip to content
Merged

Misc #1501

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions board/common/rootfs/usr/sbin/container
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,9 @@ wrap()

if [ "$cmd" = "stop" ]; then
# The setup phase may run forever in the background trying to fetch
# the image. It saves its PID in /run/containers/${name}.pid
# the image. It saves its PID in /run/containers/${name}.pid. Kill
# any in-flight setup, then fall through to podman stop -- a stale
# pidfile is not proof the container isn't running.
if [ -f "$pidfile" ]; then
pid=$(cat "$pidfile")

Expand All @@ -663,10 +665,9 @@ wrap()
fi

rm -f "$pidfile"
return 0
fi

# Only the 'podman stop' command takes -i and --timeout
# Only the 'podman stop' command takes -i (ignore missing) and --timeout
args="-i --timeout $timeout"
fi

Expand Down
15 changes: 15 additions & 0 deletions doc/ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@ Change Log

All notable changes to the project are documented in this file.

[v26.05.0][UNRELEASED]
-------------------------

### Changes

- Upgrade Linux kernel to 6.18.29 (LTS)

Comment thread
troglobit marked this conversation as resolved.
### Fixes

- Fix #1493: container with a physical interface not properly removed
when switching to a configuration without containers
- Handle unclean daemon exits better, e.g., `dbus-daemon` crashing and
leaving a stale pidfile behind, causing it to refuse to be restarted
- Fix occasional blank or garbled `[ OK ]` lines at startup

[v26.04.0][] - 2026-04-30
-------------------------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
From 4a53f610cd05c2aba3da770384460f7e66488ff5 Mon Sep 17 00:00:00 2001
From: Joachim Wiberg <troglobit@gmail.com>
Date: Mon, 11 May 2026 13:55:11 +0200
Subject: [PATCH 1/2] service: clean stale pidfile after unclean daemon exit
Organization: Wires

With `pid:!/path` Finit does not manage the file -- the daemon
creates it on start and removes it on graceful exit. If the daemon
dies before cleanup (SIGKILL, OOM, segfault, exit during startup)
the file lingers and can block the next instance from starting,
e.g. dbus-daemon refuses with EEXIST and the restart loop fails.

Remove the file when it still names the just-reaped PID and that
PID is no longer alive (the liveness check guards against reuse).
Called from service_cleanup(), and from service_monitor()'s
forking+starting branch where cleanup was previously skipped.

Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
---
src/service.c | 36 +++++++++++++++++++++++++++++++++++-
1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/service.c b/src/service.c
index 7ed4fceb..e930c4fd 100644
--- a/src/service.c
+++ b/src/service.c
@@ -1120,6 +1120,35 @@ static void service_notify_stop(svc_t *svc)
}
}

+/*
+ * Drop a daemon-owned (pid:!) pidfile if it still names the just-reaped
+ * PID and that PID is gone. The liveness check guards against reuse.
+ */
+static void service_clean_pidfile(svc_t *svc, pid_t reaped)
+{
+ pid_t pid;
+ char *fn;
+
+ if (reaped <= 1)
+ return;
+
+ fn = pid_file(svc);
+ if (!fn)
+ return;
+
+ pid = pid_file_read(fn);
+ if (pid != reaped || pid_alive(pid))
+ return;
+
+ if (remove(fn) && errno != ENOENT) {
+ logit(LOG_CRIT, "Failed removing stale service %s pidfile %s",
+ svc_ident(svc, NULL, 0), fn);
+ return;
+ }
+
+ dbg("Removed stale service %s pidfile %s", svc_ident(svc, NULL, 0), fn);
+}
+
/*
* Clean up any lingering state from dead/killed services
*/
@@ -1137,6 +1166,8 @@ static void service_cleanup(svc_t *svc)
if (remove(fn) && errno != ENOENT)
logit(LOG_CRIT, "Failed removing service %s pidfile %s",
svc_ident(svc, NULL, 0), fn);
+ } else if (svc->pidfile[0] == '!') {
+ service_clean_pidfile(svc, svc->pid);
}

/*
@@ -2405,7 +2436,10 @@ void service_monitor(pid_t lost, int status)
if (svc_is_forking(svc)) {
/* Likely start script exiting */
if (svc_is_starting(svc)) {
- svc->pid = 0; /* Expect no more activity from this one */
+ /* Daemon died before clearing 'starting'; drop any stale pidfile. */
+ service_clean_pidfile(svc, lost);
+ svc->oldpid = lost; /* So service_retry() logs the real PID */
+ svc->pid = 0; /* Expect no more activity from this one */
goto cont;
}

--
2.43.0

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
From 30f2ca3b2e64bce7db1e2d9dcb37a06d53e0b6bf Mon Sep 17 00:00:00 2001
From: Joachim Wiberg <troglobit@gmail.com>
Date: Mon, 11 May 2026 17:08:25 +0200
Subject: [PATCH 2/2] service: log signal name and core dumps in death message
Organization: Wires

Replace the bare signal number ("by signal: 9") with the symbolic
name ("killed by SIGKILL") and annotate when the kernel wrote a
core:("killed by SIGSEGV, core dumped"). Makes the restart line
self-explanatory and gives operators a strong breadcrumb when a
daemon dies unexpectedly.

Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
---
src/service.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/service.c b/src/service.c
index e930c4fd..127e0099 100644
--- a/src/service.c
+++ b/src/service.c
@@ -2828,13 +2828,18 @@ static void service_retry(svc_t *svc)
timeout = ((*restart_cnt) <= (svc->restart_max / 2)) ? 2000 : 5000;
/* If a longer timeout was specified in the conf, use that instead. */
svc->restart_tmo = max(svc->restart_tmo, timeout);
- logit(LOG_CONSOLE|LOG_WARNING, "Service %s[%d] died (%s%d), restarting (retry in %d msec) (attempt: %d/%d)",
- svc_ident(svc, NULL, 0), svc->oldpid,
- WIFEXITED(svc->status) ? "with exit status: " : "by signal: ",
- WIFEXITED(svc->status) ? WEXITSTATUS(svc->status) : WTERMSIG(svc->status),
- svc->restart_tmo,
- *restart_cnt,
- svc->restart_max);
+ if (WIFEXITED(svc->status))
+ logit(LOG_CONSOLE|LOG_WARNING,
+ "Service %s[%d] died (exit status: %d), restarting (retry in %d msec) (attempt: %d/%d)",
+ svc_ident(svc, NULL, 0), svc->oldpid, WEXITSTATUS(svc->status),
+ svc->restart_tmo, *restart_cnt, svc->restart_max);
+ else
+ logit(LOG_CONSOLE|LOG_WARNING,
+ "Service %s[%d] died (killed by %s%s), restarting (retry in %d msec) (attempt: %d/%d)",
+ svc_ident(svc, NULL, 0), svc->oldpid,
+ sig_name(WTERMSIG(svc->status)),
+ WCOREDUMP(svc->status) ? ", core dumped" : "",
+ svc->restart_tmo, *restart_cnt, svc->restart_max);

svc_unblock(svc);
service_step(svc);
--
2.43.0

28 changes: 19 additions & 9 deletions src/confd/src/ip.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,11 +295,18 @@ int netdag_gen_ip_neighs(struct dagger *net, FILE *ip, const char *proto,
int err = 0;

if (!ipconf || !lydx_is_enabled(ipconf, "enabled")) {
FILE *fp = dagger_fopen_net_exit(net, ifname, NETDAG_EXIT_PRE, "flush-neigh.sh");
FILE *fp;

/* Skip if interface is currently in another netns (container) see #1493 */
if (!if_nametoindex(ifname))
return 0;

fp = dagger_fopen_net_exit(net, ifname, NETDAG_EXIT_PRE, "flush-neigh.sh");
if (fp) {
fprintf(fp, "ip -%c neigh flush dev %s nud permanent\n", proto[3], ifname);
fclose(fp);
}

return 0;
}

Expand All @@ -323,15 +330,18 @@ int netdag_gen_ip_addrs(struct dagger *net, FILE *ip, const char *proto,
const char *ifname = lydx_get_cattr(dif, "name");

if (!ipconf || !lydx_is_enabled(ipconf, "enabled")) {
if (!cni_find(ifname) && if_nametoindex(ifname)) {
FILE *fp;

fp = dagger_fopen_net_exit(net, ifname, NETDAG_EXIT_PRE, "flush.sh");
if (fp) {
fprintf(fp, "ip -%c addr flush dev %s\n", proto[3], ifname);
fclose(fp);
}
FILE *fp;

/* Skip if interface is currently in another netns (container) see #1493 */
if (!if_nametoindex(ifname))
return 0;

fp = dagger_fopen_net_exit(net, ifname, NETDAG_EXIT_PRE, "flush.sh");
if (fp) {
fprintf(fp, "ip -%c addr flush dev %s\n", proto[3], ifname);
fclose(fp);
}

return 0;
}

Expand Down
53 changes: 32 additions & 21 deletions src/confd/src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,27 +74,38 @@ struct plugin {
};

static sig_atomic_t pump_running = 1;
static int restart; /* set when sentinel found; suppresses conout() */
static int restart; /* set when sentinel found; suppresses progress output */
int debug = 0;


/* Finit style progress output on console */
/*
* Finit-style progress output. conout(3, "fmt", ...) marks a step pending
* and caches the description; conout(0/1/2, NULL) finalises with OK/FAIL/
* WARN, reprinting the cached description so the line survives intervening
* stderr writes (sysrepo logs, NOTE/ERROR, ...).
*/
static void conout(int rc, const char *fmt, ...)
{
const char *sta = "%s\e[1m[\e[1;%dm%s\e[0m\e[1m]\e[0m %s";
const char *msg[] = { " OK ", "FAIL", "WARN", " ⋯ " };
const char *cr = rc == 3 ? "" : "\r";
const int col[] = { 32, 31, 33, 33 };
char buf[80];
va_list ap;
const int col[] = { 32, 31, 33, 33 };
static char desc[80];

if (restart)
return;

snprintf(buf, sizeof(buf), sta, cr, col[rc], msg[rc], fmt);
va_start(ap, fmt);
vfprintf(stderr, buf, ap);
va_end(ap);
if (fmt) {
va_list ap;

va_start(ap, fmt);
vsnprintf(desc, sizeof(desc), fmt, ap);
va_end(ap);
}

fprintf(stderr, "\r\e[K\e[1m[\e[1;%dm%s\e[0m\e[1m]\e[0m %s%s",
col[rc], msg[rc], desc, rc == 3 ? "" : "\n");

if (rc != 3)
desc[0] = '\0';
}

static void version_print(void)
Expand Down Expand Up @@ -527,7 +538,7 @@ static void maybe_enable_test_mode(void)

conout(3, "Enabling test mode");
rc = systemf("sysrepoctl -c infix-test -e test-mode-enable");
conout(rc ? 1 : 0, "\n");
conout(!!rc, NULL);
}
}

Expand Down Expand Up @@ -711,7 +722,7 @@ int main(int argc, char **argv)
gen_pid = fork();
if (gen_pid < 0) {
ERRNO("Failed to fork gen-config");
conout(1, "\n");
conout(1, NULL);
goto cleanup;
}
if (gen_pid == 0)
Expand All @@ -733,21 +744,21 @@ int main(int argc, char **argv)
waitpid(gen_pid, &status, 0);
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
ERROR("gen-config failed (status=%d)", status);
conout(1, "\n");
conout(1, NULL);
goto cleanup;
}
conout(0, "\n");
conout(0, NULL);

/* Phase 4: Install factory defaults into all datastores */
NOTE("Loading factory-default datastore from %s ...", factory_path);
conout(3, "Loading factory-default datastore");
r = sr_install_factory_config(conn, factory_path);
if (r != SR_ERR_OK) {
ERROR("sr_install_factory_config failed: %s", sr_strerror(r));
conout(1, "\n");
conout(1, NULL);
goto cleanup;
}
conout(0, "\n");
conout(0, NULL);
}

/* Phase 5: Start running-datastore session */
Expand Down Expand Up @@ -780,15 +791,15 @@ int main(int argc, char **argv)
if (r) {
ERROR("Plugin \"%s\" initialization failed (%s).", plugins[i].name, sr_strerror(r));
if (fatal_fail) {
conout(1, "\n");
conout(1, NULL);
goto cleanup;
}
} else {
NOTE("Plugin \"%s\" initialized.", plugins[i].name);
plugins[i].initialized = 1;
}
}
conout(0, "\n");
conout(0, NULL);

/* Phase 8: Collect subscription contexts from plugins */
for (i = 0; i < plugin_count; i++) {
Expand Down Expand Up @@ -816,10 +827,10 @@ int main(int argc, char **argv)
failure_path, test_path, timeout_ms)) {
kill(pump_pid, SIGTERM);
waitpid(pump_pid, NULL, 0);
conout(1, "\n");
conout(1, NULL);
goto cleanup;
}
conout(0, "\n");
conout(0, NULL);

/* Phase 11: Stop event pump — bootstrap is done */
kill(pump_pid, SIGTERM);
Expand Down