From 9e60ec8ef947f7e330f0f756a3c11f244f6255a2 Mon Sep 17 00:00:00 2001 From: Omer Kocaoglu Date: Wed, 10 Jun 2026 22:23:03 -0400 Subject: [PATCH 1/2] fix(connect): close TOCTOU race in connect sync lock acquisition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sync lock was a stat-then-write: two processes racing past the existence check could both acquire, both load the same LastSequence, and both write duplicate entries into .context/hub/. Replace the pair with the atomic io.SafeTryLock (O_CREATE|O_EXCL, single syscall) and release via io.SafeUnlock — the same primitive the dream pass already uses, preserving the os.ErrExist contract for callers. - LockSentinel removed: the lock is the file's existence, not its content, and the racy write was the constant's only consumer. - state_test.go regression-pins the contract: 16-way contention yields exactly one winner, release frees the next sync, a corrupt state file does not leak the lock, and the lock path stays at /hub/.sync.lock. Closes #93. Spec: specs/fix-connect-sync-lock-toctou.md Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Omer Kocaoglu --- internal/cli/connection/core/sync/state.go | 18 ++- .../cli/connection/core/sync/state_test.go | 153 ++++++++++++++++++ internal/config/hub/doc.go | 2 +- internal/config/hub/hub.go | 2 - specs/fix-connect-sync-lock-toctou.md | 87 ++++++++++ 5 files changed, 251 insertions(+), 11 deletions(-) create mode 100644 internal/cli/connection/core/sync/state_test.go create mode 100644 specs/fix-connect-sync-lock-toctou.md diff --git a/internal/cli/connection/core/sync/state.go b/internal/cli/connection/core/sync/state.go index f7ab4de97..36095e72f 100644 --- a/internal/cli/connection/core/sync/state.go +++ b/internal/cli/connection/core/sync/state.go @@ -41,18 +41,20 @@ func loadState() (state, func(), error) { return s, nil, mkErr } - // Acquire lock: fail if another sync is running. - if _, statErr := os.Stat(lockPath); statErr == nil { - return s, nil, os.ErrExist + // Acquire lock: fail if another sync is running. The + // O_CREATE|O_EXCL create-or-fail is a single syscall, so + // there is no check-to-write window for a concurrent sync + // to slip through. + acquired, lockErr := io.SafeTryLock(lockPath, fs.PermFile) + if lockErr != nil { + return s, nil, lockErr } - if writeErr := io.SafeWriteFile( - lockPath, []byte(cfgHub.LockSentinel), fs.PermFile, - ); writeErr != nil { - return s, nil, writeErr + if !acquired { + return s, nil, os.ErrExist } release := func() { - if rmErr := os.Remove(lockPath); rmErr != nil { + if rmErr := io.SafeUnlock(lockPath); rmErr != nil { logWarn.Warn(cfgWarn.Remove, lockPath, rmErr) } } diff --git a/internal/cli/connection/core/sync/state_test.go b/internal/cli/connection/core/sync/state_test.go new file mode 100644 index 000000000..dedafe601 --- /dev/null +++ b/internal/cli/connection/core/sync/state_test.go @@ -0,0 +1,153 @@ +// / ctx: https://ctx.ist +// ,'`./ do you remember? +// `.,'\ +// \ Copyright 2026-present Context contributors. +// SPDX-License-Identifier: Apache-2.0 + +package sync + +import ( + "errors" + "os" + "path/filepath" + "testing" + + "github.com/ActiveMemory/ctx/internal/config/fs" + cfgHub "github.com/ActiveMemory/ctx/internal/config/hub" + "github.com/ActiveMemory/ctx/internal/io" + "github.com/ActiveMemory/ctx/internal/testutil/testctx" +) + +// declareContext positions the test in a temp project with a +// materialized .context/ directory and returns its path. +func declareContext(t *testing.T) string { + t.Helper() + dir := t.TempDir() + ctxDir := testctx.Declare(t, dir) + if mkErr := os.MkdirAll(ctxDir, fs.PermExec); mkErr != nil { + t.Fatal(mkErr) + } + return ctxDir +} + +func TestLoadState_RejectsConcurrentSyncs(t *testing.T) { + declareContext(t) + + const attempts = 16 + + type outcome struct { + release func() + err error + } + + start := make(chan struct{}) + results := make(chan outcome, attempts) + for i := 0; i < attempts; i++ { + go func() { + <-start + _, release, lockErr := loadState() + results <- outcome{release: release, err: lockErr} + }() + } + close(start) + + // Winners must not release until every attempt has + // finished, or a late goroutine could legitimately + // re-acquire the freed lock and skew the count. + var winners []func() + var contended int + for i := 0; i < attempts; i++ { + got := <-results + switch { + case got.err == nil: + winners = append(winners, got.release) + case errors.Is(got.err, os.ErrExist): + contended++ + default: + t.Fatalf("unexpected error: %v", got.err) + } + } + + if len(winners) != 1 { + t.Errorf("winners = %d, want exactly 1", len(winners)) + } + if contended != attempts-1 { + t.Errorf( + "contended = %d, want %d", contended, attempts-1, + ) + } + for _, release := range winners { + release() + } +} + +func TestLoadState_ReleaseRemovesLock(t *testing.T) { + ctxDir := declareContext(t) + lockPath := filepath.Join( + ctxDir, cfgHub.DirHub, cfgHub.FileSyncLock, + ) + + _, release, lockErr := loadState() + if lockErr != nil { + t.Fatalf("first loadState: %v", lockErr) + } + if _, statErr := os.Stat(lockPath); statErr != nil { + t.Fatalf("lock file should exist while held: %v", statErr) + } + + release() + + if _, statErr := os.Stat(lockPath); !os.IsNotExist(statErr) { + t.Errorf( + "lock file should be gone after release: %v", statErr, + ) + } + + // The next sync must be able to proceed. + _, release, lockErr = loadState() + if lockErr != nil { + t.Fatalf("loadState after release: %v", lockErr) + } + release() +} + +func TestLoadState_ReleasesLockOnCorruptState(t *testing.T) { + ctxDir := declareContext(t) + hubDir := filepath.Join(ctxDir, cfgHub.DirHub) + if mkErr := io.SafeMkdirAll(hubDir, fs.PermKeyDir); mkErr != nil { + t.Fatal(mkErr) + } + statePath := filepath.Join(hubDir, cfgHub.FileSyncState) + if writeErr := os.WriteFile( + statePath, []byte("{not json"), fs.PermFile, + ); writeErr != nil { + t.Fatal(writeErr) + } + + _, _, loadErr := loadState() + if loadErr == nil { + t.Fatal("loadState should fail on corrupt state") + } + + lockPath := filepath.Join(hubDir, cfgHub.FileSyncLock) + if _, statErr := os.Stat(lockPath); !os.IsNotExist(statErr) { + t.Errorf( + "lock must not leak after a failed load: %v", statErr, + ) + } +} + +func TestLoadState_LockFileLocation(t *testing.T) { + ctxDir := declareContext(t) + + _, release, lockErr := loadState() + if lockErr != nil { + t.Fatalf("loadState: %v", lockErr) + } + defer release() + + want := filepath.Join(ctxDir, cfgHub.DirHub, cfgHub.FileSyncLock) + if _, statErr := os.Stat(want); statErr != nil { + t.Errorf("lock file not at %s: %v", want, statErr) + } +} diff --git a/internal/config/hub/doc.go b/internal/config/hub/doc.go index 0eb87a683..d22c657ab 100644 --- a/internal/config/hub/doc.go +++ b/internal/config/hub/doc.go @@ -59,7 +59,7 @@ // connection config files // - FilePID ("hub.pid"), FileAdminToken, // DirHubData: daemon management files -// - JSONIndent, LockSentinel, SuffixPluralMD: +// - JSONIndent, SuffixPluralMD: // formatting and naming helpers // // # Raft Cluster Configuration diff --git a/internal/config/hub/hub.go b/internal/config/hub/hub.go index 0d8fe4e70..16e7cc15d 100644 --- a/internal/config/hub/hub.go +++ b/internal/config/hub/hub.go @@ -167,8 +167,6 @@ const ( FileConnect = ".connect.enc" // JSONIndent is the indentation string for JSON marshaling. JSONIndent = " " - // LockSentinel is the content written to lock files. - LockSentinel = "lock" // SuffixPluralMD is the suffix for typed hub markdown // filenames (e.g. "decisions.md"). SuffixPluralMD = "s.md" diff --git a/specs/fix-connect-sync-lock-toctou.md b/specs/fix-connect-sync-lock-toctou.md new file mode 100644 index 000000000..021f9edd5 --- /dev/null +++ b/specs/fix-connect-sync-lock-toctou.md @@ -0,0 +1,87 @@ +# Fix Connect Sync Lock TOCTOU Race + +`internal/cli/connection/core/sync.loadState` guarded +`ctx connect sync` against concurrent execution with a +check-then-write pattern (`os.Stat` followed by +`io.SafeWriteFile`), not a real lock. Upstream issue: +[ActiveMemory/ctx#93](https://github.com/ActiveMemory/ctx/issues/93). + +## Problem + +The window between the existence check and the write is +unbounded, and the pattern is racy by construction: + +```go +// Acquire lock: fail if another sync is running. +if _, statErr := os.Stat(lockPath); statErr == nil { + return s, nil, os.ErrExist +} +if writeErr := io.SafeWriteFile( + lockPath, []byte(cfgHub.LockSentinel), fs.PermFile, +); writeErr != nil { + return s, nil, writeErr +} +``` + +Two processes racing past the `os.Stat` (hook-triggered plus +manual invocation; cron plus interactive run; two terminals) +can both decide the lock is free, both write the lock file, +both load the same `LastSequence`, both fetch the same entries +from the hub, and both write duplicate content into +`.context/hub/`. (Issue #93 says `.context/shared/`; that is +the pre-rename path — the renderer joins `cfgHub.DirHub` at +`internal/cli/connection/core/render/render.go:32`.) + +The doc comment ("Acquires a lock file to prevent concurrent +access.") overstated what the code did. + +The package had no test coverage at all: no test exercised +the contention path, the release path, or the lock location. + +## Solution + +Replace the stat-then-write with the atomic create-or-fail +primitive that already exists for exactly this purpose: +`io.SafeTryLock` (`O_CREATE|O_EXCL` in a single syscall) and +its counterpart `io.SafeUnlock`. Both landed with the dream +consolidator and have prior art at +`internal/cli/dream/core/pass/pass.go:62-70`. + +1. `internal/cli/connection/core/sync/state.go` — swap the + `os.Stat` / `io.SafeWriteFile` pair for one + `io.SafeTryLock` call. `acquired == false` maps to the + existing `os.ErrExist` contract so `Run`'s caller-facing + behavior is unchanged. The `release` closure delegates to + `io.SafeUnlock`, keeping the warn-on-failure logging. +2. `internal/config/hub/hub.go` — delete `LockSentinel`. Its + only consumer was the racy write; `SafeTryLock` creates an + empty lock file (the lock is the file's existence, not its + content), so the constant is dead. Leaving it would be a + broken window. +3. `internal/config/hub/doc.go` — drop `LockSentinel` from + the package-doc constant inventory. +4. `internal/cli/connection/core/sync/state_test.go` — new; + regression-pins the contract: + - `TestLoadState_RejectsConcurrentSyncs`: N goroutines + race `loadState`; exactly one acquires, the rest observe + `os.ErrExist`. + - `TestLoadState_ReleaseRemovesLock`: after `release()`, + the lock file is gone and a subsequent `loadState` + succeeds. + - `TestLoadState_ReleasesLockOnCorruptState`: a corrupt + `.sync-state.json` makes `loadState` fail *after* + acquisition; the lock must not leak. + - `TestLoadState_LockFileLocation`: the lock lives at + `/hub/.sync.lock` (pins `DirHub` + + `FileSyncLock` composition). + +## Out of Scope + +- Stale-lock detection (PID-in-lockfile, age-based cleanup). + A crashed process still leaves a stale lock; the issue + explicitly defers this to a follow-up. +- `flock`-based locking (issue's Option B). Rejected for now: + `syscall.Flock` is Unix-only and `SafeTryLock` already + matches the existing lockfile-as-sentinel model. +- The hubsync hook's silent error suppression + (ActiveMemory/ctx#100) — adjacent code, separate issue. From 45d4ae2fd85b68df85986ec5b3f2e1d3b178df94 Mon Sep 17 00:00:00 2001 From: Omer Kocaoglu Date: Wed, 10 Jun 2026 22:38:13 -0400 Subject: [PATCH 2/2] fix(hack): guard empty-array expansion in lint-drift for bash 3.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stock macOS ships bash 3.2 (GPLv2 freeze), which treats "${arr[@]}" on an empty array as an unbound variable under set -u — bash 4.4+ does not. drift_grep expands its exclude array unconditionally and several call sites pass no excludes, so make lint-style (and therefore make audit, the mandatory pre-PR gate) aborted on every stock Mac before running a single drift check. Guard with the parameter-expansion alternate form, ${arr[@]+"${arr[@]}"}: empty expands to zero words, populated reproduces the original quoted expansion, bash 4+ behavior unchanged. The other hack/ array expansions are safe today (count-guarded or hardcoded non-empty); details in the spec. Captures the gotcha in LEARNINGS.md so the next session does not re-debug it. Spec: specs/fix-lint-drift-bash32-empty-array.md Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Omer Kocaoglu --- .context/LEARNINGS.md | 11 ++++ hack/lint-drift.sh | 6 +- specs/fix-lint-drift-bash32-empty-array.md | 68 ++++++++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 specs/fix-lint-drift-bash32-empty-array.md diff --git a/.context/LEARNINGS.md b/.context/LEARNINGS.md index 9ee7c25f8..59cde2983 100644 --- a/.context/LEARNINGS.md +++ b/.context/LEARNINGS.md @@ -17,6 +17,7 @@ DO NOT UPDATE FOR: | Date | Learning | |----|--------| +| 2026-06-10 | Stock macOS bash 3.2 treats empty-array expansion as unbound under set -u | | 2026-06-07 | ctx-dream design principles (consolidated) | | 2026-06-07 | internal/audit & compliance gates for new code (consolidated) | | 2026-06-07 | Error handling: sentinels, unwrapping, and silent discards (consolidated) | @@ -102,6 +103,16 @@ DO NOT UPDATE FOR: --- +## [2026-06-10-223128] Stock macOS bash 3.2 treats empty-array expansion as unbound under set -u + +**Context**: make audit aborted at hack/lint-drift.sh line 39 on a stock Mac (bash 3.2.57) with 'exclude_args[@]: unbound variable' while gating the #93 TOCTOU fix; the script works fine on Linux bash 4+ + +**Lesson**: bash 3.2 (what every stock macOS ships, GPLv2 freeze) treats "${arr[@]}" on an empty array as an unbound-variable error under set -u; bash 4.4+ does not. Any 'set -u' script that expands a possibly-empty array breaks for every Mac contributor + +**Application**: Guard with ${arr[@]+"${arr[@]}"} (parameter-expansion alternate form) wherever a possibly-empty array is expanded in hack/ scripts; test hack/ scripts with /bin/bash, not just homebrew bash + +--- + ## [2026-06-07-170001] ctx-dream design principles (consolidated) **Consolidated from**: 6 entries (2026-06-06 to 2026-06-07) diff --git a/hack/lint-drift.sh b/hack/lint-drift.sh index d01b85b5e..8a5d5e8de 100755 --- a/hack/lint-drift.sh +++ b/hack/lint-drift.sh @@ -36,7 +36,11 @@ drift_grep() { for ex in "$@"; do exclude_args+=(--exclude="$ex") done - grep -rn --include='*.go' --exclude='*_test.go' "${exclude_args[@]}" \ + # ${arr[@]+...} guards the empty-array expansion: bash 3.2 + # (stock macOS) treats "${arr[@]}" on an empty array as unbound + # under `set -u` and aborts the script. + grep -rn --include='*.go' --exclude='*_test.go' \ + ${exclude_args[@]+"${exclude_args[@]}"} \ -E "$pattern" internal/ 2>/dev/null || true } diff --git a/specs/fix-lint-drift-bash32-empty-array.md b/specs/fix-lint-drift-bash32-empty-array.md new file mode 100644 index 000000000..42dcb4623 --- /dev/null +++ b/specs/fix-lint-drift-bash32-empty-array.md @@ -0,0 +1,68 @@ +# Fix lint-drift Empty-Array Expansion on Bash 3.2 + +`hack/lint-drift.sh` aborted on stock macOS before running a +single check, which silently broke `make audit` (and therefore +the contributing guide's mandatory pre-PR gate) for every Mac +contributor. + +## Problem + +The `drift_grep` helper builds its `--exclude` flags in an +array and expands it unconditionally: + +```bash +local exclude_args=() +for ex in "$@"; do + exclude_args+=(--exclude="$ex") +done +grep -rn --include='*.go' --exclude='*_test.go' "${exclude_args[@]}" \ + -E "$pattern" internal/ 2>/dev/null || true +``` + +The script runs under `set -euo pipefail`. On bash 4.4+ an +empty `"${arr[@]}"` expands to zero words; on bash 3.2 — the +newest bash Apple ships, frozen at the GPLv2 boundary — the +same expansion is an **unbound variable** error under `set -u`: + +``` +./hack/lint-drift.sh: line 39: exclude_args[@]: unbound variable +``` + +Several `drift_grep` call sites pass no exclude globs (checks +2, 3, and 8), so the script dies on its first such call and +`make lint-style` → `make audit` fail before any drift check +executes. + +## Solution + +Guard the expansion with the parameter-expansion alternate +form, the canonical bash-3.2-safe idiom: + +```bash +${exclude_args[@]+"${exclude_args[@]}"} +``` + +When the array is empty the outer expansion produces nothing; +when populated it reproduces the original quoted expansion +verbatim. Behavior on bash 4+ is unchanged. A comment at the +call site documents why the guard exists so a future cleanup +doesn't "simplify" it back. + +Verified: `make audit` passes end-to-end on macOS +bash 3.2.57 with the guard in place. + +## Out of Scope + +- Hardening the other `hack/` scripts' array expansions. A + `grep -rn '\[@\]' hack/` sweep plus empirical bash 3.2 + checks show the remaining sites are all safe today: + `lint-shellcheck.sh` exits on a `${#TARGETS[@]}` count + guard (count expansion does not trip `set -u` on 3.2) + before its element expansion; `build-all.sh` and + `detect-ai-typography.sh` expand arrays populated from + hardcoded non-empty literals. Those are latent-only + hazards (someone emptying a config array), not failures, + and belong to a separate sweep if ever. +- Requiring bash 4+ (e.g. a version check or `#!/usr/bin/env + bash4`). Contributors should not need a homebrew bash to run + the project's own audit gate.