diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml index e7ced65..eeb2454 100644 --- a/.github/workflows/nightly.yaml +++ b/.github/workflows/nightly.yaml @@ -43,8 +43,9 @@ jobs: VERSION="nightly-$(date -u +%Y-%m-%d)" PKG="github.com/NodeOps-app/createos-cli/internal/pkg/version" CFG="github.com/NodeOps-app/createos-cli/internal/config" + TEL="github.com/NodeOps-app/createos-cli/internal/telemetry" COMMIT="${{ github.sha }}" - LDFLAGS="-s -w -X ${PKG}.Version=${VERSION} -X ${PKG}.Channel=nightly -X ${PKG}.Commit=${COMMIT} -X ${CFG}.OAuthClientID=${{ secrets.OAUTH_CLIENT_ID }}" + LDFLAGS="-s -w -X ${PKG}.Version=${VERSION} -X ${PKG}.Channel=nightly -X ${PKG}.Commit=${COMMIT} -X ${CFG}.OAuthClientID=${{ secrets.OAUTH_CLIENT_ID }} -X ${TEL}.PostHogAPIKey=${{ secrets.POSTHOG_API_KEY }} -X ${TEL}.PostHogHost=https://us.i.posthog.com" if [ "${{ matrix.goos }}" = "windows" ]; then BINARY="createos-${{ matrix.goos }}-${{ matrix.goarch }}.exe" else diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 0fa088d..5cf884d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -43,8 +43,9 @@ jobs: VERSION="${{ github.ref_name }}" PKG="github.com/NodeOps-app/createos-cli/internal/pkg/version" CFG="github.com/NodeOps-app/createos-cli/internal/config" + TEL="github.com/NodeOps-app/createos-cli/internal/telemetry" COMMIT="${{ github.sha }}" - LDFLAGS="-s -w -X ${PKG}.Version=${VERSION} -X ${PKG}.Channel=stable -X ${PKG}.Commit=${COMMIT} -X ${CFG}.OAuthClientID=${{ secrets.OAUTH_CLIENT_ID }}" + LDFLAGS="-s -w -X ${PKG}.Version=${VERSION} -X ${PKG}.Channel=stable -X ${PKG}.Commit=${COMMIT} -X ${CFG}.OAuthClientID=${{ secrets.OAUTH_CLIENT_ID }} -X ${TEL}.PostHogAPIKey=${{ secrets.POSTHOG_API_KEY }} -X ${TEL}.PostHogHost=https://us.i.posthog.com" if [ "${{ matrix.goos }}" = "windows" ]; then BINARY="createos-${{ matrix.goos }}-${{ matrix.goarch }}.exe" else diff --git a/CLAUDE.md b/CLAUDE.md index bfc7e30..5fb80cf 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -24,6 +24,8 @@ When adding a new command: 1. Create the file under `cmd//` 2. Register it in the group's `NewXxxCommand()` subcommands slice 3. Add it to the manual list in `root.go` Action (the home screen) in alphabetical order +4. Telemetry is automatic — @docs/telemetry.md + ## API Client diff --git a/README.md b/README.md index 2be3e1a..e7a0c05 100644 --- a/README.md +++ b/README.md @@ -467,3 +467,12 @@ createos environments list --project -o json - OAuth session tokens are stored at `~/.createos/.oauth` with `600` permissions (readable only by you). - Debug mode masks your token in output — only the first 6 and last 4 characters are shown. - Never share your token or commit it to version control. + +## Telemetry + +The CLI sends usage telemetry (commands run, version, OS, error categories) +to help us improve the product. Before you run `createos login`, events are +anonymous and tied only to a one-way machine hash. After login, events are +associated with your account ID and may include the project ID for +project-scoped commands. No file paths, command output, or secrets are +collected. To disable, set `CREATEOS_DO_NOT_TRACK=1` in your environment. diff --git a/cmd/auth/login.go b/cmd/auth/login.go index 548fe41..6b7f552 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -8,8 +8,10 @@ import ( "github.com/pterm/pterm" "github.com/urfave/cli/v2" + "github.com/NodeOps-app/createos-cli/internal/api" "github.com/NodeOps-app/createos-cli/internal/config" internaloauth "github.com/NodeOps-app/createos-cli/internal/oauth" + "github.com/NodeOps-app/createos-cli/internal/telemetry" "github.com/NodeOps-app/createos-cli/internal/terminal" ) @@ -18,6 +20,90 @@ const ( oauthCallbackURI = "http://localhost:65341/callback" ) +// captureLoginFailure emits an auth_event with success=false. Called BEFORE +// identity is rebound, so distinct_id is still the machine_id_hash — that's +// expected and correct. +func captureLoginFailure(method string, err error) { + if telemetry.Default == nil { + return + } + reason := "" + if err != nil { + reason = err.Error() + } + telemetry.Default.Capture("auth_event", map[string]any{ + "action": "login", + "method": method, + "success": false, + "failure_reason": reason, + }) +} + +// bindIdentityAndCapture runs the post-credential-save identity flow: +// fetch /me, persist Identity (preserving AliasedForUserID for same user), +// rebind telemetry distinct_id to user_id, then emit success auth_event. +// All identity fetching is best-effort — a failure here must NOT fail login. +// +// If /me fails OR SaveIdentity fails, we DELETE any pre-existing .identity +// file and skip RebindIdentity — otherwise stale identity from a previous +// account on this machine would mis-attribute the new login's telemetry. +func bindIdentityAndCapture(apiClient *api.APIClient, method string) { + identityFresh := false + var personProps map[string]any + if apiClient != nil { + if u, err := apiClient.GetUser(); err == nil && u != nil && u.ID != "" { + id := config.Identity{UserID: u.ID} + if existing, _ := config.LoadIdentity(); existing != nil && existing.UserID == u.ID { + id.AliasedForUserID = existing.AliasedForUserID + } + if saveErr := config.SaveIdentity(id); saveErr == nil { + identityFresh = true + personProps = userToPersonProps(u) + } + } + // Silent on /me failure — login still succeeds without user_id. + } + + if !identityFresh { + // /me or SaveIdentity failed; drop any stale identity so that a later + // command does not attribute events to a previous user_id. + _ = config.DeleteIdentity() + } + + if telemetry.Default != nil { + if identityFresh { + telemetry.Default.SetPersonProperties(personProps) + telemetry.Default.RebindIdentity() + } + telemetry.Default.Capture("auth_event", map[string]any{ + "action": "login", + "method": method, + "success": true, + }) + } +} + +// userToPersonProps maps the API User struct to PostHog Person-level +// properties. These go ONLY to the Person record via Identify; they are +// NOT included in any Capture event payload. Pointer fields are dereferenced +// only when non-nil. +func userToPersonProps(u *api.User) map[string]any { + p := map[string]any{ + "email": u.Email, + } + if u.DisplayName != nil && *u.DisplayName != "" { + p["name"] = *u.DisplayName + } + if u.Username != nil && *u.Username != "" { + p["username"] = *u.Username + } + if u.CreatedAt != "" { + // signup_date is immutable — Client uses $set_once for this key. + p["signup_date"] = u.CreatedAt + } + return p +} + // NewLoginCommand creates the login command. func NewLoginCommand() *cli.Command { return &cli.Command{ @@ -34,15 +120,21 @@ func NewLoginCommand() *cli.Command { // --token flag: API key flow (works in both TTY and non-TTY) if token := c.String("token"); token != "" { if err := config.SaveToken(token); err != nil { - return fmt.Errorf("could not save your token: %w", err) + wrapped := fmt.Errorf("could not save your token: %w", err) + captureLoginFailure("token", wrapped) + return wrapped } + client := api.NewClient(token, c.String("api-url"), c.Bool("debug")) + bindIdentityAndCapture(&client, "token") pterm.Success.Println("You're signed in.") return nil } // Non-interactive (CI/script): require --token flag if !terminal.IsInteractive() { - return fmt.Errorf("non-interactive mode: use --token flag to sign in\n\n Example:\n createos login --token ") + err := fmt.Errorf("non-interactive mode: use --token flag to sign in\n\n Example:\n createos login --token ") + captureLoginFailure("token", err) + return err } // Interactive: let user choose auth method @@ -54,30 +146,40 @@ func NewLoginCommand() *cli.Command { WithOptions(options). Show("How would you like to sign in?") if err != nil { - return fmt.Errorf("sign in cancelled") + cancel := fmt.Errorf("sign in cancelled") + // Method unknown at this point — choice was never made. Pick + // "browser" since that's the default selection in the picker. + captureLoginFailure("browser", cancel) + return cancel } if selected == options[1] { - return loginWithAPIToken() + return loginWithAPIToken(c) } - return loginWithBrowser() + return loginWithBrowser(c) }, } } -func loginWithAPIToken() error { +func loginWithAPIToken(c *cli.Context) error { token, err := pterm.DefaultInteractiveTextInput.WithMask("*").Show("Paste your API token") if err != nil || token == "" { - return fmt.Errorf("sign in cancelled") + cancel := fmt.Errorf("sign in cancelled") + captureLoginFailure("token", cancel) + return cancel } if err := config.SaveToken(token); err != nil { - return fmt.Errorf("could not save your token: %w", err) + wrapped := fmt.Errorf("could not save your token: %w", err) + captureLoginFailure("token", wrapped) + return wrapped } + client := api.NewClient(token, c.String("api-url"), c.Bool("debug")) + bindIdentityAndCapture(&client, "token") pterm.Success.Println("You're signed in.") return nil } -func loginWithBrowser() error { +func loginWithBrowser(c *cli.Context) error { pterm.Info.Println("Starting browser login...") port := oauthCallbackPort @@ -86,17 +188,23 @@ func loginWithBrowser() error { pterm.Info.Println("Fetching authorization server info...") meta, err := internaloauth.FetchServerMetadata(config.OAuthIssuerURL) if err != nil { - return fmt.Errorf("could not reach authorization server: %w", err) + wrapped := fmt.Errorf("could not reach authorization server: %w", err) + captureLoginFailure("browser", wrapped) + return wrapped } pkce, err := internaloauth.GeneratePKCE() if err != nil { - return fmt.Errorf("could not generate security parameters: %w", err) + wrapped := fmt.Errorf("could not generate security parameters: %w", err) + captureLoginFailure("browser", wrapped) + return wrapped } state, err := internaloauth.GenerateState() if err != nil { - return fmt.Errorf("could not generate state: %w", err) + wrapped := fmt.Errorf("could not generate state: %w", err) + captureLoginFailure("browser", wrapped) + return wrapped } authURL := internaloauth.BuildAuthURL( @@ -120,11 +228,15 @@ func loginWithBrowser() error { code, returnedState, err := internaloauth.StartCallbackServer(port) if err != nil { - return fmt.Errorf("login was not completed: %w", err) + wrapped := fmt.Errorf("login was not completed: %w", err) + captureLoginFailure("browser", wrapped) + return wrapped } if returnedState != state { - return fmt.Errorf("invalid state parameter — possible CSRF attack, login aborted") + wrapped := fmt.Errorf("invalid state parameter — possible CSRF attack, login aborted") + captureLoginFailure("browser", wrapped) + return wrapped } pterm.Info.Println("Completing sign in...") @@ -136,7 +248,9 @@ func loginWithBrowser() error { pkce.Verifier, ) if err != nil { - return fmt.Errorf("could not complete sign in: %w", err) + wrapped := fmt.Errorf("could not complete sign in: %w", err) + captureLoginFailure("browser", wrapped) + return wrapped } expiresAt := time.Now().Unix() + int64(tokenResp.ExpiresIn) @@ -150,9 +264,14 @@ func loginWithBrowser() error { TokenEndpoint: meta.TokenEndpoint, } if err := config.SaveOAuthSession(session); err != nil { - return fmt.Errorf("could not save your session: %w", err) + wrapped := fmt.Errorf("could not save your session: %w", err) + captureLoginFailure("browser", wrapped) + return wrapped } + client := api.NewClientWithAccessToken(tokenResp.AccessToken, c.String("api-url"), c.Bool("debug")) + bindIdentityAndCapture(&client, "browser") + fmt.Println() pterm.Success.Println("You're signed in.") return nil diff --git a/cmd/auth/logout.go b/cmd/auth/logout.go index c367d00..ac59a79 100644 --- a/cmd/auth/logout.go +++ b/cmd/auth/logout.go @@ -6,6 +6,7 @@ import ( "github.com/urfave/cli/v2" "github.com/NodeOps-app/createos-cli/internal/config" + "github.com/NodeOps-app/createos-cli/internal/telemetry" ) // NewLogoutCommand creates the logout command @@ -24,6 +25,16 @@ func NewLogoutCommand() *cli.Command { if err := config.DeleteOAuthSession(); err != nil { return fmt.Errorf("could not clear your session: %w", err) } + // Phase 4: clear identity binding so the next login can re-Alias + // against the post-login user_id without inheriting stale state. + _ = config.DeleteIdentity() + + if telemetry.Default != nil { + telemetry.Default.Capture("auth_event", map[string]any{ + "action": "logout", + "success": true, + }) + } fmt.Println("You've been signed out successfully.") return nil diff --git a/cmd/root/root.go b/cmd/root/root.go index a937bcc..0f25488 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -3,6 +3,7 @@ package root import ( "fmt" + "io" "net/url" "time" @@ -34,6 +35,7 @@ import ( internaloauth "github.com/NodeOps-app/createos-cli/internal/oauth" "github.com/NodeOps-app/createos-cli/internal/output" "github.com/NodeOps-app/createos-cli/internal/pkg/version" + "github.com/NodeOps-app/createos-cli/internal/telemetry" ) // NewApp creates and configures the root CLI application. @@ -64,6 +66,14 @@ func NewApp() *cli.App { }, }, Before: func(c *cli.Context) error { + // Telemetry stashes (Phase 2) — must run BEFORE any validation that + // may return early, so the finalizer always sees a start time and + // the resolved subcommand name. NO event emission here: + // c.Command.FullName() returns "createos" at this point because + // urfave/cli has not yet dispatched into the subcommand. + c.App.Metadata["telemetry_start"] = time.Now() + c.App.Metadata["telemetry_arg_first"] = c.Args().First() + // Store the output format in metadata c.App.Metadata[output.FormatKey] = output.DetectFormat(c) @@ -106,6 +116,14 @@ func NewApp() *cli.App { session.RefreshToken, ) if err != nil { + if telemetry.Default != nil { + telemetry.Default.Capture("auth_event", map[string]any{ + "action": "refresh", + "method": "refresh", + "success": false, + "failure_reason": err.Error(), + }) + } return fmt.Errorf("your session has expired and could not be renewed — run 'createos login' to sign in again") } session.AccessToken = refreshed.AccessToken @@ -116,8 +134,23 @@ func NewApp() *cli.App { session.ExpiresAt = time.Now().Unix() + int64(refreshed.ExpiresIn) } if err := config.SaveOAuthSession(*session); err != nil { + if telemetry.Default != nil { + telemetry.Default.Capture("auth_event", map[string]any{ + "action": "refresh", + "method": "refresh", + "success": false, + "failure_reason": err.Error(), + }) + } return fmt.Errorf("could not save refreshed session: %w", err) } + if telemetry.Default != nil { + telemetry.Default.Capture("auth_event", map[string]any{ + "action": "refresh", + "method": "refresh", + "success": true, + }) + } } client := api.NewClientWithAccessToken(session.AccessToken, c.String("api-url"), c.Bool("debug")) c.App.Metadata[api.ClientKey] = &client @@ -198,5 +231,48 @@ func NewApp() *cli.App { }, } + // Phase 3: store app pointer so HelpPrinter override (which has no + // cli.Context) can reach App.Metadata via CurrentApp(). + currentAppPtr.Store(app) + + // Phase 2 telemetry wiring. Wrap every subcommand Action AND the + // home-screen Action so command_invoked fires with the correct + // FullName() AFTER cli/v2 has dispatched into the subcommand. + wrapActions(app.Commands) + wrapAppAction(app) + + // Phase 3: HelpPrinter / VersionPrinter overrides emit command_invoked + // for `--help` and `--version` paths where the Action never runs. We + // defer to the original printer so user output is never delayed by + // telemetry. NO Flush — Close is terminal and runs in finalizeTelemetry. + originalHelp := cli.HelpPrinter + cli.HelpPrinter = func(w io.Writer, templ string, data interface{}) { + defer originalHelp(w, templ, data) + if !helpEmittedThisProcess.CompareAndSwap(false, true) { + return + } + client := telemetry.Default + if client == nil { + return + } + props := map[string]any{"command": "--help"} + if appRef := CurrentApp(); appRef != nil { + appRef.Metadata["telemetry_invoked_props"] = props + } + client.Capture("command_invoked", props) + } + + originalVersion := cli.VersionPrinter + cli.VersionPrinter = func(c *cli.Context) { + defer originalVersion(c) + client := telemetry.Default + if client == nil { + return + } + props := map[string]any{"command": "--version"} + c.App.Metadata["telemetry_invoked_props"] = props + client.Capture("command_invoked", props) + } + return app } diff --git a/cmd/root/telemetry.go b/cmd/root/telemetry.go new file mode 100644 index 0000000..1f573bb --- /dev/null +++ b/cmd/root/telemetry.go @@ -0,0 +1,223 @@ +// Package root — telemetry wiring helpers (Phase 2). +package root + +import ( + "context" + "os" + "strings" + "sync/atomic" + "time" + + "github.com/urfave/cli/v2" + + "github.com/NodeOps-app/createos-cli/internal/config" + "github.com/NodeOps-app/createos-cli/internal/telemetry" +) + +// helpEmittedThisProcess is a process-wide guard so the HelpPrinter override +// (Phase 3) and the Action wrapper don't double-emit command_invoked when both +// fire (e.g. `createos projects --help`). +var helpEmittedThisProcess atomic.Bool + +// currentAppPtr holds a process-wide pointer to the active *cli.App so the +// HelpPrinter override (which has no cli.Context access) can stash invoked +// props into App.Metadata for the finalizer to pair with. +var currentAppPtr atomic.Pointer[cli.App] + +// CurrentApp returns the active app pointer set by NewApp, or nil if not set. +func CurrentApp() *cli.App { return currentAppPtr.Load() } + +// valueFlags lists global flags that take a value. Must be kept in sync with +// NewApp's Flags slice. Bools (--debug, -d) take no value, so omitted. +var valueFlags = map[string]bool{ + "--api-url": true, + "--output": true, + "-o": true, +} + +// buildInvokedProps collects properties for a command_invoked event from the +// active cli.Context. +func buildInvokedProps(c *cli.Context) map[string]any { + props := map[string]any{ + "command": commandPath(c), + "flags": telemetry.FlagsFromContext(c), + "arg_count": c.Args().Len(), + } + if pid := resolveProjectID(c); pid != "" { + props["project_id"] = pid + } + return props +} + +// commandPath returns the space-joined command name including all parent +// subcommand names (e.g. "projects list"). urfave/cli/v2 v2.27.7's +// c.Command.FullName() returns only the leaf name, so we walk c.Lineage() +// from root → leaf, skipping the synthesized root command whose Name +// equals the App.Name. +func commandPath(c *cli.Context) string { + if c == nil || c.Command == nil { + return "" + } + lineage := c.Lineage() + rootName := "" + if c.App != nil { + rootName = c.App.Name + } + parts := make([]string, 0, len(lineage)) + for i := len(lineage) - 1; i >= 0; i-- { + ctx := lineage[i] + if ctx.Command == nil || ctx.Command.Name == "" { + continue + } + // Skip the synthesized root command unless it is the only entry + // (i.e. this IS the home-screen Action and "createos" is the name). + if ctx.Command.Name == rootName && len(lineage) > 1 { + continue + } + parts = append(parts, ctx.Command.Name) + } + if len(parts) == 0 { + return c.Command.Name + } + return strings.Join(parts, " ") +} + +// resolveProjectID picks (in order): --project flag, --project-id flag, then +// the linked .createos.json's ProjectID. Empty string when none resolve. +func resolveProjectID(c *cli.Context) string { + if v := c.String("project"); v != "" { + return v + } + if v := c.String("project-id"); v != "" { + return v + } + if cfg, err := config.FindProjectConfig(); err == nil && cfg != nil { + return cfg.ProjectID + } + return "" +} + +// wrapActions wraps every command's Action so we capture command_invoked and +// stash invoked props into App.Metadata for the finalizer to pair with. +func wrapActions(cmds []*cli.Command) { + for _, cmd := range cmds { + if original := cmd.Action; original != nil { + orig := original + cmd.Action = func(c *cli.Context) error { + props := buildInvokedProps(c) + c.App.Metadata["telemetry_invoked_props"] = props + if !c.Bool("help") { + if telemetry.Default != nil { + telemetry.Default.Capture("command_invoked", props) + } + } + return orig(c) + } + } + wrapActions(cmd.Subcommands) + } +} + +// wrapAppAction wraps the App-level Action (the home screen). +func wrapAppAction(app *cli.App) { + if app.Action == nil { + return + } + original := app.Action + app.Action = func(c *cli.Context) error { + props := buildInvokedProps(c) + c.App.Metadata["telemetry_invoked_props"] = props + if !c.Bool("help") { + if telemetry.Default != nil { + telemetry.Default.Capture("command_invoked", props) + } + } + return original(c) + } +} + +// coarseCommandFromArgs extracts the first positional token from os.Args[1:], +// skipping flag tokens AND their values. Used by the finalizer ONLY when +// App.Before did not run (cli/v2 framework rejected args before Before fired). +func coarseCommandFromArgs(args []string) string { + if len(args) < 2 { + return "" + } + skipNext := false + for _, a := range args[1:] { + if skipNext { + skipNext = false + continue + } + if a == "" { + continue + } + if strings.HasPrefix(a, "-") { + if eq := strings.IndexByte(a, '='); eq >= 0 { + // --flag=value form — value consumed inline, no skip needed. + _ = a[:eq] + } else if valueFlags[a] { + skipNext = true + } + continue + } + return a + } + return "" +} + +// finalizeTelemetry runs after app.Run returns. It emits command_completed or +// command_failed (paired with the command_invoked from wrapActions or the +// HelpPrinter override), then bounds-flushes the posthog client. +// +// Architecture: we deliberately do NOT use App.After — it cannot see the +// Action's return error and fires on Before-failure paths where it would +// mislabel errors as success. +func finalizeTelemetry(app *cli.App, err error) { + client := telemetry.Default + if client == nil { + return + } + + props, ok := app.Metadata["telemetry_invoked_props"].(map[string]any) + if !ok || props == nil { + // Action wrapper never ran. Build a coarse fallback. Prefer the value + // stashed by App.Before (cli/v2 has already paired flags and values + // there) over re-parsing os.Args. + cmd, _ := app.Metadata["telemetry_arg_first"].(string) + if cmd == "" { + cmd = coarseCommandFromArgs(os.Args) + } + props = map[string]any{ + "command": cmd, + "invoked_emitted": false, + } + } else { + props["invoked_emitted"] = true + } + + if start, ok := app.Metadata["telemetry_start"].(time.Time); ok { + props["duration_ms"] = time.Since(start).Milliseconds() + } + + if err != nil { + cat, status := telemetry.CategorizeError(err) + props["error_category"] = cat + props["error_message"] = err.Error() + props["success"] = false + if status != 0 { + props["api_status_code"] = status + } + client.Capture("command_failed", props) + } else { + props["success"] = true + client.Capture("command_completed", props) + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + client.Shutdown(ctx) + cancel() +} + +// FinalizeTelemetry is the exported entry-point used by main.go. +func FinalizeTelemetry(app *cli.App, err error) { finalizeTelemetry(app, err) } diff --git a/cmd/upgrade/upgrade.go b/cmd/upgrade/upgrade.go index fbeee61..f7488d2 100644 --- a/cmd/upgrade/upgrade.go +++ b/cmd/upgrade/upgrade.go @@ -20,6 +20,7 @@ import ( "golang.org/x/mod/semver" "github.com/NodeOps-app/createos-cli/internal/pkg/version" + "github.com/NodeOps-app/createos-cli/internal/telemetry" ) const ( @@ -55,17 +56,52 @@ func NewUpgradeCommand() *cli.Command { } } -func runUpgrade() error { +func runUpgrade() (retErr error) { pterm.Info.Printf("Current version: %s (channel: %s)\n", version.Version, version.Channel) + fromVersion := version.Version + // toVersion is "latest" until the release tag is resolved. Captured by + // the deferred emit below if we fail before resolution. + toVersion := "latest" + // upgradeAttempted is set true only when we cross the "no-op / already + // up-to-date" boundary and actually try to swap the binary. Successful + // no-op paths must NOT emit upgrade_event. + upgradeAttempted := false + upgradeSucceeded := false + + // Single deferred emit handles every return path. Note: the runUpgrade + // Action returns normally to main.go which calls FinalizeTelemetry + + // telemetry.Shutdown before os.Exit, so the event flushes naturally — no + // Flush() call needed here. + defer func() { + if !upgradeAttempted { + return + } + if telemetry.Default == nil { + return + } + props := map[string]any{ + "from_version": fromVersion, + "to_version": toVersion, + "success": upgradeSucceeded, + } + if !upgradeSucceeded && retErr != nil { + props["failure_reason"] = retErr.Error() + } + telemetry.Default.Capture("upgrade_event", props) + }() + release, err := fetchLatestRelease() if err != nil { + upgradeAttempted = true return fmt.Errorf("could not check for updates: %w", err) } + toVersion = release.TagName if version.Channel == "nightly" { remoteCommit, err := fetchNightlyCommit(release) if err != nil { + upgradeAttempted = true return fmt.Errorf("could not check nightly commit: %w", err) } if remoteCommit == version.Commit { @@ -85,6 +121,9 @@ func runUpgrade() error { } } + // Past this point we are committed to attempting the upgrade. + upgradeAttempted = true + pterm.Info.Printf("New version available: %s\n", release.TagName) assetName := binaryAssetName() @@ -143,6 +182,7 @@ func runUpgrade() error { return fmt.Errorf("could not replace binary: %w", err) } + upgradeSucceeded = true pterm.Success.Printf("Upgraded to %s. Run 'createos version' to confirm.\n", release.TagName) return nil } diff --git a/docs/telemetry.md b/docs/telemetry.md new file mode 100644 index 0000000..f9803b5 --- /dev/null +++ b/docs/telemetry.md @@ -0,0 +1,59 @@ +## Telemetry + +The CLI ships PostHog telemetry via `internal/telemetry`. **Most new commands need ZERO telemetry code** — `command_invoked` / `command_completed` / `command_failed` are emitted automatically by the Action wrapper in `cmd/root/telemetry.go` and the `main.go` finalizer. Just write your command's `Action` and it will be tracked. + +### When you DO need to touch telemetry + +Add a `telemetry.Default.Capture(...)` call ONLY when a command represents a discrete domain event distinct from "command invoked/completed". Examples already in the codebase: + +- `auth_event{action: login|logout|refresh, method, success}` — login/logout/refresh in `cmd/auth/`, `cmd/root/root.go` Before hook. +- `upgrade_event{from_version, to_version, success, failure_reason}` — `cmd/upgrade/`. + +If you're adding a similar high-value lifecycle event (e.g. `deployment_event`, `vm_event`), follow the same pattern: +```go +if telemetry.Default != nil { + telemetry.Default.Capture("", map[string]any{ + "action": "", + "success": true, + // domain-specific props (NO secrets, NO file paths, NO emails) + }) +} +``` + +### Hard rules (do NOT break) + +- ❌ **Never** import `github.com/posthog/posthog-go` outside `internal/telemetry/`. +- ❌ **Never** add a `Flush(timeout)` method to `internal/telemetry/Client`. posthog-go's `Close()` is terminal and there is no non-terminal flush primitive. +- ❌ **Never** include user email, file paths, command output, tokens, or any flag value matching the redact denylist (token/password/secret/key/credential/bearer/auth) in event Properties. The Action wrapper auto-redacts flag values via `internal/telemetry/redact.go`; preserve that behavior — if you add a new sensitive flag alias, ensure `internal/telemetry/redact.go::denyKeywords` covers it (canonical name OR alias). +- ❌ **Never** call `apiClient.GetUser()` outside of `cmd/auth/login.go`'s `bindIdentityAndCapture`. Identity binding happens once at login, not per-command. +- ❌ **Never** persist user_id/email/anything PII to `~/.createos/.identity` beyond `{user_id, aliased_for_user_id}`. The file is intentionally minimal; PostHog Person properties (email, name, signup_date) are sent in-memory via `Client.SetPersonProperties` and never touch disk. +- ❌ **Never** emit telemetry from `App.Before` (subcommand name not yet resolved) or `App.After` (cannot see Action error). Use the Action wrapper or the `main.go` finalizer. +- ❌ **Never** call `telemetry.Default.Capture` from a hot loop or per-iteration code path. Events are coarse-grained — one per CLI invocation, plus a handful of domain lifecycle events. The free monthly quota is 1M events. + +### When adding a new sensitive flag + +If you add a flag whose value should be redacted from telemetry (any new auth/secret-bearing flag): +- Pick a name where the canonical OR any alias contains a denylist keyword (`token`, `secret`, etc.) — e.g. `--api-token`, `--ssh-key`. The redact path canonicalizes via `c.Lineage()` so any alias matching the denylist redacts the whole flag. +- If the flag name doesn't naturally contain a denylist keyword (e.g. a credential called `--cookie`), add the new keyword to `internal/telemetry/redact.go::denyKeywords`. + +### When adding a new project-scoped command + +The Action wrapper auto-attaches `project_id` to events when: +- the command has a `--project` or `--project-id` flag, OR +- a `.createos.json` exists in cwd / parent dirs (`config.FindProjectConfig`). + +If your command resolves project ID via a different mechanism (e.g. positional arg only, or a custom env var), update `cmd/root/telemetry.go::resolveProjectID` so the project_id property is set correctly. + +### Verifying your changes + +After wiring telemetry, smoke test against a staging key: +```bash +go build -ldflags="-X github.com/NodeOps-app/createos-cli/internal/telemetry.PostHogAPIKey= \ + -X github.com/NodeOps-app/createos-cli/internal/telemetry.PostHogHost=https://us.i.posthog.com" \ + -o /tmp/createos-test . +/tmp/createos-test +# wait ~10s for posthog-go batch flush + 3s Shutdown +# then query PostHog HogQL: SELECT event, properties FROM events WHERE timestamp > now() - INTERVAL 5 MINUTE +``` + +Run the anti-pattern grep audit from the plan (`docs/superpowers/plans/2026-05-01-posthog-telemetry-plan.md` §Phase 7) before merging. diff --git a/go.mod b/go.mod index c0b3189..a36bd8e 100644 --- a/go.mod +++ b/go.mod @@ -5,11 +5,14 @@ go 1.25.0 require ( github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 + github.com/denisbrodbeck/machineid v1.0.1 github.com/go-resty/resty/v2 v2.17.2 + github.com/posthog/posthog-go v1.12.4 github.com/pterm/pterm v0.12.83 github.com/urfave/cli/v2 v2.27.7 golang.org/x/crypto v0.50.0 golang.org/x/mod v0.35.0 + golang.org/x/sys v0.43.0 golang.org/x/term v0.42.0 ) @@ -26,7 +29,10 @@ require ( github.com/containerd/console v1.0.5 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect + github.com/goccy/go-json v0.10.5 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/gookit/color v1.6.0 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/lithammer/fuzzysearch v1.1.8 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect @@ -40,6 +46,5 @@ require ( github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect golang.org/x/net v0.52.0 // indirect - golang.org/x/sys v0.43.0 // indirect golang.org/x/text v0.36.0 // indirect ) diff --git a/go.sum b/go.sum index 93d8422..2231730 100644 --- a/go.sum +++ b/go.sum @@ -40,16 +40,26 @@ github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6N github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/denisbrodbeck/machineid v1.0.1 h1:geKr9qtkB876mXguW2X6TU4ZynleN6ezuMSRhl4D7AQ= +github.com/denisbrodbeck/machineid v1.0.1/go.mod h1:dJUwb7PTidGDeYyUBmXZ2GphQBbjJCrnectwCyxcUSI= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= github.com/go-resty/resty/v2 v2.17.2 h1:FQW5oHYcIlkCNrMD2lloGScxcHJ0gkjshV3qcQAyHQk= github.com/go-resty/resty/v2 v2.17.2/go.mod h1:kCKZ3wWmwJaNc7S29BRtUhJwy7iqmn+2mLtQrOyQlVA= +github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= +github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gookit/assert v0.1.1 h1:lh3GcawXe/p+cU7ESTZ5Ui3Sm/x8JWpIis4/1aF0mY0= github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj0VX2E= github.com/gookit/color v1.4.2/go.mod h1:fqRyamkC1W8uxl+lxCQxOT09l/vYfZ+QeiX3rKQHCoQ= github.com/gookit/color v1.5.0/go.mod h1:43aQb+Zerm/BWh2GnrgOQm7ffz7tvQXEKV6BFMl7wAo= github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA= github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.10/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= @@ -75,7 +85,10 @@ github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELU github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/posthog/posthog-go v1.12.4 h1:fAqTAGgLQFZADRg3AJvSSoP0hwEIs940OE8AmL8/H8c= +github.com/posthog/posthog-go v1.12.4/go.mod h1:xsVOW9YImilUcazwPNEq4PJDqEZf2KeCS758zXjwkPg= github.com/pterm/pterm v0.12.27/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI= github.com/pterm/pterm v0.12.29/go.mod h1:WI3qxgvoQFFGKGjGnJR849gU0TsEOvKn5Q8LlY1U7lg= github.com/pterm/pterm v0.12.30/go.mod h1:MOqLIyMOgmTDz9yorcYbcw+HsgoZo3BQfg2wtl3HEFE= @@ -96,6 +109,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs= @@ -165,3 +180,5 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/config/identity.go b/internal/config/identity.go new file mode 100644 index 0000000..4ae7f63 --- /dev/null +++ b/internal/config/identity.go @@ -0,0 +1,85 @@ +package config + +import ( + "encoding/json" + "errors" + "os" + "path/filepath" +) + +const identityFile = ".identity" + +// Identity is the post-login user binding stored in ~/.createos/.identity. +// +// Kept in its own file (NOT inside OAuthSession or .token) so that the two +// existing auth paths — OAuth JSON and flat-string token — stay untouched. +// AliasedForUserID is set after RebindIdentity has emitted PostHog Alias for +// this user_id; the marker keeps the alias one-shot per (machine, user) +// pair. +type Identity struct { + UserID string `json:"user_id"` + AliasedForUserID string `json:"aliased_for_user_id,omitempty"` +} + +// identityPath returns the path to ~/.createos/.identity. +func identityPath() (string, error) { + dir, err := configPath() + if err != nil { + return "", err + } + return filepath.Join(dir, identityFile), nil +} + +// SaveIdentity writes the identity file (mode 0600) under ~/.createos. +func SaveIdentity(id Identity) error { + dir, err := configPath() + if err != nil { + return err + } + if err := os.MkdirAll(dir, 0700); err != nil { + return err + } + path, err := identityPath() + if err != nil { + return err + } + data, err := json.Marshal(id) + if err != nil { + return err + } + return os.WriteFile(path, data, 0600) +} + +// LoadIdentity reads ~/.createos/.identity. Returns (nil, nil) when the file +// is absent (matches the LoadOAuthSession contract). +func LoadIdentity() (*Identity, error) { + path, err := identityPath() + if err != nil { + return nil, err + } + data, err := os.ReadFile(path) // #nosec G304 -- path is from identityPath() under ~/.createos/ + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, err + } + var id Identity + if err := json.Unmarshal(data, &id); err != nil { + return nil, err + } + return &id, nil +} + +// DeleteIdentity removes ~/.createos/.identity. Nil on os.ErrNotExist. +func DeleteIdentity() error { + path, err := identityPath() + if err != nil { + return err + } + err = os.Remove(path) + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err +} diff --git a/internal/telemetry/client.go b/internal/telemetry/client.go new file mode 100644 index 0000000..83536f3 --- /dev/null +++ b/internal/telemetry/client.go @@ -0,0 +1,231 @@ +package telemetry + +import ( + "context" + "sync" + "time" + + "github.com/posthog/posthog-go" + + "github.com/NodeOps-app/createos-cli/internal/config" +) + +// Default is the package-global Client. Written once by Init. Other packages +// MUST tolerate Default == nil (call sites use the nil-receiver no-op +// pattern). +var Default *Client + +// initOnce guards Init so re-entry is a cheap no-op. +var initOnce sync.Once + +// Client wraps the posthog-go client with our distinct_id / user_id state. +// +// Thread-safety: posthog-go's Enqueue is itself goroutine-safe. The mutex +// here only guards the userID/distinctID swap inside RebindIdentity so that +// a concurrent Capture sees a consistent pair. +type Client struct { + inner posthog.Client + machineIDHash string // anonymous distinct_id; never overwritten after Init + globalProps posthog.Properties + + mu sync.Mutex + userID string // empty pre-login + distinctID string // == machineIDHash pre-login, == userID post-login + disabled bool + personProps map[string]any // sent on Identify only; never on Capture events +} + +// silentLogger satisfies posthog.Logger but drops all output. Telemetry must +// never write to stderr/stdout. +type silentLogger struct{} + +func (silentLogger) Logf(string, ...interface{}) {} +func (silentLogger) Errorf(string, ...interface{}) {} +func (silentLogger) Debugf(string, ...interface{}) {} +func (silentLogger) Warnf(string, ...interface{}) {} + +// Init constructs Default. Idempotent — second call is a no-op. +// +// On disabled (no key, or CREATEOS_DO_NOT_TRACK=1), Default is set to a +// non-nil sentinel client whose methods are all no-ops. Callers can deref +// without nil-checks, but the package-global is still nil-checked for the +// belt-and-braces case where Init was never reached at all. +func Init(ctx context.Context) error { + var initErr error + initOnce.Do(func() { + if IsDisabled() { + Default = &Client{disabled: true} + return + } + + inner, err := posthog.NewWithConfig(effectiveKey(), posthog.Config{ + Endpoint: effectiveHost(), + BatchSize: 20, + Interval: 5 * time.Second, + Logger: silentLogger{}, + }) + if err != nil { + // Fail-soft: degrade to disabled rather than surfacing a + // telemetry error to the user. + Default = &Client{disabled: true} + initErr = err + return + } + + c := &Client{ + inner: inner, + machineIDHash: ResolveDistinctID(), + globalProps: GlobalProperties(ctx), + } + c.distinctID = c.machineIDHash + Default = c + + // Apply any persisted identity (alias + identify + switch distinctID). + c.RebindIdentity() + }) + return initErr +} + +// Capture enqueues a custom event. Safe no-op when c is nil or disabled. +func (c *Client) Capture(event string, props map[string]any) { + if c == nil || c.disabled || c.inner == nil { + return + } + + merged := posthog.NewProperties() + for k, v := range c.globalProps { + merged[k] = v + } + for k, v := range props { + merged[k] = v + } + + c.mu.Lock() + distinctID := c.distinctID + userID := c.userID + c.mu.Unlock() + + if userID != "" { + merged["user_id"] = userID + } + + if distinctID == "" { + // posthog-go validates DistinctId is non-empty; drop silently. + return + } + + _ = c.inner.Enqueue(posthog.Capture{ + DistinctId: distinctID, + Event: event, + Properties: merged, + }) +} + +// SetPersonProperties stores PostHog Person-level properties (email, name, +// signup_date, ...) that will be attached to the next Identify event sent +// by RebindIdentity. The map is held in memory only — it is never persisted +// to disk and never appears on Capture event payloads. +// +// Callers (the login flow) MUST call this BEFORE RebindIdentity for the +// props to land on the corresponding Identify. +func (c *Client) SetPersonProperties(props map[string]any) { + if c == nil || c.disabled { + return + } + cp := make(map[string]any, len(props)) + for k, v := range props { + cp[k] = v + } + c.mu.Lock() + c.personProps = cp + c.mu.Unlock() +} + +// RebindIdentity reads the on-disk Identity and aligns the client state. +// Idempotent — repeated calls do nothing extra once user is bound. +// +// CRITICAL: alias is ALWAYS emitted from machineIDHash, never from the +// current distinctID (which may already point to a previous user_id from +// this same process — aliasing user→user collapses the wrong nodes). +func (c *Client) RebindIdentity() { + if c == nil || c.disabled || c.inner == nil { + return + } + id, err := config.LoadIdentity() + if err != nil || id == nil || id.UserID == "" { + return + } + + if id.AliasedForUserID != id.UserID { + _ = c.inner.Enqueue(posthog.Alias{ + DistinctId: c.machineIDHash, + Alias: id.UserID, + }) + id.AliasedForUserID = id.UserID + _ = config.SaveIdentity(*id) + } + + identifyProps := posthog.NewProperties() + if v, ok := c.globalProps["is_ci"]; ok { + identifyProps.Set("is_ci", v) + } + if v, ok := c.globalProps["ci_provider"]; ok { + identifyProps.Set("ci_provider", v) + } + if v, ok := c.globalProps["channel"]; ok { + identifyProps.Set("channel", v) + } + // Attach Person props (email, name, signup_date, ...) supplied by the + // login flow. They go ONLY to PostHog's person record via Identify; they + // are NOT included in any Capture event payload. + c.mu.Lock() + personProps := c.personProps + c.mu.Unlock() + setOnce := map[string]any{} + for k, v := range personProps { + switch k { + case "signup_date", "created_at": + // Immutable Person fields — only set on first identify per + // person, never overwritten on subsequent logins. + setOnce[k] = v + default: + identifyProps.Set(k, v) + } + } + if len(setOnce) > 0 { + identifyProps.Set("$set_once", setOnce) + } + _ = c.inner.Enqueue(posthog.Identify{ + DistinctId: id.UserID, + Properties: identifyProps, + }) + + c.mu.Lock() + c.userID = id.UserID + c.distinctID = id.UserID + c.mu.Unlock() +} + +// Shutdown best-effort flushes pending events. +// +// posthog-go has no non-terminal Flush primitive; Close() is the only way to +// drain the batch and it is TERMINAL — further Enqueue calls return errors. +// We bound the wait on the caller's ctx (typically a 500ms deadline) and +// disable the client locally so subsequent Capture calls are cheap no-ops. +func (c *Client) Shutdown(ctx context.Context) { + if c == nil || c.disabled || c.inner == nil { + return + } + done := make(chan struct{}) + go func() { + _ = c.inner.Close() + close(done) + }() + select { + case <-done: + case <-ctx.Done(): + } + c.mu.Lock() + c.disabled = true + c.mu.Unlock() +} diff --git a/internal/telemetry/disabled.go b/internal/telemetry/disabled.go new file mode 100644 index 0000000..d2b4b33 --- /dev/null +++ b/internal/telemetry/disabled.go @@ -0,0 +1,48 @@ +// Package telemetry sends anonymous usage data to PostHog. +// +// All events are best-effort and never block the CLI. The package is a no-op +// when disabled (empty PostHog API key or CREATEOS_DO_NOT_TRACK=1). +// +// IMPORTANT: do not import this package outside its own files except via the +// thin wiring in main.go and cmd/root. Do not log to stdout/stderr from any +// path here — telemetry must stay silent. +package telemetry + +import "os" + +// PostHogAPIKey is injected at build time via -ldflags. Empty by default, +// which disables telemetry entirely. +var PostHogAPIKey = "" + +// PostHogHost is the PostHog ingestion endpoint. Overridable via -ldflags. +var PostHogHost = "https://us.i.posthog.com" + +const ( + envOptOut = "CREATEOS_DO_NOT_TRACK" + envKey = "CREATEOS_POSTHOG_KEY" + envHost = "CREATEOS_POSTHOG_HOST" +) + +// IsDisabled reports whether telemetry should be a no-op for this process. +func IsDisabled() bool { + if os.Getenv(envOptOut) == "1" { + return true + } + return effectiveKey() == "" +} + +// effectiveKey returns the PostHog API key — env var wins over ldflag. +func effectiveKey() string { + if v := os.Getenv(envKey); v != "" { + return v + } + return PostHogAPIKey +} + +// effectiveHost returns the PostHog endpoint — env var wins over ldflag. +func effectiveHost() string { + if v := os.Getenv(envHost); v != "" { + return v + } + return PostHogHost +} diff --git a/internal/telemetry/distinct_id.go b/internal/telemetry/distinct_id.go new file mode 100644 index 0000000..f312c34 --- /dev/null +++ b/internal/telemetry/distinct_id.go @@ -0,0 +1,35 @@ +package telemetry + +import ( + "crypto/sha256" + "encoding/hex" + "os" + "os/user" + + "github.com/denisbrodbeck/machineid" +) + +// ResolveDistinctID returns a stable anonymous identifier for this machine. +// +// Resolution order (per spec §7.1): +// 1. machineid.ProtectedID("createos-cli") — HMAC-SHA256 over the OS machine ID. +// 2. sha256(hostname + "|" + username) — fallback for containers / locked-down envs. +// 3. "" — give up (caller should treat empty distinct_id as a no-op). +// +// The result is NEVER cached to disk — recompute each run. +func ResolveDistinctID() string { + if id, err := machineid.ProtectedID("createos-cli"); err == nil && id != "" { + return id + } + + host, _ := os.Hostname() + username := "" + if u, err := user.Current(); err == nil && u != nil { + username = u.Username + } + if host == "" && username == "" { + return "" + } + sum := sha256.Sum256([]byte(host + "|" + username)) + return hex.EncodeToString(sum[:]) +} diff --git a/internal/telemetry/errors.go b/internal/telemetry/errors.go new file mode 100644 index 0000000..b304389 --- /dev/null +++ b/internal/telemetry/errors.go @@ -0,0 +1,99 @@ +// Package telemetry provides event capture and error categorization. +// +// Error categorization uses string-matching needles to map locally-raised CLI +// errors to a category. The list is intentionally small and acknowledged-fragile; +// the CLI's user-facing error strings are stable enough (UX-tested) that flips +// between buckets are rare. When error messages change in cmd/auth/login.go, +// cmd/root/root.go, or anywhere else, update the corresponding needle below. +// +// Order of checks (first match wins): +// 1. *api.APIError → category by HTTP status. +// 2. context.DeadlineExceeded / net.Error → "network". +// 3. local sentinel substrings → "user_input" first, then "auth". +// user_input is checked first because some validation messages contain +// auth-shaped phrases (e.g. "use --token flag to sign in"). +// 4. default → "unknown". +package telemetry + +import ( + "context" + "errors" + "net" + "strings" + + "github.com/NodeOps-app/createos-cli/internal/api" +) + +// CategorizeError returns a stable category label and (when known) the API +// HTTP status code. apiStatusCode is 0 for non-API errors. +func CategorizeError(err error) (category string, apiStatusCode int) { + if err == nil { + return "unknown", 0 + } + + // 1. API errors — bucket by HTTP status. + var apiErr *api.APIError + if errors.As(err, &apiErr) { + switch { + case apiErr.StatusCode == 401, apiErr.StatusCode == 403: + return "auth", apiErr.StatusCode + case apiErr.StatusCode == 404: + return "not_found", apiErr.StatusCode + case apiErr.StatusCode == 400, apiErr.StatusCode == 422: + return "validation", apiErr.StatusCode + case apiErr.StatusCode == 429: + return "rate_limit", apiErr.StatusCode + case apiErr.StatusCode >= 500: + return "api_5xx", apiErr.StatusCode + } + return "unknown", apiErr.StatusCode + } + + // 2. Network / deadline. + if errors.Is(err, context.DeadlineExceeded) { + return "network", 0 + } + var netErr net.Error + if errors.As(err, &netErr) { + return "network", 0 + } + + // 3. Locally-raised sentinels — string match on err.Error(). + // user_input first: some validation messages embed "sign in" wording + // (e.g. "non-interactive mode: use --token flag to sign in"). + msg := err.Error() + for _, needle := range userInputNeedles { + if strings.Contains(msg, needle) { + return "user_input", 0 + } + } + for _, needle := range authNeedles { + if strings.Contains(msg, needle) { + return "auth", 0 + } + } + + return "unknown", 0 +} + +// authNeedles match locally-raised auth errors (signed-in checks, login +// prompts). Update these in lockstep with cmd/auth/login.go and cmd/root. +var authNeedles = []string{ + "sign in", + "signed in", + "login", +} + +// userInputNeedles match validation errors raised by us OR by urfave/cli's +// framework. Update when we change a "please provide ..." or "--api-url +// must ..." string in the CLI. +var userInputNeedles = []string{ + "--api-url must", + "must use HTTPS", + "non-interactive mode", + "could not save", + "please provide", + "missing argument", + "required flag", + "flag provided but not defined", +} diff --git a/internal/telemetry/os_release_other.go b/internal/telemetry/os_release_other.go new file mode 100644 index 0000000..a395ab9 --- /dev/null +++ b/internal/telemetry/os_release_other.go @@ -0,0 +1,6 @@ +//go:build !linux && !darwin && !windows + +package telemetry + +// osRelease returns "" on platforms we don't have a kernel-version probe for. +func osRelease() string { return "" } diff --git a/internal/telemetry/os_release_unix.go b/internal/telemetry/os_release_unix.go new file mode 100644 index 0000000..ae55a97 --- /dev/null +++ b/internal/telemetry/os_release_unix.go @@ -0,0 +1,25 @@ +//go:build linux || darwin + +package telemetry + +import "golang.org/x/sys/unix" + +// osRelease returns a kernel release string like "23.6.0" (darwin) or +// "5.15.0-87-generic" (linux). Empty string on any error. +func osRelease() string { + var u unix.Utsname + if err := unix.Uname(&u); err != nil { + return "" + } + return cstr(u.Release[:]) +} + +// cstr converts a C-string byte slice (NUL-terminated, fixed-width) to Go. +func cstr(b []byte) string { + for i, c := range b { + if c == 0 { + return string(b[:i]) + } + } + return string(b) +} diff --git a/internal/telemetry/os_release_windows.go b/internal/telemetry/os_release_windows.go new file mode 100644 index 0000000..6180dc0 --- /dev/null +++ b/internal/telemetry/os_release_windows.go @@ -0,0 +1,21 @@ +//go:build windows + +package telemetry + +import ( + "strconv" + + "golang.org/x/sys/windows" +) + +// osRelease returns a "MAJOR.MINOR.BUILD" string from RtlGetVersion. +// Empty string on any error. +func osRelease() string { + v := windows.RtlGetVersion() + if v == nil { + return "" + } + return strconv.FormatUint(uint64(v.MajorVersion), 10) + "." + + strconv.FormatUint(uint64(v.MinorVersion), 10) + "." + + strconv.FormatUint(uint64(v.BuildNumber), 10) +} diff --git a/internal/telemetry/properties.go b/internal/telemetry/properties.go new file mode 100644 index 0000000..d5641e5 --- /dev/null +++ b/internal/telemetry/properties.go @@ -0,0 +1,67 @@ +package telemetry + +import ( + "context" + "os" + "runtime" + + "github.com/posthog/posthog-go" + + "github.com/NodeOps-app/createos-cli/internal/pkg/version" + "github.com/NodeOps-app/createos-cli/internal/terminal" +) + +// telSchemaVersion is bumped when the property shape changes in an +// incompatible way for downstream dashboards. +const telSchemaVersion = 1 + +// GlobalProperties returns the always-on properties attached to every event. +// The shape matches spec §5.1. +func GlobalProperties(_ context.Context) posthog.Properties { + props := posthog.NewProperties(). + Set("tel_schema_version", telSchemaVersion). + Set("version", version.Version). + Set("channel", version.Channel). + Set("commit_sha", shortCommit(version.Commit)). + Set("goos", runtime.GOOS). + Set("goarch", runtime.GOARCH). + Set("os_release", osRelease()). + Set("go_version", runtime.Version()). + Set("is_interactive", terminal.IsInteractive()) + + isCI, provider := detectCI() + props = props.Set("is_ci", isCI).Set("ci_provider", provider) + return props +} + +// shortCommit truncates a git SHA to the first 7 chars (bounds-checked). +func shortCommit(c string) string { + if len(c) >= 7 { + return c[:7] + } + return c +} + +// detectCI returns whether the process is running in CI and a best-effort +// provider name. Order matters — most specific markers checked first so that, +// e.g., a GitHub Actions runner is reported as "github" rather than the +// generic "ci". +func detectCI() (bool, string) { + switch { + case os.Getenv("GITHUB_ACTIONS") == "true": + return true, "github" + case os.Getenv("GITLAB_CI") == "true": + return true, "gitlab" + case os.Getenv("CIRCLECI") == "true": + return true, "circle" + case os.Getenv("BUILDKITE") == "true": + return true, "buildkite" + case os.Getenv("JENKINS_URL") != "": + return true, "jenkins" + case os.Getenv("TF_BUILD") == "True": + return true, "azure" + case os.Getenv("CI") != "": + return true, "generic" + } + return false, "" +} diff --git a/internal/telemetry/redact.go b/internal/telemetry/redact.go new file mode 100644 index 0000000..c3d804a --- /dev/null +++ b/internal/telemetry/redact.go @@ -0,0 +1,125 @@ +package telemetry + +import ( + "net/url" + "strings" + + "github.com/urfave/cli/v2" +) + +// denyKeywords are case-insensitive substrings that mark a flag value as +// secret. Match → redact. +var denyKeywords = []string{ + "token", "password", "passwd", "secret", "key", "credential", "bearer", "auth", +} + +// redactedSentinel is the placeholder substituted for secret-flag values. +const redactedSentinel = "[REDACTED]" + +// RedactFlagValue returns the original value or the sentinel when the flag +// name matches any deny keyword (case-insensitive substring match). +func RedactFlagValue(name string, value any) any { + if isSensitiveName(name) { + return redactedSentinel + } + return value +} + +// isSensitiveName reports whether a flag name (canonical or alias) matches +// any deny keyword via case-insensitive substring match. +func isSensitiveName(name string) bool { + lower := strings.ToLower(name) + for _, kw := range denyKeywords { + if strings.Contains(lower, kw) { + return true + } + } + return false +} + +// findFlagByName walks c.Lineage() (child→root) and returns the cli.Flag whose +// Names() include name (canonical or alias). Returns nil when not found. +func findFlagByName(c *cli.Context, name string) cli.Flag { + if c == nil { + return nil + } + for _, ctx := range c.Lineage() { + if ctx.Command != nil { + for _, f := range ctx.Command.Flags { + for _, n := range f.Names() { + if n == name { + return f + } + } + } + } + if ctx.App != nil { + for _, f := range ctx.App.Flags { + for _, n := range f.Names() { + if n == name { + return f + } + } + } + } + } + return nil +} + +// anyAliasSensitive returns true when any of the flag's Names() +// (canonical + aliases) matches the denylist. +func anyAliasSensitive(f cli.Flag) bool { + if f == nil { + return false + } + for _, n := range f.Names() { + if isSensitiveName(n) { + return true + } + } + return false +} + +// NormalizeAPIURL strips path/query/fragment, returning "scheme://host" only. +// Returns "" when the input cannot be parsed or has no host. +func NormalizeAPIURL(raw string) string { + if raw == "" { + return "" + } + u, err := url.Parse(raw) + if err != nil || u.Host == "" { + return "" + } + scheme := u.Scheme + if scheme == "" { + scheme = "http" + } + return scheme + "://" + u.Host +} + +// FlagsFromContext extracts the locally-set flags on a cli.Context as a +// redacted map suitable for telemetry. +func FlagsFromContext(c *cli.Context) map[string]any { + if c == nil { + return nil + } + out := map[string]any{} + for _, name := range c.LocalFlagNames() { + v := c.Value(name) + if name == "api-url" { + if s, ok := v.(string); ok { + out[name] = NormalizeAPIURL(s) + continue + } + } + // Canonicalize: redact when ANY alias of this flag matches the + // denylist, not just the user-supplied alias. Example: `login -t ` + // reports name="t" via LocalFlagNames; the canonical "token" matches. + if anyAliasSensitive(findFlagByName(c, name)) { + out[name] = redactedSentinel + continue + } + out[name] = RedactFlagValue(name, v) + } + return out +} diff --git a/main.go b/main.go index 911f13e..8fa4da0 100644 --- a/main.go +++ b/main.go @@ -2,6 +2,7 @@ package main import ( + "context" "errors" "os" @@ -9,12 +10,23 @@ import ( "github.com/NodeOps-app/createos-cli/cmd/root" "github.com/NodeOps-app/createos-cli/internal/api" + "github.com/NodeOps-app/createos-cli/internal/telemetry" ) func main() { + // Init telemetry BEFORE constructing the app. This is required so the + // HelpPrinter / VersionPrinter overrides (Phase 3) see telemetry.Default + // as non-nil even when global --help / --version short-circuit App.Before. + // Init is a no-op when CREATEOS_DO_NOT_TRACK=1 or PostHogAPIKey is empty. + _ = telemetry.Init(context.Background()) + app := root.NewApp() + err := app.Run(os.Args) + + // Telemetry finalize runs BEFORE the existing error display + os.Exit. + root.FinalizeTelemetry(app, err) - if err := app.Run(os.Args); err != nil { + if err != nil { var apiErr *api.APIError if errors.As(err, &apiErr) { pterm.Error.Println(apiErr.Message)