From a7eb29667ef281803d4cd50e043e3d6ea41d2c76 Mon Sep 17 00:00:00 2001 From: Facundo Farias Date: Tue, 28 Apr 2026 08:28:05 +0200 Subject: [PATCH] fix(errors): classify network errors as exit code 4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixpanel telemetry currently buckets every API-call failure as error_class=internal because the only command paths that wrapped errors returned *output.InternalError unconditionally. That made the "internal" bucket impossible to read — a transient DNS timeout looked identical to a real CLI bug. Add a NetworkError type (exit code 4 = network) and an IsNetworkErr helper that detects the standard transport-level failures from net/http: net.Error.Timeout(), *net.DNSError, *net.OpError, *url.Error chains, and ECONNREFUSED / ECONNRESET / EHOSTUNREACH / ENETUNREACH / EPIPE syscalls. ClassifyError now applies the same detection to raw errors that bubble up unwrapped, so commands that don't explicitly wrap their errors still classify correctly. The two known sites that wrap a network call (auth.go and hello.go, both validating credentials via client.ListProjects) now return NetworkError when the cause is connectivity-related. Successful HTTP exchanges that returned non-2xx status (e.g. 401, 5xx) are unchanged and still flow through the existing AuthError / InternalError paths. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/commands/auth.go | 3 + internal/commands/hello.go | 3 + internal/output/errors.go | 67 ++++++++++++++++++++- internal/output/errors_test.go | 104 +++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 3 deletions(-) diff --git a/internal/commands/auth.go b/internal/commands/auth.go index aabae4b..88cb032 100644 --- a/internal/commands/auth.go +++ b/internal/commands/auth.go @@ -131,6 +131,9 @@ func runAuthLogin(opts *AuthLoginOptions) error { Hint: "Check your email and API key at Profile > API Key in DeployHQ", } } + if output.IsNetworkErr(err) { + return &output.NetworkError{Message: "validate credentials", Cause: err} + } return &output.InternalError{Message: "validate credentials", Cause: err} } diff --git a/internal/commands/hello.go b/internal/commands/hello.go index 050b5a2..44c8483 100644 --- a/internal/commands/hello.go +++ b/internal/commands/hello.go @@ -209,6 +209,9 @@ func helloLogin(env *output.Envelope, reader *bufio.Reader) (*auth.Credentials, Hint: "Check your email and API key at Profile > API Key in DeployHQ", } } + if output.IsNetworkErr(err) { + return nil, &output.NetworkError{Message: "validate credentials", Cause: err} + } return nil, &output.InternalError{Message: "validate credentials", Cause: err} } diff --git a/internal/output/errors.go b/internal/output/errors.go index 2ba182c..81af8fd 100644 --- a/internal/output/errors.go +++ b/internal/output/errors.go @@ -6,7 +6,13 @@ // - errors are classified for appropriate exit codes package output -import "fmt" +import ( + "errors" + "fmt" + "net" + "net/url" + "syscall" +) // ExitCode constants for error classification. const ( @@ -63,6 +69,57 @@ func (e *AuthError) Error() string { return e.Message } +// NetworkError represents a connectivity failure (timeout, DNS, refused +// connection, unreachable host). These produce exit code 4 so wrappers can +// distinguish "the network is broken" from "the CLI itself is broken". +type NetworkError struct { + Message string + Cause error +} + +func (e *NetworkError) Error() string { + if e.Cause != nil { + return fmt.Sprintf("%s: %v", e.Message, e.Cause) + } + return e.Message +} + +func (e *NetworkError) Unwrap() error { return e.Cause } + +// IsNetworkErr reports whether err (or any error it wraps) is a connectivity +// failure originating from the network/transport layer. It returns false for +// successful HTTP exchanges that returned non-2xx status (those should be +// classified by status code, not bucketed as network). +func IsNetworkErr(err error) bool { + if err == nil { + return false + } + var netErr net.Error + if errors.As(err, &netErr) && netErr.Timeout() { + return true + } + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + return true + } + var opErr *net.OpError + if errors.As(err, &opErr) { + return true + } + var urlErr *url.Error + if errors.As(err, &urlErr) && urlErr.Err != nil { + return IsNetworkErr(urlErr.Err) + } + if errors.Is(err, syscall.ECONNREFUSED) || + errors.Is(err, syscall.ECONNRESET) || + errors.Is(err, syscall.EHOSTUNREACH) || + errors.Is(err, syscall.ENETUNREACH) || + errors.Is(err, syscall.EPIPE) { + return true + } + return false +} + // ClassifyError returns the appropriate exit code for an error. func ClassifyError(err error) int { if err == nil { @@ -75,7 +132,11 @@ func ClassifyError(err error) int { return ExitInternalError case *AuthError: return ExitAuthError - default: - return ExitInternalError + case *NetworkError: + return ExitNetworkError + } + if IsNetworkErr(err) { + return ExitNetworkError } + return ExitInternalError } diff --git a/internal/output/errors_test.go b/internal/output/errors_test.go index 2af237d..dbe51b7 100644 --- a/internal/output/errors_test.go +++ b/internal/output/errors_test.go @@ -2,7 +2,11 @@ package output import ( "errors" + "net" + "net/url" + "syscall" "testing" + "time" "github.com/stretchr/testify/assert" ) @@ -42,3 +46,103 @@ func TestClassifyError_Nil(t *testing.T) { func TestClassifyError_GenericError(t *testing.T) { assert.Equal(t, ExitInternalError, ClassifyError(errors.New("unknown"))) } + +func TestNetworkError(t *testing.T) { + cause := errors.New("dial tcp: i/o timeout") + err := &NetworkError{Message: "validate credentials", Cause: cause} + assert.Contains(t, err.Error(), "validate credentials") + assert.Contains(t, err.Error(), "dial tcp") + assert.Equal(t, ExitNetworkError, ClassifyError(err)) + assert.Equal(t, cause, errors.Unwrap(err)) +} + +func TestIsNetworkErr_Nil(t *testing.T) { + assert.False(t, IsNetworkErr(nil)) +} + +func TestIsNetworkErr_GenericError(t *testing.T) { + assert.False(t, IsNetworkErr(errors.New("not a network thing"))) +} + +func TestIsNetworkErr_Syscalls(t *testing.T) { + cases := []syscall.Errno{ + syscall.ECONNREFUSED, + syscall.ECONNRESET, + syscall.EHOSTUNREACH, + syscall.ENETUNREACH, + syscall.EPIPE, + } + for _, e := range cases { + assert.True(t, IsNetworkErr(e), "expected syscall errno %v to classify as network", e) + } +} + +func TestIsNetworkErr_DNSError(t *testing.T) { + dnsErr := &net.DNSError{Err: "no such host", Name: "api.example.invalid"} + assert.True(t, IsNetworkErr(dnsErr)) +} + +func TestIsNetworkErr_OpError(t *testing.T) { + opErr := &net.OpError{Op: "dial", Net: "tcp", Err: syscall.ECONNREFUSED} + assert.True(t, IsNetworkErr(opErr)) +} + +// timeoutErr satisfies net.Error with Timeout() = true. +type timeoutErr struct{} + +func (timeoutErr) Error() string { return "i/o timeout" } +func (timeoutErr) Timeout() bool { return true } +func (timeoutErr) Temporary() bool { return true } + +func TestIsNetworkErr_TimeoutNetError(t *testing.T) { + assert.True(t, IsNetworkErr(timeoutErr{})) +} + +func TestIsNetworkErr_URLErrorWraps(t *testing.T) { + urlErr := &url.Error{ + Op: "Get", + URL: "https://api.example.com", + Err: &net.OpError{Op: "dial", Net: "tcp", Err: syscall.ECONNREFUSED}, + } + assert.True(t, IsNetworkErr(urlErr)) +} + +func TestIsNetworkErr_URLErrorWithNonNetworkCause(t *testing.T) { + urlErr := &url.Error{ + Op: "Get", + URL: "https://api.example.com", + Err: errors.New("malformed body"), + } + assert.False(t, IsNetworkErr(urlErr)) +} + +func TestClassifyError_DetectsRawNetworkError(t *testing.T) { + // An untyped network error returned from the SDK should classify as ExitNetworkError, + // not ExitInternalError, even when no command wraps it. + err := &net.OpError{Op: "dial", Net: "tcp", Err: syscall.ECONNREFUSED} + assert.Equal(t, ExitNetworkError, ClassifyError(err)) +} + +func TestClassifyError_DetectsURLNetworkError(t *testing.T) { + urlErr := &url.Error{ + Op: "Get", + URL: "https://api.example.com", + Err: timeoutErr{}, + } + assert.Equal(t, ExitNetworkError, ClassifyError(urlErr)) +} + +// Sanity check: a deadline-style error wrapped in net.Error.Timeout() classifies +// even when accessed through deadline-exceeded chains used by net/http. +func TestIsNetworkErr_DeadlineExceededViaNetError(t *testing.T) { + deadline := &net.OpError{ + Op: "read", + Net: "tcp", + Source: nil, + Addr: nil, + Err: timeoutErr{}, + } + assert.True(t, IsNetworkErr(deadline)) + // And confirm it doesn't depend on the elapsed time + _ = time.Second +}