Resilient API calls with exponential backoff and source-aware retry
const abortError = () => new APIUserAbortError()
const DEFAULT_MAX_RETRIES = 10
const FLOOR_OUTPUT_TOKENS = 3000
const MAX_529_RETRIES = 3
export const BASE_DELAY_MS = 500
// Foreground query sources where the user IS blocking on the result — these
// retry on 529. Everything else (summaries, titles, suggestions, classifiers)
// bails immediately: during a capacity cascade each retry is 3-10× gateway
// amplification, and the user never sees those fail anyway. New sources
// default to no-retry — add here only if the user is waiting on the result.
const FOREGROUND_529_RETRY_SOURCES = new Set<QuerySource>([
'repl_main_thread',
'repl_main_thread:outputStyle:custom',
'repl_main_thread:outputStyle:Explanatory',
'repl_main_thread:outputStyle:Learning',
'sdk',
'agent:custom',
'agent:default',
'agent:builtin',
'compact',
'hook_agent',
'hook_prompt',
'verification_agent',
'side_question',
// Security classifiers — must complete for auto-mode correctness.
// yoloClassifier.ts uses 'auto_mode' (not 'yolo_classifier' — that's
// type-only). bash_classifier is ant-only; feature-gate so the string
// tree-shakes out of external builds (excluded-strings.txt).
'auto_mode',
...(feature('BASH_CLASSIFIER') ? (['bash_classifier'] as const) : []),])
function shouldRetry529(querySource: QuerySource | undefined): boolean {// undefined → retry (conservative for untagged call paths)
return (
querySource === undefined || FOREGROUND_529_RETRY_SOURCES.has(querySource)
)
}
// CLAUDE_CODE_UNATTENDED_RETRY: for unattended sessions (ant-only). Retries 429/529
// indefinitely with higher backoff and periodic keep-alive yields so the host
// environment does not mark the session idle mid-wait.
// TODO(ANT-344): the keep-alive via SystemAPIErrorMessage yields is a stopgap
// until there's a dedicated keep-alive channel.
const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000
const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000
const HEARTBEAT_INTERVAL_MS = 30_000
function isPersistentRetryEnabled(): boolean { return feature('UNATTENDED_RETRY')? isEnvTruthy(process.env.CLAUDE_CODE_UNATTENDED_RETRY)
: false
}
function isTransientCapacityError(error: unknown): boolean {return (
is529Error(error) || (error instanceof APIError && error.status === 429)
)
}
function isStaleConnectionError(error: unknown): boolean { if (!(error instanceof APIConnectionError)) {return false
}
const details = extractConnectionErrorDetails(error)
return details?.code === 'ECONNRESET' || details?.code === 'EPIPE'
}
export interface RetryContext {src/services/api/withRetry.ts wraps every Anthropic API call. It makes a critical architectural decision: **not all failures are equal**. A 429 (rate limit) deserves a retry. A 529 (capacity limit) only deserves a retry if the user is actively waiting.
Source-aware retry: `FOREGROUND_529_RETRY_SOURCES` is a set of query sources where the user is blocking on the result. Background queries (title generation, inline suggestions, classifier scoring) bail immediately on 529 — retrying them would amplify a capacity cascade.
Exponential backoff starts at 500ms and doubles each retry. Combined with a 30-second keep-alive heartbeat on the HTTP connection, this ensures Claude Code survives rate limits without confusing the proxy into thinking the connection died.
The `PERSISTENT_MAX_BACKOFF_MS = 5min` constant controls unattended mode retries. When running without a user watching (`persistent` mode), backoffs can go all the way to 5 minutes.
Ask anything about Rate Limiting & Retry Logic
Powered by Groq · Enter to send, Shift+Enter for newline