Chapter 10 System Design

Rate Limiting & Retry Logic

Resilient API calls with exponential backoff and source-aware retry

src/services/api/withRetry.tsLines 50120
50
const abortError = () => new APIUserAbortError()
51
 
52
const DEFAULT_MAX_RETRIES = 10
53
const FLOOR_OUTPUT_TOKENS = 3000
54
const MAX_529_RETRIES = 3
55
export const BASE_DELAY_MS = 500
56
 
57
// Foreground query sources where the user IS blocking on the result — these
58
// retry on 529. Everything else (summaries, titles, suggestions, classifiers)
59
// bails immediately: during a capacity cascade each retry is 3-10× gateway
60
// amplification, and the user never sees those fail anyway. New sources
61
// default to no-retry — add here only if the user is waiting on the result.
62
const FOREGROUND_529_RETRY_SOURCES = new Set<QuerySource>([
63
  'repl_main_thread',
64
  'repl_main_thread:outputStyle:custom',
65
  'repl_main_thread:outputStyle:Explanatory',
66
  'repl_main_thread:outputStyle:Learning',
67
  'sdk',
68
  'agent:custom',
69
  'agent:default',
70
  'agent:builtin',
71
  'compact',
72
  'hook_agent',
73
  'hook_prompt',
74
  'verification_agent',
75
  'side_question',
76
  // Security classifiers — must complete for auto-mode correctness.
77
  // yoloClassifier.ts uses 'auto_mode' (not 'yolo_classifier' — that's
78
  // type-only). bash_classifier is ant-only; feature-gate so the string
79
  // tree-shakes out of external builds (excluded-strings.txt).
80
  'auto_mode',
81
  ...(feature('BASH_CLASSIFIER') ? (['bash_classifier'] as const) : []),
82
])
83
 
84
function shouldRetry529(querySource: QuerySource | undefined): boolean {
85
  // undefined → retry (conservative for untagged call paths)
86
  return (
87
    querySource === undefined || FOREGROUND_529_RETRY_SOURCES.has(querySource)
88
  )
89
}
90
 
91
// CLAUDE_CODE_UNATTENDED_RETRY: for unattended sessions (ant-only). Retries 429/529
92
// indefinitely with higher backoff and periodic keep-alive yields so the host
93
// environment does not mark the session idle mid-wait.
94
// TODO(ANT-344): the keep-alive via SystemAPIErrorMessage yields is a stopgap
95
// until there's a dedicated keep-alive channel.
96
const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000
97
const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000
98
const HEARTBEAT_INTERVAL_MS = 30_000
99
 
100
function isPersistentRetryEnabled(): boolean {
101
  return feature('UNATTENDED_RETRY')
102
    ? isEnvTruthy(process.env.CLAUDE_CODE_UNATTENDED_RETRY)
103
    : false
104
}
105
 
106
function isTransientCapacityError(error: unknown): boolean {
107
  return (
108
    is529Error(error) || (error instanceof APIError && error.status === 429)
109
  )
110
}
111
 
112
function isStaleConnectionError(error: unknown): boolean {
113
  if (!(error instanceof APIConnectionError)) {
114
    return false
115
  }
116
  const details = extractConnectionErrorDetails(error)
117
  return details?.code === 'ECONNRESET' || details?.code === 'EPIPE'
118
}
119
 
120
export interface RetryContext {
Annotations (click the dots)

src/services/api/withRetry.ts wraps every Anthropic API call. It makes a critical architectural decision: **not all failures are equal**. A 429 (rate limit) deserves a retry. A 529 (capacity limit) only deserves a retry if the user is actively waiting.

🔑Key Insight

Source-aware retry: `FOREGROUND_529_RETRY_SOURCES` is a set of query sources where the user is blocking on the result. Background queries (title generation, inline suggestions, classifier scoring) bail immediately on 529 — retrying them would amplify a capacity cascade.

Exponential backoff starts at 500ms and doubles each retry. Combined with a 30-second keep-alive heartbeat on the HTTP connection, this ensures Claude Code survives rate limits without confusing the proxy into thinking the connection died.

💡Tip

The `PERSISTENT_MAX_BACKOFF_MS = 5min` constant controls unattended mode retries. When running without a user watching (`persistent` mode), backoffs can go all the way to 5 minutes.

KEY TAKEAWAYS
  • 429 (rate limit) always retries; 529 (capacity) only retries for foreground queries
  • Background queries (suggestions, titles, classifiers) bail immediately on 529
  • Exponential backoff: BASE_DELAY_MS=500, MAX_529_RETRIES=3
  • Keep-alive heartbeats (30s) prevent proxy timeouts on long-running tool calls
  • Per-model max_tokens floors prevent the model from generating too-short responses
AI Assistant

Ask anything about Rate Limiting & Retry Logic

Powered by Groq · Enter to send, Shift+Enter for newline