mono/packages/kbot/ref/services/api/withRetry.ts
2026-04-01 01:05:48 +02:00

823 lines
28 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { feature } from 'bun:bundle'
import type Anthropic from '@anthropic-ai/sdk'
import {
APIConnectionError,
APIError,
APIUserAbortError,
} from '@anthropic-ai/sdk'
import type { QuerySource } from 'src/constants/querySource.js'
import type { SystemAPIErrorMessage } from 'src/types/message.js'
import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
import { logForDebugging } from 'src/utils/debug.js'
import { logError } from 'src/utils/log.js'
import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
import {
clearApiKeyHelperCache,
clearAwsCredentialsCache,
clearGcpCredentialsCache,
getClaudeAIOAuthTokens,
handleOAuth401Error,
isClaudeAISubscriber,
isEnterpriseSubscriber,
} from '../../utils/auth.js'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { errorMessage } from '../../utils/errors.js'
import {
type CooldownReason,
handleFastModeOverageRejection,
handleFastModeRejectedByAPI,
isFastModeCooldown,
isFastModeEnabled,
triggerFastModeCooldown,
} from '../../utils/fastMode.js'
import { isNonCustomOpusModel } from '../../utils/model/model.js'
import { disableKeepAlive } from '../../utils/proxy.js'
import { sleep } from '../../utils/sleep.js'
import type { ThinkingConfig } from '../../utils/thinking.js'
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../analytics/index.js'
import {
checkMockRateLimitError,
isMockRateLimitError,
} from '../rateLimitMocking.js'
import { REPEATED_529_ERROR_MESSAGE } from './errors.js'
import { extractConnectionErrorDetails } from './errorUtils.js'
const abortError = () => new APIUserAbortError()
const DEFAULT_MAX_RETRIES = 10
const FLOOR_OUTPUT_TOKENS = 3000
const MAX_529_RETRIES = 3
export const BASE_DELAY_MS = 500
// Foreground query sources where the user IS blocking on the result — these
// retry on 529. Everything else (summaries, titles, suggestions, classifiers)
// bails immediately: during a capacity cascade each retry is 3-10× gateway
// amplification, and the user never sees those fail anyway. New sources
// default to no-retry — add here only if the user is waiting on the result.
const FOREGROUND_529_RETRY_SOURCES = new Set<QuerySource>([
'repl_main_thread',
'repl_main_thread:outputStyle:custom',
'repl_main_thread:outputStyle:Explanatory',
'repl_main_thread:outputStyle:Learning',
'sdk',
'agent:custom',
'agent:default',
'agent:builtin',
'compact',
'hook_agent',
'hook_prompt',
'verification_agent',
'side_question',
// Security classifiers — must complete for auto-mode correctness.
// yoloClassifier.ts uses 'auto_mode' (not 'yolo_classifier' — that's
// type-only). bash_classifier is ant-only; feature-gate so the string
// tree-shakes out of external builds (excluded-strings.txt).
'auto_mode',
...(feature('BASH_CLASSIFIER') ? (['bash_classifier'] as const) : []),
])
function shouldRetry529(querySource: QuerySource | undefined): boolean {
// undefined → retry (conservative for untagged call paths)
return (
querySource === undefined || FOREGROUND_529_RETRY_SOURCES.has(querySource)
)
}
// CLAUDE_CODE_UNATTENDED_RETRY: for unattended sessions (ant-only). Retries 429/529
// indefinitely with higher backoff and periodic keep-alive yields so the host
// environment does not mark the session idle mid-wait.
// TODO(ANT-344): the keep-alive via SystemAPIErrorMessage yields is a stopgap
// until there's a dedicated keep-alive channel.
const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000
const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000
const HEARTBEAT_INTERVAL_MS = 30_000
function isPersistentRetryEnabled(): boolean {
return feature('UNATTENDED_RETRY')
? isEnvTruthy(process.env.CLAUDE_CODE_UNATTENDED_RETRY)
: false
}
function isTransientCapacityError(error: unknown): boolean {
return (
is529Error(error) || (error instanceof APIError && error.status === 429)
)
}
function isStaleConnectionError(error: unknown): boolean {
if (!(error instanceof APIConnectionError)) {
return false
}
const details = extractConnectionErrorDetails(error)
return details?.code === 'ECONNRESET' || details?.code === 'EPIPE'
}
export interface RetryContext {
maxTokensOverride?: number
model: string
thinkingConfig: ThinkingConfig
fastMode?: boolean
}
interface RetryOptions {
maxRetries?: number
model: string
fallbackModel?: string
thinkingConfig: ThinkingConfig
fastMode?: boolean
signal?: AbortSignal
querySource?: QuerySource
/**
* Pre-seed the consecutive 529 counter. Used when this retry loop is a
* non-streaming fallback after a streaming 529 — the streaming 529 should
* count toward MAX_529_RETRIES so total 529s-before-fallback is consistent
* regardless of which request mode hit the overload.
*/
initialConsecutive529Errors?: number
}
export class CannotRetryError extends Error {
constructor(
public readonly originalError: unknown,
public readonly retryContext: RetryContext,
) {
const message = errorMessage(originalError)
super(message)
this.name = 'RetryError'
// Preserve the original stack trace if available
if (originalError instanceof Error && originalError.stack) {
this.stack = originalError.stack
}
}
}
export class FallbackTriggeredError extends Error {
constructor(
public readonly originalModel: string,
public readonly fallbackModel: string,
) {
super(`Model fallback triggered: ${originalModel} -> ${fallbackModel}`)
this.name = 'FallbackTriggeredError'
}
}
export async function* withRetry<T>(
getClient: () => Promise<Anthropic>,
operation: (
client: Anthropic,
attempt: number,
context: RetryContext,
) => Promise<T>,
options: RetryOptions,
): AsyncGenerator<SystemAPIErrorMessage, T> {
const maxRetries = getMaxRetries(options)
const retryContext: RetryContext = {
model: options.model,
thinkingConfig: options.thinkingConfig,
...(isFastModeEnabled() && { fastMode: options.fastMode }),
}
let client: Anthropic | null = null
let consecutive529Errors = options.initialConsecutive529Errors ?? 0
let lastError: unknown
let persistentAttempt = 0
for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
if (options.signal?.aborted) {
throw new APIUserAbortError()
}
// Capture whether fast mode is active before this attempt
// (fallback may change the state mid-loop)
const wasFastModeActive = isFastModeEnabled()
? retryContext.fastMode && !isFastModeCooldown()
: false
try {
// Check for mock rate limits (used by /mock-limits command for Ant employees)
if (process.env.USER_TYPE === 'ant') {
const mockError = checkMockRateLimitError(
retryContext.model,
wasFastModeActive,
)
if (mockError) {
throw mockError
}
}
// Get a fresh client instance on first attempt or after authentication errors
// - 401 for first-party API authentication failures
// - 403 "OAuth token has been revoked" (another process refreshed the token)
// - Bedrock-specific auth errors (403 or CredentialsProviderError)
// - Vertex-specific auth errors (credential refresh failures, 401)
// - ECONNRESET/EPIPE: stale keep-alive socket; disable pooling and reconnect
const isStaleConnection = isStaleConnectionError(lastError)
if (
isStaleConnection &&
getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_disable_keepalive_on_econnreset',
false,
)
) {
logForDebugging(
'Stale connection (ECONNRESET/EPIPE) — disabling keep-alive for retry',
)
disableKeepAlive()
}
if (
client === null ||
(lastError instanceof APIError && lastError.status === 401) ||
isOAuthTokenRevokedError(lastError) ||
isBedrockAuthError(lastError) ||
isVertexAuthError(lastError) ||
isStaleConnection
) {
// On 401 "token expired" or 403 "token revoked", force a token refresh
if (
(lastError instanceof APIError && lastError.status === 401) ||
isOAuthTokenRevokedError(lastError)
) {
const failedAccessToken = getClaudeAIOAuthTokens()?.accessToken
if (failedAccessToken) {
await handleOAuth401Error(failedAccessToken)
}
}
client = await getClient()
}
return await operation(client, attempt, retryContext)
} catch (error) {
lastError = error
logForDebugging(
`API error (attempt ${attempt}/${maxRetries + 1}): ${error instanceof APIError ? `${error.status} ${error.message}` : errorMessage(error)}`,
{ level: 'error' },
)
// Fast mode fallback: on 429/529, either wait and retry (short delays)
// or fall back to standard speed (long delays) to avoid cache thrashing.
// Skip in persistent mode: the short-retry path below loops with fast
// mode still active, so its `continue` never reaches the attempt clamp
// and the for-loop terminates. Persistent sessions want the chunked
// keep-alive path instead of fast-mode cache-preservation anyway.
if (
wasFastModeActive &&
!isPersistentRetryEnabled() &&
error instanceof APIError &&
(error.status === 429 || is529Error(error))
) {
// If the 429 is specifically because extra usage (overage) is not
// available, permanently disable fast mode with a specific message.
const overageReason = error.headers?.get(
'anthropic-ratelimit-unified-overage-disabled-reason',
)
if (overageReason !== null && overageReason !== undefined) {
handleFastModeOverageRejection(overageReason)
retryContext.fastMode = false
continue
}
const retryAfterMs = getRetryAfterMs(error)
if (retryAfterMs !== null && retryAfterMs < SHORT_RETRY_THRESHOLD_MS) {
// Short retry-after: wait and retry with fast mode still active
// to preserve prompt cache (same model name on retry).
await sleep(retryAfterMs, options.signal, { abortError })
continue
}
// Long or unknown retry-after: enter cooldown (switches to standard
// speed model), with a minimum floor to avoid flip-flopping.
const cooldownMs = Math.max(
retryAfterMs ?? DEFAULT_FAST_MODE_FALLBACK_HOLD_MS,
MIN_COOLDOWN_MS,
)
const cooldownReason: CooldownReason = is529Error(error)
? 'overloaded'
: 'rate_limit'
triggerFastModeCooldown(Date.now() + cooldownMs, cooldownReason)
if (isFastModeEnabled()) {
retryContext.fastMode = false
}
continue
}
// Fast mode fallback: if the API rejects the fast mode parameter
// (e.g., org doesn't have fast mode enabled), permanently disable fast
// mode and retry at standard speed.
if (wasFastModeActive && isFastModeNotEnabledError(error)) {
handleFastModeRejectedByAPI()
retryContext.fastMode = false
continue
}
// Non-foreground sources bail immediately on 529 — no retry amplification
// during capacity cascades. User never sees these fail.
if (is529Error(error) && !shouldRetry529(options.querySource)) {
logEvent('tengu_api_529_background_dropped', {
query_source:
options.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
throw new CannotRetryError(error, retryContext)
}
// Track consecutive 529 errors
if (
is529Error(error) &&
// If FALLBACK_FOR_ALL_PRIMARY_MODELS is not set, fall through only if the primary model is a non-custom Opus model.
// TODO: Revisit if the isNonCustomOpusModel check should still exist, or if isNonCustomOpusModel is a stale artifact of when Claude Code was hardcoded on Opus.
(process.env.FALLBACK_FOR_ALL_PRIMARY_MODELS ||
(!isClaudeAISubscriber() && isNonCustomOpusModel(options.model)))
) {
consecutive529Errors++
if (consecutive529Errors >= MAX_529_RETRIES) {
// Check if fallback model is specified
if (options.fallbackModel) {
logEvent('tengu_api_opus_fallback_triggered', {
original_model:
options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
fallback_model:
options.fallbackModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
provider: getAPIProviderForStatsig(),
})
// Throw special error to indicate fallback was triggered
throw new FallbackTriggeredError(
options.model,
options.fallbackModel,
)
}
if (
process.env.USER_TYPE === 'external' &&
!process.env.IS_SANDBOX &&
!isPersistentRetryEnabled()
) {
logEvent('tengu_api_custom_529_overloaded_error', {})
throw new CannotRetryError(
new Error(REPEATED_529_ERROR_MESSAGE),
retryContext,
)
}
}
}
// Only retry if the error indicates we should
const persistent =
isPersistentRetryEnabled() && isTransientCapacityError(error)
if (attempt > maxRetries && !persistent) {
throw new CannotRetryError(error, retryContext)
}
// AWS/GCP errors aren't always APIError, but can be retried
const handledCloudAuthError =
handleAwsCredentialError(error) || handleGcpCredentialError(error)
if (
!handledCloudAuthError &&
(!(error instanceof APIError) || !shouldRetry(error))
) {
throw new CannotRetryError(error, retryContext)
}
// Handle max tokens context overflow errors by adjusting max_tokens for the next attempt
// NOTE: With extended-context-window beta, this 400 error should not occur.
// The API now returns 'model_context_window_exceeded' stop_reason instead.
// Keeping for backward compatibility.
if (error instanceof APIError) {
const overflowData = parseMaxTokensContextOverflowError(error)
if (overflowData) {
const { inputTokens, contextLimit } = overflowData
const safetyBuffer = 1000
const availableContext = Math.max(
0,
contextLimit - inputTokens - safetyBuffer,
)
if (availableContext < FLOOR_OUTPUT_TOKENS) {
logError(
new Error(
`availableContext ${availableContext} is less than FLOOR_OUTPUT_TOKENS ${FLOOR_OUTPUT_TOKENS}`,
),
)
throw error
}
// Ensure we have enough tokens for thinking + at least 1 output token
const minRequired =
(retryContext.thinkingConfig.type === 'enabled'
? retryContext.thinkingConfig.budgetTokens
: 0) + 1
const adjustedMaxTokens = Math.max(
FLOOR_OUTPUT_TOKENS,
availableContext,
minRequired,
)
retryContext.maxTokensOverride = adjustedMaxTokens
logEvent('tengu_max_tokens_context_overflow_adjustment', {
inputTokens,
contextLimit,
adjustedMaxTokens,
attempt,
})
continue
}
}
// For other errors, proceed with normal retry logic
// Get retry-after header if available
const retryAfter = getRetryAfter(error)
let delayMs: number
if (persistent && error instanceof APIError && error.status === 429) {
persistentAttempt++
// Window-based limits (e.g. 5hr Max/Pro) include a reset timestamp.
// Wait until reset rather than polling every 5 min uselessly.
const resetDelay = getRateLimitResetDelayMs(error)
delayMs =
resetDelay ??
Math.min(
getRetryDelay(
persistentAttempt,
retryAfter,
PERSISTENT_MAX_BACKOFF_MS,
),
PERSISTENT_RESET_CAP_MS,
)
} else if (persistent) {
persistentAttempt++
// Retry-After is a server directive and bypasses maxDelayMs inside
// getRetryDelay (intentional — honoring it is correct). Cap at the
// 6hr reset-cap here so a pathological header can't wait unbounded.
delayMs = Math.min(
getRetryDelay(
persistentAttempt,
retryAfter,
PERSISTENT_MAX_BACKOFF_MS,
),
PERSISTENT_RESET_CAP_MS,
)
} else {
delayMs = getRetryDelay(attempt, retryAfter)
}
// In persistent mode the for-loop `attempt` is clamped at maxRetries+1;
// use persistentAttempt for telemetry/yields so they show the true count.
const reportedAttempt = persistent ? persistentAttempt : attempt
logEvent('tengu_api_retry', {
attempt: reportedAttempt,
delayMs: delayMs,
error: (error as APIError)
.message as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
status: (error as APIError).status,
provider: getAPIProviderForStatsig(),
})
if (persistent) {
if (delayMs > 60_000) {
logEvent('tengu_api_persistent_retry_wait', {
status: (error as APIError).status,
delayMs,
attempt: reportedAttempt,
provider: getAPIProviderForStatsig(),
})
}
// Chunk long sleeps so the host sees periodic stdout activity and
// does not mark the session idle. Each yield surfaces as
// {type:'system', subtype:'api_retry'} on stdout via QueryEngine.
let remaining = delayMs
while (remaining > 0) {
if (options.signal?.aborted) throw new APIUserAbortError()
if (error instanceof APIError) {
yield createSystemAPIErrorMessage(
error,
remaining,
reportedAttempt,
maxRetries,
)
}
const chunk = Math.min(remaining, HEARTBEAT_INTERVAL_MS)
await sleep(chunk, options.signal, { abortError })
remaining -= chunk
}
// Clamp so the for-loop never terminates. Backoff uses the separate
// persistentAttempt counter which keeps growing to the 5-min cap.
if (attempt >= maxRetries) attempt = maxRetries
} else {
if (error instanceof APIError) {
yield createSystemAPIErrorMessage(error, delayMs, attempt, maxRetries)
}
await sleep(delayMs, options.signal, { abortError })
}
}
}
throw new CannotRetryError(lastError, retryContext)
}
function getRetryAfter(error: unknown): string | null {
return (
((error as { headers?: { 'retry-after'?: string } }).headers?.[
'retry-after'
] ||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
((error as APIError).headers as Headers)?.get?.('retry-after')) ??
null
)
}
export function getRetryDelay(
attempt: number,
retryAfterHeader?: string | null,
maxDelayMs = 32000,
): number {
if (retryAfterHeader) {
const seconds = parseInt(retryAfterHeader, 10)
if (!isNaN(seconds)) {
return seconds * 1000
}
}
const baseDelay = Math.min(
BASE_DELAY_MS * Math.pow(2, attempt - 1),
maxDelayMs,
)
const jitter = Math.random() * 0.25 * baseDelay
return baseDelay + jitter
}
export function parseMaxTokensContextOverflowError(error: APIError):
| {
inputTokens: number
maxTokens: number
contextLimit: number
}
| undefined {
if (error.status !== 400 || !error.message) {
return undefined
}
if (
!error.message.includes(
'input length and `max_tokens` exceed context limit',
)
) {
return undefined
}
// Example format: "input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000"
const regex =
/input length and `max_tokens` exceed context limit: (\d+) \+ (\d+) > (\d+)/
const match = error.message.match(regex)
if (!match || match.length !== 4) {
return undefined
}
if (!match[1] || !match[2] || !match[3]) {
logError(
new Error(
'Unable to parse max_tokens from max_tokens exceed context limit error message',
),
)
return undefined
}
const inputTokens = parseInt(match[1], 10)
const maxTokens = parseInt(match[2], 10)
const contextLimit = parseInt(match[3], 10)
if (isNaN(inputTokens) || isNaN(maxTokens) || isNaN(contextLimit)) {
return undefined
}
return { inputTokens, maxTokens, contextLimit }
}
// TODO: Replace with a response header check once the API adds a dedicated
// header for fast-mode rejection (e.g., x-fast-mode-rejected). String-matching
// the error message is fragile and will break if the API wording changes.
function isFastModeNotEnabledError(error: unknown): boolean {
if (!(error instanceof APIError)) {
return false
}
return (
error.status === 400 &&
(error.message?.includes('Fast mode is not enabled') ?? false)
)
}
export function is529Error(error: unknown): boolean {
if (!(error instanceof APIError)) {
return false
}
// Check for 529 status code or overloaded error in message
return (
error.status === 529 ||
// See below: the SDK sometimes fails to properly pass the 529 status code during streaming
(error.message?.includes('"type":"overloaded_error"') ?? false)
)
}
function isOAuthTokenRevokedError(error: unknown): boolean {
return (
error instanceof APIError &&
error.status === 403 &&
(error.message?.includes('OAuth token has been revoked') ?? false)
)
}
function isBedrockAuthError(error: unknown): boolean {
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
// AWS libs reject without an API call if .aws holds a past Expiration value
// otherwise, API calls that receive expired tokens give generic 403
// "The security token included in the request is invalid"
if (
isAwsCredentialsProviderError(error) ||
(error instanceof APIError && error.status === 403)
) {
return true
}
}
return false
}
/**
* Clear AWS auth caches if appropriate.
* @returns true if action was taken.
*/
function handleAwsCredentialError(error: unknown): boolean {
if (isBedrockAuthError(error)) {
clearAwsCredentialsCache()
return true
}
return false
}
// google-auth-library throws plain Error (no typed name like AWS's
// CredentialsProviderError). Match common SDK-level credential-failure messages.
function isGoogleAuthLibraryCredentialError(error: unknown): boolean {
if (!(error instanceof Error)) return false
const msg = error.message
return (
msg.includes('Could not load the default credentials') ||
msg.includes('Could not refresh access token') ||
msg.includes('invalid_grant')
)
}
function isVertexAuthError(error: unknown): boolean {
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) {
// SDK-level: google-auth-library fails in prepareOptions() before the HTTP call
if (isGoogleAuthLibraryCredentialError(error)) {
return true
}
// Server-side: Vertex returns 401 for expired/invalid tokens
if (error instanceof APIError && error.status === 401) {
return true
}
}
return false
}
/**
* Clear GCP auth caches if appropriate.
* @returns true if action was taken.
*/
function handleGcpCredentialError(error: unknown): boolean {
if (isVertexAuthError(error)) {
clearGcpCredentialsCache()
return true
}
return false
}
function shouldRetry(error: APIError): boolean {
// Never retry mock errors - they're from /mock-limits command for testing
if (isMockRateLimitError(error)) {
return false
}
// Persistent mode: 429/529 always retryable, bypass subscriber gates and
// x-should-retry header.
if (isPersistentRetryEnabled() && isTransientCapacityError(error)) {
return true
}
// CCR mode: auth is via infrastructure-provided JWTs, so a 401/403 is a
// transient blip (auth service flap, network hiccup) rather than bad
// credentials. Bypass x-should-retry:false — the server assumes we'd retry
// the same bad key, but our key is fine.
if (
isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) &&
(error.status === 401 || error.status === 403)
) {
return true
}
// Check for overloaded errors first by examining the message content
// The SDK sometimes fails to properly pass the 529 status code during streaming,
// so we need to check the error message directly
if (error.message?.includes('"type":"overloaded_error"')) {
return true
}
// Check for max tokens context overflow errors that we can handle
if (parseMaxTokensContextOverflowError(error)) {
return true
}
// Note this is not a standard header.
const shouldRetryHeader = error.headers?.get('x-should-retry')
// If the server explicitly says whether or not to retry, obey.
// For Max and Pro users, should-retry is true, but in several hours, so we shouldn't.
// Enterprise users can retry because they typically use PAYG instead of rate limits.
if (
shouldRetryHeader === 'true' &&
(!isClaudeAISubscriber() || isEnterpriseSubscriber())
) {
return true
}
// Ants can ignore x-should-retry: false for 5xx server errors only.
// For other status codes (401, 403, 400, 429, etc.), respect the header.
if (shouldRetryHeader === 'false') {
const is5xxError = error.status !== undefined && error.status >= 500
if (!(process.env.USER_TYPE === 'ant' && is5xxError)) {
return false
}
}
if (error instanceof APIConnectionError) {
return true
}
if (!error.status) return false
// Retry on request timeouts.
if (error.status === 408) return true
// Retry on lock timeouts.
if (error.status === 409) return true
// Retry on rate limits, but not for ClaudeAI Subscription users
// Enterprise users can retry because they typically use PAYG instead of rate limits
if (error.status === 429) {
return !isClaudeAISubscriber() || isEnterpriseSubscriber()
}
// Clear API key cache on 401 and allow retry.
// OAuth token handling is done in the main retry loop via handleOAuth401Error.
if (error.status === 401) {
clearApiKeyHelperCache()
return true
}
// Retry on 403 "token revoked" (same refresh logic as 401, see above)
if (isOAuthTokenRevokedError(error)) {
return true
}
// Retry internal errors.
if (error.status && error.status >= 500) return true
return false
}
export function getDefaultMaxRetries(): number {
if (process.env.CLAUDE_CODE_MAX_RETRIES) {
return parseInt(process.env.CLAUDE_CODE_MAX_RETRIES, 10)
}
return DEFAULT_MAX_RETRIES
}
function getMaxRetries(options: RetryOptions): number {
return options.maxRetries ?? getDefaultMaxRetries()
}
const DEFAULT_FAST_MODE_FALLBACK_HOLD_MS = 30 * 60 * 1000 // 30 minutes
const SHORT_RETRY_THRESHOLD_MS = 20 * 1000 // 20 seconds
const MIN_COOLDOWN_MS = 10 * 60 * 1000 // 10 minutes
function getRetryAfterMs(error: APIError): number | null {
const retryAfter = getRetryAfter(error)
if (retryAfter) {
const seconds = parseInt(retryAfter, 10)
if (!isNaN(seconds)) {
return seconds * 1000
}
}
return null
}
function getRateLimitResetDelayMs(error: APIError): number | null {
const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
if (!resetHeader) return null
const resetUnixSec = Number(resetHeader)
if (!Number.isFinite(resetUnixSec)) return null
const delayMs = resetUnixSec * 1000 - Date.now()
if (delayMs <= 0) return null
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
}