import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs' import { createPatch } from 'diff' import { mkdir, writeFile } from 'fs/promises' import { join } from 'path' import type { AgentId } from 'src/types/ids.js' import type { Message } from 'src/types/message.js' import { logForDebugging } from 'src/utils/debug.js' import { djb2Hash } from 'src/utils/hash.js' import { logError } from 'src/utils/log.js' import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js' import { jsonStringify } from 'src/utils/slowOperations.js' import type { QuerySource } from '../../constants/querySource.js' import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, logEvent, } from '../analytics/index.js' function getCacheBreakDiffPath(): string { const chars = 'abcdefghijklmnopqrstuvwxyz0123456789' let suffix = '' for (let i = 0; i < 4; i++) { suffix += chars[Math.floor(Math.random() * chars.length)] } return join(getClaudeTempDir(), `cache-break-${suffix}.diff`) } type PreviousState = { systemHash: number toolsHash: number /** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips * (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */ cacheControlHash: number toolNames: string[] /** Per-tool schema hash. Diffed to name which tool's description changed * when toolSchemasChanged but added=removed=0 (77% of tool breaks per * BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */ perToolHashes: Record systemCharCount: number model: string fastMode: boolean /** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are * discovered/removed. */ globalCacheStrategy: string /** Sorted beta header list. Diffed to show which headers were added/removed. */ betas: string[] /** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore * (sticky-on latched in claude.ts). Tracked to verify the fix. */ autoModeActive: boolean /** Overage state flip — should NOT break cache anymore (eligibility is * latched session-stable in should1hCacheTTL). Tracked to verify the fix. */ isUsingOverage: boolean /** Cache-editing beta header presence — should NOT break cache anymore * (sticky-on latched in claude.ts). Tracked to verify the fix. */ cachedMCEnabled: boolean /** Resolved effort (env → options → model default). Goes into output_config * or anthropic_internal.effort_override. */ effortValue: string /** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and * anthropic_internal changes. */ extraBodyHash: number callCount: number pendingChanges: PendingChanges | null prevCacheReadTokens: number | null /** Set when cached microcompact sends cache_edits deletions. Cache reads * will legitimately drop — this is expected, not a break. */ cacheDeletionsPending: boolean buildDiffableContent: () => string } type PendingChanges = { systemPromptChanged: boolean toolSchemasChanged: boolean modelChanged: boolean fastModeChanged: boolean cacheControlChanged: boolean globalCacheStrategyChanged: boolean betasChanged: boolean autoModeChanged: boolean overageChanged: boolean cachedMCChanged: boolean effortChanged: boolean extraBodyChanged: boolean addedToolCount: number removedToolCount: number systemCharDelta: number addedTools: string[] removedTools: string[] changedToolSchemas: string[] previousModel: string newModel: string prevGlobalCacheStrategy: string newGlobalCacheStrategy: string addedBetas: string[] removedBetas: string[] prevEffortValue: string newEffortValue: string buildPrevDiffableContent: () => string } const previousStateBySource = new Map() // Cap the number of tracked sources to prevent unbounded memory growth. // Each entry stores a ~300KB+ diffableContent string (serialized system prompt // + tool schemas). Without a cap, spawning many subagents (each with a unique // agentId key) causes the map to grow indefinitely. const MAX_TRACKED_SOURCES = 10 const TRACKED_SOURCE_PREFIXES = [ 'repl_main_thread', 'sdk', 'agent:custom', 'agent:default', 'agent:builtin', ] // Minimum absolute token drop required to trigger a cache break warning. // Small drops (e.g., a few thousand tokens) can happen due to normal variation // and aren't worth alerting on. const MIN_CACHE_MISS_TOKENS = 2_000 // Anthropic's server-side prompt cache TTL thresholds to test. // Cache breaks after these durations are likely due to TTL expiration // rather than client-side changes. const CACHE_TTL_5MIN_MS = 5 * 60 * 1000 export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000 // Models to exclude from cache break detection (e.g., haiku has different caching behavior) function isExcludedModel(model: string): boolean { return model.includes('haiku') } /** * Returns the tracking key for a querySource, or null if untracked. * Compact shares the same server-side cache as repl_main_thread * (same cacheSafeParams), so they share tracking state. * * For subagents with a tracked querySource, uses the unique agentId to * isolate tracking state. This prevents false positive cache break * notifications when multiple instances of the same agent type run * concurrently. * * Untracked sources (speculation, session_memory, prompt_suggestion, etc.) * are short-lived forked agents where cache break detection provides no * value — they run 1-3 turns with a fresh agentId each time, so there's * nothing meaningful to compare against. Their cache metrics are still * logged via tengu_api_success for analytics. */ function getTrackingKey( querySource: QuerySource, agentId?: AgentId, ): string | null { if (querySource === 'compact') return 'repl_main_thread' for (const prefix of TRACKED_SOURCE_PREFIXES) { if (querySource.startsWith(prefix)) return agentId || querySource } return null } function stripCacheControl( items: ReadonlyArray>, ): unknown[] { return items.map(item => { if (!('cache_control' in item)) return item const { cache_control: _, ...rest } = item return rest }) } function computeHash(data: unknown): number { const str = jsonStringify(data) if (typeof Bun !== 'undefined') { const hash = Bun.hash(str) // Bun.hash can return bigint for large inputs; convert to number safely return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash } // Fallback for non-Bun runtimes (e.g. Node.js via npm global install) return djb2Hash(str) } /** MCP tool names are user-controlled (server config) and may leak filepaths. * Collapse them to 'mcp'; built-in names are a fixed vocabulary. */ function sanitizeToolName(name: string): string { return name.startsWith('mcp__') ? 'mcp' : name } function computePerToolHashes( strippedTools: ReadonlyArray, names: string[], ): Record { const hashes: Record = {} for (let i = 0; i < strippedTools.length; i++) { hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i]) } return hashes } function getSystemCharCount(system: TextBlockParam[]): number { let total = 0 for (const block of system) { total += block.text.length } return total } function buildDiffableContent( system: TextBlockParam[], tools: BetaToolUnion[], model: string, ): string { const systemText = system.map(b => b.text).join('\n\n') const toolDetails = tools .map(t => { if (!('name' in t)) return 'unknown' const desc = 'description' in t ? t.description : '' const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : '' return `${t.name}\n description: ${desc}\n input_schema: ${schema}` }) .sort() .join('\n\n') return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n` } /** Extended tracking snapshot — everything that could affect the server-side * cache key that we can observe from the client. All fields are optional so * the call site can add incrementally; undefined fields compare as stable. */ export type PromptStateSnapshot = { system: TextBlockParam[] toolSchemas: BetaToolUnion[] querySource: QuerySource model: string agentId?: AgentId fastMode?: boolean globalCacheStrategy?: string betas?: readonly string[] autoModeActive?: boolean isUsingOverage?: boolean cachedMCEnabled?: boolean effortValue?: string | number extraBodyParams?: unknown } /** * Phase 1 (pre-call): Record the current prompt/tool state and detect what changed. * Does NOT fire events — just stores pending changes for phase 2 to use. */ export function recordPromptState(snapshot: PromptStateSnapshot): void { try { const { system, toolSchemas, querySource, model, agentId, fastMode, globalCacheStrategy = '', betas = [], autoModeActive = false, isUsingOverage = false, cachedMCEnabled = false, effortValue, extraBodyParams, } = snapshot const key = getTrackingKey(querySource, agentId) if (!key) return const strippedSystem = stripCacheControl( system as unknown as ReadonlyArray>, ) const strippedTools = stripCacheControl( toolSchemas as unknown as ReadonlyArray>, ) const systemHash = computeHash(strippedSystem) const toolsHash = computeHash(strippedTools) // Hash the full system array INCLUDING cache_control — this catches // scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped // hash can't see because the text content is identical. const cacheControlHash = computeHash( system.map(b => ('cache_control' in b ? b.cache_control : null)), ) const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown')) // Only compute per-tool hashes when the aggregate changed — common case // (tools unchanged) skips N extra jsonStringify calls. const computeToolHashes = () => computePerToolHashes(strippedTools, toolNames) const systemCharCount = getSystemCharCount(system) const lazyDiffableContent = () => buildDiffableContent(system, toolSchemas, model) const isFastMode = fastMode ?? false const sortedBetas = [...betas].sort() const effortStr = effortValue === undefined ? '' : String(effortValue) const extraBodyHash = extraBodyParams === undefined ? 0 : computeHash(extraBodyParams) const prev = previousStateBySource.get(key) if (!prev) { // Evict oldest entries if map is at capacity while (previousStateBySource.size >= MAX_TRACKED_SOURCES) { const oldest = previousStateBySource.keys().next().value if (oldest !== undefined) previousStateBySource.delete(oldest) } previousStateBySource.set(key, { systemHash, toolsHash, cacheControlHash, toolNames, systemCharCount, model, fastMode: isFastMode, globalCacheStrategy, betas: sortedBetas, autoModeActive, isUsingOverage, cachedMCEnabled, effortValue: effortStr, extraBodyHash, callCount: 1, pendingChanges: null, prevCacheReadTokens: null, cacheDeletionsPending: false, buildDiffableContent: lazyDiffableContent, perToolHashes: computeToolHashes(), }) return } prev.callCount++ const systemPromptChanged = systemHash !== prev.systemHash const toolSchemasChanged = toolsHash !== prev.toolsHash const modelChanged = model !== prev.model const fastModeChanged = isFastMode !== prev.fastMode const cacheControlChanged = cacheControlHash !== prev.cacheControlHash const globalCacheStrategyChanged = globalCacheStrategy !== prev.globalCacheStrategy const betasChanged = sortedBetas.length !== prev.betas.length || sortedBetas.some((b, i) => b !== prev.betas[i]) const autoModeChanged = autoModeActive !== prev.autoModeActive const overageChanged = isUsingOverage !== prev.isUsingOverage const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled const effortChanged = effortStr !== prev.effortValue const extraBodyChanged = extraBodyHash !== prev.extraBodyHash if ( systemPromptChanged || toolSchemasChanged || modelChanged || fastModeChanged || cacheControlChanged || globalCacheStrategyChanged || betasChanged || autoModeChanged || overageChanged || cachedMCChanged || effortChanged || extraBodyChanged ) { const prevToolSet = new Set(prev.toolNames) const newToolSet = new Set(toolNames) const prevBetaSet = new Set(prev.betas) const newBetaSet = new Set(sortedBetas) const addedTools = toolNames.filter(n => !prevToolSet.has(n)) const removedTools = prev.toolNames.filter(n => !newToolSet.has(n)) const changedToolSchemas: string[] = [] if (toolSchemasChanged) { const newHashes = computeToolHashes() for (const name of toolNames) { if (!prevToolSet.has(name)) continue if (newHashes[name] !== prev.perToolHashes[name]) { changedToolSchemas.push(name) } } prev.perToolHashes = newHashes } prev.pendingChanges = { systemPromptChanged, toolSchemasChanged, modelChanged, fastModeChanged, cacheControlChanged, globalCacheStrategyChanged, betasChanged, autoModeChanged, overageChanged, cachedMCChanged, effortChanged, extraBodyChanged, addedToolCount: addedTools.length, removedToolCount: removedTools.length, addedTools, removedTools, changedToolSchemas, systemCharDelta: systemCharCount - prev.systemCharCount, previousModel: prev.model, newModel: model, prevGlobalCacheStrategy: prev.globalCacheStrategy, newGlobalCacheStrategy: globalCacheStrategy, addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)), removedBetas: prev.betas.filter(b => !newBetaSet.has(b)), prevEffortValue: prev.effortValue, newEffortValue: effortStr, buildPrevDiffableContent: prev.buildDiffableContent, } } else { prev.pendingChanges = null } prev.systemHash = systemHash prev.toolsHash = toolsHash prev.cacheControlHash = cacheControlHash prev.toolNames = toolNames prev.systemCharCount = systemCharCount prev.model = model prev.fastMode = isFastMode prev.globalCacheStrategy = globalCacheStrategy prev.betas = sortedBetas prev.autoModeActive = autoModeActive prev.isUsingOverage = isUsingOverage prev.cachedMCEnabled = cachedMCEnabled prev.effortValue = effortStr prev.extraBodyHash = extraBodyHash prev.buildDiffableContent = lazyDiffableContent } catch (e: unknown) { logError(e) } } /** * Phase 2 (post-call): Check the API response's cache tokens to determine * if a cache break actually occurred. If it did, use the pending changes * from phase 1 to explain why. */ export async function checkResponseForCacheBreak( querySource: QuerySource, cacheReadTokens: number, cacheCreationTokens: number, messages: Message[], agentId?: AgentId, requestId?: string | null, ): Promise { try { const key = getTrackingKey(querySource, agentId) if (!key) return const state = previousStateBySource.get(key) if (!state) return // Skip excluded models (e.g., haiku has different caching behavior) if (isExcludedModel(state.model)) return const prevCacheRead = state.prevCacheReadTokens state.prevCacheReadTokens = cacheReadTokens // Calculate time since last call for TTL detection by finding the most recent // assistant message timestamp in the messages array (before the current response) const lastAssistantMessage = messages.findLast(m => m.type === 'assistant') const timeSinceLastAssistantMsg = lastAssistantMessage ? Date.now() - new Date(lastAssistantMessage.timestamp).getTime() : null // Skip the first call — no previous value to compare against if (prevCacheRead === null) return const changes = state.pendingChanges // Cache deletions via cached microcompact intentionally reduce the cached // prefix. The drop in cache read tokens is expected — reset the baseline // so we don't false-positive on the next call. if (state.cacheDeletionsPending) { state.cacheDeletionsPending = false logForDebugging( `[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead} → ${cacheReadTokens} (expected drop)`, ) // Don't flag as a break — the remaining state is still valid state.pendingChanges = null return } // Detect a cache break: cache read dropped >5% from previous AND // the absolute drop exceeds the minimum threshold. const tokenDrop = prevCacheRead - cacheReadTokens if ( cacheReadTokens >= prevCacheRead * 0.95 || tokenDrop < MIN_CACHE_MISS_TOKENS ) { state.pendingChanges = null return } // Build explanation from pending changes (if any) const parts: string[] = [] if (changes) { if (changes.modelChanged) { parts.push( `model changed (${changes.previousModel} → ${changes.newModel})`, ) } if (changes.systemPromptChanged) { const charDelta = changes.systemCharDelta const charInfo = charDelta === 0 ? '' : charDelta > 0 ? ` (+${charDelta} chars)` : ` (${charDelta} chars)` parts.push(`system prompt changed${charInfo}`) } if (changes.toolSchemasChanged) { const toolDiff = changes.addedToolCount > 0 || changes.removedToolCount > 0 ? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)` : ' (tool prompt/schema changed, same tool set)' parts.push(`tools changed${toolDiff}`) } if (changes.fastModeChanged) { parts.push('fast mode toggled') } if (changes.globalCacheStrategyChanged) { parts.push( `global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'} → ${changes.newGlobalCacheStrategy || 'none'})`, ) } if ( changes.cacheControlChanged && !changes.globalCacheStrategyChanged && !changes.systemPromptChanged ) { // Only report as standalone cause if nothing else explains it — // otherwise the scope/TTL flip is a consequence, not the root cause. parts.push('cache_control changed (scope or TTL)') } if (changes.betasChanged) { const added = changes.addedBetas.length ? `+${changes.addedBetas.join(',')}` : '' const removed = changes.removedBetas.length ? `-${changes.removedBetas.join(',')}` : '' const diff = [added, removed].filter(Boolean).join(' ') parts.push(`betas changed${diff ? ` (${diff})` : ''}`) } if (changes.autoModeChanged) { parts.push('auto mode toggled') } if (changes.overageChanged) { parts.push('overage state changed (TTL latched, no flip)') } if (changes.cachedMCChanged) { parts.push('cached microcompact toggled') } if (changes.effortChanged) { parts.push( `effort changed (${changes.prevEffortValue || 'default'} → ${changes.newEffortValue || 'default'})`, ) } if (changes.extraBodyChanged) { parts.push('extra body params changed') } } // Check if time gap suggests TTL expiration const lastAssistantMsgOver5minAgo = timeSinceLastAssistantMsg !== null && timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS const lastAssistantMsgOver1hAgo = timeSinceLastAssistantMsg !== null && timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS // Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql): // when all client-side flags are false and the gap is under TTL, ~90% of breaks // are server-side routing/eviction or billed/inference disagreement. Label // accordingly instead of implying a CC bug hunt. let reason: string if (parts.length > 0) { reason = parts.join(', ') } else if (lastAssistantMsgOver1hAgo) { reason = 'possible 1h TTL expiry (prompt unchanged)' } else if (lastAssistantMsgOver5minAgo) { reason = 'possible 5min TTL expiry (prompt unchanged)' } else if (timeSinceLastAssistantMsg !== null) { reason = 'likely server-side (prompt unchanged, <5min gap)' } else { reason = 'unknown cause' } logEvent('tengu_prompt_cache_break', { systemPromptChanged: changes?.systemPromptChanged ?? false, toolSchemasChanged: changes?.toolSchemasChanged ?? false, modelChanged: changes?.modelChanged ?? false, fastModeChanged: changes?.fastModeChanged ?? false, cacheControlChanged: changes?.cacheControlChanged ?? false, globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false, betasChanged: changes?.betasChanged ?? false, autoModeChanged: changes?.autoModeChanged ?? false, overageChanged: changes?.overageChanged ?? false, cachedMCChanged: changes?.cachedMCChanged ?? false, effortChanged: changes?.effortChanged ?? false, extraBodyChanged: changes?.extraBodyChanged ?? false, addedToolCount: changes?.addedToolCount ?? 0, removedToolCount: changes?.removedToolCount ?? 0, systemCharDelta: changes?.systemCharDelta ?? 0, // Tool names are sanitized: built-in names are a fixed vocabulary, // MCP tools collapse to 'mcp' (user-configured, could leak paths). addedTools: (changes?.addedTools ?? []) .map(sanitizeToolName) .join( ',', ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, removedTools: (changes?.removedTools ?? []) .map(sanitizeToolName) .join( ',', ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, changedToolSchemas: (changes?.changedToolSchemas ?? []) .map(sanitizeToolName) .join( ',', ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, // Beta header names and cache strategy are fixed enum-like values, // not code or filepaths. requestId is an opaque server-generated ID. addedBetas: (changes?.addedBetas ?? []).join( ',', ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, removedBetas: (changes?.removedBetas ?? []).join( ',', ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ?? '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ?? '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, callNumber: state.callCount, prevCacheReadTokens: prevCacheRead, cacheReadTokens, cacheCreationTokens, timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1, lastAssistantMsgOver5minAgo, lastAssistantMsgOver1hAgo, requestId: (requestId ?? '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, }) // Write diff file for ant debugging via --debug. The path is included in // the summary log so ants can find it (DevBar UI removed — event data // flows reliably to BQ for analytics). let diffPath: string | undefined if (changes?.buildPrevDiffableContent) { diffPath = await writeCacheBreakDiff( changes.buildPrevDiffableContent(), state.buildDiffableContent(), ) } const diffSuffix = diffPath ? `, diff: ${diffPath}` : '' const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead} → ${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]` logForDebugging(summary, { level: 'warn' }) state.pendingChanges = null } catch (e: unknown) { logError(e) } } /** * Call when cached microcompact sends cache_edits deletions. * The next API response will have lower cache read tokens — that's * expected, not a cache break. */ export function notifyCacheDeletion( querySource: QuerySource, agentId?: AgentId, ): void { const key = getTrackingKey(querySource, agentId) const state = key ? previousStateBySource.get(key) : undefined if (state) { state.cacheDeletionsPending = true } } /** * Call after compaction to reset the cache read baseline. * Compaction legitimately reduces message count, so cache read tokens * will naturally drop on the next call — that's not a break. */ export function notifyCompaction( querySource: QuerySource, agentId?: AgentId, ): void { const key = getTrackingKey(querySource, agentId) const state = key ? previousStateBySource.get(key) : undefined if (state) { state.prevCacheReadTokens = null } } export function cleanupAgentTracking(agentId: AgentId): void { previousStateBySource.delete(agentId) } export function resetPromptCacheBreakDetection(): void { previousStateBySource.clear() } async function writeCacheBreakDiff( prevContent: string, newContent: string, ): Promise { try { const diffPath = getCacheBreakDiffPath() await mkdir(getClaudeTempDir(), { recursive: true }) const patch = createPatch( 'prompt-state', prevContent, newContent, 'before', 'after', ) await writeFile(diffPath, patch) return diffPath } catch { return undefined } }