728 lines
26 KiB
TypeScript
728 lines
26 KiB
TypeScript
import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
|
import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
|
|
import { createPatch } from 'diff'
|
|
import { mkdir, writeFile } from 'fs/promises'
|
|
import { join } from 'path'
|
|
import type { AgentId } from 'src/types/ids.js'
|
|
import type { Message } from 'src/types/message.js'
|
|
import { logForDebugging } from 'src/utils/debug.js'
|
|
import { djb2Hash } from 'src/utils/hash.js'
|
|
import { logError } from 'src/utils/log.js'
|
|
import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js'
|
|
import { jsonStringify } from 'src/utils/slowOperations.js'
|
|
import type { QuerySource } from '../../constants/querySource.js'
|
|
import {
|
|
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
logEvent,
|
|
} from '../analytics/index.js'
|
|
|
|
function getCacheBreakDiffPath(): string {
|
|
const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
|
|
let suffix = ''
|
|
for (let i = 0; i < 4; i++) {
|
|
suffix += chars[Math.floor(Math.random() * chars.length)]
|
|
}
|
|
return join(getClaudeTempDir(), `cache-break-${suffix}.diff`)
|
|
}
|
|
|
|
type PreviousState = {
|
|
systemHash: number
|
|
toolsHash: number
|
|
/** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips
|
|
* (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */
|
|
cacheControlHash: number
|
|
toolNames: string[]
|
|
/** Per-tool schema hash. Diffed to name which tool's description changed
|
|
* when toolSchemasChanged but added=removed=0 (77% of tool breaks per
|
|
* BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */
|
|
perToolHashes: Record<string, number>
|
|
systemCharCount: number
|
|
model: string
|
|
fastMode: boolean
|
|
/** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are
|
|
* discovered/removed. */
|
|
globalCacheStrategy: string
|
|
/** Sorted beta header list. Diffed to show which headers were added/removed. */
|
|
betas: string[]
|
|
/** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore
|
|
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
|
|
autoModeActive: boolean
|
|
/** Overage state flip — should NOT break cache anymore (eligibility is
|
|
* latched session-stable in should1hCacheTTL). Tracked to verify the fix. */
|
|
isUsingOverage: boolean
|
|
/** Cache-editing beta header presence — should NOT break cache anymore
|
|
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
|
|
cachedMCEnabled: boolean
|
|
/** Resolved effort (env → options → model default). Goes into output_config
|
|
* or anthropic_internal.effort_override. */
|
|
effortValue: string
|
|
/** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and
|
|
* anthropic_internal changes. */
|
|
extraBodyHash: number
|
|
callCount: number
|
|
pendingChanges: PendingChanges | null
|
|
prevCacheReadTokens: number | null
|
|
/** Set when cached microcompact sends cache_edits deletions. Cache reads
|
|
* will legitimately drop — this is expected, not a break. */
|
|
cacheDeletionsPending: boolean
|
|
buildDiffableContent: () => string
|
|
}
|
|
|
|
type PendingChanges = {
|
|
systemPromptChanged: boolean
|
|
toolSchemasChanged: boolean
|
|
modelChanged: boolean
|
|
fastModeChanged: boolean
|
|
cacheControlChanged: boolean
|
|
globalCacheStrategyChanged: boolean
|
|
betasChanged: boolean
|
|
autoModeChanged: boolean
|
|
overageChanged: boolean
|
|
cachedMCChanged: boolean
|
|
effortChanged: boolean
|
|
extraBodyChanged: boolean
|
|
addedToolCount: number
|
|
removedToolCount: number
|
|
systemCharDelta: number
|
|
addedTools: string[]
|
|
removedTools: string[]
|
|
changedToolSchemas: string[]
|
|
previousModel: string
|
|
newModel: string
|
|
prevGlobalCacheStrategy: string
|
|
newGlobalCacheStrategy: string
|
|
addedBetas: string[]
|
|
removedBetas: string[]
|
|
prevEffortValue: string
|
|
newEffortValue: string
|
|
buildPrevDiffableContent: () => string
|
|
}
|
|
|
|
const previousStateBySource = new Map<string, PreviousState>()
|
|
|
|
// Cap the number of tracked sources to prevent unbounded memory growth.
|
|
// Each entry stores a ~300KB+ diffableContent string (serialized system prompt
|
|
// + tool schemas). Without a cap, spawning many subagents (each with a unique
|
|
// agentId key) causes the map to grow indefinitely.
|
|
const MAX_TRACKED_SOURCES = 10
|
|
|
|
const TRACKED_SOURCE_PREFIXES = [
|
|
'repl_main_thread',
|
|
'sdk',
|
|
'agent:custom',
|
|
'agent:default',
|
|
'agent:builtin',
|
|
]
|
|
|
|
// Minimum absolute token drop required to trigger a cache break warning.
|
|
// Small drops (e.g., a few thousand tokens) can happen due to normal variation
|
|
// and aren't worth alerting on.
|
|
const MIN_CACHE_MISS_TOKENS = 2_000
|
|
|
|
// Anthropic's server-side prompt cache TTL thresholds to test.
|
|
// Cache breaks after these durations are likely due to TTL expiration
|
|
// rather than client-side changes.
|
|
const CACHE_TTL_5MIN_MS = 5 * 60 * 1000
|
|
export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000
|
|
|
|
// Models to exclude from cache break detection (e.g., haiku has different caching behavior)
|
|
function isExcludedModel(model: string): boolean {
|
|
return model.includes('haiku')
|
|
}
|
|
|
|
/**
|
|
* Returns the tracking key for a querySource, or null if untracked.
|
|
* Compact shares the same server-side cache as repl_main_thread
|
|
* (same cacheSafeParams), so they share tracking state.
|
|
*
|
|
* For subagents with a tracked querySource, uses the unique agentId to
|
|
* isolate tracking state. This prevents false positive cache break
|
|
* notifications when multiple instances of the same agent type run
|
|
* concurrently.
|
|
*
|
|
* Untracked sources (speculation, session_memory, prompt_suggestion, etc.)
|
|
* are short-lived forked agents where cache break detection provides no
|
|
* value — they run 1-3 turns with a fresh agentId each time, so there's
|
|
* nothing meaningful to compare against. Their cache metrics are still
|
|
* logged via tengu_api_success for analytics.
|
|
*/
|
|
function getTrackingKey(
|
|
querySource: QuerySource,
|
|
agentId?: AgentId,
|
|
): string | null {
|
|
if (querySource === 'compact') return 'repl_main_thread'
|
|
for (const prefix of TRACKED_SOURCE_PREFIXES) {
|
|
if (querySource.startsWith(prefix)) return agentId || querySource
|
|
}
|
|
return null
|
|
}
|
|
|
|
function stripCacheControl(
|
|
items: ReadonlyArray<Record<string, unknown>>,
|
|
): unknown[] {
|
|
return items.map(item => {
|
|
if (!('cache_control' in item)) return item
|
|
const { cache_control: _, ...rest } = item
|
|
return rest
|
|
})
|
|
}
|
|
|
|
function computeHash(data: unknown): number {
|
|
const str = jsonStringify(data)
|
|
if (typeof Bun !== 'undefined') {
|
|
const hash = Bun.hash(str)
|
|
// Bun.hash can return bigint for large inputs; convert to number safely
|
|
return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash
|
|
}
|
|
// Fallback for non-Bun runtimes (e.g. Node.js via npm global install)
|
|
return djb2Hash(str)
|
|
}
|
|
|
|
/** MCP tool names are user-controlled (server config) and may leak filepaths.
|
|
* Collapse them to 'mcp'; built-in names are a fixed vocabulary. */
|
|
function sanitizeToolName(name: string): string {
|
|
return name.startsWith('mcp__') ? 'mcp' : name
|
|
}
|
|
|
|
function computePerToolHashes(
|
|
strippedTools: ReadonlyArray<unknown>,
|
|
names: string[],
|
|
): Record<string, number> {
|
|
const hashes: Record<string, number> = {}
|
|
for (let i = 0; i < strippedTools.length; i++) {
|
|
hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i])
|
|
}
|
|
return hashes
|
|
}
|
|
|
|
function getSystemCharCount(system: TextBlockParam[]): number {
|
|
let total = 0
|
|
for (const block of system) {
|
|
total += block.text.length
|
|
}
|
|
return total
|
|
}
|
|
|
|
function buildDiffableContent(
|
|
system: TextBlockParam[],
|
|
tools: BetaToolUnion[],
|
|
model: string,
|
|
): string {
|
|
const systemText = system.map(b => b.text).join('\n\n')
|
|
const toolDetails = tools
|
|
.map(t => {
|
|
if (!('name' in t)) return 'unknown'
|
|
const desc = 'description' in t ? t.description : ''
|
|
const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : ''
|
|
return `${t.name}\n description: ${desc}\n input_schema: ${schema}`
|
|
})
|
|
.sort()
|
|
.join('\n\n')
|
|
return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n`
|
|
}
|
|
|
|
/** Extended tracking snapshot — everything that could affect the server-side
|
|
* cache key that we can observe from the client. All fields are optional so
|
|
* the call site can add incrementally; undefined fields compare as stable. */
|
|
export type PromptStateSnapshot = {
|
|
system: TextBlockParam[]
|
|
toolSchemas: BetaToolUnion[]
|
|
querySource: QuerySource
|
|
model: string
|
|
agentId?: AgentId
|
|
fastMode?: boolean
|
|
globalCacheStrategy?: string
|
|
betas?: readonly string[]
|
|
autoModeActive?: boolean
|
|
isUsingOverage?: boolean
|
|
cachedMCEnabled?: boolean
|
|
effortValue?: string | number
|
|
extraBodyParams?: unknown
|
|
}
|
|
|
|
/**
|
|
* Phase 1 (pre-call): Record the current prompt/tool state and detect what changed.
|
|
* Does NOT fire events — just stores pending changes for phase 2 to use.
|
|
*/
|
|
export function recordPromptState(snapshot: PromptStateSnapshot): void {
|
|
try {
|
|
const {
|
|
system,
|
|
toolSchemas,
|
|
querySource,
|
|
model,
|
|
agentId,
|
|
fastMode,
|
|
globalCacheStrategy = '',
|
|
betas = [],
|
|
autoModeActive = false,
|
|
isUsingOverage = false,
|
|
cachedMCEnabled = false,
|
|
effortValue,
|
|
extraBodyParams,
|
|
} = snapshot
|
|
const key = getTrackingKey(querySource, agentId)
|
|
if (!key) return
|
|
|
|
const strippedSystem = stripCacheControl(
|
|
system as unknown as ReadonlyArray<Record<string, unknown>>,
|
|
)
|
|
const strippedTools = stripCacheControl(
|
|
toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>,
|
|
)
|
|
|
|
const systemHash = computeHash(strippedSystem)
|
|
const toolsHash = computeHash(strippedTools)
|
|
// Hash the full system array INCLUDING cache_control — this catches
|
|
// scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped
|
|
// hash can't see because the text content is identical.
|
|
const cacheControlHash = computeHash(
|
|
system.map(b => ('cache_control' in b ? b.cache_control : null)),
|
|
)
|
|
const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown'))
|
|
// Only compute per-tool hashes when the aggregate changed — common case
|
|
// (tools unchanged) skips N extra jsonStringify calls.
|
|
const computeToolHashes = () =>
|
|
computePerToolHashes(strippedTools, toolNames)
|
|
const systemCharCount = getSystemCharCount(system)
|
|
const lazyDiffableContent = () =>
|
|
buildDiffableContent(system, toolSchemas, model)
|
|
const isFastMode = fastMode ?? false
|
|
const sortedBetas = [...betas].sort()
|
|
const effortStr = effortValue === undefined ? '' : String(effortValue)
|
|
const extraBodyHash =
|
|
extraBodyParams === undefined ? 0 : computeHash(extraBodyParams)
|
|
|
|
const prev = previousStateBySource.get(key)
|
|
|
|
if (!prev) {
|
|
// Evict oldest entries if map is at capacity
|
|
while (previousStateBySource.size >= MAX_TRACKED_SOURCES) {
|
|
const oldest = previousStateBySource.keys().next().value
|
|
if (oldest !== undefined) previousStateBySource.delete(oldest)
|
|
}
|
|
|
|
previousStateBySource.set(key, {
|
|
systemHash,
|
|
toolsHash,
|
|
cacheControlHash,
|
|
toolNames,
|
|
systemCharCount,
|
|
model,
|
|
fastMode: isFastMode,
|
|
globalCacheStrategy,
|
|
betas: sortedBetas,
|
|
autoModeActive,
|
|
isUsingOverage,
|
|
cachedMCEnabled,
|
|
effortValue: effortStr,
|
|
extraBodyHash,
|
|
callCount: 1,
|
|
pendingChanges: null,
|
|
prevCacheReadTokens: null,
|
|
cacheDeletionsPending: false,
|
|
buildDiffableContent: lazyDiffableContent,
|
|
perToolHashes: computeToolHashes(),
|
|
})
|
|
return
|
|
}
|
|
|
|
prev.callCount++
|
|
|
|
const systemPromptChanged = systemHash !== prev.systemHash
|
|
const toolSchemasChanged = toolsHash !== prev.toolsHash
|
|
const modelChanged = model !== prev.model
|
|
const fastModeChanged = isFastMode !== prev.fastMode
|
|
const cacheControlChanged = cacheControlHash !== prev.cacheControlHash
|
|
const globalCacheStrategyChanged =
|
|
globalCacheStrategy !== prev.globalCacheStrategy
|
|
const betasChanged =
|
|
sortedBetas.length !== prev.betas.length ||
|
|
sortedBetas.some((b, i) => b !== prev.betas[i])
|
|
const autoModeChanged = autoModeActive !== prev.autoModeActive
|
|
const overageChanged = isUsingOverage !== prev.isUsingOverage
|
|
const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled
|
|
const effortChanged = effortStr !== prev.effortValue
|
|
const extraBodyChanged = extraBodyHash !== prev.extraBodyHash
|
|
|
|
if (
|
|
systemPromptChanged ||
|
|
toolSchemasChanged ||
|
|
modelChanged ||
|
|
fastModeChanged ||
|
|
cacheControlChanged ||
|
|
globalCacheStrategyChanged ||
|
|
betasChanged ||
|
|
autoModeChanged ||
|
|
overageChanged ||
|
|
cachedMCChanged ||
|
|
effortChanged ||
|
|
extraBodyChanged
|
|
) {
|
|
const prevToolSet = new Set(prev.toolNames)
|
|
const newToolSet = new Set(toolNames)
|
|
const prevBetaSet = new Set(prev.betas)
|
|
const newBetaSet = new Set(sortedBetas)
|
|
const addedTools = toolNames.filter(n => !prevToolSet.has(n))
|
|
const removedTools = prev.toolNames.filter(n => !newToolSet.has(n))
|
|
const changedToolSchemas: string[] = []
|
|
if (toolSchemasChanged) {
|
|
const newHashes = computeToolHashes()
|
|
for (const name of toolNames) {
|
|
if (!prevToolSet.has(name)) continue
|
|
if (newHashes[name] !== prev.perToolHashes[name]) {
|
|
changedToolSchemas.push(name)
|
|
}
|
|
}
|
|
prev.perToolHashes = newHashes
|
|
}
|
|
prev.pendingChanges = {
|
|
systemPromptChanged,
|
|
toolSchemasChanged,
|
|
modelChanged,
|
|
fastModeChanged,
|
|
cacheControlChanged,
|
|
globalCacheStrategyChanged,
|
|
betasChanged,
|
|
autoModeChanged,
|
|
overageChanged,
|
|
cachedMCChanged,
|
|
effortChanged,
|
|
extraBodyChanged,
|
|
addedToolCount: addedTools.length,
|
|
removedToolCount: removedTools.length,
|
|
addedTools,
|
|
removedTools,
|
|
changedToolSchemas,
|
|
systemCharDelta: systemCharCount - prev.systemCharCount,
|
|
previousModel: prev.model,
|
|
newModel: model,
|
|
prevGlobalCacheStrategy: prev.globalCacheStrategy,
|
|
newGlobalCacheStrategy: globalCacheStrategy,
|
|
addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)),
|
|
removedBetas: prev.betas.filter(b => !newBetaSet.has(b)),
|
|
prevEffortValue: prev.effortValue,
|
|
newEffortValue: effortStr,
|
|
buildPrevDiffableContent: prev.buildDiffableContent,
|
|
}
|
|
} else {
|
|
prev.pendingChanges = null
|
|
}
|
|
|
|
prev.systemHash = systemHash
|
|
prev.toolsHash = toolsHash
|
|
prev.cacheControlHash = cacheControlHash
|
|
prev.toolNames = toolNames
|
|
prev.systemCharCount = systemCharCount
|
|
prev.model = model
|
|
prev.fastMode = isFastMode
|
|
prev.globalCacheStrategy = globalCacheStrategy
|
|
prev.betas = sortedBetas
|
|
prev.autoModeActive = autoModeActive
|
|
prev.isUsingOverage = isUsingOverage
|
|
prev.cachedMCEnabled = cachedMCEnabled
|
|
prev.effortValue = effortStr
|
|
prev.extraBodyHash = extraBodyHash
|
|
prev.buildDiffableContent = lazyDiffableContent
|
|
} catch (e: unknown) {
|
|
logError(e)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Phase 2 (post-call): Check the API response's cache tokens to determine
|
|
* if a cache break actually occurred. If it did, use the pending changes
|
|
* from phase 1 to explain why.
|
|
*/
|
|
export async function checkResponseForCacheBreak(
|
|
querySource: QuerySource,
|
|
cacheReadTokens: number,
|
|
cacheCreationTokens: number,
|
|
messages: Message[],
|
|
agentId?: AgentId,
|
|
requestId?: string | null,
|
|
): Promise<void> {
|
|
try {
|
|
const key = getTrackingKey(querySource, agentId)
|
|
if (!key) return
|
|
|
|
const state = previousStateBySource.get(key)
|
|
if (!state) return
|
|
|
|
// Skip excluded models (e.g., haiku has different caching behavior)
|
|
if (isExcludedModel(state.model)) return
|
|
|
|
const prevCacheRead = state.prevCacheReadTokens
|
|
state.prevCacheReadTokens = cacheReadTokens
|
|
|
|
// Calculate time since last call for TTL detection by finding the most recent
|
|
// assistant message timestamp in the messages array (before the current response)
|
|
const lastAssistantMessage = messages.findLast(m => m.type === 'assistant')
|
|
const timeSinceLastAssistantMsg = lastAssistantMessage
|
|
? Date.now() - new Date(lastAssistantMessage.timestamp).getTime()
|
|
: null
|
|
|
|
// Skip the first call — no previous value to compare against
|
|
if (prevCacheRead === null) return
|
|
|
|
const changes = state.pendingChanges
|
|
|
|
// Cache deletions via cached microcompact intentionally reduce the cached
|
|
// prefix. The drop in cache read tokens is expected — reset the baseline
|
|
// so we don't false-positive on the next call.
|
|
if (state.cacheDeletionsPending) {
|
|
state.cacheDeletionsPending = false
|
|
logForDebugging(
|
|
`[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead} → ${cacheReadTokens} (expected drop)`,
|
|
)
|
|
// Don't flag as a break — the remaining state is still valid
|
|
state.pendingChanges = null
|
|
return
|
|
}
|
|
|
|
// Detect a cache break: cache read dropped >5% from previous AND
|
|
// the absolute drop exceeds the minimum threshold.
|
|
const tokenDrop = prevCacheRead - cacheReadTokens
|
|
if (
|
|
cacheReadTokens >= prevCacheRead * 0.95 ||
|
|
tokenDrop < MIN_CACHE_MISS_TOKENS
|
|
) {
|
|
state.pendingChanges = null
|
|
return
|
|
}
|
|
|
|
// Build explanation from pending changes (if any)
|
|
const parts: string[] = []
|
|
if (changes) {
|
|
if (changes.modelChanged) {
|
|
parts.push(
|
|
`model changed (${changes.previousModel} → ${changes.newModel})`,
|
|
)
|
|
}
|
|
if (changes.systemPromptChanged) {
|
|
const charDelta = changes.systemCharDelta
|
|
const charInfo =
|
|
charDelta === 0
|
|
? ''
|
|
: charDelta > 0
|
|
? ` (+${charDelta} chars)`
|
|
: ` (${charDelta} chars)`
|
|
parts.push(`system prompt changed${charInfo}`)
|
|
}
|
|
if (changes.toolSchemasChanged) {
|
|
const toolDiff =
|
|
changes.addedToolCount > 0 || changes.removedToolCount > 0
|
|
? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)`
|
|
: ' (tool prompt/schema changed, same tool set)'
|
|
parts.push(`tools changed${toolDiff}`)
|
|
}
|
|
if (changes.fastModeChanged) {
|
|
parts.push('fast mode toggled')
|
|
}
|
|
if (changes.globalCacheStrategyChanged) {
|
|
parts.push(
|
|
`global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'} → ${changes.newGlobalCacheStrategy || 'none'})`,
|
|
)
|
|
}
|
|
if (
|
|
changes.cacheControlChanged &&
|
|
!changes.globalCacheStrategyChanged &&
|
|
!changes.systemPromptChanged
|
|
) {
|
|
// Only report as standalone cause if nothing else explains it —
|
|
// otherwise the scope/TTL flip is a consequence, not the root cause.
|
|
parts.push('cache_control changed (scope or TTL)')
|
|
}
|
|
if (changes.betasChanged) {
|
|
const added = changes.addedBetas.length
|
|
? `+${changes.addedBetas.join(',')}`
|
|
: ''
|
|
const removed = changes.removedBetas.length
|
|
? `-${changes.removedBetas.join(',')}`
|
|
: ''
|
|
const diff = [added, removed].filter(Boolean).join(' ')
|
|
parts.push(`betas changed${diff ? ` (${diff})` : ''}`)
|
|
}
|
|
if (changes.autoModeChanged) {
|
|
parts.push('auto mode toggled')
|
|
}
|
|
if (changes.overageChanged) {
|
|
parts.push('overage state changed (TTL latched, no flip)')
|
|
}
|
|
if (changes.cachedMCChanged) {
|
|
parts.push('cached microcompact toggled')
|
|
}
|
|
if (changes.effortChanged) {
|
|
parts.push(
|
|
`effort changed (${changes.prevEffortValue || 'default'} → ${changes.newEffortValue || 'default'})`,
|
|
)
|
|
}
|
|
if (changes.extraBodyChanged) {
|
|
parts.push('extra body params changed')
|
|
}
|
|
}
|
|
|
|
// Check if time gap suggests TTL expiration
|
|
const lastAssistantMsgOver5minAgo =
|
|
timeSinceLastAssistantMsg !== null &&
|
|
timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS
|
|
const lastAssistantMsgOver1hAgo =
|
|
timeSinceLastAssistantMsg !== null &&
|
|
timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS
|
|
|
|
// Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql):
|
|
// when all client-side flags are false and the gap is under TTL, ~90% of breaks
|
|
// are server-side routing/eviction or billed/inference disagreement. Label
|
|
// accordingly instead of implying a CC bug hunt.
|
|
let reason: string
|
|
if (parts.length > 0) {
|
|
reason = parts.join(', ')
|
|
} else if (lastAssistantMsgOver1hAgo) {
|
|
reason = 'possible 1h TTL expiry (prompt unchanged)'
|
|
} else if (lastAssistantMsgOver5minAgo) {
|
|
reason = 'possible 5min TTL expiry (prompt unchanged)'
|
|
} else if (timeSinceLastAssistantMsg !== null) {
|
|
reason = 'likely server-side (prompt unchanged, <5min gap)'
|
|
} else {
|
|
reason = 'unknown cause'
|
|
}
|
|
|
|
logEvent('tengu_prompt_cache_break', {
|
|
systemPromptChanged: changes?.systemPromptChanged ?? false,
|
|
toolSchemasChanged: changes?.toolSchemasChanged ?? false,
|
|
modelChanged: changes?.modelChanged ?? false,
|
|
fastModeChanged: changes?.fastModeChanged ?? false,
|
|
cacheControlChanged: changes?.cacheControlChanged ?? false,
|
|
globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false,
|
|
betasChanged: changes?.betasChanged ?? false,
|
|
autoModeChanged: changes?.autoModeChanged ?? false,
|
|
overageChanged: changes?.overageChanged ?? false,
|
|
cachedMCChanged: changes?.cachedMCChanged ?? false,
|
|
effortChanged: changes?.effortChanged ?? false,
|
|
extraBodyChanged: changes?.extraBodyChanged ?? false,
|
|
addedToolCount: changes?.addedToolCount ?? 0,
|
|
removedToolCount: changes?.removedToolCount ?? 0,
|
|
systemCharDelta: changes?.systemCharDelta ?? 0,
|
|
// Tool names are sanitized: built-in names are a fixed vocabulary,
|
|
// MCP tools collapse to 'mcp' (user-configured, could leak paths).
|
|
addedTools: (changes?.addedTools ?? [])
|
|
.map(sanitizeToolName)
|
|
.join(
|
|
',',
|
|
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
removedTools: (changes?.removedTools ?? [])
|
|
.map(sanitizeToolName)
|
|
.join(
|
|
',',
|
|
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
changedToolSchemas: (changes?.changedToolSchemas ?? [])
|
|
.map(sanitizeToolName)
|
|
.join(
|
|
',',
|
|
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
// Beta header names and cache strategy are fixed enum-like values,
|
|
// not code or filepaths. requestId is an opaque server-generated ID.
|
|
addedBetas: (changes?.addedBetas ?? []).join(
|
|
',',
|
|
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
removedBetas: (changes?.removedBetas ?? []).join(
|
|
',',
|
|
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ??
|
|
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ??
|
|
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
callNumber: state.callCount,
|
|
prevCacheReadTokens: prevCacheRead,
|
|
cacheReadTokens,
|
|
cacheCreationTokens,
|
|
timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1,
|
|
lastAssistantMsgOver5minAgo,
|
|
lastAssistantMsgOver1hAgo,
|
|
requestId: (requestId ??
|
|
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
})
|
|
|
|
// Write diff file for ant debugging via --debug. The path is included in
|
|
// the summary log so ants can find it (DevBar UI removed — event data
|
|
// flows reliably to BQ for analytics).
|
|
let diffPath: string | undefined
|
|
if (changes?.buildPrevDiffableContent) {
|
|
diffPath = await writeCacheBreakDiff(
|
|
changes.buildPrevDiffableContent(),
|
|
state.buildDiffableContent(),
|
|
)
|
|
}
|
|
|
|
const diffSuffix = diffPath ? `, diff: ${diffPath}` : ''
|
|
const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead} → ${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]`
|
|
|
|
logForDebugging(summary, { level: 'warn' })
|
|
|
|
state.pendingChanges = null
|
|
} catch (e: unknown) {
|
|
logError(e)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Call when cached microcompact sends cache_edits deletions.
|
|
* The next API response will have lower cache read tokens — that's
|
|
* expected, not a cache break.
|
|
*/
|
|
export function notifyCacheDeletion(
|
|
querySource: QuerySource,
|
|
agentId?: AgentId,
|
|
): void {
|
|
const key = getTrackingKey(querySource, agentId)
|
|
const state = key ? previousStateBySource.get(key) : undefined
|
|
if (state) {
|
|
state.cacheDeletionsPending = true
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Call after compaction to reset the cache read baseline.
|
|
* Compaction legitimately reduces message count, so cache read tokens
|
|
* will naturally drop on the next call — that's not a break.
|
|
*/
|
|
export function notifyCompaction(
|
|
querySource: QuerySource,
|
|
agentId?: AgentId,
|
|
): void {
|
|
const key = getTrackingKey(querySource, agentId)
|
|
const state = key ? previousStateBySource.get(key) : undefined
|
|
if (state) {
|
|
state.prevCacheReadTokens = null
|
|
}
|
|
}
|
|
|
|
export function cleanupAgentTracking(agentId: AgentId): void {
|
|
previousStateBySource.delete(agentId)
|
|
}
|
|
|
|
export function resetPromptCacheBreakDetection(): void {
|
|
previousStateBySource.clear()
|
|
}
|
|
|
|
async function writeCacheBreakDiff(
|
|
prevContent: string,
|
|
newContent: string,
|
|
): Promise<string | undefined> {
|
|
try {
|
|
const diffPath = getCacheBreakDiffPath()
|
|
await mkdir(getClaudeTempDir(), { recursive: true })
|
|
const patch = createPatch(
|
|
'prompt-state',
|
|
prevContent,
|
|
newContent,
|
|
'before',
|
|
'after',
|
|
)
|
|
await writeFile(diffPath, patch)
|
|
return diffPath
|
|
} catch {
|
|
return undefined
|
|
}
|
|
}
|