import { feature } from 'bun:bundle' import type { UUID } from 'crypto' import uniqBy from 'lodash-es/uniqBy.js' /* eslint-disable @typescript-eslint/no-require-imports */ const sessionTranscriptModule = feature('KAIROS') ? (require('../sessionTranscript/sessionTranscript.js') as typeof import('../sessionTranscript/sessionTranscript.js')) : null import { APIUserAbortError } from '@anthropic-ai/sdk' import { markPostCompaction } from 'src/bootstrap/state.js' import { getInvokedSkillsForAgent } from '../../bootstrap/state.js' import type { QuerySource } from '../../constants/querySource.js' import type { CanUseToolFn } from '../../hooks/useCanUseTool.js' import type { Tool, ToolUseContext } from '../../Tool.js' import type { LocalAgentTaskState } from '../../tasks/LocalAgentTask/LocalAgentTask.js' import { FileReadTool } from '../../tools/FileReadTool/FileReadTool.js' import { FILE_READ_TOOL_NAME, FILE_UNCHANGED_STUB, } from '../../tools/FileReadTool/prompt.js' import { ToolSearchTool } from '../../tools/ToolSearchTool/ToolSearchTool.js' import type { AgentId } from '../../types/ids.js' import type { AssistantMessage, AttachmentMessage, HookResultMessage, Message, PartialCompactDirection, SystemCompactBoundaryMessage, SystemMessage, UserMessage, } from '../../types/message.js' import { createAttachmentMessage, generateFileAttachment, getAgentListingDeltaAttachment, getDeferredToolsDeltaAttachment, getMcpInstructionsDeltaAttachment, } from '../../utils/attachments.js' import { getMemoryPath } from '../../utils/config.js' import { COMPACT_MAX_OUTPUT_TOKENS } from '../../utils/context.js' import { analyzeContext, tokenStatsToStatsigMetrics, } from '../../utils/contextAnalysis.js' import { logForDebugging } from '../../utils/debug.js' import { hasExactErrorMessage } from '../../utils/errors.js' import { cacheToObject } from '../../utils/fileStateCache.js' import { type CacheSafeParams, runForkedAgent, } from '../../utils/forkedAgent.js' import { executePostCompactHooks, executePreCompactHooks, } from '../../utils/hooks.js' import { logError } from '../../utils/log.js' import { MEMORY_TYPE_VALUES } from '../../utils/memory/types.js' import { createCompactBoundaryMessage, createUserMessage, getAssistantMessageText, getLastAssistantMessage, getMessagesAfterCompactBoundary, isCompactBoundaryMessage, normalizeMessagesForAPI, } from '../../utils/messages.js' import { expandPath } from '../../utils/path.js' import { getPlan, getPlanFilePath } from '../../utils/plans.js' import { isSessionActivityTrackingActive, sendSessionActivitySignal, } from '../../utils/sessionActivity.js' import { processSessionStartHooks } from '../../utils/sessionStart.js' import { getTranscriptPath, reAppendSessionMetadata, } from '../../utils/sessionStorage.js' import { sleep } from '../../utils/sleep.js' import { jsonStringify } from '../../utils/slowOperations.js' /* eslint-enable @typescript-eslint/no-require-imports */ import { asSystemPrompt } from '../../utils/systemPromptType.js' import { getTaskOutputPath } from '../../utils/task/diskOutput.js' import { getTokenUsage, tokenCountFromLastAPIResponse, tokenCountWithEstimation, } from '../../utils/tokens.js' import { extractDiscoveredToolNames, isToolSearchEnabled, } from '../../utils/toolSearch.js' import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js' import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, logEvent, } from '../analytics/index.js' import { getMaxOutputTokensForModel, queryModelWithStreaming, } from '../api/claude.js' import { getPromptTooLongTokenGap, PROMPT_TOO_LONG_ERROR_MESSAGE, startsWithApiErrorPrefix, } from '../api/errors.js' import { notifyCompaction } from '../api/promptCacheBreakDetection.js' import { getRetryDelay } from '../api/withRetry.js' import { logPermissionContextForAnts } from '../internalLogging.js' import { roughTokenCountEstimation, roughTokenCountEstimationForMessages, } from '../tokenEstimation.js' import { groupMessagesByApiRound } from './grouping.js' import { getCompactPrompt, getCompactUserSummaryMessage, getPartialCompactPrompt, } from './prompt.js' export const POST_COMPACT_MAX_FILES_TO_RESTORE = 5 export const POST_COMPACT_TOKEN_BUDGET = 50_000 export const POST_COMPACT_MAX_TOKENS_PER_FILE = 5_000 // Skills can be large (verify=18.7KB, claude-api=20.1KB). Previously re-injected // unbounded on every compact → 5-10K tok/compact. Per-skill truncation beats // dropping — instructions at the top of a skill file are usually the critical // part. Budget sized to hold ~5 skills at the per-skill cap. export const POST_COMPACT_MAX_TOKENS_PER_SKILL = 5_000 export const POST_COMPACT_SKILLS_TOKEN_BUDGET = 25_000 const MAX_COMPACT_STREAMING_RETRIES = 2 /** * Strip image blocks from user messages before sending for compaction. * Images are not needed for generating a conversation summary and can * cause the compaction API call itself to hit the prompt-too-long limit, * especially in CCD sessions where users frequently attach images. * Replaces image blocks with a text marker so the summary still notes * that an image was shared. * * Note: Only user messages contain images (either directly attached or within * tool_result content from tools). Assistant messages contain text, tool_use, * and thinking blocks but not images. */ export function stripImagesFromMessages(messages: Message[]): Message[] { return messages.map(message => { if (message.type !== 'user') { return message } const content = message.message.content if (!Array.isArray(content)) { return message } let hasMediaBlock = false const newContent = content.flatMap(block => { if (block.type === 'image') { hasMediaBlock = true return [{ type: 'text' as const, text: '[image]' }] } if (block.type === 'document') { hasMediaBlock = true return [{ type: 'text' as const, text: '[document]' }] } // Also strip images/documents nested inside tool_result content arrays if (block.type === 'tool_result' && Array.isArray(block.content)) { let toolHasMedia = false const newToolContent = block.content.map(item => { if (item.type === 'image') { toolHasMedia = true return { type: 'text' as const, text: '[image]' } } if (item.type === 'document') { toolHasMedia = true return { type: 'text' as const, text: '[document]' } } return item }) if (toolHasMedia) { hasMediaBlock = true return [{ ...block, content: newToolContent }] } } return [block] }) if (!hasMediaBlock) { return message } return { ...message, message: { ...message.message, content: newContent, }, } as typeof message }) } /** * Strip attachment types that are re-injected post-compaction anyway. * skill_discovery/skill_listing are re-surfaced by resetSentSkillNames() * + the next turn's discovery signal, so feeding them to the summarizer * wastes tokens and pollutes the summary with stale skill suggestions. * * No-op when EXPERIMENTAL_SKILL_SEARCH is off (the attachment types * don't exist on external builds). */ export function stripReinjectedAttachments(messages: Message[]): Message[] { if (feature('EXPERIMENTAL_SKILL_SEARCH')) { return messages.filter( m => !( m.type === 'attachment' && (m.attachment.type === 'skill_discovery' || m.attachment.type === 'skill_listing') ), ) } return messages } export const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES = 'Not enough messages to compact.' const MAX_PTL_RETRIES = 3 const PTL_RETRY_MARKER = '[earlier conversation truncated for compaction retry]' /** * Drops the oldest API-round groups from messages until tokenGap is covered. * Falls back to dropping 20% of groups when the gap is unparseable (some * Vertex/Bedrock error formats). Returns null when nothing can be dropped * without leaving an empty summarize set. * * This is the last-resort escape hatch for CC-1180 — when the compact request * itself hits prompt-too-long, the user is otherwise stuck. Dropping the * oldest context is lossy but unblocks them. The reactive-compact path * (compactMessages.ts) has the proper retry loop that peels from the tail; * this helper is the dumb-but-safe fallback for the proactive/manual path * that wasn't migrated in bfdb472f's unification. */ export function truncateHeadForPTLRetry( messages: Message[], ptlResponse: AssistantMessage, ): Message[] | null { // Strip our own synthetic marker from a previous retry before grouping. // Otherwise it becomes its own group 0 and the 20% fallback stalls // (drops only the marker, re-adds it, zero progress on retry 2+). const input = messages[0]?.type === 'user' && messages[0].isMeta && messages[0].message.content === PTL_RETRY_MARKER ? messages.slice(1) : messages const groups = groupMessagesByApiRound(input) if (groups.length < 2) return null const tokenGap = getPromptTooLongTokenGap(ptlResponse) let dropCount: number if (tokenGap !== undefined) { let acc = 0 dropCount = 0 for (const g of groups) { acc += roughTokenCountEstimationForMessages(g) dropCount++ if (acc >= tokenGap) break } } else { dropCount = Math.max(1, Math.floor(groups.length * 0.2)) } // Keep at least one group so there's something to summarize. dropCount = Math.min(dropCount, groups.length - 1) if (dropCount < 1) return null const sliced = groups.slice(dropCount).flat() // groupMessagesByApiRound puts the preamble in group 0 and starts every // subsequent group with an assistant message. Dropping group 0 leaves an // assistant-first sequence which the API rejects (first message must be // role=user). Prepend a synthetic user marker — ensureToolResultPairing // already handles any orphaned tool_results this creates. if (sliced[0]?.type === 'assistant') { return [ createUserMessage({ content: PTL_RETRY_MARKER, isMeta: true }), ...sliced, ] } return sliced } export const ERROR_MESSAGE_PROMPT_TOO_LONG = 'Conversation too long. Press esc twice to go up a few messages and try again.' export const ERROR_MESSAGE_USER_ABORT = 'API Error: Request was aborted.' export const ERROR_MESSAGE_INCOMPLETE_RESPONSE = 'Compaction interrupted · This may be due to network issues — please try again.' export interface CompactionResult { boundaryMarker: SystemMessage summaryMessages: UserMessage[] attachments: AttachmentMessage[] hookResults: HookResultMessage[] messagesToKeep?: Message[] userDisplayMessage?: string preCompactTokenCount?: number postCompactTokenCount?: number truePostCompactTokenCount?: number compactionUsage?: ReturnType } /** * Diagnosis context passed from autoCompactIfNeeded into compactConversation. * Lets the tengu_compact event disambiguate same-chain loops (H2) from * cross-agent (H1/H5) and manual-vs-auto (H3) compactions without joins. */ export type RecompactionInfo = { isRecompactionInChain: boolean turnsSincePreviousCompact: number previousCompactTurnId?: string autoCompactThreshold: number querySource?: QuerySource } /** * Build the base post-compact messages array from a CompactionResult. * This ensures consistent ordering across all compaction paths. * Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults */ export function buildPostCompactMessages(result: CompactionResult): Message[] { return [ result.boundaryMarker, ...result.summaryMessages, ...(result.messagesToKeep ?? []), ...result.attachments, ...result.hookResults, ] } /** * Annotate a compact boundary with relink metadata for messagesToKeep. * Preserved messages keep their original parentUuids on disk (dedup-skipped); * the loader uses this to patch head→anchor and anchor's-other-children→tail. * * `anchorUuid` = what sits immediately before keep[0] in the desired chain: * - suffix-preserving (reactive/session-memory): last summary message * - prefix-preserving (partial compact): the boundary itself */ export function annotateBoundaryWithPreservedSegment( boundary: SystemCompactBoundaryMessage, anchorUuid: UUID, messagesToKeep: readonly Message[] | undefined, ): SystemCompactBoundaryMessage { const keep = messagesToKeep ?? [] if (keep.length === 0) return boundary return { ...boundary, compactMetadata: { ...boundary.compactMetadata, preservedSegment: { headUuid: keep[0]!.uuid, anchorUuid, tailUuid: keep.at(-1)!.uuid, }, }, } } /** * Merges user-supplied custom instructions with hook-provided instructions. * User instructions come first; hook instructions are appended. * Empty strings normalize to undefined. */ export function mergeHookInstructions( userInstructions: string | undefined, hookInstructions: string | undefined, ): string | undefined { if (!hookInstructions) return userInstructions || undefined if (!userInstructions) return hookInstructions return `${userInstructions}\n\n${hookInstructions}` } /** * Creates a compact version of a conversation by summarizing older messages * and preserving recent conversation history. */ export async function compactConversation( messages: Message[], context: ToolUseContext, cacheSafeParams: CacheSafeParams, suppressFollowUpQuestions: boolean, customInstructions?: string, isAutoCompact: boolean = false, recompactionInfo?: RecompactionInfo, ): Promise { try { if (messages.length === 0) { throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES) } const preCompactTokenCount = tokenCountWithEstimation(messages) const appState = context.getAppState() void logPermissionContextForAnts(appState.toolPermissionContext, 'summary') context.onCompactProgress?.({ type: 'hooks_start', hookType: 'pre_compact', }) // Execute PreCompact hooks context.setSDKStatus?.('compacting') const hookResult = await executePreCompactHooks( { trigger: isAutoCompact ? 'auto' : 'manual', customInstructions: customInstructions ?? null, }, context.abortController.signal, ) customInstructions = mergeHookInstructions( customInstructions, hookResult.newCustomInstructions, ) const userDisplayMessage = hookResult.userDisplayMessage // Show requesting mode with up arrow and custom message context.setStreamMode?.('requesting') context.setResponseLength?.(() => 0) context.onCompactProgress?.({ type: 'compact_start' }) // 3P default: true — forked-agent path reuses main conversation's prompt cache. // Experiment (Jan 2026) confirmed: false path is 98% cache miss, costs ~0.76% of // fleet cache_creation (~38B tok/day), concentrated in ephemeral envs (CCR/GHA/SDK) // with cold GB cache and 3P providers where GB is disabled. GB gate kept as kill-switch. const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE( 'tengu_compact_cache_prefix', true, ) const compactPrompt = getCompactPrompt(customInstructions) const summaryRequest = createUserMessage({ content: compactPrompt, }) let messagesToSummarize = messages let retryCacheSafeParams = cacheSafeParams let summaryResponse: AssistantMessage let summary: string | null let ptlAttempts = 0 for (;;) { summaryResponse = await streamCompactSummary({ messages: messagesToSummarize, summaryRequest, appState, context, preCompactTokenCount, cacheSafeParams: retryCacheSafeParams, }) summary = getAssistantMessageText(summaryResponse) if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break // CC-1180: compact request itself hit prompt-too-long. Truncate the // oldest API-round groups and retry rather than leaving the user stuck. ptlAttempts++ const truncated = ptlAttempts <= MAX_PTL_RETRIES ? truncateHeadForPTLRetry(messagesToSummarize, summaryResponse) : null if (!truncated) { logEvent('tengu_compact_failed', { reason: 'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, preCompactTokenCount, promptCacheSharingEnabled, ptlAttempts, }) throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG) } logEvent('tengu_compact_ptl_retry', { attempt: ptlAttempts, droppedMessages: messagesToSummarize.length - truncated.length, remainingMessages: truncated.length, }) messagesToSummarize = truncated // The forked-agent path reads from cacheSafeParams.forkContextMessages, // not the messages param — thread the truncated set through both paths. retryCacheSafeParams = { ...retryCacheSafeParams, forkContextMessages: truncated, } } if (!summary) { logForDebugging( `Compact failed: no summary text in response. Response: ${jsonStringify(summaryResponse)}`, { level: 'error' }, ) logEvent('tengu_compact_failed', { reason: 'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, preCompactTokenCount, promptCacheSharingEnabled, }) throw new Error( `Failed to generate conversation summary - response did not contain valid text content`, ) } else if (startsWithApiErrorPrefix(summary)) { logEvent('tengu_compact_failed', { reason: 'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, preCompactTokenCount, promptCacheSharingEnabled, }) throw new Error(summary) } // Store the current file state before clearing const preCompactReadFileState = cacheToObject(context.readFileState) // Clear the cache context.readFileState.clear() context.loadedNestedMemoryPaths?.clear() // Intentionally NOT resetting sentSkillNames: re-injecting the full // skill_listing (~4K tokens) post-compact is pure cache_creation with // marginal benefit. The model still has SkillTool in its schema and // invoked_skills attachment (below) preserves used-skill content. Ants // with EXPERIMENTAL_SKILL_SEARCH already skip re-injection via the // early-return in getSkillListingAttachments. // Run async attachment generation in parallel const [fileAttachments, asyncAgentAttachments] = await Promise.all([ createPostCompactFileAttachments( preCompactReadFileState, context, POST_COMPACT_MAX_FILES_TO_RESTORE, ), createAsyncAgentAttachmentsIfNeeded(context), ]) const postCompactFileAttachments: AttachmentMessage[] = [ ...fileAttachments, ...asyncAgentAttachments, ] const planAttachment = createPlanAttachmentIfNeeded(context.agentId) if (planAttachment) { postCompactFileAttachments.push(planAttachment) } // Add plan mode instructions if currently in plan mode, so the model // continues operating in plan mode after compaction const planModeAttachment = await createPlanModeAttachmentIfNeeded(context) if (planModeAttachment) { postCompactFileAttachments.push(planModeAttachment) } // Add skill attachment if skills were invoked in this session const skillAttachment = createSkillAttachmentIfNeeded(context.agentId) if (skillAttachment) { postCompactFileAttachments.push(skillAttachment) } // Compaction ate prior delta attachments. Re-announce from the current // state so the model has tool/instruction context on the first // post-compact turn. Empty message history → diff against nothing → // announces the full set. for (const att of getDeferredToolsDeltaAttachment( context.options.tools, context.options.mainLoopModel, [], { callSite: 'compact_full' }, )) { postCompactFileAttachments.push(createAttachmentMessage(att)) } for (const att of getAgentListingDeltaAttachment(context, [])) { postCompactFileAttachments.push(createAttachmentMessage(att)) } for (const att of getMcpInstructionsDeltaAttachment( context.options.mcpClients, context.options.tools, context.options.mainLoopModel, [], )) { postCompactFileAttachments.push(createAttachmentMessage(att)) } context.onCompactProgress?.({ type: 'hooks_start', hookType: 'session_start', }) // Execute SessionStart hooks after successful compaction const hookMessages = await processSessionStartHooks('compact', { model: context.options.mainLoopModel, }) // Create the compact boundary marker and summary messages before the // event so we can compute the true resulting-context size. const boundaryMarker = createCompactBoundaryMessage( isAutoCompact ? 'auto' : 'manual', preCompactTokenCount ?? 0, messages.at(-1)?.uuid, ) // Carry loaded-tool state — the summary doesn't preserve tool_reference // blocks, so the post-compact schema filter needs this to keep sending // already-loaded deferred tool schemas to the API. const preCompactDiscovered = extractDiscoveredToolNames(messages) if (preCompactDiscovered.size > 0) { boundaryMarker.compactMetadata.preCompactDiscoveredTools = [ ...preCompactDiscovered, ].sort() } const transcriptPath = getTranscriptPath() const summaryMessages: UserMessage[] = [ createUserMessage({ content: getCompactUserSummaryMessage( summary, suppressFollowUpQuestions, transcriptPath, ), isCompactSummary: true, isVisibleInTranscriptOnly: true, }), ] // Previously "postCompactTokenCount" — renamed because this is the // compact API call's total usage (input_tokens ≈ preCompactTokenCount), // NOT the size of the resulting context. Kept for event-field continuity. const compactionCallTotalTokens = tokenCountFromLastAPIResponse([ summaryResponse, ]) // Message-payload estimate of the resulting context. The next iteration's // shouldAutoCompact will see this PLUS ~20-40K for system prompt + tools + // userContext (via API usage.input_tokens). So `willRetriggerNextTurn: true` // is a strong signal; `false` may still retrigger when this is close to threshold. const truePostCompactTokenCount = roughTokenCountEstimationForMessages([ boundaryMarker, ...summaryMessages, ...postCompactFileAttachments, ...hookMessages, ]) // Extract compaction API usage metrics const compactionUsage = getTokenUsage(summaryResponse) const querySourceForEvent = recompactionInfo?.querySource ?? context.options.querySource ?? 'unknown' logEvent('tengu_compact', { preCompactTokenCount, // Kept for continuity — semantically the compact API call's total usage postCompactTokenCount: compactionCallTotalTokens, truePostCompactTokenCount, autoCompactThreshold: recompactionInfo?.autoCompactThreshold ?? -1, willRetriggerNextTurn: recompactionInfo !== undefined && truePostCompactTokenCount >= recompactionInfo.autoCompactThreshold, isAutoCompact, querySource: querySourceForEvent as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, queryChainId: (context.queryTracking?.chainId ?? '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, queryDepth: context.queryTracking?.depth ?? -1, isRecompactionInChain: recompactionInfo?.isRecompactionInChain ?? false, turnsSincePreviousCompact: recompactionInfo?.turnsSincePreviousCompact ?? -1, previousCompactTurnId: (recompactionInfo?.previousCompactTurnId ?? '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, compactionInputTokens: compactionUsage?.input_tokens, compactionOutputTokens: compactionUsage?.output_tokens, compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0, compactionCacheCreationTokens: compactionUsage?.cache_creation_input_tokens ?? 0, compactionTotalTokens: compactionUsage ? compactionUsage.input_tokens + (compactionUsage.cache_creation_input_tokens ?? 0) + (compactionUsage.cache_read_input_tokens ?? 0) + compactionUsage.output_tokens : 0, promptCacheSharingEnabled, // analyzeContext walks every content block (~11ms on a 4.5K-message // session) purely for this telemetry breakdown. Computed here, past // the compaction-API await, so the sync walk doesn't starve the // render loop before compaction even starts. Same deferral pattern // as reactiveCompact.ts. ...(() => { try { return tokenStatsToStatsigMetrics(analyzeContext(messages)) } catch (error) { logError(error as Error) return {} } })(), }) // Reset cache read baseline so the post-compact drop isn't flagged as a break if (feature('PROMPT_CACHE_BREAK_DETECTION')) { notifyCompaction( context.options.querySource ?? 'compact', context.agentId, ) } markPostCompaction() // Re-append session metadata (custom title, tag) so it stays within // the 16KB tail window that readLiteMetadata reads for --resume display. // Without this, enough post-compaction messages push the metadata entry // out of the window, causing --resume to show the auto-generated title // instead of the user-set session name. reAppendSessionMetadata() // Write a reduced transcript segment for the pre-compaction messages // (assistant mode only). Fire-and-forget — errors are logged internally. if (feature('KAIROS')) { void sessionTranscriptModule?.writeSessionTranscriptSegment(messages) } context.onCompactProgress?.({ type: 'hooks_start', hookType: 'post_compact', }) const postCompactHookResult = await executePostCompactHooks( { trigger: isAutoCompact ? 'auto' : 'manual', compactSummary: summary, }, context.abortController.signal, ) const combinedUserDisplayMessage = [ userDisplayMessage, postCompactHookResult.userDisplayMessage, ] .filter(Boolean) .join('\n') return { boundaryMarker, summaryMessages, attachments: postCompactFileAttachments, hookResults: hookMessages, userDisplayMessage: combinedUserDisplayMessage || undefined, preCompactTokenCount, postCompactTokenCount: compactionCallTotalTokens, truePostCompactTokenCount, compactionUsage, } } catch (error) { // Only show the error notification for manual /compact. // Auto-compact failures are retried on the next turn and the // notification is confusing when compaction eventually succeeds. if (!isAutoCompact) { addErrorNotificationIfNeeded(error, context) } throw error } finally { context.setStreamMode?.('requesting') context.setResponseLength?.(() => 0) context.onCompactProgress?.({ type: 'compact_end' }) context.setSDKStatus?.(null) } } /** * Performs a partial compaction around the selected message index. * Direction 'from': summarizes messages after the index, keeps earlier ones. * Prompt cache for kept (earlier) messages is preserved. * Direction 'up_to': summarizes messages before the index, keeps later ones. * Prompt cache is invalidated since the summary precedes the kept messages. */ export async function partialCompactConversation( allMessages: Message[], pivotIndex: number, context: ToolUseContext, cacheSafeParams: CacheSafeParams, userFeedback?: string, direction: PartialCompactDirection = 'from', ): Promise { try { const messagesToSummarize = direction === 'up_to' ? allMessages.slice(0, pivotIndex) : allMessages.slice(pivotIndex) // 'up_to' must strip old compact boundaries/summaries: for 'up_to', // summary_B sits BEFORE kept, so a stale boundary_A in kept wins // findLastCompactBoundaryIndex's backward scan and drops summary_B. // 'from' keeps them: summary_B sits AFTER kept (backward scan still // works), and removing an old summary would lose its covered history. const messagesToKeep = direction === 'up_to' ? allMessages .slice(pivotIndex) .filter( m => m.type !== 'progress' && !isCompactBoundaryMessage(m) && !(m.type === 'user' && m.isCompactSummary), ) : allMessages.slice(0, pivotIndex).filter(m => m.type !== 'progress') if (messagesToSummarize.length === 0) { throw new Error( direction === 'up_to' ? 'Nothing to summarize before the selected message.' : 'Nothing to summarize after the selected message.', ) } const preCompactTokenCount = tokenCountWithEstimation(allMessages) context.onCompactProgress?.({ type: 'hooks_start', hookType: 'pre_compact', }) context.setSDKStatus?.('compacting') const hookResult = await executePreCompactHooks( { trigger: 'manual', customInstructions: null, }, context.abortController.signal, ) // Merge hook instructions with user feedback let customInstructions: string | undefined if (hookResult.newCustomInstructions && userFeedback) { customInstructions = `${hookResult.newCustomInstructions}\n\nUser context: ${userFeedback}` } else if (hookResult.newCustomInstructions) { customInstructions = hookResult.newCustomInstructions } else if (userFeedback) { customInstructions = `User context: ${userFeedback}` } context.setStreamMode?.('requesting') context.setResponseLength?.(() => 0) context.onCompactProgress?.({ type: 'compact_start' }) const compactPrompt = getPartialCompactPrompt(customInstructions, direction) const summaryRequest = createUserMessage({ content: compactPrompt, }) const failureMetadata = { preCompactTokenCount, direction: direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, messagesSummarized: messagesToSummarize.length, } // 'up_to' prefix hits cache directly; 'from' sends all (tail wouldn't cache). // PTL retry breaks the cache prefix but unblocks the user (CC-1180). let apiMessages = direction === 'up_to' ? messagesToSummarize : allMessages let retryCacheSafeParams = direction === 'up_to' ? { ...cacheSafeParams, forkContextMessages: messagesToSummarize } : cacheSafeParams let summaryResponse: AssistantMessage let summary: string | null let ptlAttempts = 0 for (;;) { summaryResponse = await streamCompactSummary({ messages: apiMessages, summaryRequest, appState: context.getAppState(), context, preCompactTokenCount, cacheSafeParams: retryCacheSafeParams, }) summary = getAssistantMessageText(summaryResponse) if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break ptlAttempts++ const truncated = ptlAttempts <= MAX_PTL_RETRIES ? truncateHeadForPTLRetry(apiMessages, summaryResponse) : null if (!truncated) { logEvent('tengu_partial_compact_failed', { reason: 'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, ...failureMetadata, ptlAttempts, }) throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG) } logEvent('tengu_compact_ptl_retry', { attempt: ptlAttempts, droppedMessages: apiMessages.length - truncated.length, remainingMessages: truncated.length, path: 'partial' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, }) apiMessages = truncated retryCacheSafeParams = { ...retryCacheSafeParams, forkContextMessages: truncated, } } if (!summary) { logEvent('tengu_partial_compact_failed', { reason: 'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, ...failureMetadata, }) throw new Error( 'Failed to generate conversation summary - response did not contain valid text content', ) } else if (startsWithApiErrorPrefix(summary)) { logEvent('tengu_partial_compact_failed', { reason: 'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, ...failureMetadata, }) throw new Error(summary) } // Store the current file state before clearing const preCompactReadFileState = cacheToObject(context.readFileState) context.readFileState.clear() context.loadedNestedMemoryPaths?.clear() // Intentionally NOT resetting sentSkillNames — see compactConversation() // for rationale (~4K tokens saved per compact event). const [fileAttachments, asyncAgentAttachments] = await Promise.all([ createPostCompactFileAttachments( preCompactReadFileState, context, POST_COMPACT_MAX_FILES_TO_RESTORE, messagesToKeep, ), createAsyncAgentAttachmentsIfNeeded(context), ]) const postCompactFileAttachments: AttachmentMessage[] = [ ...fileAttachments, ...asyncAgentAttachments, ] const planAttachment = createPlanAttachmentIfNeeded(context.agentId) if (planAttachment) { postCompactFileAttachments.push(planAttachment) } // Add plan mode instructions if currently in plan mode const planModeAttachment = await createPlanModeAttachmentIfNeeded(context) if (planModeAttachment) { postCompactFileAttachments.push(planModeAttachment) } const skillAttachment = createSkillAttachmentIfNeeded(context.agentId) if (skillAttachment) { postCompactFileAttachments.push(skillAttachment) } // Re-announce only what was in the summarized portion — messagesToKeep // is scanned, so anything already announced there is skipped. for (const att of getDeferredToolsDeltaAttachment( context.options.tools, context.options.mainLoopModel, messagesToKeep, { callSite: 'compact_partial' }, )) { postCompactFileAttachments.push(createAttachmentMessage(att)) } for (const att of getAgentListingDeltaAttachment(context, messagesToKeep)) { postCompactFileAttachments.push(createAttachmentMessage(att)) } for (const att of getMcpInstructionsDeltaAttachment( context.options.mcpClients, context.options.tools, context.options.mainLoopModel, messagesToKeep, )) { postCompactFileAttachments.push(createAttachmentMessage(att)) } context.onCompactProgress?.({ type: 'hooks_start', hookType: 'session_start', }) const hookMessages = await processSessionStartHooks('compact', { model: context.options.mainLoopModel, }) const postCompactTokenCount = tokenCountFromLastAPIResponse([ summaryResponse, ]) const compactionUsage = getTokenUsage(summaryResponse) logEvent('tengu_partial_compact', { preCompactTokenCount, postCompactTokenCount, messagesKept: messagesToKeep.length, messagesSummarized: messagesToSummarize.length, direction: direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, hasUserFeedback: !!userFeedback, trigger: 'message_selector' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, compactionInputTokens: compactionUsage?.input_tokens, compactionOutputTokens: compactionUsage?.output_tokens, compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0, compactionCacheCreationTokens: compactionUsage?.cache_creation_input_tokens ?? 0, }) // Progress messages aren't loggable, so forkSessionImpl would null out // a logicalParentUuid pointing at one. Both directions skip them. const lastPreCompactUuid = direction === 'up_to' ? allMessages.slice(0, pivotIndex).findLast(m => m.type !== 'progress') ?.uuid : messagesToKeep.at(-1)?.uuid const boundaryMarker = createCompactBoundaryMessage( 'manual', preCompactTokenCount ?? 0, lastPreCompactUuid, userFeedback, messagesToSummarize.length, ) // allMessages not just messagesToSummarize — set union is idempotent, // simpler than tracking which half each tool lived in. const preCompactDiscovered = extractDiscoveredToolNames(allMessages) if (preCompactDiscovered.size > 0) { boundaryMarker.compactMetadata.preCompactDiscoveredTools = [ ...preCompactDiscovered, ].sort() } const transcriptPath = getTranscriptPath() const summaryMessages: UserMessage[] = [ createUserMessage({ content: getCompactUserSummaryMessage(summary, false, transcriptPath), isCompactSummary: true, ...(messagesToKeep.length > 0 ? { summarizeMetadata: { messagesSummarized: messagesToSummarize.length, userContext: userFeedback, direction, }, } : { isVisibleInTranscriptOnly: true as const }), }), ] if (feature('PROMPT_CACHE_BREAK_DETECTION')) { notifyCompaction( context.options.querySource ?? 'compact', context.agentId, ) } markPostCompaction() // Re-append session metadata (custom title, tag) so it stays within // the 16KB tail window that readLiteMetadata reads for --resume display. reAppendSessionMetadata() if (feature('KAIROS')) { void sessionTranscriptModule?.writeSessionTranscriptSegment( messagesToSummarize, ) } context.onCompactProgress?.({ type: 'hooks_start', hookType: 'post_compact', }) const postCompactHookResult = await executePostCompactHooks( { trigger: 'manual', compactSummary: summary, }, context.abortController.signal, ) // 'from': prefix-preserving → boundary; 'up_to': suffix → last summary const anchorUuid = direction === 'up_to' ? (summaryMessages.at(-1)?.uuid ?? boundaryMarker.uuid) : boundaryMarker.uuid return { boundaryMarker: annotateBoundaryWithPreservedSegment( boundaryMarker, anchorUuid, messagesToKeep, ), summaryMessages, messagesToKeep, attachments: postCompactFileAttachments, hookResults: hookMessages, userDisplayMessage: postCompactHookResult.userDisplayMessage, preCompactTokenCount, postCompactTokenCount, compactionUsage, } } catch (error) { addErrorNotificationIfNeeded(error, context) throw error } finally { context.setStreamMode?.('requesting') context.setResponseLength?.(() => 0) context.onCompactProgress?.({ type: 'compact_end' }) context.setSDKStatus?.(null) } } function addErrorNotificationIfNeeded( error: unknown, context: Pick, ) { if ( !hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT) && !hasExactErrorMessage(error, ERROR_MESSAGE_NOT_ENOUGH_MESSAGES) ) { context.addNotification?.({ key: 'error-compacting-conversation', text: 'Error compacting conversation', priority: 'immediate', color: 'error', }) } } export function createCompactCanUseTool(): CanUseToolFn { return async () => ({ behavior: 'deny' as const, message: 'Tool use is not allowed during compaction', decisionReason: { type: 'other' as const, reason: 'compaction agent should only produce text summary', }, }) } async function streamCompactSummary({ messages, summaryRequest, appState, context, preCompactTokenCount, cacheSafeParams, }: { messages: Message[] summaryRequest: UserMessage appState: Awaited> context: ToolUseContext preCompactTokenCount: number cacheSafeParams: CacheSafeParams }): Promise { // When prompt cache sharing is enabled, use forked agent to reuse the // main conversation's cached prefix (system prompt, tools, context messages). // Falls back to regular streaming path on failure. // 3P default: true — see comment at the other tengu_compact_cache_prefix read above. const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE( 'tengu_compact_cache_prefix', true, ) // Send keep-alive signals during compaction to prevent remote session // WebSocket idle timeouts from dropping bridge connections. Compaction // API calls can take 5-10+ seconds, during which no other messages // flow through the transport — without keep-alives, the server may // close the WebSocket for inactivity. // Two signals: (1) PUT /worker heartbeat via sessionActivity, and // (2) re-emit 'compacting' status so the SDK event stream stays active // and the server doesn't consider the session stale. const activityInterval = isSessionActivityTrackingActive() ? setInterval( (statusSetter?: (status: 'compacting' | null) => void) => { sendSessionActivitySignal() statusSetter?.('compacting') }, 30_000, context.setSDKStatus, ) : undefined try { if (promptCacheSharingEnabled) { try { // DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's // prompt cache by sending identical cache-key params (system, tools, model, // messages prefix, thinking config). Setting maxOutputTokens would clamp // budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts, // creating a thinking config mismatch that invalidates the cache. // The streaming fallback path (below) can safely set maxOutputTokensOverride // since it doesn't share cache with the main thread. const result = await runForkedAgent({ promptMessages: [summaryRequest], cacheSafeParams, canUseTool: createCompactCanUseTool(), querySource: 'compact', forkLabel: 'compact', maxTurns: 1, skipCacheWrite: true, // Pass the compact context's abortController so user Esc aborts the // fork — same signal the streaming fallback uses at // `signal: context.abortController.signal` below. overrides: { abortController: context.abortController }, }) const assistantMsg = getLastAssistantMessage(result.messages) const assistantText = assistantMsg ? getAssistantMessageText(assistantMsg) : null // Guard isApiErrorMessage: query() catches API errors (including // APIUserAbortError on ESC) and yields them as synthetic assistant // messages. Without this check, an aborted compact "succeeds" with // "Request was aborted." as the summary — the text doesn't start with // "API Error" so the caller's startsWithApiErrorPrefix guard misses it. if (assistantMsg && assistantText && !assistantMsg.isApiErrorMessage) { // Skip success logging for PTL error text — it's returned so the // caller's retry loop catches it, but it's not a successful summary. if (!assistantText.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) { logEvent('tengu_compact_cache_sharing_success', { preCompactTokenCount, outputTokens: result.totalUsage.output_tokens, cacheReadInputTokens: result.totalUsage.cache_read_input_tokens, cacheCreationInputTokens: result.totalUsage.cache_creation_input_tokens, cacheHitRate: result.totalUsage.cache_read_input_tokens > 0 ? result.totalUsage.cache_read_input_tokens / (result.totalUsage.cache_read_input_tokens + result.totalUsage.cache_creation_input_tokens + result.totalUsage.input_tokens) : 0, }) } return assistantMsg } logForDebugging( `Compact cache sharing: no text in response, falling back. Response: ${jsonStringify(assistantMsg)}`, { level: 'warn' }, ) logEvent('tengu_compact_cache_sharing_fallback', { reason: 'no_text_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, preCompactTokenCount, }) } catch (error) { logError(error) logEvent('tengu_compact_cache_sharing_fallback', { reason: 'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, preCompactTokenCount, }) } } // Regular streaming path (fallback when cache sharing fails or is disabled) const retryEnabled = getFeatureValue_CACHED_MAY_BE_STALE( 'tengu_compact_streaming_retry', false, ) const maxAttempts = retryEnabled ? MAX_COMPACT_STREAMING_RETRIES : 1 for (let attempt = 1; attempt <= maxAttempts; attempt++) { // Reset state for retry let hasStartedStreaming = false let response: AssistantMessage | undefined context.setResponseLength?.(() => 0) // Check if tool search is enabled using the main loop's tools list. // context.options.tools includes MCP tools merged via useMergedTools. const useToolSearch = await isToolSearchEnabled( context.options.mainLoopModel, context.options.tools, async () => appState.toolPermissionContext, context.options.agentDefinitions.activeAgents, 'compact', ) // When tool search is enabled, include ToolSearchTool and MCP tools. They get // defer_loading: true and don't count against context - the API filters them out // of system_prompt_tools before token counting (see api/token_count_api/counting.py:188 // and api/public_api/messages/handler.py:324). // Filter MCP tools from context.options.tools (not appState.mcp.tools) so we // get the permission-filtered set from useMergedTools — same source used for // isToolSearchEnabled above and normalizeMessagesForAPI below. // Deduplicate by name to avoid API errors when MCP tools share names with built-in tools. const tools: Tool[] = useToolSearch ? uniqBy( [ FileReadTool, ToolSearchTool, ...context.options.tools.filter(t => t.isMcp), ], 'name', ) : [FileReadTool] const streamingGen = queryModelWithStreaming({ messages: normalizeMessagesForAPI( stripImagesFromMessages( stripReinjectedAttachments([ ...getMessagesAfterCompactBoundary(messages), summaryRequest, ]), ), context.options.tools, ), systemPrompt: asSystemPrompt([ 'You are a helpful AI assistant tasked with summarizing conversations.', ]), thinkingConfig: { type: 'disabled' as const }, tools, signal: context.abortController.signal, options: { async getToolPermissionContext() { const appState = context.getAppState() return appState.toolPermissionContext }, model: context.options.mainLoopModel, toolChoice: undefined, isNonInteractiveSession: context.options.isNonInteractiveSession, hasAppendSystemPrompt: !!context.options.appendSystemPrompt, maxOutputTokensOverride: Math.min( COMPACT_MAX_OUTPUT_TOKENS, getMaxOutputTokensForModel(context.options.mainLoopModel), ), querySource: 'compact', agents: context.options.agentDefinitions.activeAgents, mcpTools: [], effortValue: appState.effortValue, }, }) const streamIter = streamingGen[Symbol.asyncIterator]() let next = await streamIter.next() while (!next.done) { const event = next.value if ( !hasStartedStreaming && event.type === 'stream_event' && event.event.type === 'content_block_start' && event.event.content_block.type === 'text' ) { hasStartedStreaming = true context.setStreamMode?.('responding') } if ( event.type === 'stream_event' && event.event.type === 'content_block_delta' && event.event.delta.type === 'text_delta' ) { const charactersStreamed = event.event.delta.text.length context.setResponseLength?.(length => length + charactersStreamed) } if (event.type === 'assistant') { response = event } next = await streamIter.next() } if (response) { return response } if (attempt < maxAttempts) { logEvent('tengu_compact_streaming_retry', { attempt, preCompactTokenCount, hasStartedStreaming, }) await sleep(getRetryDelay(attempt), context.abortController.signal, { abortError: () => new APIUserAbortError(), }) continue } logForDebugging( `Compact streaming failed after ${attempt} attempts. hasStartedStreaming=${hasStartedStreaming}`, { level: 'error' }, ) logEvent('tengu_compact_failed', { reason: 'no_streaming_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, preCompactTokenCount, hasStartedStreaming, retryEnabled, attempts: attempt, promptCacheSharingEnabled, }) throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE) } // This should never be reached due to the throw above, but TypeScript needs it throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE) } finally { clearInterval(activityInterval) } } /** * Creates attachment messages for recently accessed files to restore them after compaction. * This prevents the model from having to re-read files that were recently accessed. * Re-reads files using FileReadTool to get fresh content with proper validation. * Files are selected based on recency, but constrained by both file count and token budget limits. * * Files already present as Read tool results in preservedMessages are skipped — * re-injecting identical content the model can already see in the preserved tail * is pure waste (up to 25K tok/compact). Mirrors the diff-against-preserved * pattern that getDeferredToolsDeltaAttachment uses at the same call sites. * * @param readFileState The current file state tracking recently read files * @param toolUseContext The tool use context for calling FileReadTool * @param maxFiles Maximum number of files to restore (default: 5) * @param preservedMessages Messages kept post-compact; Read results here are skipped * @returns Array of attachment messages for the most recently accessed files that fit within token budget */ export async function createPostCompactFileAttachments( readFileState: Record, toolUseContext: ToolUseContext, maxFiles: number, preservedMessages: Message[] = [], ): Promise { const preservedReadPaths = collectReadToolFilePaths(preservedMessages) const recentFiles = Object.entries(readFileState) .map(([filename, state]) => ({ filename, ...state })) .filter( file => !shouldExcludeFromPostCompactRestore( file.filename, toolUseContext.agentId, ) && !preservedReadPaths.has(expandPath(file.filename)), ) .sort((a, b) => b.timestamp - a.timestamp) .slice(0, maxFiles) const results = await Promise.all( recentFiles.map(async file => { const attachment = await generateFileAttachment( file.filename, { ...toolUseContext, fileReadingLimits: { maxTokens: POST_COMPACT_MAX_TOKENS_PER_FILE, }, }, 'tengu_post_compact_file_restore_success', 'tengu_post_compact_file_restore_error', 'compact', ) return attachment ? createAttachmentMessage(attachment) : null }), ) let usedTokens = 0 return results.filter((result): result is AttachmentMessage => { if (result === null) { return false } const attachmentTokens = roughTokenCountEstimation(jsonStringify(result)) if (usedTokens + attachmentTokens <= POST_COMPACT_TOKEN_BUDGET) { usedTokens += attachmentTokens return true } return false }) } /** * Creates a plan file attachment if a plan file exists for the current session. * This ensures the plan is preserved after compaction. */ export function createPlanAttachmentIfNeeded( agentId?: AgentId, ): AttachmentMessage | null { const planContent = getPlan(agentId) if (!planContent) { return null } const planFilePath = getPlanFilePath(agentId) return createAttachmentMessage({ type: 'plan_file_reference', planFilePath, planContent, }) } /** * Creates an attachment for invoked skills to preserve their content across compaction. * Only includes skills scoped to the given agent (or main session when agentId is null/undefined). * This ensures skill guidelines remain available after the conversation is summarized * without leaking skills from other agent contexts. */ export function createSkillAttachmentIfNeeded( agentId?: string, ): AttachmentMessage | null { const invokedSkills = getInvokedSkillsForAgent(agentId) if (invokedSkills.size === 0) { return null } // Sorted most-recent-first so budget pressure drops the least-relevant skills. // Per-skill truncation keeps the head of each file (where setup/usage // instructions typically live) rather than dropping whole skills. let usedTokens = 0 const skills = Array.from(invokedSkills.values()) .sort((a, b) => b.invokedAt - a.invokedAt) .map(skill => ({ name: skill.skillName, path: skill.skillPath, content: truncateToTokens( skill.content, POST_COMPACT_MAX_TOKENS_PER_SKILL, ), })) .filter(skill => { const tokens = roughTokenCountEstimation(skill.content) if (usedTokens + tokens > POST_COMPACT_SKILLS_TOKEN_BUDGET) { return false } usedTokens += tokens return true }) if (skills.length === 0) { return null } return createAttachmentMessage({ type: 'invoked_skills', skills, }) } /** * Creates a plan_mode attachment if the user is currently in plan mode. * This ensures the model continues to operate in plan mode after compaction * (otherwise it would lose the plan mode instructions since those are * normally only injected on tool-use turns via getAttachmentMessages). */ export async function createPlanModeAttachmentIfNeeded( context: ToolUseContext, ): Promise { const appState = context.getAppState() if (appState.toolPermissionContext.mode !== 'plan') { return null } const planFilePath = getPlanFilePath(context.agentId) const planExists = getPlan(context.agentId) !== null return createAttachmentMessage({ type: 'plan_mode', reminderType: 'full', isSubAgent: !!context.agentId, planFilePath, planExists, }) } /** * Creates attachments for async agents so the model knows about them after * compaction. Covers both agents still running in the background (so the model * doesn't spawn a duplicate) and agents that have finished but whose results * haven't been retrieved yet. */ export async function createAsyncAgentAttachmentsIfNeeded( context: ToolUseContext, ): Promise { const appState = context.getAppState() const asyncAgents = Object.values(appState.tasks).filter( (task): task is LocalAgentTaskState => task.type === 'local_agent', ) return asyncAgents.flatMap(agent => { if ( agent.retrieved || agent.status === 'pending' || agent.agentId === context.agentId ) { return [] } return [ createAttachmentMessage({ type: 'task_status', taskId: agent.agentId, taskType: 'local_agent', description: agent.description, status: agent.status, deltaSummary: agent.status === 'running' ? (agent.progress?.summary ?? null) : (agent.error ?? null), outputFilePath: getTaskOutputPath(agent.agentId), }), ] }) } /** * Scan messages for Read tool_use blocks and collect their file_path inputs * (normalized via expandPath). Used to dedup post-compact file restoration * against what's already visible in the preserved tail. * * Skips Reads whose tool_result is a dedup stub — the stub points at an * earlier full Read that may have been compacted away, so we want * createPostCompactFileAttachments to re-inject the real content. */ function collectReadToolFilePaths(messages: Message[]): Set { const stubIds = new Set() for (const message of messages) { if (message.type !== 'user' || !Array.isArray(message.message.content)) { continue } for (const block of message.message.content) { if ( block.type === 'tool_result' && typeof block.content === 'string' && block.content.startsWith(FILE_UNCHANGED_STUB) ) { stubIds.add(block.tool_use_id) } } } const paths = new Set() for (const message of messages) { if ( message.type !== 'assistant' || !Array.isArray(message.message.content) ) { continue } for (const block of message.message.content) { if ( block.type !== 'tool_use' || block.name !== FILE_READ_TOOL_NAME || stubIds.has(block.id) ) { continue } const input = block.input if ( input && typeof input === 'object' && 'file_path' in input && typeof input.file_path === 'string' ) { paths.add(expandPath(input.file_path)) } } } return paths } const SKILL_TRUNCATION_MARKER = '\n\n[... skill content truncated for compaction; use Read on the skill path if you need the full text]' /** * Truncate content to roughly maxTokens, keeping the head. roughTokenCountEstimation * uses ~4 chars/token (its default bytesPerToken), so char budget = maxTokens * 4 * minus the marker so the result stays within budget. Marker tells the model it * can Read the full file if needed. */ function truncateToTokens(content: string, maxTokens: number): string { if (roughTokenCountEstimation(content) <= maxTokens) { return content } const charBudget = maxTokens * 4 - SKILL_TRUNCATION_MARKER.length return content.slice(0, charBudget) + SKILL_TRUNCATION_MARKER } function shouldExcludeFromPostCompactRestore( filename: string, agentId?: AgentId, ): boolean { const normalizedFilename = expandPath(filename) // Exclude plan files try { const planFilePath = expandPath(getPlanFilePath(agentId)) if (normalizedFilename === planFilePath) { return true } } catch { // If we can't get plan file path, continue with other checks } // Exclude all types of claude.md files // TODO: Refactor to use isMemoryFilePath() from claudemd.ts for consistency // and to also match child directory memory files (.claude/rules/*.md, etc.) try { const normalizedMemoryPaths = new Set( MEMORY_TYPE_VALUES.map(type => expandPath(getMemoryPath(type))), ) if (normalizedMemoryPaths.has(normalizedFilename)) { return true } } catch { // If we can't get memory paths, continue } return false }