1706 lines
59 KiB
TypeScript
1706 lines
59 KiB
TypeScript
import { feature } from 'bun:bundle'
|
|
import type { UUID } from 'crypto'
|
|
import uniqBy from 'lodash-es/uniqBy.js'
|
|
|
|
/* eslint-disable @typescript-eslint/no-require-imports */
|
|
const sessionTranscriptModule = feature('KAIROS')
|
|
? (require('../sessionTranscript/sessionTranscript.js') as typeof import('../sessionTranscript/sessionTranscript.js'))
|
|
: null
|
|
|
|
import { APIUserAbortError } from '@anthropic-ai/sdk'
|
|
import { markPostCompaction } from 'src/bootstrap/state.js'
|
|
import { getInvokedSkillsForAgent } from '../../bootstrap/state.js'
|
|
import type { QuerySource } from '../../constants/querySource.js'
|
|
import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
|
|
import type { Tool, ToolUseContext } from '../../Tool.js'
|
|
import type { LocalAgentTaskState } from '../../tasks/LocalAgentTask/LocalAgentTask.js'
|
|
import { FileReadTool } from '../../tools/FileReadTool/FileReadTool.js'
|
|
import {
|
|
FILE_READ_TOOL_NAME,
|
|
FILE_UNCHANGED_STUB,
|
|
} from '../../tools/FileReadTool/prompt.js'
|
|
import { ToolSearchTool } from '../../tools/ToolSearchTool/ToolSearchTool.js'
|
|
import type { AgentId } from '../../types/ids.js'
|
|
import type {
|
|
AssistantMessage,
|
|
AttachmentMessage,
|
|
HookResultMessage,
|
|
Message,
|
|
PartialCompactDirection,
|
|
SystemCompactBoundaryMessage,
|
|
SystemMessage,
|
|
UserMessage,
|
|
} from '../../types/message.js'
|
|
import {
|
|
createAttachmentMessage,
|
|
generateFileAttachment,
|
|
getAgentListingDeltaAttachment,
|
|
getDeferredToolsDeltaAttachment,
|
|
getMcpInstructionsDeltaAttachment,
|
|
} from '../../utils/attachments.js'
|
|
import { getMemoryPath } from '../../utils/config.js'
|
|
import { COMPACT_MAX_OUTPUT_TOKENS } from '../../utils/context.js'
|
|
import {
|
|
analyzeContext,
|
|
tokenStatsToStatsigMetrics,
|
|
} from '../../utils/contextAnalysis.js'
|
|
import { logForDebugging } from '../../utils/debug.js'
|
|
import { hasExactErrorMessage } from '../../utils/errors.js'
|
|
import { cacheToObject } from '../../utils/fileStateCache.js'
|
|
import {
|
|
type CacheSafeParams,
|
|
runForkedAgent,
|
|
} from '../../utils/forkedAgent.js'
|
|
import {
|
|
executePostCompactHooks,
|
|
executePreCompactHooks,
|
|
} from '../../utils/hooks.js'
|
|
import { logError } from '../../utils/log.js'
|
|
import { MEMORY_TYPE_VALUES } from '../../utils/memory/types.js'
|
|
import {
|
|
createCompactBoundaryMessage,
|
|
createUserMessage,
|
|
getAssistantMessageText,
|
|
getLastAssistantMessage,
|
|
getMessagesAfterCompactBoundary,
|
|
isCompactBoundaryMessage,
|
|
normalizeMessagesForAPI,
|
|
} from '../../utils/messages.js'
|
|
import { expandPath } from '../../utils/path.js'
|
|
import { getPlan, getPlanFilePath } from '../../utils/plans.js'
|
|
import {
|
|
isSessionActivityTrackingActive,
|
|
sendSessionActivitySignal,
|
|
} from '../../utils/sessionActivity.js'
|
|
import { processSessionStartHooks } from '../../utils/sessionStart.js'
|
|
import {
|
|
getTranscriptPath,
|
|
reAppendSessionMetadata,
|
|
} from '../../utils/sessionStorage.js'
|
|
import { sleep } from '../../utils/sleep.js'
|
|
import { jsonStringify } from '../../utils/slowOperations.js'
|
|
/* eslint-enable @typescript-eslint/no-require-imports */
|
|
import { asSystemPrompt } from '../../utils/systemPromptType.js'
|
|
import { getTaskOutputPath } from '../../utils/task/diskOutput.js'
|
|
import {
|
|
getTokenUsage,
|
|
tokenCountFromLastAPIResponse,
|
|
tokenCountWithEstimation,
|
|
} from '../../utils/tokens.js'
|
|
import {
|
|
extractDiscoveredToolNames,
|
|
isToolSearchEnabled,
|
|
} from '../../utils/toolSearch.js'
|
|
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
|
|
import {
|
|
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
logEvent,
|
|
} from '../analytics/index.js'
|
|
import {
|
|
getMaxOutputTokensForModel,
|
|
queryModelWithStreaming,
|
|
} from '../api/claude.js'
|
|
import {
|
|
getPromptTooLongTokenGap,
|
|
PROMPT_TOO_LONG_ERROR_MESSAGE,
|
|
startsWithApiErrorPrefix,
|
|
} from '../api/errors.js'
|
|
import { notifyCompaction } from '../api/promptCacheBreakDetection.js'
|
|
import { getRetryDelay } from '../api/withRetry.js'
|
|
import { logPermissionContextForAnts } from '../internalLogging.js'
|
|
import {
|
|
roughTokenCountEstimation,
|
|
roughTokenCountEstimationForMessages,
|
|
} from '../tokenEstimation.js'
|
|
import { groupMessagesByApiRound } from './grouping.js'
|
|
import {
|
|
getCompactPrompt,
|
|
getCompactUserSummaryMessage,
|
|
getPartialCompactPrompt,
|
|
} from './prompt.js'
|
|
|
|
export const POST_COMPACT_MAX_FILES_TO_RESTORE = 5
|
|
export const POST_COMPACT_TOKEN_BUDGET = 50_000
|
|
export const POST_COMPACT_MAX_TOKENS_PER_FILE = 5_000
|
|
// Skills can be large (verify=18.7KB, claude-api=20.1KB). Previously re-injected
|
|
// unbounded on every compact → 5-10K tok/compact. Per-skill truncation beats
|
|
// dropping — instructions at the top of a skill file are usually the critical
|
|
// part. Budget sized to hold ~5 skills at the per-skill cap.
|
|
export const POST_COMPACT_MAX_TOKENS_PER_SKILL = 5_000
|
|
export const POST_COMPACT_SKILLS_TOKEN_BUDGET = 25_000
|
|
const MAX_COMPACT_STREAMING_RETRIES = 2
|
|
|
|
/**
|
|
* Strip image blocks from user messages before sending for compaction.
|
|
* Images are not needed for generating a conversation summary and can
|
|
* cause the compaction API call itself to hit the prompt-too-long limit,
|
|
* especially in CCD sessions where users frequently attach images.
|
|
* Replaces image blocks with a text marker so the summary still notes
|
|
* that an image was shared.
|
|
*
|
|
* Note: Only user messages contain images (either directly attached or within
|
|
* tool_result content from tools). Assistant messages contain text, tool_use,
|
|
* and thinking blocks but not images.
|
|
*/
|
|
export function stripImagesFromMessages(messages: Message[]): Message[] {
|
|
return messages.map(message => {
|
|
if (message.type !== 'user') {
|
|
return message
|
|
}
|
|
|
|
const content = message.message.content
|
|
if (!Array.isArray(content)) {
|
|
return message
|
|
}
|
|
|
|
let hasMediaBlock = false
|
|
const newContent = content.flatMap(block => {
|
|
if (block.type === 'image') {
|
|
hasMediaBlock = true
|
|
return [{ type: 'text' as const, text: '[image]' }]
|
|
}
|
|
if (block.type === 'document') {
|
|
hasMediaBlock = true
|
|
return [{ type: 'text' as const, text: '[document]' }]
|
|
}
|
|
// Also strip images/documents nested inside tool_result content arrays
|
|
if (block.type === 'tool_result' && Array.isArray(block.content)) {
|
|
let toolHasMedia = false
|
|
const newToolContent = block.content.map(item => {
|
|
if (item.type === 'image') {
|
|
toolHasMedia = true
|
|
return { type: 'text' as const, text: '[image]' }
|
|
}
|
|
if (item.type === 'document') {
|
|
toolHasMedia = true
|
|
return { type: 'text' as const, text: '[document]' }
|
|
}
|
|
return item
|
|
})
|
|
if (toolHasMedia) {
|
|
hasMediaBlock = true
|
|
return [{ ...block, content: newToolContent }]
|
|
}
|
|
}
|
|
return [block]
|
|
})
|
|
|
|
if (!hasMediaBlock) {
|
|
return message
|
|
}
|
|
|
|
return {
|
|
...message,
|
|
message: {
|
|
...message.message,
|
|
content: newContent,
|
|
},
|
|
} as typeof message
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Strip attachment types that are re-injected post-compaction anyway.
|
|
* skill_discovery/skill_listing are re-surfaced by resetSentSkillNames()
|
|
* + the next turn's discovery signal, so feeding them to the summarizer
|
|
* wastes tokens and pollutes the summary with stale skill suggestions.
|
|
*
|
|
* No-op when EXPERIMENTAL_SKILL_SEARCH is off (the attachment types
|
|
* don't exist on external builds).
|
|
*/
|
|
export function stripReinjectedAttachments(messages: Message[]): Message[] {
|
|
if (feature('EXPERIMENTAL_SKILL_SEARCH')) {
|
|
return messages.filter(
|
|
m =>
|
|
!(
|
|
m.type === 'attachment' &&
|
|
(m.attachment.type === 'skill_discovery' ||
|
|
m.attachment.type === 'skill_listing')
|
|
),
|
|
)
|
|
}
|
|
return messages
|
|
}
|
|
|
|
export const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES =
|
|
'Not enough messages to compact.'
|
|
const MAX_PTL_RETRIES = 3
|
|
const PTL_RETRY_MARKER = '[earlier conversation truncated for compaction retry]'
|
|
|
|
/**
|
|
* Drops the oldest API-round groups from messages until tokenGap is covered.
|
|
* Falls back to dropping 20% of groups when the gap is unparseable (some
|
|
* Vertex/Bedrock error formats). Returns null when nothing can be dropped
|
|
* without leaving an empty summarize set.
|
|
*
|
|
* This is the last-resort escape hatch for CC-1180 — when the compact request
|
|
* itself hits prompt-too-long, the user is otherwise stuck. Dropping the
|
|
* oldest context is lossy but unblocks them. The reactive-compact path
|
|
* (compactMessages.ts) has the proper retry loop that peels from the tail;
|
|
* this helper is the dumb-but-safe fallback for the proactive/manual path
|
|
* that wasn't migrated in bfdb472f's unification.
|
|
*/
|
|
export function truncateHeadForPTLRetry(
|
|
messages: Message[],
|
|
ptlResponse: AssistantMessage,
|
|
): Message[] | null {
|
|
// Strip our own synthetic marker from a previous retry before grouping.
|
|
// Otherwise it becomes its own group 0 and the 20% fallback stalls
|
|
// (drops only the marker, re-adds it, zero progress on retry 2+).
|
|
const input =
|
|
messages[0]?.type === 'user' &&
|
|
messages[0].isMeta &&
|
|
messages[0].message.content === PTL_RETRY_MARKER
|
|
? messages.slice(1)
|
|
: messages
|
|
|
|
const groups = groupMessagesByApiRound(input)
|
|
if (groups.length < 2) return null
|
|
|
|
const tokenGap = getPromptTooLongTokenGap(ptlResponse)
|
|
let dropCount: number
|
|
if (tokenGap !== undefined) {
|
|
let acc = 0
|
|
dropCount = 0
|
|
for (const g of groups) {
|
|
acc += roughTokenCountEstimationForMessages(g)
|
|
dropCount++
|
|
if (acc >= tokenGap) break
|
|
}
|
|
} else {
|
|
dropCount = Math.max(1, Math.floor(groups.length * 0.2))
|
|
}
|
|
|
|
// Keep at least one group so there's something to summarize.
|
|
dropCount = Math.min(dropCount, groups.length - 1)
|
|
if (dropCount < 1) return null
|
|
|
|
const sliced = groups.slice(dropCount).flat()
|
|
// groupMessagesByApiRound puts the preamble in group 0 and starts every
|
|
// subsequent group with an assistant message. Dropping group 0 leaves an
|
|
// assistant-first sequence which the API rejects (first message must be
|
|
// role=user). Prepend a synthetic user marker — ensureToolResultPairing
|
|
// already handles any orphaned tool_results this creates.
|
|
if (sliced[0]?.type === 'assistant') {
|
|
return [
|
|
createUserMessage({ content: PTL_RETRY_MARKER, isMeta: true }),
|
|
...sliced,
|
|
]
|
|
}
|
|
return sliced
|
|
}
|
|
|
|
export const ERROR_MESSAGE_PROMPT_TOO_LONG =
|
|
'Conversation too long. Press esc twice to go up a few messages and try again.'
|
|
export const ERROR_MESSAGE_USER_ABORT = 'API Error: Request was aborted.'
|
|
export const ERROR_MESSAGE_INCOMPLETE_RESPONSE =
|
|
'Compaction interrupted · This may be due to network issues — please try again.'
|
|
|
|
export interface CompactionResult {
|
|
boundaryMarker: SystemMessage
|
|
summaryMessages: UserMessage[]
|
|
attachments: AttachmentMessage[]
|
|
hookResults: HookResultMessage[]
|
|
messagesToKeep?: Message[]
|
|
userDisplayMessage?: string
|
|
preCompactTokenCount?: number
|
|
postCompactTokenCount?: number
|
|
truePostCompactTokenCount?: number
|
|
compactionUsage?: ReturnType<typeof getTokenUsage>
|
|
}
|
|
|
|
/**
|
|
* Diagnosis context passed from autoCompactIfNeeded into compactConversation.
|
|
* Lets the tengu_compact event disambiguate same-chain loops (H2) from
|
|
* cross-agent (H1/H5) and manual-vs-auto (H3) compactions without joins.
|
|
*/
|
|
export type RecompactionInfo = {
|
|
isRecompactionInChain: boolean
|
|
turnsSincePreviousCompact: number
|
|
previousCompactTurnId?: string
|
|
autoCompactThreshold: number
|
|
querySource?: QuerySource
|
|
}
|
|
|
|
/**
|
|
* Build the base post-compact messages array from a CompactionResult.
|
|
* This ensures consistent ordering across all compaction paths.
|
|
* Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults
|
|
*/
|
|
export function buildPostCompactMessages(result: CompactionResult): Message[] {
|
|
return [
|
|
result.boundaryMarker,
|
|
...result.summaryMessages,
|
|
...(result.messagesToKeep ?? []),
|
|
...result.attachments,
|
|
...result.hookResults,
|
|
]
|
|
}
|
|
|
|
/**
|
|
* Annotate a compact boundary with relink metadata for messagesToKeep.
|
|
* Preserved messages keep their original parentUuids on disk (dedup-skipped);
|
|
* the loader uses this to patch head→anchor and anchor's-other-children→tail.
|
|
*
|
|
* `anchorUuid` = what sits immediately before keep[0] in the desired chain:
|
|
* - suffix-preserving (reactive/session-memory): last summary message
|
|
* - prefix-preserving (partial compact): the boundary itself
|
|
*/
|
|
export function annotateBoundaryWithPreservedSegment(
|
|
boundary: SystemCompactBoundaryMessage,
|
|
anchorUuid: UUID,
|
|
messagesToKeep: readonly Message[] | undefined,
|
|
): SystemCompactBoundaryMessage {
|
|
const keep = messagesToKeep ?? []
|
|
if (keep.length === 0) return boundary
|
|
return {
|
|
...boundary,
|
|
compactMetadata: {
|
|
...boundary.compactMetadata,
|
|
preservedSegment: {
|
|
headUuid: keep[0]!.uuid,
|
|
anchorUuid,
|
|
tailUuid: keep.at(-1)!.uuid,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Merges user-supplied custom instructions with hook-provided instructions.
|
|
* User instructions come first; hook instructions are appended.
|
|
* Empty strings normalize to undefined.
|
|
*/
|
|
export function mergeHookInstructions(
|
|
userInstructions: string | undefined,
|
|
hookInstructions: string | undefined,
|
|
): string | undefined {
|
|
if (!hookInstructions) return userInstructions || undefined
|
|
if (!userInstructions) return hookInstructions
|
|
return `${userInstructions}\n\n${hookInstructions}`
|
|
}
|
|
|
|
/**
|
|
* Creates a compact version of a conversation by summarizing older messages
|
|
* and preserving recent conversation history.
|
|
*/
|
|
export async function compactConversation(
|
|
messages: Message[],
|
|
context: ToolUseContext,
|
|
cacheSafeParams: CacheSafeParams,
|
|
suppressFollowUpQuestions: boolean,
|
|
customInstructions?: string,
|
|
isAutoCompact: boolean = false,
|
|
recompactionInfo?: RecompactionInfo,
|
|
): Promise<CompactionResult> {
|
|
try {
|
|
if (messages.length === 0) {
|
|
throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
|
|
}
|
|
|
|
const preCompactTokenCount = tokenCountWithEstimation(messages)
|
|
|
|
const appState = context.getAppState()
|
|
void logPermissionContextForAnts(appState.toolPermissionContext, 'summary')
|
|
|
|
context.onCompactProgress?.({
|
|
type: 'hooks_start',
|
|
hookType: 'pre_compact',
|
|
})
|
|
|
|
// Execute PreCompact hooks
|
|
context.setSDKStatus?.('compacting')
|
|
const hookResult = await executePreCompactHooks(
|
|
{
|
|
trigger: isAutoCompact ? 'auto' : 'manual',
|
|
customInstructions: customInstructions ?? null,
|
|
},
|
|
context.abortController.signal,
|
|
)
|
|
customInstructions = mergeHookInstructions(
|
|
customInstructions,
|
|
hookResult.newCustomInstructions,
|
|
)
|
|
const userDisplayMessage = hookResult.userDisplayMessage
|
|
|
|
// Show requesting mode with up arrow and custom message
|
|
context.setStreamMode?.('requesting')
|
|
context.setResponseLength?.(() => 0)
|
|
context.onCompactProgress?.({ type: 'compact_start' })
|
|
|
|
// 3P default: true — forked-agent path reuses main conversation's prompt cache.
|
|
// Experiment (Jan 2026) confirmed: false path is 98% cache miss, costs ~0.76% of
|
|
// fleet cache_creation (~38B tok/day), concentrated in ephemeral envs (CCR/GHA/SDK)
|
|
// with cold GB cache and 3P providers where GB is disabled. GB gate kept as kill-switch.
|
|
const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
|
|
'tengu_compact_cache_prefix',
|
|
true,
|
|
)
|
|
|
|
const compactPrompt = getCompactPrompt(customInstructions)
|
|
const summaryRequest = createUserMessage({
|
|
content: compactPrompt,
|
|
})
|
|
|
|
let messagesToSummarize = messages
|
|
let retryCacheSafeParams = cacheSafeParams
|
|
let summaryResponse: AssistantMessage
|
|
let summary: string | null
|
|
let ptlAttempts = 0
|
|
for (;;) {
|
|
summaryResponse = await streamCompactSummary({
|
|
messages: messagesToSummarize,
|
|
summaryRequest,
|
|
appState,
|
|
context,
|
|
preCompactTokenCount,
|
|
cacheSafeParams: retryCacheSafeParams,
|
|
})
|
|
summary = getAssistantMessageText(summaryResponse)
|
|
if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break
|
|
|
|
// CC-1180: compact request itself hit prompt-too-long. Truncate the
|
|
// oldest API-round groups and retry rather than leaving the user stuck.
|
|
ptlAttempts++
|
|
const truncated =
|
|
ptlAttempts <= MAX_PTL_RETRIES
|
|
? truncateHeadForPTLRetry(messagesToSummarize, summaryResponse)
|
|
: null
|
|
if (!truncated) {
|
|
logEvent('tengu_compact_failed', {
|
|
reason:
|
|
'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
preCompactTokenCount,
|
|
promptCacheSharingEnabled,
|
|
ptlAttempts,
|
|
})
|
|
throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)
|
|
}
|
|
logEvent('tengu_compact_ptl_retry', {
|
|
attempt: ptlAttempts,
|
|
droppedMessages: messagesToSummarize.length - truncated.length,
|
|
remainingMessages: truncated.length,
|
|
})
|
|
messagesToSummarize = truncated
|
|
// The forked-agent path reads from cacheSafeParams.forkContextMessages,
|
|
// not the messages param — thread the truncated set through both paths.
|
|
retryCacheSafeParams = {
|
|
...retryCacheSafeParams,
|
|
forkContextMessages: truncated,
|
|
}
|
|
}
|
|
|
|
if (!summary) {
|
|
logForDebugging(
|
|
`Compact failed: no summary text in response. Response: ${jsonStringify(summaryResponse)}`,
|
|
{ level: 'error' },
|
|
)
|
|
logEvent('tengu_compact_failed', {
|
|
reason:
|
|
'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
preCompactTokenCount,
|
|
promptCacheSharingEnabled,
|
|
})
|
|
throw new Error(
|
|
`Failed to generate conversation summary - response did not contain valid text content`,
|
|
)
|
|
} else if (startsWithApiErrorPrefix(summary)) {
|
|
logEvent('tengu_compact_failed', {
|
|
reason:
|
|
'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
preCompactTokenCount,
|
|
promptCacheSharingEnabled,
|
|
})
|
|
throw new Error(summary)
|
|
}
|
|
|
|
// Store the current file state before clearing
|
|
const preCompactReadFileState = cacheToObject(context.readFileState)
|
|
|
|
// Clear the cache
|
|
context.readFileState.clear()
|
|
context.loadedNestedMemoryPaths?.clear()
|
|
|
|
// Intentionally NOT resetting sentSkillNames: re-injecting the full
|
|
// skill_listing (~4K tokens) post-compact is pure cache_creation with
|
|
// marginal benefit. The model still has SkillTool in its schema and
|
|
// invoked_skills attachment (below) preserves used-skill content. Ants
|
|
// with EXPERIMENTAL_SKILL_SEARCH already skip re-injection via the
|
|
// early-return in getSkillListingAttachments.
|
|
|
|
// Run async attachment generation in parallel
|
|
const [fileAttachments, asyncAgentAttachments] = await Promise.all([
|
|
createPostCompactFileAttachments(
|
|
preCompactReadFileState,
|
|
context,
|
|
POST_COMPACT_MAX_FILES_TO_RESTORE,
|
|
),
|
|
createAsyncAgentAttachmentsIfNeeded(context),
|
|
])
|
|
|
|
const postCompactFileAttachments: AttachmentMessage[] = [
|
|
...fileAttachments,
|
|
...asyncAgentAttachments,
|
|
]
|
|
const planAttachment = createPlanAttachmentIfNeeded(context.agentId)
|
|
if (planAttachment) {
|
|
postCompactFileAttachments.push(planAttachment)
|
|
}
|
|
|
|
// Add plan mode instructions if currently in plan mode, so the model
|
|
// continues operating in plan mode after compaction
|
|
const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)
|
|
if (planModeAttachment) {
|
|
postCompactFileAttachments.push(planModeAttachment)
|
|
}
|
|
|
|
// Add skill attachment if skills were invoked in this session
|
|
const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)
|
|
if (skillAttachment) {
|
|
postCompactFileAttachments.push(skillAttachment)
|
|
}
|
|
|
|
// Compaction ate prior delta attachments. Re-announce from the current
|
|
// state so the model has tool/instruction context on the first
|
|
// post-compact turn. Empty message history → diff against nothing →
|
|
// announces the full set.
|
|
for (const att of getDeferredToolsDeltaAttachment(
|
|
context.options.tools,
|
|
context.options.mainLoopModel,
|
|
[],
|
|
{ callSite: 'compact_full' },
|
|
)) {
|
|
postCompactFileAttachments.push(createAttachmentMessage(att))
|
|
}
|
|
for (const att of getAgentListingDeltaAttachment(context, [])) {
|
|
postCompactFileAttachments.push(createAttachmentMessage(att))
|
|
}
|
|
for (const att of getMcpInstructionsDeltaAttachment(
|
|
context.options.mcpClients,
|
|
context.options.tools,
|
|
context.options.mainLoopModel,
|
|
[],
|
|
)) {
|
|
postCompactFileAttachments.push(createAttachmentMessage(att))
|
|
}
|
|
|
|
context.onCompactProgress?.({
|
|
type: 'hooks_start',
|
|
hookType: 'session_start',
|
|
})
|
|
// Execute SessionStart hooks after successful compaction
|
|
const hookMessages = await processSessionStartHooks('compact', {
|
|
model: context.options.mainLoopModel,
|
|
})
|
|
|
|
// Create the compact boundary marker and summary messages before the
|
|
// event so we can compute the true resulting-context size.
|
|
const boundaryMarker = createCompactBoundaryMessage(
|
|
isAutoCompact ? 'auto' : 'manual',
|
|
preCompactTokenCount ?? 0,
|
|
messages.at(-1)?.uuid,
|
|
)
|
|
// Carry loaded-tool state — the summary doesn't preserve tool_reference
|
|
// blocks, so the post-compact schema filter needs this to keep sending
|
|
// already-loaded deferred tool schemas to the API.
|
|
const preCompactDiscovered = extractDiscoveredToolNames(messages)
|
|
if (preCompactDiscovered.size > 0) {
|
|
boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
|
|
...preCompactDiscovered,
|
|
].sort()
|
|
}
|
|
|
|
const transcriptPath = getTranscriptPath()
|
|
const summaryMessages: UserMessage[] = [
|
|
createUserMessage({
|
|
content: getCompactUserSummaryMessage(
|
|
summary,
|
|
suppressFollowUpQuestions,
|
|
transcriptPath,
|
|
),
|
|
isCompactSummary: true,
|
|
isVisibleInTranscriptOnly: true,
|
|
}),
|
|
]
|
|
|
|
// Previously "postCompactTokenCount" — renamed because this is the
|
|
// compact API call's total usage (input_tokens ≈ preCompactTokenCount),
|
|
// NOT the size of the resulting context. Kept for event-field continuity.
|
|
const compactionCallTotalTokens = tokenCountFromLastAPIResponse([
|
|
summaryResponse,
|
|
])
|
|
|
|
// Message-payload estimate of the resulting context. The next iteration's
|
|
// shouldAutoCompact will see this PLUS ~20-40K for system prompt + tools +
|
|
// userContext (via API usage.input_tokens). So `willRetriggerNextTurn: true`
|
|
// is a strong signal; `false` may still retrigger when this is close to threshold.
|
|
const truePostCompactTokenCount = roughTokenCountEstimationForMessages([
|
|
boundaryMarker,
|
|
...summaryMessages,
|
|
...postCompactFileAttachments,
|
|
...hookMessages,
|
|
])
|
|
|
|
// Extract compaction API usage metrics
|
|
const compactionUsage = getTokenUsage(summaryResponse)
|
|
|
|
const querySourceForEvent =
|
|
recompactionInfo?.querySource ?? context.options.querySource ?? 'unknown'
|
|
|
|
logEvent('tengu_compact', {
|
|
preCompactTokenCount,
|
|
// Kept for continuity — semantically the compact API call's total usage
|
|
postCompactTokenCount: compactionCallTotalTokens,
|
|
truePostCompactTokenCount,
|
|
autoCompactThreshold: recompactionInfo?.autoCompactThreshold ?? -1,
|
|
willRetriggerNextTurn:
|
|
recompactionInfo !== undefined &&
|
|
truePostCompactTokenCount >= recompactionInfo.autoCompactThreshold,
|
|
isAutoCompact,
|
|
querySource:
|
|
querySourceForEvent as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
queryChainId: (context.queryTracking?.chainId ??
|
|
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
queryDepth: context.queryTracking?.depth ?? -1,
|
|
isRecompactionInChain: recompactionInfo?.isRecompactionInChain ?? false,
|
|
turnsSincePreviousCompact:
|
|
recompactionInfo?.turnsSincePreviousCompact ?? -1,
|
|
previousCompactTurnId: (recompactionInfo?.previousCompactTurnId ??
|
|
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
compactionInputTokens: compactionUsage?.input_tokens,
|
|
compactionOutputTokens: compactionUsage?.output_tokens,
|
|
compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,
|
|
compactionCacheCreationTokens:
|
|
compactionUsage?.cache_creation_input_tokens ?? 0,
|
|
compactionTotalTokens: compactionUsage
|
|
? compactionUsage.input_tokens +
|
|
(compactionUsage.cache_creation_input_tokens ?? 0) +
|
|
(compactionUsage.cache_read_input_tokens ?? 0) +
|
|
compactionUsage.output_tokens
|
|
: 0,
|
|
promptCacheSharingEnabled,
|
|
// analyzeContext walks every content block (~11ms on a 4.5K-message
|
|
// session) purely for this telemetry breakdown. Computed here, past
|
|
// the compaction-API await, so the sync walk doesn't starve the
|
|
// render loop before compaction even starts. Same deferral pattern
|
|
// as reactiveCompact.ts.
|
|
...(() => {
|
|
try {
|
|
return tokenStatsToStatsigMetrics(analyzeContext(messages))
|
|
} catch (error) {
|
|
logError(error as Error)
|
|
return {}
|
|
}
|
|
})(),
|
|
})
|
|
|
|
// Reset cache read baseline so the post-compact drop isn't flagged as a break
|
|
if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
|
|
notifyCompaction(
|
|
context.options.querySource ?? 'compact',
|
|
context.agentId,
|
|
)
|
|
}
|
|
markPostCompaction()
|
|
|
|
// Re-append session metadata (custom title, tag) so it stays within
|
|
// the 16KB tail window that readLiteMetadata reads for --resume display.
|
|
// Without this, enough post-compaction messages push the metadata entry
|
|
// out of the window, causing --resume to show the auto-generated title
|
|
// instead of the user-set session name.
|
|
reAppendSessionMetadata()
|
|
|
|
// Write a reduced transcript segment for the pre-compaction messages
|
|
// (assistant mode only). Fire-and-forget — errors are logged internally.
|
|
if (feature('KAIROS')) {
|
|
void sessionTranscriptModule?.writeSessionTranscriptSegment(messages)
|
|
}
|
|
|
|
context.onCompactProgress?.({
|
|
type: 'hooks_start',
|
|
hookType: 'post_compact',
|
|
})
|
|
const postCompactHookResult = await executePostCompactHooks(
|
|
{
|
|
trigger: isAutoCompact ? 'auto' : 'manual',
|
|
compactSummary: summary,
|
|
},
|
|
context.abortController.signal,
|
|
)
|
|
|
|
const combinedUserDisplayMessage = [
|
|
userDisplayMessage,
|
|
postCompactHookResult.userDisplayMessage,
|
|
]
|
|
.filter(Boolean)
|
|
.join('\n')
|
|
|
|
return {
|
|
boundaryMarker,
|
|
summaryMessages,
|
|
attachments: postCompactFileAttachments,
|
|
hookResults: hookMessages,
|
|
userDisplayMessage: combinedUserDisplayMessage || undefined,
|
|
preCompactTokenCount,
|
|
postCompactTokenCount: compactionCallTotalTokens,
|
|
truePostCompactTokenCount,
|
|
compactionUsage,
|
|
}
|
|
} catch (error) {
|
|
// Only show the error notification for manual /compact.
|
|
// Auto-compact failures are retried on the next turn and the
|
|
// notification is confusing when compaction eventually succeeds.
|
|
if (!isAutoCompact) {
|
|
addErrorNotificationIfNeeded(error, context)
|
|
}
|
|
throw error
|
|
} finally {
|
|
context.setStreamMode?.('requesting')
|
|
context.setResponseLength?.(() => 0)
|
|
context.onCompactProgress?.({ type: 'compact_end' })
|
|
context.setSDKStatus?.(null)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Performs a partial compaction around the selected message index.
|
|
* Direction 'from': summarizes messages after the index, keeps earlier ones.
|
|
* Prompt cache for kept (earlier) messages is preserved.
|
|
* Direction 'up_to': summarizes messages before the index, keeps later ones.
|
|
* Prompt cache is invalidated since the summary precedes the kept messages.
|
|
*/
|
|
export async function partialCompactConversation(
|
|
allMessages: Message[],
|
|
pivotIndex: number,
|
|
context: ToolUseContext,
|
|
cacheSafeParams: CacheSafeParams,
|
|
userFeedback?: string,
|
|
direction: PartialCompactDirection = 'from',
|
|
): Promise<CompactionResult> {
|
|
try {
|
|
const messagesToSummarize =
|
|
direction === 'up_to'
|
|
? allMessages.slice(0, pivotIndex)
|
|
: allMessages.slice(pivotIndex)
|
|
// 'up_to' must strip old compact boundaries/summaries: for 'up_to',
|
|
// summary_B sits BEFORE kept, so a stale boundary_A in kept wins
|
|
// findLastCompactBoundaryIndex's backward scan and drops summary_B.
|
|
// 'from' keeps them: summary_B sits AFTER kept (backward scan still
|
|
// works), and removing an old summary would lose its covered history.
|
|
const messagesToKeep =
|
|
direction === 'up_to'
|
|
? allMessages
|
|
.slice(pivotIndex)
|
|
.filter(
|
|
m =>
|
|
m.type !== 'progress' &&
|
|
!isCompactBoundaryMessage(m) &&
|
|
!(m.type === 'user' && m.isCompactSummary),
|
|
)
|
|
: allMessages.slice(0, pivotIndex).filter(m => m.type !== 'progress')
|
|
|
|
if (messagesToSummarize.length === 0) {
|
|
throw new Error(
|
|
direction === 'up_to'
|
|
? 'Nothing to summarize before the selected message.'
|
|
: 'Nothing to summarize after the selected message.',
|
|
)
|
|
}
|
|
|
|
const preCompactTokenCount = tokenCountWithEstimation(allMessages)
|
|
|
|
context.onCompactProgress?.({
|
|
type: 'hooks_start',
|
|
hookType: 'pre_compact',
|
|
})
|
|
|
|
context.setSDKStatus?.('compacting')
|
|
const hookResult = await executePreCompactHooks(
|
|
{
|
|
trigger: 'manual',
|
|
customInstructions: null,
|
|
},
|
|
context.abortController.signal,
|
|
)
|
|
|
|
// Merge hook instructions with user feedback
|
|
let customInstructions: string | undefined
|
|
if (hookResult.newCustomInstructions && userFeedback) {
|
|
customInstructions = `${hookResult.newCustomInstructions}\n\nUser context: ${userFeedback}`
|
|
} else if (hookResult.newCustomInstructions) {
|
|
customInstructions = hookResult.newCustomInstructions
|
|
} else if (userFeedback) {
|
|
customInstructions = `User context: ${userFeedback}`
|
|
}
|
|
|
|
context.setStreamMode?.('requesting')
|
|
context.setResponseLength?.(() => 0)
|
|
context.onCompactProgress?.({ type: 'compact_start' })
|
|
|
|
const compactPrompt = getPartialCompactPrompt(customInstructions, direction)
|
|
const summaryRequest = createUserMessage({
|
|
content: compactPrompt,
|
|
})
|
|
|
|
const failureMetadata = {
|
|
preCompactTokenCount,
|
|
direction:
|
|
direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
messagesSummarized: messagesToSummarize.length,
|
|
}
|
|
|
|
// 'up_to' prefix hits cache directly; 'from' sends all (tail wouldn't cache).
|
|
// PTL retry breaks the cache prefix but unblocks the user (CC-1180).
|
|
let apiMessages = direction === 'up_to' ? messagesToSummarize : allMessages
|
|
let retryCacheSafeParams =
|
|
direction === 'up_to'
|
|
? { ...cacheSafeParams, forkContextMessages: messagesToSummarize }
|
|
: cacheSafeParams
|
|
let summaryResponse: AssistantMessage
|
|
let summary: string | null
|
|
let ptlAttempts = 0
|
|
for (;;) {
|
|
summaryResponse = await streamCompactSummary({
|
|
messages: apiMessages,
|
|
summaryRequest,
|
|
appState: context.getAppState(),
|
|
context,
|
|
preCompactTokenCount,
|
|
cacheSafeParams: retryCacheSafeParams,
|
|
})
|
|
summary = getAssistantMessageText(summaryResponse)
|
|
if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break
|
|
|
|
ptlAttempts++
|
|
const truncated =
|
|
ptlAttempts <= MAX_PTL_RETRIES
|
|
? truncateHeadForPTLRetry(apiMessages, summaryResponse)
|
|
: null
|
|
if (!truncated) {
|
|
logEvent('tengu_partial_compact_failed', {
|
|
reason:
|
|
'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
...failureMetadata,
|
|
ptlAttempts,
|
|
})
|
|
throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)
|
|
}
|
|
logEvent('tengu_compact_ptl_retry', {
|
|
attempt: ptlAttempts,
|
|
droppedMessages: apiMessages.length - truncated.length,
|
|
remainingMessages: truncated.length,
|
|
path: 'partial' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
})
|
|
apiMessages = truncated
|
|
retryCacheSafeParams = {
|
|
...retryCacheSafeParams,
|
|
forkContextMessages: truncated,
|
|
}
|
|
}
|
|
if (!summary) {
|
|
logEvent('tengu_partial_compact_failed', {
|
|
reason:
|
|
'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
...failureMetadata,
|
|
})
|
|
throw new Error(
|
|
'Failed to generate conversation summary - response did not contain valid text content',
|
|
)
|
|
} else if (startsWithApiErrorPrefix(summary)) {
|
|
logEvent('tengu_partial_compact_failed', {
|
|
reason:
|
|
'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
...failureMetadata,
|
|
})
|
|
throw new Error(summary)
|
|
}
|
|
|
|
// Store the current file state before clearing
|
|
const preCompactReadFileState = cacheToObject(context.readFileState)
|
|
context.readFileState.clear()
|
|
context.loadedNestedMemoryPaths?.clear()
|
|
// Intentionally NOT resetting sentSkillNames — see compactConversation()
|
|
// for rationale (~4K tokens saved per compact event).
|
|
|
|
const [fileAttachments, asyncAgentAttachments] = await Promise.all([
|
|
createPostCompactFileAttachments(
|
|
preCompactReadFileState,
|
|
context,
|
|
POST_COMPACT_MAX_FILES_TO_RESTORE,
|
|
messagesToKeep,
|
|
),
|
|
createAsyncAgentAttachmentsIfNeeded(context),
|
|
])
|
|
|
|
const postCompactFileAttachments: AttachmentMessage[] = [
|
|
...fileAttachments,
|
|
...asyncAgentAttachments,
|
|
]
|
|
const planAttachment = createPlanAttachmentIfNeeded(context.agentId)
|
|
if (planAttachment) {
|
|
postCompactFileAttachments.push(planAttachment)
|
|
}
|
|
|
|
// Add plan mode instructions if currently in plan mode
|
|
const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)
|
|
if (planModeAttachment) {
|
|
postCompactFileAttachments.push(planModeAttachment)
|
|
}
|
|
|
|
const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)
|
|
if (skillAttachment) {
|
|
postCompactFileAttachments.push(skillAttachment)
|
|
}
|
|
|
|
// Re-announce only what was in the summarized portion — messagesToKeep
|
|
// is scanned, so anything already announced there is skipped.
|
|
for (const att of getDeferredToolsDeltaAttachment(
|
|
context.options.tools,
|
|
context.options.mainLoopModel,
|
|
messagesToKeep,
|
|
{ callSite: 'compact_partial' },
|
|
)) {
|
|
postCompactFileAttachments.push(createAttachmentMessage(att))
|
|
}
|
|
for (const att of getAgentListingDeltaAttachment(context, messagesToKeep)) {
|
|
postCompactFileAttachments.push(createAttachmentMessage(att))
|
|
}
|
|
for (const att of getMcpInstructionsDeltaAttachment(
|
|
context.options.mcpClients,
|
|
context.options.tools,
|
|
context.options.mainLoopModel,
|
|
messagesToKeep,
|
|
)) {
|
|
postCompactFileAttachments.push(createAttachmentMessage(att))
|
|
}
|
|
|
|
context.onCompactProgress?.({
|
|
type: 'hooks_start',
|
|
hookType: 'session_start',
|
|
})
|
|
const hookMessages = await processSessionStartHooks('compact', {
|
|
model: context.options.mainLoopModel,
|
|
})
|
|
|
|
const postCompactTokenCount = tokenCountFromLastAPIResponse([
|
|
summaryResponse,
|
|
])
|
|
const compactionUsage = getTokenUsage(summaryResponse)
|
|
|
|
logEvent('tengu_partial_compact', {
|
|
preCompactTokenCount,
|
|
postCompactTokenCount,
|
|
messagesKept: messagesToKeep.length,
|
|
messagesSummarized: messagesToSummarize.length,
|
|
direction:
|
|
direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
hasUserFeedback: !!userFeedback,
|
|
trigger:
|
|
'message_selector' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
compactionInputTokens: compactionUsage?.input_tokens,
|
|
compactionOutputTokens: compactionUsage?.output_tokens,
|
|
compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,
|
|
compactionCacheCreationTokens:
|
|
compactionUsage?.cache_creation_input_tokens ?? 0,
|
|
})
|
|
|
|
// Progress messages aren't loggable, so forkSessionImpl would null out
|
|
// a logicalParentUuid pointing at one. Both directions skip them.
|
|
const lastPreCompactUuid =
|
|
direction === 'up_to'
|
|
? allMessages.slice(0, pivotIndex).findLast(m => m.type !== 'progress')
|
|
?.uuid
|
|
: messagesToKeep.at(-1)?.uuid
|
|
const boundaryMarker = createCompactBoundaryMessage(
|
|
'manual',
|
|
preCompactTokenCount ?? 0,
|
|
lastPreCompactUuid,
|
|
userFeedback,
|
|
messagesToSummarize.length,
|
|
)
|
|
// allMessages not just messagesToSummarize — set union is idempotent,
|
|
// simpler than tracking which half each tool lived in.
|
|
const preCompactDiscovered = extractDiscoveredToolNames(allMessages)
|
|
if (preCompactDiscovered.size > 0) {
|
|
boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
|
|
...preCompactDiscovered,
|
|
].sort()
|
|
}
|
|
|
|
const transcriptPath = getTranscriptPath()
|
|
const summaryMessages: UserMessage[] = [
|
|
createUserMessage({
|
|
content: getCompactUserSummaryMessage(summary, false, transcriptPath),
|
|
isCompactSummary: true,
|
|
...(messagesToKeep.length > 0
|
|
? {
|
|
summarizeMetadata: {
|
|
messagesSummarized: messagesToSummarize.length,
|
|
userContext: userFeedback,
|
|
direction,
|
|
},
|
|
}
|
|
: { isVisibleInTranscriptOnly: true as const }),
|
|
}),
|
|
]
|
|
|
|
if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
|
|
notifyCompaction(
|
|
context.options.querySource ?? 'compact',
|
|
context.agentId,
|
|
)
|
|
}
|
|
markPostCompaction()
|
|
|
|
// Re-append session metadata (custom title, tag) so it stays within
|
|
// the 16KB tail window that readLiteMetadata reads for --resume display.
|
|
reAppendSessionMetadata()
|
|
|
|
if (feature('KAIROS')) {
|
|
void sessionTranscriptModule?.writeSessionTranscriptSegment(
|
|
messagesToSummarize,
|
|
)
|
|
}
|
|
|
|
context.onCompactProgress?.({
|
|
type: 'hooks_start',
|
|
hookType: 'post_compact',
|
|
})
|
|
const postCompactHookResult = await executePostCompactHooks(
|
|
{
|
|
trigger: 'manual',
|
|
compactSummary: summary,
|
|
},
|
|
context.abortController.signal,
|
|
)
|
|
|
|
// 'from': prefix-preserving → boundary; 'up_to': suffix → last summary
|
|
const anchorUuid =
|
|
direction === 'up_to'
|
|
? (summaryMessages.at(-1)?.uuid ?? boundaryMarker.uuid)
|
|
: boundaryMarker.uuid
|
|
return {
|
|
boundaryMarker: annotateBoundaryWithPreservedSegment(
|
|
boundaryMarker,
|
|
anchorUuid,
|
|
messagesToKeep,
|
|
),
|
|
summaryMessages,
|
|
messagesToKeep,
|
|
attachments: postCompactFileAttachments,
|
|
hookResults: hookMessages,
|
|
userDisplayMessage: postCompactHookResult.userDisplayMessage,
|
|
preCompactTokenCount,
|
|
postCompactTokenCount,
|
|
compactionUsage,
|
|
}
|
|
} catch (error) {
|
|
addErrorNotificationIfNeeded(error, context)
|
|
throw error
|
|
} finally {
|
|
context.setStreamMode?.('requesting')
|
|
context.setResponseLength?.(() => 0)
|
|
context.onCompactProgress?.({ type: 'compact_end' })
|
|
context.setSDKStatus?.(null)
|
|
}
|
|
}
|
|
|
|
function addErrorNotificationIfNeeded(
|
|
error: unknown,
|
|
context: Pick<ToolUseContext, 'addNotification'>,
|
|
) {
|
|
if (
|
|
!hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT) &&
|
|
!hasExactErrorMessage(error, ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
|
|
) {
|
|
context.addNotification?.({
|
|
key: 'error-compacting-conversation',
|
|
text: 'Error compacting conversation',
|
|
priority: 'immediate',
|
|
color: 'error',
|
|
})
|
|
}
|
|
}
|
|
|
|
export function createCompactCanUseTool(): CanUseToolFn {
|
|
return async () => ({
|
|
behavior: 'deny' as const,
|
|
message: 'Tool use is not allowed during compaction',
|
|
decisionReason: {
|
|
type: 'other' as const,
|
|
reason: 'compaction agent should only produce text summary',
|
|
},
|
|
})
|
|
}
|
|
|
|
async function streamCompactSummary({
|
|
messages,
|
|
summaryRequest,
|
|
appState,
|
|
context,
|
|
preCompactTokenCount,
|
|
cacheSafeParams,
|
|
}: {
|
|
messages: Message[]
|
|
summaryRequest: UserMessage
|
|
appState: Awaited<ReturnType<ToolUseContext['getAppState']>>
|
|
context: ToolUseContext
|
|
preCompactTokenCount: number
|
|
cacheSafeParams: CacheSafeParams
|
|
}): Promise<AssistantMessage> {
|
|
// When prompt cache sharing is enabled, use forked agent to reuse the
|
|
// main conversation's cached prefix (system prompt, tools, context messages).
|
|
// Falls back to regular streaming path on failure.
|
|
// 3P default: true — see comment at the other tengu_compact_cache_prefix read above.
|
|
const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
|
|
'tengu_compact_cache_prefix',
|
|
true,
|
|
)
|
|
// Send keep-alive signals during compaction to prevent remote session
|
|
// WebSocket idle timeouts from dropping bridge connections. Compaction
|
|
// API calls can take 5-10+ seconds, during which no other messages
|
|
// flow through the transport — without keep-alives, the server may
|
|
// close the WebSocket for inactivity.
|
|
// Two signals: (1) PUT /worker heartbeat via sessionActivity, and
|
|
// (2) re-emit 'compacting' status so the SDK event stream stays active
|
|
// and the server doesn't consider the session stale.
|
|
const activityInterval = isSessionActivityTrackingActive()
|
|
? setInterval(
|
|
(statusSetter?: (status: 'compacting' | null) => void) => {
|
|
sendSessionActivitySignal()
|
|
statusSetter?.('compacting')
|
|
},
|
|
30_000,
|
|
context.setSDKStatus,
|
|
)
|
|
: undefined
|
|
|
|
try {
|
|
if (promptCacheSharingEnabled) {
|
|
try {
|
|
// DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's
|
|
// prompt cache by sending identical cache-key params (system, tools, model,
|
|
// messages prefix, thinking config). Setting maxOutputTokens would clamp
|
|
// budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts,
|
|
// creating a thinking config mismatch that invalidates the cache.
|
|
// The streaming fallback path (below) can safely set maxOutputTokensOverride
|
|
// since it doesn't share cache with the main thread.
|
|
const result = await runForkedAgent({
|
|
promptMessages: [summaryRequest],
|
|
cacheSafeParams,
|
|
canUseTool: createCompactCanUseTool(),
|
|
querySource: 'compact',
|
|
forkLabel: 'compact',
|
|
maxTurns: 1,
|
|
skipCacheWrite: true,
|
|
// Pass the compact context's abortController so user Esc aborts the
|
|
// fork — same signal the streaming fallback uses at
|
|
// `signal: context.abortController.signal` below.
|
|
overrides: { abortController: context.abortController },
|
|
})
|
|
const assistantMsg = getLastAssistantMessage(result.messages)
|
|
const assistantText = assistantMsg
|
|
? getAssistantMessageText(assistantMsg)
|
|
: null
|
|
// Guard isApiErrorMessage: query() catches API errors (including
|
|
// APIUserAbortError on ESC) and yields them as synthetic assistant
|
|
// messages. Without this check, an aborted compact "succeeds" with
|
|
// "Request was aborted." as the summary — the text doesn't start with
|
|
// "API Error" so the caller's startsWithApiErrorPrefix guard misses it.
|
|
if (assistantMsg && assistantText && !assistantMsg.isApiErrorMessage) {
|
|
// Skip success logging for PTL error text — it's returned so the
|
|
// caller's retry loop catches it, but it's not a successful summary.
|
|
if (!assistantText.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) {
|
|
logEvent('tengu_compact_cache_sharing_success', {
|
|
preCompactTokenCount,
|
|
outputTokens: result.totalUsage.output_tokens,
|
|
cacheReadInputTokens: result.totalUsage.cache_read_input_tokens,
|
|
cacheCreationInputTokens:
|
|
result.totalUsage.cache_creation_input_tokens,
|
|
cacheHitRate:
|
|
result.totalUsage.cache_read_input_tokens > 0
|
|
? result.totalUsage.cache_read_input_tokens /
|
|
(result.totalUsage.cache_read_input_tokens +
|
|
result.totalUsage.cache_creation_input_tokens +
|
|
result.totalUsage.input_tokens)
|
|
: 0,
|
|
})
|
|
}
|
|
return assistantMsg
|
|
}
|
|
logForDebugging(
|
|
`Compact cache sharing: no text in response, falling back. Response: ${jsonStringify(assistantMsg)}`,
|
|
{ level: 'warn' },
|
|
)
|
|
logEvent('tengu_compact_cache_sharing_fallback', {
|
|
reason:
|
|
'no_text_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
preCompactTokenCount,
|
|
})
|
|
} catch (error) {
|
|
logError(error)
|
|
logEvent('tengu_compact_cache_sharing_fallback', {
|
|
reason:
|
|
'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
preCompactTokenCount,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Regular streaming path (fallback when cache sharing fails or is disabled)
|
|
const retryEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
|
|
'tengu_compact_streaming_retry',
|
|
false,
|
|
)
|
|
const maxAttempts = retryEnabled ? MAX_COMPACT_STREAMING_RETRIES : 1
|
|
|
|
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
// Reset state for retry
|
|
let hasStartedStreaming = false
|
|
let response: AssistantMessage | undefined
|
|
context.setResponseLength?.(() => 0)
|
|
|
|
// Check if tool search is enabled using the main loop's tools list.
|
|
// context.options.tools includes MCP tools merged via useMergedTools.
|
|
const useToolSearch = await isToolSearchEnabled(
|
|
context.options.mainLoopModel,
|
|
context.options.tools,
|
|
async () => appState.toolPermissionContext,
|
|
context.options.agentDefinitions.activeAgents,
|
|
'compact',
|
|
)
|
|
|
|
// When tool search is enabled, include ToolSearchTool and MCP tools. They get
|
|
// defer_loading: true and don't count against context - the API filters them out
|
|
// of system_prompt_tools before token counting (see api/token_count_api/counting.py:188
|
|
// and api/public_api/messages/handler.py:324).
|
|
// Filter MCP tools from context.options.tools (not appState.mcp.tools) so we
|
|
// get the permission-filtered set from useMergedTools — same source used for
|
|
// isToolSearchEnabled above and normalizeMessagesForAPI below.
|
|
// Deduplicate by name to avoid API errors when MCP tools share names with built-in tools.
|
|
const tools: Tool[] = useToolSearch
|
|
? uniqBy(
|
|
[
|
|
FileReadTool,
|
|
ToolSearchTool,
|
|
...context.options.tools.filter(t => t.isMcp),
|
|
],
|
|
'name',
|
|
)
|
|
: [FileReadTool]
|
|
|
|
const streamingGen = queryModelWithStreaming({
|
|
messages: normalizeMessagesForAPI(
|
|
stripImagesFromMessages(
|
|
stripReinjectedAttachments([
|
|
...getMessagesAfterCompactBoundary(messages),
|
|
summaryRequest,
|
|
]),
|
|
),
|
|
context.options.tools,
|
|
),
|
|
systemPrompt: asSystemPrompt([
|
|
'You are a helpful AI assistant tasked with summarizing conversations.',
|
|
]),
|
|
thinkingConfig: { type: 'disabled' as const },
|
|
tools,
|
|
signal: context.abortController.signal,
|
|
options: {
|
|
async getToolPermissionContext() {
|
|
const appState = context.getAppState()
|
|
return appState.toolPermissionContext
|
|
},
|
|
model: context.options.mainLoopModel,
|
|
toolChoice: undefined,
|
|
isNonInteractiveSession: context.options.isNonInteractiveSession,
|
|
hasAppendSystemPrompt: !!context.options.appendSystemPrompt,
|
|
maxOutputTokensOverride: Math.min(
|
|
COMPACT_MAX_OUTPUT_TOKENS,
|
|
getMaxOutputTokensForModel(context.options.mainLoopModel),
|
|
),
|
|
querySource: 'compact',
|
|
agents: context.options.agentDefinitions.activeAgents,
|
|
mcpTools: [],
|
|
effortValue: appState.effortValue,
|
|
},
|
|
})
|
|
const streamIter = streamingGen[Symbol.asyncIterator]()
|
|
let next = await streamIter.next()
|
|
|
|
while (!next.done) {
|
|
const event = next.value
|
|
|
|
if (
|
|
!hasStartedStreaming &&
|
|
event.type === 'stream_event' &&
|
|
event.event.type === 'content_block_start' &&
|
|
event.event.content_block.type === 'text'
|
|
) {
|
|
hasStartedStreaming = true
|
|
context.setStreamMode?.('responding')
|
|
}
|
|
|
|
if (
|
|
event.type === 'stream_event' &&
|
|
event.event.type === 'content_block_delta' &&
|
|
event.event.delta.type === 'text_delta'
|
|
) {
|
|
const charactersStreamed = event.event.delta.text.length
|
|
context.setResponseLength?.(length => length + charactersStreamed)
|
|
}
|
|
|
|
if (event.type === 'assistant') {
|
|
response = event
|
|
}
|
|
|
|
next = await streamIter.next()
|
|
}
|
|
|
|
if (response) {
|
|
return response
|
|
}
|
|
|
|
if (attempt < maxAttempts) {
|
|
logEvent('tengu_compact_streaming_retry', {
|
|
attempt,
|
|
preCompactTokenCount,
|
|
hasStartedStreaming,
|
|
})
|
|
await sleep(getRetryDelay(attempt), context.abortController.signal, {
|
|
abortError: () => new APIUserAbortError(),
|
|
})
|
|
continue
|
|
}
|
|
|
|
logForDebugging(
|
|
`Compact streaming failed after ${attempt} attempts. hasStartedStreaming=${hasStartedStreaming}`,
|
|
{ level: 'error' },
|
|
)
|
|
logEvent('tengu_compact_failed', {
|
|
reason:
|
|
'no_streaming_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
preCompactTokenCount,
|
|
hasStartedStreaming,
|
|
retryEnabled,
|
|
attempts: attempt,
|
|
promptCacheSharingEnabled,
|
|
})
|
|
throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
|
|
}
|
|
|
|
// This should never be reached due to the throw above, but TypeScript needs it
|
|
throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
|
|
} finally {
|
|
clearInterval(activityInterval)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Creates attachment messages for recently accessed files to restore them after compaction.
|
|
* This prevents the model from having to re-read files that were recently accessed.
|
|
* Re-reads files using FileReadTool to get fresh content with proper validation.
|
|
* Files are selected based on recency, but constrained by both file count and token budget limits.
|
|
*
|
|
* Files already present as Read tool results in preservedMessages are skipped —
|
|
* re-injecting identical content the model can already see in the preserved tail
|
|
* is pure waste (up to 25K tok/compact). Mirrors the diff-against-preserved
|
|
* pattern that getDeferredToolsDeltaAttachment uses at the same call sites.
|
|
*
|
|
* @param readFileState The current file state tracking recently read files
|
|
* @param toolUseContext The tool use context for calling FileReadTool
|
|
* @param maxFiles Maximum number of files to restore (default: 5)
|
|
* @param preservedMessages Messages kept post-compact; Read results here are skipped
|
|
* @returns Array of attachment messages for the most recently accessed files that fit within token budget
|
|
*/
|
|
export async function createPostCompactFileAttachments(
|
|
readFileState: Record<string, { content: string; timestamp: number }>,
|
|
toolUseContext: ToolUseContext,
|
|
maxFiles: number,
|
|
preservedMessages: Message[] = [],
|
|
): Promise<AttachmentMessage[]> {
|
|
const preservedReadPaths = collectReadToolFilePaths(preservedMessages)
|
|
const recentFiles = Object.entries(readFileState)
|
|
.map(([filename, state]) => ({ filename, ...state }))
|
|
.filter(
|
|
file =>
|
|
!shouldExcludeFromPostCompactRestore(
|
|
file.filename,
|
|
toolUseContext.agentId,
|
|
) && !preservedReadPaths.has(expandPath(file.filename)),
|
|
)
|
|
.sort((a, b) => b.timestamp - a.timestamp)
|
|
.slice(0, maxFiles)
|
|
|
|
const results = await Promise.all(
|
|
recentFiles.map(async file => {
|
|
const attachment = await generateFileAttachment(
|
|
file.filename,
|
|
{
|
|
...toolUseContext,
|
|
fileReadingLimits: {
|
|
maxTokens: POST_COMPACT_MAX_TOKENS_PER_FILE,
|
|
},
|
|
},
|
|
'tengu_post_compact_file_restore_success',
|
|
'tengu_post_compact_file_restore_error',
|
|
'compact',
|
|
)
|
|
return attachment ? createAttachmentMessage(attachment) : null
|
|
}),
|
|
)
|
|
|
|
let usedTokens = 0
|
|
return results.filter((result): result is AttachmentMessage => {
|
|
if (result === null) {
|
|
return false
|
|
}
|
|
const attachmentTokens = roughTokenCountEstimation(jsonStringify(result))
|
|
if (usedTokens + attachmentTokens <= POST_COMPACT_TOKEN_BUDGET) {
|
|
usedTokens += attachmentTokens
|
|
return true
|
|
}
|
|
return false
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Creates a plan file attachment if a plan file exists for the current session.
|
|
* This ensures the plan is preserved after compaction.
|
|
*/
|
|
export function createPlanAttachmentIfNeeded(
|
|
agentId?: AgentId,
|
|
): AttachmentMessage | null {
|
|
const planContent = getPlan(agentId)
|
|
|
|
if (!planContent) {
|
|
return null
|
|
}
|
|
|
|
const planFilePath = getPlanFilePath(agentId)
|
|
|
|
return createAttachmentMessage({
|
|
type: 'plan_file_reference',
|
|
planFilePath,
|
|
planContent,
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Creates an attachment for invoked skills to preserve their content across compaction.
|
|
* Only includes skills scoped to the given agent (or main session when agentId is null/undefined).
|
|
* This ensures skill guidelines remain available after the conversation is summarized
|
|
* without leaking skills from other agent contexts.
|
|
*/
|
|
export function createSkillAttachmentIfNeeded(
|
|
agentId?: string,
|
|
): AttachmentMessage | null {
|
|
const invokedSkills = getInvokedSkillsForAgent(agentId)
|
|
|
|
if (invokedSkills.size === 0) {
|
|
return null
|
|
}
|
|
|
|
// Sorted most-recent-first so budget pressure drops the least-relevant skills.
|
|
// Per-skill truncation keeps the head of each file (where setup/usage
|
|
// instructions typically live) rather than dropping whole skills.
|
|
let usedTokens = 0
|
|
const skills = Array.from(invokedSkills.values())
|
|
.sort((a, b) => b.invokedAt - a.invokedAt)
|
|
.map(skill => ({
|
|
name: skill.skillName,
|
|
path: skill.skillPath,
|
|
content: truncateToTokens(
|
|
skill.content,
|
|
POST_COMPACT_MAX_TOKENS_PER_SKILL,
|
|
),
|
|
}))
|
|
.filter(skill => {
|
|
const tokens = roughTokenCountEstimation(skill.content)
|
|
if (usedTokens + tokens > POST_COMPACT_SKILLS_TOKEN_BUDGET) {
|
|
return false
|
|
}
|
|
usedTokens += tokens
|
|
return true
|
|
})
|
|
|
|
if (skills.length === 0) {
|
|
return null
|
|
}
|
|
|
|
return createAttachmentMessage({
|
|
type: 'invoked_skills',
|
|
skills,
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Creates a plan_mode attachment if the user is currently in plan mode.
|
|
* This ensures the model continues to operate in plan mode after compaction
|
|
* (otherwise it would lose the plan mode instructions since those are
|
|
* normally only injected on tool-use turns via getAttachmentMessages).
|
|
*/
|
|
export async function createPlanModeAttachmentIfNeeded(
|
|
context: ToolUseContext,
|
|
): Promise<AttachmentMessage | null> {
|
|
const appState = context.getAppState()
|
|
if (appState.toolPermissionContext.mode !== 'plan') {
|
|
return null
|
|
}
|
|
|
|
const planFilePath = getPlanFilePath(context.agentId)
|
|
const planExists = getPlan(context.agentId) !== null
|
|
|
|
return createAttachmentMessage({
|
|
type: 'plan_mode',
|
|
reminderType: 'full',
|
|
isSubAgent: !!context.agentId,
|
|
planFilePath,
|
|
planExists,
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Creates attachments for async agents so the model knows about them after
|
|
* compaction. Covers both agents still running in the background (so the model
|
|
* doesn't spawn a duplicate) and agents that have finished but whose results
|
|
* haven't been retrieved yet.
|
|
*/
|
|
export async function createAsyncAgentAttachmentsIfNeeded(
|
|
context: ToolUseContext,
|
|
): Promise<AttachmentMessage[]> {
|
|
const appState = context.getAppState()
|
|
const asyncAgents = Object.values(appState.tasks).filter(
|
|
(task): task is LocalAgentTaskState => task.type === 'local_agent',
|
|
)
|
|
|
|
return asyncAgents.flatMap(agent => {
|
|
if (
|
|
agent.retrieved ||
|
|
agent.status === 'pending' ||
|
|
agent.agentId === context.agentId
|
|
) {
|
|
return []
|
|
}
|
|
return [
|
|
createAttachmentMessage({
|
|
type: 'task_status',
|
|
taskId: agent.agentId,
|
|
taskType: 'local_agent',
|
|
description: agent.description,
|
|
status: agent.status,
|
|
deltaSummary:
|
|
agent.status === 'running'
|
|
? (agent.progress?.summary ?? null)
|
|
: (agent.error ?? null),
|
|
outputFilePath: getTaskOutputPath(agent.agentId),
|
|
}),
|
|
]
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Scan messages for Read tool_use blocks and collect their file_path inputs
|
|
* (normalized via expandPath). Used to dedup post-compact file restoration
|
|
* against what's already visible in the preserved tail.
|
|
*
|
|
* Skips Reads whose tool_result is a dedup stub — the stub points at an
|
|
* earlier full Read that may have been compacted away, so we want
|
|
* createPostCompactFileAttachments to re-inject the real content.
|
|
*/
|
|
function collectReadToolFilePaths(messages: Message[]): Set<string> {
|
|
const stubIds = new Set<string>()
|
|
for (const message of messages) {
|
|
if (message.type !== 'user' || !Array.isArray(message.message.content)) {
|
|
continue
|
|
}
|
|
for (const block of message.message.content) {
|
|
if (
|
|
block.type === 'tool_result' &&
|
|
typeof block.content === 'string' &&
|
|
block.content.startsWith(FILE_UNCHANGED_STUB)
|
|
) {
|
|
stubIds.add(block.tool_use_id)
|
|
}
|
|
}
|
|
}
|
|
|
|
const paths = new Set<string>()
|
|
for (const message of messages) {
|
|
if (
|
|
message.type !== 'assistant' ||
|
|
!Array.isArray(message.message.content)
|
|
) {
|
|
continue
|
|
}
|
|
for (const block of message.message.content) {
|
|
if (
|
|
block.type !== 'tool_use' ||
|
|
block.name !== FILE_READ_TOOL_NAME ||
|
|
stubIds.has(block.id)
|
|
) {
|
|
continue
|
|
}
|
|
const input = block.input
|
|
if (
|
|
input &&
|
|
typeof input === 'object' &&
|
|
'file_path' in input &&
|
|
typeof input.file_path === 'string'
|
|
) {
|
|
paths.add(expandPath(input.file_path))
|
|
}
|
|
}
|
|
}
|
|
return paths
|
|
}
|
|
|
|
const SKILL_TRUNCATION_MARKER =
|
|
'\n\n[... skill content truncated for compaction; use Read on the skill path if you need the full text]'
|
|
|
|
/**
|
|
* Truncate content to roughly maxTokens, keeping the head. roughTokenCountEstimation
|
|
* uses ~4 chars/token (its default bytesPerToken), so char budget = maxTokens * 4
|
|
* minus the marker so the result stays within budget. Marker tells the model it
|
|
* can Read the full file if needed.
|
|
*/
|
|
function truncateToTokens(content: string, maxTokens: number): string {
|
|
if (roughTokenCountEstimation(content) <= maxTokens) {
|
|
return content
|
|
}
|
|
const charBudget = maxTokens * 4 - SKILL_TRUNCATION_MARKER.length
|
|
return content.slice(0, charBudget) + SKILL_TRUNCATION_MARKER
|
|
}
|
|
|
|
function shouldExcludeFromPostCompactRestore(
|
|
filename: string,
|
|
agentId?: AgentId,
|
|
): boolean {
|
|
const normalizedFilename = expandPath(filename)
|
|
// Exclude plan files
|
|
try {
|
|
const planFilePath = expandPath(getPlanFilePath(agentId))
|
|
if (normalizedFilename === planFilePath) {
|
|
return true
|
|
}
|
|
} catch {
|
|
// If we can't get plan file path, continue with other checks
|
|
}
|
|
|
|
// Exclude all types of claude.md files
|
|
// TODO: Refactor to use isMemoryFilePath() from claudemd.ts for consistency
|
|
// and to also match child directory memory files (.claude/rules/*.md, etc.)
|
|
try {
|
|
const normalizedMemoryPaths = new Set(
|
|
MEMORY_TYPE_VALUES.map(type => expandPath(getMemoryPath(type))),
|
|
)
|
|
|
|
if (normalizedMemoryPaths.has(normalizedFilename)) {
|
|
return true
|
|
}
|
|
} catch {
|
|
// If we can't get memory paths, continue
|
|
}
|
|
|
|
return false
|
|
}
|