531 lines
19 KiB
TypeScript
531 lines
19 KiB
TypeScript
import { feature } from 'bun:bundle'
|
|
import type { ToolResultBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
|
|
import type { QuerySource } from '../../constants/querySource.js'
|
|
import type { ToolUseContext } from '../../Tool.js'
|
|
import { FILE_EDIT_TOOL_NAME } from '../../tools/FileEditTool/constants.js'
|
|
import { FILE_READ_TOOL_NAME } from '../../tools/FileReadTool/prompt.js'
|
|
import { FILE_WRITE_TOOL_NAME } from '../../tools/FileWriteTool/prompt.js'
|
|
import { GLOB_TOOL_NAME } from '../../tools/GlobTool/prompt.js'
|
|
import { GREP_TOOL_NAME } from '../../tools/GrepTool/prompt.js'
|
|
import { WEB_FETCH_TOOL_NAME } from '../../tools/WebFetchTool/prompt.js'
|
|
import { WEB_SEARCH_TOOL_NAME } from '../../tools/WebSearchTool/prompt.js'
|
|
import type { Message } from '../../types/message.js'
|
|
import { logForDebugging } from '../../utils/debug.js'
|
|
import { getMainLoopModel } from '../../utils/model/model.js'
|
|
import { SHELL_TOOL_NAMES } from '../../utils/shell/shellToolUtils.js'
|
|
import { jsonStringify } from '../../utils/slowOperations.js'
|
|
import {
|
|
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
logEvent,
|
|
} from '../analytics/index.js'
|
|
import { notifyCacheDeletion } from '../api/promptCacheBreakDetection.js'
|
|
import { roughTokenCountEstimation } from '../tokenEstimation.js'
|
|
import {
|
|
clearCompactWarningSuppression,
|
|
suppressCompactWarning,
|
|
} from './compactWarningState.js'
|
|
import {
|
|
getTimeBasedMCConfig,
|
|
type TimeBasedMCConfig,
|
|
} from './timeBasedMCConfig.js'
|
|
|
|
// Inline from utils/toolResultStorage.ts — importing that file pulls in
|
|
// sessionStorage → utils/messages → services/api/errors, completing a
|
|
// circular-deps loop back through this file via promptCacheBreakDetection.
|
|
// Drift is caught by a test asserting equality with the source-of-truth.
|
|
export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]'
|
|
|
|
const IMAGE_MAX_TOKEN_SIZE = 2000
|
|
|
|
// Only compact these tools
|
|
const COMPACTABLE_TOOLS = new Set<string>([
|
|
FILE_READ_TOOL_NAME,
|
|
...SHELL_TOOL_NAMES,
|
|
GREP_TOOL_NAME,
|
|
GLOB_TOOL_NAME,
|
|
WEB_SEARCH_TOOL_NAME,
|
|
WEB_FETCH_TOOL_NAME,
|
|
FILE_EDIT_TOOL_NAME,
|
|
FILE_WRITE_TOOL_NAME,
|
|
])
|
|
|
|
// --- Cached microcompact state (ant-only, gated by feature('CACHED_MICROCOMPACT')) ---
|
|
|
|
// Lazy-initialized cached MC module and state to avoid importing in external builds.
|
|
// The imports and state live inside feature() checks for dead code elimination.
|
|
let cachedMCModule: typeof import('./cachedMicrocompact.js') | null = null
|
|
let cachedMCState: import('./cachedMicrocompact.js').CachedMCState | null = null
|
|
let pendingCacheEdits:
|
|
| import('./cachedMicrocompact.js').CacheEditsBlock
|
|
| null = null
|
|
|
|
async function getCachedMCModule(): Promise<
|
|
typeof import('./cachedMicrocompact.js')
|
|
> {
|
|
if (!cachedMCModule) {
|
|
cachedMCModule = await import('./cachedMicrocompact.js')
|
|
}
|
|
return cachedMCModule
|
|
}
|
|
|
|
function ensureCachedMCState(): import('./cachedMicrocompact.js').CachedMCState {
|
|
if (!cachedMCState && cachedMCModule) {
|
|
cachedMCState = cachedMCModule.createCachedMCState()
|
|
}
|
|
if (!cachedMCState) {
|
|
throw new Error(
|
|
'cachedMCState not initialized — getCachedMCModule() must be called first',
|
|
)
|
|
}
|
|
return cachedMCState
|
|
}
|
|
|
|
/**
|
|
* Get new pending cache edits to be included in the next API request.
|
|
* Returns null if there are no new pending edits.
|
|
* Clears the pending state (caller must pin them after insertion).
|
|
*/
|
|
export function consumePendingCacheEdits():
|
|
| import('./cachedMicrocompact.js').CacheEditsBlock
|
|
| null {
|
|
const edits = pendingCacheEdits
|
|
pendingCacheEdits = null
|
|
return edits
|
|
}
|
|
|
|
/**
|
|
* Get all previously-pinned cache edits that must be re-sent at their
|
|
* original positions for cache hits.
|
|
*/
|
|
export function getPinnedCacheEdits(): import('./cachedMicrocompact.js').PinnedCacheEdits[] {
|
|
if (!cachedMCState) {
|
|
return []
|
|
}
|
|
return cachedMCState.pinnedEdits
|
|
}
|
|
|
|
/**
|
|
* Pin a new cache_edits block to a specific user message position.
|
|
* Called after inserting new edits so they are re-sent in subsequent calls.
|
|
*/
|
|
export function pinCacheEdits(
|
|
userMessageIndex: number,
|
|
block: import('./cachedMicrocompact.js').CacheEditsBlock,
|
|
): void {
|
|
if (cachedMCState) {
|
|
cachedMCState.pinnedEdits.push({ userMessageIndex, block })
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Marks all registered tools as sent to the API.
|
|
* Called after a successful API response.
|
|
*/
|
|
export function markToolsSentToAPIState(): void {
|
|
if (cachedMCState && cachedMCModule) {
|
|
cachedMCModule.markToolsSentToAPI(cachedMCState)
|
|
}
|
|
}
|
|
|
|
export function resetMicrocompactState(): void {
|
|
if (cachedMCState && cachedMCModule) {
|
|
cachedMCModule.resetCachedMCState(cachedMCState)
|
|
}
|
|
pendingCacheEdits = null
|
|
}
|
|
|
|
// Helper to calculate tool result tokens
|
|
function calculateToolResultTokens(block: ToolResultBlockParam): number {
|
|
if (!block.content) {
|
|
return 0
|
|
}
|
|
|
|
if (typeof block.content === 'string') {
|
|
return roughTokenCountEstimation(block.content)
|
|
}
|
|
|
|
// Array of TextBlockParam | ImageBlockParam | DocumentBlockParam
|
|
return block.content.reduce((sum, item) => {
|
|
if (item.type === 'text') {
|
|
return sum + roughTokenCountEstimation(item.text)
|
|
} else if (item.type === 'image' || item.type === 'document') {
|
|
// Images/documents are approximately 2000 tokens regardless of format
|
|
return sum + IMAGE_MAX_TOKEN_SIZE
|
|
}
|
|
return sum
|
|
}, 0)
|
|
}
|
|
|
|
/**
|
|
* Estimate token count for messages by extracting text content
|
|
* Used for rough token estimation when we don't have accurate API counts
|
|
* Pads estimate by 4/3 to be conservative since we're approximating
|
|
*/
|
|
export function estimateMessageTokens(messages: Message[]): number {
|
|
let totalTokens = 0
|
|
|
|
for (const message of messages) {
|
|
if (message.type !== 'user' && message.type !== 'assistant') {
|
|
continue
|
|
}
|
|
|
|
if (!Array.isArray(message.message.content)) {
|
|
continue
|
|
}
|
|
|
|
for (const block of message.message.content) {
|
|
if (block.type === 'text') {
|
|
totalTokens += roughTokenCountEstimation(block.text)
|
|
} else if (block.type === 'tool_result') {
|
|
totalTokens += calculateToolResultTokens(block)
|
|
} else if (block.type === 'image' || block.type === 'document') {
|
|
totalTokens += IMAGE_MAX_TOKEN_SIZE
|
|
} else if (block.type === 'thinking') {
|
|
// Match roughTokenCountEstimationForBlock: count only the thinking
|
|
// text, not the JSON wrapper or signature (signature is metadata,
|
|
// not model-tokenized content).
|
|
totalTokens += roughTokenCountEstimation(block.thinking)
|
|
} else if (block.type === 'redacted_thinking') {
|
|
totalTokens += roughTokenCountEstimation(block.data)
|
|
} else if (block.type === 'tool_use') {
|
|
// Match roughTokenCountEstimationForBlock: count name + input,
|
|
// not the JSON wrapper or id field.
|
|
totalTokens += roughTokenCountEstimation(
|
|
block.name + jsonStringify(block.input ?? {}),
|
|
)
|
|
} else {
|
|
// server_tool_use, web_search_tool_result, etc.
|
|
totalTokens += roughTokenCountEstimation(jsonStringify(block))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Pad estimate by 4/3 to be conservative since we're approximating
|
|
return Math.ceil(totalTokens * (4 / 3))
|
|
}
|
|
|
|
export type PendingCacheEdits = {
|
|
trigger: 'auto'
|
|
deletedToolIds: string[]
|
|
// Baseline cumulative cache_deleted_input_tokens from the previous API response,
|
|
// used to compute the per-operation delta (the API value is sticky/cumulative)
|
|
baselineCacheDeletedTokens: number
|
|
}
|
|
|
|
export type MicrocompactResult = {
|
|
messages: Message[]
|
|
compactionInfo?: {
|
|
pendingCacheEdits?: PendingCacheEdits
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Walk messages and collect tool_use IDs whose tool name is in
|
|
* COMPACTABLE_TOOLS, in encounter order. Shared by both microcompact paths.
|
|
*/
|
|
function collectCompactableToolIds(messages: Message[]): string[] {
|
|
const ids: string[] = []
|
|
for (const message of messages) {
|
|
if (
|
|
message.type === 'assistant' &&
|
|
Array.isArray(message.message.content)
|
|
) {
|
|
for (const block of message.message.content) {
|
|
if (block.type === 'tool_use' && COMPACTABLE_TOOLS.has(block.name)) {
|
|
ids.push(block.id)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ids
|
|
}
|
|
|
|
// Prefix-match because promptCategory.ts sets the querySource to
|
|
// 'repl_main_thread:outputStyle:<style>' when a non-default output style
|
|
// is active. The bare 'repl_main_thread' is only used for the default style.
|
|
// query.ts:350/1451 use the same startsWith pattern; the pre-existing
|
|
// cached-MC `=== 'repl_main_thread'` check was a latent bug — users with a
|
|
// non-default output style were silently excluded from cached MC.
|
|
function isMainThreadSource(querySource: QuerySource | undefined): boolean {
|
|
return !querySource || querySource.startsWith('repl_main_thread')
|
|
}
|
|
|
|
export async function microcompactMessages(
|
|
messages: Message[],
|
|
toolUseContext?: ToolUseContext,
|
|
querySource?: QuerySource,
|
|
): Promise<MicrocompactResult> {
|
|
// Clear suppression flag at start of new microcompact attempt
|
|
clearCompactWarningSuppression()
|
|
|
|
// Time-based trigger runs first and short-circuits. If the gap since the
|
|
// last assistant message exceeds the threshold, the server cache has expired
|
|
// and the full prefix will be rewritten regardless — so content-clear old
|
|
// tool results now, before the request, to shrink what gets rewritten.
|
|
// Cached MC (cache-editing) is skipped when this fires: editing assumes a
|
|
// warm cache, and we just established it's cold.
|
|
const timeBasedResult = maybeTimeBasedMicrocompact(messages, querySource)
|
|
if (timeBasedResult) {
|
|
return timeBasedResult
|
|
}
|
|
|
|
// Only run cached MC for the main thread to prevent forked agents
|
|
// (session_memory, prompt_suggestion, etc.) from registering their
|
|
// tool_results in the global cachedMCState, which would cause the main
|
|
// thread to try deleting tools that don't exist in its own conversation.
|
|
if (feature('CACHED_MICROCOMPACT')) {
|
|
const mod = await getCachedMCModule()
|
|
const model = toolUseContext?.options.mainLoopModel ?? getMainLoopModel()
|
|
if (
|
|
mod.isCachedMicrocompactEnabled() &&
|
|
mod.isModelSupportedForCacheEditing(model) &&
|
|
isMainThreadSource(querySource)
|
|
) {
|
|
return await cachedMicrocompactPath(messages, querySource)
|
|
}
|
|
}
|
|
|
|
// Legacy microcompact path removed — tengu_cache_plum_violet is always true.
|
|
// For contexts where cached microcompact is not available (external builds,
|
|
// non-ant users, unsupported models, sub-agents), no compaction happens here;
|
|
// autocompact handles context pressure instead.
|
|
return { messages }
|
|
}
|
|
|
|
/**
|
|
* Cached microcompact path - uses cache editing API to remove tool results
|
|
* without invalidating the cached prefix.
|
|
*
|
|
* Key differences from regular microcompact:
|
|
* - Does NOT modify local message content (cache_reference and cache_edits are added at API layer)
|
|
* - Uses count-based trigger/keep thresholds from GrowthBook config
|
|
* - Takes precedence over regular microcompact (no disk persistence)
|
|
* - Tracks tool results and queues cache edits for the API layer
|
|
*/
|
|
async function cachedMicrocompactPath(
|
|
messages: Message[],
|
|
querySource: QuerySource | undefined,
|
|
): Promise<MicrocompactResult> {
|
|
const mod = await getCachedMCModule()
|
|
const state = ensureCachedMCState()
|
|
const config = mod.getCachedMCConfig()
|
|
|
|
const compactableToolIds = new Set(collectCompactableToolIds(messages))
|
|
// Second pass: register tool results grouped by user message
|
|
for (const message of messages) {
|
|
if (message.type === 'user' && Array.isArray(message.message.content)) {
|
|
const groupIds: string[] = []
|
|
for (const block of message.message.content) {
|
|
if (
|
|
block.type === 'tool_result' &&
|
|
compactableToolIds.has(block.tool_use_id) &&
|
|
!state.registeredTools.has(block.tool_use_id)
|
|
) {
|
|
mod.registerToolResult(state, block.tool_use_id)
|
|
groupIds.push(block.tool_use_id)
|
|
}
|
|
}
|
|
mod.registerToolMessage(state, groupIds)
|
|
}
|
|
}
|
|
|
|
const toolsToDelete = mod.getToolResultsToDelete(state)
|
|
|
|
if (toolsToDelete.length > 0) {
|
|
// Create and queue the cache_edits block for the API layer
|
|
const cacheEdits = mod.createCacheEditsBlock(state, toolsToDelete)
|
|
if (cacheEdits) {
|
|
pendingCacheEdits = cacheEdits
|
|
}
|
|
|
|
logForDebugging(
|
|
`Cached MC deleting ${toolsToDelete.length} tool(s): ${toolsToDelete.join(', ')}`,
|
|
)
|
|
|
|
// Log the event
|
|
logEvent('tengu_cached_microcompact', {
|
|
toolsDeleted: toolsToDelete.length,
|
|
deletedToolIds: toolsToDelete.join(
|
|
',',
|
|
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
activeToolCount: state.toolOrder.length - state.deletedRefs.size,
|
|
triggerType:
|
|
'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
threshold: config.triggerThreshold,
|
|
keepRecent: config.keepRecent,
|
|
})
|
|
|
|
// Suppress warning after successful compaction
|
|
suppressCompactWarning()
|
|
|
|
// Notify cache break detection that cache reads will legitimately drop
|
|
if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
|
|
// Pass the actual querySource — isMainThreadSource now prefix-matches
|
|
// so output-style variants enter here, and getTrackingKey keys on the
|
|
// full source string, not the 'repl_main_thread' prefix.
|
|
notifyCacheDeletion(querySource ?? 'repl_main_thread')
|
|
}
|
|
|
|
// Return messages unchanged - cache_reference and cache_edits are added at API layer
|
|
// Boundary message is deferred until after API response so we can use
|
|
// actual cache_deleted_input_tokens from the API instead of client-side estimates
|
|
// Capture the baseline cumulative cache_deleted_input_tokens from the last
|
|
// assistant message so we can compute a per-operation delta after the API call
|
|
const lastAsst = messages.findLast(m => m.type === 'assistant')
|
|
const baseline =
|
|
lastAsst?.type === 'assistant'
|
|
? ((
|
|
lastAsst.message.usage as unknown as Record<
|
|
string,
|
|
number | undefined
|
|
>
|
|
)?.cache_deleted_input_tokens ?? 0)
|
|
: 0
|
|
|
|
return {
|
|
messages,
|
|
compactionInfo: {
|
|
pendingCacheEdits: {
|
|
trigger: 'auto',
|
|
deletedToolIds: toolsToDelete,
|
|
baselineCacheDeletedTokens: baseline,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// No compaction needed, return messages unchanged
|
|
return { messages }
|
|
}
|
|
|
|
/**
|
|
* Time-based microcompact: when the gap since the last main-loop assistant
|
|
* message exceeds the configured threshold, content-clear all but the most
|
|
* recent N compactable tool results.
|
|
*
|
|
* Returns null when the trigger doesn't fire (disabled, wrong source, gap
|
|
* under threshold, nothing to clear) — caller falls through to other paths.
|
|
*
|
|
* Unlike cached MC, this mutates message content directly. The cache is cold,
|
|
* so there's no cached prefix to preserve via cache_edits.
|
|
*/
|
|
/**
|
|
* Check whether the time-based trigger should fire for this request.
|
|
*
|
|
* Returns the measured gap (minutes since last assistant message) when the
|
|
* trigger fires, or null when it doesn't (disabled, wrong source, under
|
|
* threshold, no prior assistant, unparseable timestamp).
|
|
*
|
|
* Extracted so other pre-request paths (e.g. snip force-apply) can consult
|
|
* the same predicate without coupling to the tool-result clearing action.
|
|
*/
|
|
export function evaluateTimeBasedTrigger(
|
|
messages: Message[],
|
|
querySource: QuerySource | undefined,
|
|
): { gapMinutes: number; config: TimeBasedMCConfig } | null {
|
|
const config = getTimeBasedMCConfig()
|
|
// Require an explicit main-thread querySource. isMainThreadSource treats
|
|
// undefined as main-thread (for cached-MC backward-compat), but several
|
|
// callers (/context, /compact, analyzeContext) invoke microcompactMessages
|
|
// without a source for analysis-only purposes — they should not trigger.
|
|
if (!config.enabled || !querySource || !isMainThreadSource(querySource)) {
|
|
return null
|
|
}
|
|
const lastAssistant = messages.findLast(m => m.type === 'assistant')
|
|
if (!lastAssistant) {
|
|
return null
|
|
}
|
|
const gapMinutes =
|
|
(Date.now() - new Date(lastAssistant.timestamp).getTime()) / 60_000
|
|
if (!Number.isFinite(gapMinutes) || gapMinutes < config.gapThresholdMinutes) {
|
|
return null
|
|
}
|
|
return { gapMinutes, config }
|
|
}
|
|
|
|
function maybeTimeBasedMicrocompact(
|
|
messages: Message[],
|
|
querySource: QuerySource | undefined,
|
|
): MicrocompactResult | null {
|
|
const trigger = evaluateTimeBasedTrigger(messages, querySource)
|
|
if (!trigger) {
|
|
return null
|
|
}
|
|
const { gapMinutes, config } = trigger
|
|
|
|
const compactableIds = collectCompactableToolIds(messages)
|
|
|
|
// Floor at 1: slice(-0) returns the full array (paradoxically keeps
|
|
// everything), and clearing ALL results leaves the model with zero working
|
|
// context. Neither degenerate is sensible — always keep at least the last.
|
|
const keepRecent = Math.max(1, config.keepRecent)
|
|
const keepSet = new Set(compactableIds.slice(-keepRecent))
|
|
const clearSet = new Set(compactableIds.filter(id => !keepSet.has(id)))
|
|
|
|
if (clearSet.size === 0) {
|
|
return null
|
|
}
|
|
|
|
let tokensSaved = 0
|
|
const result: Message[] = messages.map(message => {
|
|
if (message.type !== 'user' || !Array.isArray(message.message.content)) {
|
|
return message
|
|
}
|
|
let touched = false
|
|
const newContent = message.message.content.map(block => {
|
|
if (
|
|
block.type === 'tool_result' &&
|
|
clearSet.has(block.tool_use_id) &&
|
|
block.content !== TIME_BASED_MC_CLEARED_MESSAGE
|
|
) {
|
|
tokensSaved += calculateToolResultTokens(block)
|
|
touched = true
|
|
return { ...block, content: TIME_BASED_MC_CLEARED_MESSAGE }
|
|
}
|
|
return block
|
|
})
|
|
if (!touched) return message
|
|
return {
|
|
...message,
|
|
message: { ...message.message, content: newContent },
|
|
}
|
|
})
|
|
|
|
if (tokensSaved === 0) {
|
|
return null
|
|
}
|
|
|
|
logEvent('tengu_time_based_microcompact', {
|
|
gapMinutes: Math.round(gapMinutes),
|
|
gapThresholdMinutes: config.gapThresholdMinutes,
|
|
toolsCleared: clearSet.size,
|
|
toolsKept: keepSet.size,
|
|
keepRecent: config.keepRecent,
|
|
tokensSaved,
|
|
})
|
|
|
|
logForDebugging(
|
|
`[TIME-BASED MC] gap ${Math.round(gapMinutes)}min > ${config.gapThresholdMinutes}min, cleared ${clearSet.size} tool results (~${tokensSaved} tokens), kept last ${keepSet.size}`,
|
|
)
|
|
|
|
suppressCompactWarning()
|
|
// Cached-MC state (module-level) holds tool IDs registered on prior turns.
|
|
// We just content-cleared some of those tools AND invalidated the server
|
|
// cache by changing prompt content. If cached-MC runs next turn with the
|
|
// stale state, it would try to cache_edit tools whose server-side entries
|
|
// no longer exist. Reset it.
|
|
resetMicrocompactState()
|
|
// We just changed the prompt content — the next response's cache read will
|
|
// be low, but that's us, not a break. Tell the detector to expect a drop.
|
|
// notifyCacheDeletion (not notifyCompaction) because it's already imported
|
|
// here and achieves the same false-positive suppression — adding the second
|
|
// symbol to the import was flagged by the circular-deps check.
|
|
// Pass the actual querySource: getTrackingKey returns the full source string
|
|
// (e.g. 'repl_main_thread:outputStyle:custom'), not just the prefix.
|
|
if (feature('PROMPT_CACHE_BREAK_DETECTION') && querySource) {
|
|
notifyCacheDeletion(querySource)
|
|
}
|
|
|
|
return { messages: result }
|
|
}
|