mono/packages/kbot/ref/utils/analyzeContext.ts
2026-04-01 01:05:48 +02:00

1383 lines
42 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { feature } from 'bun:bundle'
import type { Anthropic } from '@anthropic-ai/sdk'
import {
getSystemPrompt,
SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
} from 'src/constants/prompts.js'
import { microcompactMessages } from 'src/services/compact/microCompact.js'
import { getSdkBetas } from '../bootstrap/state.js'
import { getCommandName } from '../commands.js'
import { getSystemContext } from '../context.js'
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
import {
AUTOCOMPACT_BUFFER_TOKENS,
getEffectiveContextWindowSize,
isAutoCompactEnabled,
MANUAL_COMPACT_BUFFER_TOKENS,
} from '../services/compact/autoCompact.js'
import {
countMessagesTokensWithAPI,
countTokensViaHaikuFallback,
roughTokenCountEstimation,
} from '../services/tokenEstimation.js'
import { estimateSkillFrontmatterTokens } from '../skills/loadSkillsDir.js'
import {
findToolByName,
type Tool,
type ToolPermissionContext,
type Tools,
type ToolUseContext,
toolMatchesName,
} from '../Tool.js'
import type {
AgentDefinition,
AgentDefinitionsResult,
} from '../tools/AgentTool/loadAgentsDir.js'
import { SKILL_TOOL_NAME } from '../tools/SkillTool/constants.js'
import {
getLimitedSkillToolCommands,
getSkillToolInfo as getSlashCommandInfo,
} from '../tools/SkillTool/prompt.js'
import type {
AssistantMessage,
AttachmentMessage,
Message,
NormalizedAssistantMessage,
NormalizedUserMessage,
UserMessage,
} from '../types/message.js'
import { toolToAPISchema } from './api.js'
import { filterInjectedMemoryFiles, getMemoryFiles } from './claudemd.js'
import { getContextWindowForModel } from './context.js'
import { getCwd } from './cwd.js'
import { logForDebugging } from './debug.js'
import { isEnvTruthy } from './envUtils.js'
import { errorMessage, toError } from './errors.js'
import { logError } from './log.js'
import { normalizeMessagesForAPI } from './messages.js'
import { getRuntimeMainLoopModel } from './model/model.js'
import type { SettingSource } from './settings/constants.js'
import { jsonStringify } from './slowOperations.js'
import { buildEffectiveSystemPrompt } from './systemPrompt.js'
import type { Theme } from './theme.js'
import { getCurrentUsage } from './tokens.js'
const RESERVED_CATEGORY_NAME = 'Autocompact buffer'
const MANUAL_COMPACT_BUFFER_NAME = 'Compact buffer'
/**
* Fixed token overhead added by the API when tools are present.
* The API adds a tool prompt preamble (~500 tokens) once per API call when tools are present.
* When we count tools individually via the token counting API, each call includes this overhead,
* leading to N × overhead instead of 1 × overhead for N tools.
* We subtract this overhead from per-tool counts to show accurate tool content sizes.
*/
export const TOOL_TOKEN_COUNT_OVERHEAD = 500
async function countTokensWithFallback(
messages: Anthropic.Beta.Messages.BetaMessageParam[],
tools: Anthropic.Beta.Messages.BetaToolUnion[],
): Promise<number | null> {
try {
const result = await countMessagesTokensWithAPI(messages, tools)
if (result !== null) {
return result
}
logForDebugging(
`countTokensWithFallback: API returned null, trying haiku fallback (${tools.length} tools)`,
)
} catch (err) {
logForDebugging(`countTokensWithFallback: API failed: ${errorMessage(err)}`)
logError(err)
}
try {
const fallbackResult = await countTokensViaHaikuFallback(messages, tools)
if (fallbackResult === null) {
logForDebugging(
`countTokensWithFallback: haiku fallback also returned null (${tools.length} tools)`,
)
}
return fallbackResult
} catch (err) {
logForDebugging(
`countTokensWithFallback: haiku fallback failed: ${errorMessage(err)}`,
)
logError(err)
return null
}
}
interface ContextCategory {
name: string
tokens: number
color: keyof Theme
/** When true, these tokens are deferred and don't count toward context usage */
isDeferred?: boolean
}
interface GridSquare {
color: keyof Theme
isFilled: boolean
categoryName: string
tokens: number
percentage: number
squareFullness: number // 0-1 representing how full this individual square is
}
interface MemoryFile {
path: string
type: string
tokens: number
}
interface McpTool {
name: string
serverName: string
tokens: number
isLoaded?: boolean
}
export interface DeferredBuiltinTool {
name: string
tokens: number
isLoaded: boolean
}
export interface SystemToolDetail {
name: string
tokens: number
}
export interface SystemPromptSectionDetail {
name: string
tokens: number
}
interface Agent {
agentType: string
source: SettingSource | 'built-in' | 'plugin'
tokens: number
}
interface SlashCommandInfo {
readonly totalCommands: number
readonly includedCommands: number
readonly tokens: number
}
/** Individual skill detail for context display */
interface SkillFrontmatter {
name: string
source: SettingSource | 'plugin'
tokens: number
}
/**
* Information about skills included in the context window.
*/
interface SkillInfo {
/** Total number of available skills */
readonly totalSkills: number
/** Number of skills included within token budget */
readonly includedSkills: number
/** Total tokens consumed by skills */
readonly tokens: number
/** Individual skill details */
readonly skillFrontmatter: SkillFrontmatter[]
}
export interface ContextData {
readonly categories: ContextCategory[]
readonly totalTokens: number
readonly maxTokens: number
readonly rawMaxTokens: number
readonly percentage: number
readonly gridRows: GridSquare[][]
readonly model: string
readonly memoryFiles: MemoryFile[]
readonly mcpTools: McpTool[]
/** Ant-only: per-tool breakdown of deferred built-in tools */
readonly deferredBuiltinTools?: DeferredBuiltinTool[]
/** Ant-only: per-tool breakdown of always-loaded built-in tools */
readonly systemTools?: SystemToolDetail[]
/** Ant-only: per-section breakdown of system prompt */
readonly systemPromptSections?: SystemPromptSectionDetail[]
readonly agents: Agent[]
readonly slashCommands?: SlashCommandInfo
/** Skill statistics */
readonly skills?: SkillInfo
readonly autoCompactThreshold?: number
readonly isAutoCompactEnabled: boolean
messageBreakdown?: {
toolCallTokens: number
toolResultTokens: number
attachmentTokens: number
assistantMessageTokens: number
userMessageTokens: number
toolCallsByType: Array<{
name: string
callTokens: number
resultTokens: number
}>
attachmentsByType: Array<{ name: string; tokens: number }>
}
/** Actual token usage from last API response (if available) */
readonly apiUsage: {
input_tokens: number
output_tokens: number
cache_creation_input_tokens: number
cache_read_input_tokens: number
} | null
}
export async function countToolDefinitionTokens(
tools: Tools,
getToolPermissionContext: () => Promise<ToolPermissionContext>,
agentInfo: AgentDefinitionsResult | null,
model?: string,
): Promise<number> {
const toolSchemas = await Promise.all(
tools.map(tool =>
toolToAPISchema(tool, {
getToolPermissionContext,
tools,
agents: agentInfo?.activeAgents ?? [],
model,
}),
),
)
const result = await countTokensWithFallback([], toolSchemas)
if (result === null || result === 0) {
const toolNames = tools.map(t => t.name).join(', ')
logForDebugging(
`countToolDefinitionTokens returned ${result} for ${tools.length} tools: ${toolNames.slice(0, 100)}${toolNames.length > 100 ? '...' : ''}`,
)
}
return result ?? 0
}
/** Extract a human-readable name from a system prompt section's content */
function extractSectionName(content: string): string {
// Try to find first markdown heading
const headingMatch = content.match(/^#+\s+(.+)$/m)
if (headingMatch) {
return headingMatch[1]!.trim()
}
// Fall back to a truncated preview of the first non-empty line
const firstLine = content.split('\n').find(l => l.trim().length > 0) ?? ''
return firstLine.length > 40 ? firstLine.slice(0, 40) + '…' : firstLine
}
async function countSystemTokens(
effectiveSystemPrompt: readonly string[],
): Promise<{
systemPromptTokens: number
systemPromptSections: SystemPromptSectionDetail[]
}> {
// Get system context (gitStatus, etc.) which is always included
const systemContext = await getSystemContext()
// Build named entries: system prompt parts + system context values
// Skip empty strings and the global-cache boundary marker
const namedEntries: Array<{ name: string; content: string }> = [
...effectiveSystemPrompt
.filter(
content =>
content.length > 0 && content !== SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
)
.map(content => ({ name: extractSectionName(content), content })),
...Object.entries(systemContext)
.filter(([, content]) => content.length > 0)
.map(([name, content]) => ({ name, content })),
]
if (namedEntries.length < 1) {
return { systemPromptTokens: 0, systemPromptSections: [] }
}
const systemTokenCounts = await Promise.all(
namedEntries.map(({ content }) =>
countTokensWithFallback([{ role: 'user', content }], []),
),
)
const systemPromptSections: SystemPromptSectionDetail[] = namedEntries.map(
(entry, i) => ({
name: entry.name,
tokens: systemTokenCounts[i] || 0,
}),
)
const systemPromptTokens = systemTokenCounts.reduce(
(sum: number, tokens) => sum + (tokens || 0),
0,
)
return { systemPromptTokens, systemPromptSections }
}
async function countMemoryFileTokens(): Promise<{
memoryFileDetails: MemoryFile[]
claudeMdTokens: number
}> {
// Simple mode disables CLAUDE.md loading, so don't report tokens for them
if (isEnvTruthy(process.env.CLAUDE_CODE_SIMPLE)) {
return { memoryFileDetails: [], claudeMdTokens: 0 }
}
const memoryFilesData = filterInjectedMemoryFiles(await getMemoryFiles())
const memoryFileDetails: MemoryFile[] = []
let claudeMdTokens = 0
if (memoryFilesData.length < 1) {
return {
memoryFileDetails: [],
claudeMdTokens: 0,
}
}
const claudeMdTokenCounts = await Promise.all(
memoryFilesData.map(async file => {
const tokens = await countTokensWithFallback(
[{ role: 'user', content: file.content }],
[],
)
return { file, tokens: tokens || 0 }
}),
)
for (const { file, tokens } of claudeMdTokenCounts) {
claudeMdTokens += tokens
memoryFileDetails.push({
path: file.path,
type: file.type,
tokens,
})
}
return { claudeMdTokens, memoryFileDetails }
}
async function countBuiltInToolTokens(
tools: Tools,
getToolPermissionContext: () => Promise<ToolPermissionContext>,
agentInfo: AgentDefinitionsResult | null,
model?: string,
messages?: Message[],
): Promise<{
builtInToolTokens: number
deferredBuiltinDetails: DeferredBuiltinTool[]
deferredBuiltinTokens: number
systemToolDetails: SystemToolDetail[]
}> {
const builtInTools = tools.filter(tool => !tool.isMcp)
if (builtInTools.length < 1) {
return {
builtInToolTokens: 0,
deferredBuiltinDetails: [],
deferredBuiltinTokens: 0,
systemToolDetails: [],
}
}
// Check if tool search is enabled
const { isToolSearchEnabled } = await import('./toolSearch.js')
const { isDeferredTool } = await import('../tools/ToolSearchTool/prompt.js')
const isDeferred = await isToolSearchEnabled(
model ?? '',
tools,
getToolPermissionContext,
agentInfo?.activeAgents ?? [],
'analyzeBuiltIn',
)
// Separate always-loaded and deferred builtin tools using dynamic isDeferredTool check
const alwaysLoadedTools = builtInTools.filter(t => !isDeferredTool(t))
const deferredBuiltinTools = builtInTools.filter(t => isDeferredTool(t))
// Count always-loaded tools
const alwaysLoadedTokens =
alwaysLoadedTools.length > 0
? await countToolDefinitionTokens(
alwaysLoadedTools,
getToolPermissionContext,
agentInfo,
model,
)
: 0
// Build per-tool breakdown for always-loaded tools (ant-only, proportional
// split of the bulk count based on rough schema size estimation). Excludes
// SkillTool since its tokens are shown in the separate Skills category.
let systemToolDetails: SystemToolDetail[] = []
if (process.env.USER_TYPE === 'ant') {
const toolsForBreakdown = alwaysLoadedTools.filter(
t => !toolMatchesName(t, SKILL_TOOL_NAME),
)
if (toolsForBreakdown.length > 0) {
const estimates = toolsForBreakdown.map(t =>
roughTokenCountEstimation(jsonStringify(t.inputSchema ?? {})),
)
const estimateTotal = estimates.reduce((s, e) => s + e, 0) || 1
const distributable = Math.max(
0,
alwaysLoadedTokens - TOOL_TOKEN_COUNT_OVERHEAD,
)
systemToolDetails = toolsForBreakdown
.map((t, i) => ({
name: t.name,
tokens: Math.round((estimates[i]! / estimateTotal) * distributable),
}))
.sort((a, b) => b.tokens - a.tokens)
}
}
// Count deferred builtin tools individually for details
const deferredBuiltinDetails: DeferredBuiltinTool[] = []
let loadedDeferredTokens = 0
let totalDeferredTokens = 0
if (deferredBuiltinTools.length > 0 && isDeferred) {
// Find which deferred tools have been used in messages
const loadedToolNames = new Set<string>()
if (messages) {
const deferredToolNameSet = new Set(deferredBuiltinTools.map(t => t.name))
for (const msg of messages) {
if (msg.type === 'assistant') {
for (const block of msg.message.content) {
if (
'type' in block &&
block.type === 'tool_use' &&
'name' in block &&
typeof block.name === 'string' &&
deferredToolNameSet.has(block.name)
) {
loadedToolNames.add(block.name)
}
}
}
}
}
// Count each deferred tool
const tokensByTool = await Promise.all(
deferredBuiltinTools.map(t =>
countToolDefinitionTokens(
[t],
getToolPermissionContext,
agentInfo,
model,
),
),
)
for (const [i, tool] of deferredBuiltinTools.entries()) {
const tokens = Math.max(
0,
(tokensByTool[i] || 0) - TOOL_TOKEN_COUNT_OVERHEAD,
)
const isLoaded = loadedToolNames.has(tool.name)
deferredBuiltinDetails.push({
name: tool.name,
tokens,
isLoaded,
})
totalDeferredTokens += tokens
if (isLoaded) {
loadedDeferredTokens += tokens
}
}
} else if (deferredBuiltinTools.length > 0) {
// Tool search not enabled - count deferred tools as regular
const deferredTokens = await countToolDefinitionTokens(
deferredBuiltinTools,
getToolPermissionContext,
agentInfo,
model,
)
return {
builtInToolTokens: alwaysLoadedTokens + deferredTokens,
deferredBuiltinDetails: [],
deferredBuiltinTokens: 0,
systemToolDetails,
}
}
return {
// When deferred, only count always-loaded tools + any loaded deferred tools
builtInToolTokens: alwaysLoadedTokens + loadedDeferredTokens,
deferredBuiltinDetails,
deferredBuiltinTokens: totalDeferredTokens - loadedDeferredTokens,
systemToolDetails,
}
}
function findSkillTool(tools: Tools): Tool | undefined {
return findToolByName(tools, SKILL_TOOL_NAME)
}
async function countSlashCommandTokens(
tools: Tools,
getToolPermissionContext: () => Promise<ToolPermissionContext>,
agentInfo: AgentDefinitionsResult | null,
): Promise<{
slashCommandTokens: number
commandInfo: { totalCommands: number; includedCommands: number }
}> {
const info = await getSlashCommandInfo(getCwd())
const slashCommandTool = findSkillTool(tools)
if (!slashCommandTool) {
return {
slashCommandTokens: 0,
commandInfo: { totalCommands: 0, includedCommands: 0 },
}
}
const slashCommandTokens = await countToolDefinitionTokens(
[slashCommandTool],
getToolPermissionContext,
agentInfo,
)
return {
slashCommandTokens,
commandInfo: {
totalCommands: info.totalCommands,
includedCommands: info.includedCommands,
},
}
}
async function countSkillTokens(
tools: Tools,
getToolPermissionContext: () => Promise<ToolPermissionContext>,
agentInfo: AgentDefinitionsResult | null,
): Promise<{
skillTokens: number
skillInfo: {
totalSkills: number
includedSkills: number
skillFrontmatter: SkillFrontmatter[]
}
}> {
try {
const skills = await getLimitedSkillToolCommands(getCwd())
const slashCommandTool = findSkillTool(tools)
if (!slashCommandTool) {
return {
skillTokens: 0,
skillInfo: { totalSkills: 0, includedSkills: 0, skillFrontmatter: [] },
}
}
// NOTE: This counts the entire SlashCommandTool (which includes both commands AND skills).
// This is the same tool counted by countSlashCommandTokens(), but we track it separately
// here for display purposes. These tokens should NOT be added to context categories
// to avoid double-counting.
const skillTokens = await countToolDefinitionTokens(
[slashCommandTool],
getToolPermissionContext,
agentInfo,
)
// Calculate per-skill token estimates based on frontmatter only
// (name, description, whenToUse) since full content is only loaded on invocation
const skillFrontmatter: SkillFrontmatter[] = skills.map(skill => ({
name: getCommandName(skill),
source: (skill.type === 'prompt' ? skill.source : 'plugin') as
| SettingSource
| 'plugin',
tokens: estimateSkillFrontmatterTokens(skill),
}))
return {
skillTokens,
skillInfo: {
totalSkills: skills.length,
includedSkills: skills.length,
skillFrontmatter,
},
}
} catch (error) {
logError(toError(error))
// Return zero values rather than failing the entire context analysis
return {
skillTokens: 0,
skillInfo: { totalSkills: 0, includedSkills: 0, skillFrontmatter: [] },
}
}
}
export async function countMcpToolTokens(
tools: Tools,
getToolPermissionContext: () => Promise<ToolPermissionContext>,
agentInfo: AgentDefinitionsResult | null,
model: string,
messages?: Message[],
): Promise<{
mcpToolTokens: number
mcpToolDetails: McpTool[]
deferredToolTokens: number
loadedMcpToolNames: Set<string>
}> {
const mcpTools = tools.filter(tool => tool.isMcp)
const mcpToolDetails: McpTool[] = []
// Single bulk API call for all MCP tools (instead of N individual calls)
const totalTokensRaw = await countToolDefinitionTokens(
mcpTools,
getToolPermissionContext,
agentInfo,
model,
)
// Subtract the single overhead since we made one bulk call
const totalTokens = Math.max(
0,
(totalTokensRaw || 0) - TOOL_TOKEN_COUNT_OVERHEAD,
)
// Estimate per-tool proportions for display using local estimation.
// Include name + description + input schema to match what toolToAPISchema
// sends — otherwise tools with similar schemas but different descriptions
// get identical counts (MCP tools share the same base Zod inputSchema).
const estimates = await Promise.all(
mcpTools.map(async t =>
roughTokenCountEstimation(
jsonStringify({
name: t.name,
description: await t.prompt({
getToolPermissionContext,
tools,
agents: agentInfo?.activeAgents ?? [],
}),
input_schema: t.inputJSONSchema ?? {},
}),
),
),
)
const estimateTotal = estimates.reduce((s, e) => s + e, 0) || 1
const mcpToolTokensByTool = estimates.map(e =>
Math.round((e / estimateTotal) * totalTokens),
)
// Check if tool search is enabled - if so, MCP tools are deferred
// isToolSearchEnabled handles threshold calculation internally for TstAuto mode
const { isToolSearchEnabled } = await import('./toolSearch.js')
const { isDeferredTool } = await import('../tools/ToolSearchTool/prompt.js')
const isDeferred = await isToolSearchEnabled(
model,
tools,
getToolPermissionContext,
agentInfo?.activeAgents ?? [],
'analyzeMcp',
)
// Find MCP tools that have been used in messages (loaded via ToolSearchTool)
const loadedMcpToolNames = new Set<string>()
if (isDeferred && messages) {
const mcpToolNameSet = new Set(mcpTools.map(t => t.name))
for (const msg of messages) {
if (msg.type === 'assistant') {
for (const block of msg.message.content) {
if (
'type' in block &&
block.type === 'tool_use' &&
'name' in block &&
typeof block.name === 'string' &&
mcpToolNameSet.has(block.name)
) {
loadedMcpToolNames.add(block.name)
}
}
}
}
}
// Build tool details with isLoaded flag
for (const [i, tool] of mcpTools.entries()) {
mcpToolDetails.push({
name: tool.name,
serverName: tool.name.split('__')[1] || 'unknown',
tokens: mcpToolTokensByTool[i]!,
isLoaded: loadedMcpToolNames.has(tool.name) || !isDeferredTool(tool),
})
}
// Calculate loaded vs deferred tokens
let loadedTokens = 0
let deferredTokens = 0
for (const detail of mcpToolDetails) {
if (detail.isLoaded) {
loadedTokens += detail.tokens
} else if (isDeferred) {
deferredTokens += detail.tokens
}
}
return {
// When deferred but some tools are loaded, count loaded tokens
mcpToolTokens: isDeferred ? loadedTokens : totalTokens,
mcpToolDetails,
// Track deferred tokens separately for display
deferredToolTokens: deferredTokens,
loadedMcpToolNames,
}
}
async function countCustomAgentTokens(agentDefinitions: {
activeAgents: AgentDefinition[]
}): Promise<{
agentTokens: number
agentDetails: Agent[]
}> {
const customAgents = agentDefinitions.activeAgents.filter(
a => a.source !== 'built-in',
)
const agentDetails: Agent[] = []
let agentTokens = 0
const tokenCounts = await Promise.all(
customAgents.map(agent =>
countTokensWithFallback(
[
{
role: 'user',
content: [agent.agentType, agent.whenToUse].join(' '),
},
],
[],
),
),
)
for (const [i, agent] of customAgents.entries()) {
const tokens = tokenCounts[i] || 0
agentTokens += tokens || 0
agentDetails.push({
agentType: agent.agentType,
source: agent.source,
tokens: tokens || 0,
})
}
return { agentTokens, agentDetails }
}
type MessageBreakdown = {
totalTokens: number
toolCallTokens: number
toolResultTokens: number
attachmentTokens: number
assistantMessageTokens: number
userMessageTokens: number
toolCallsByType: Map<string, number>
toolResultsByType: Map<string, number>
attachmentsByType: Map<string, number>
}
function processAssistantMessage(
msg: AssistantMessage | NormalizedAssistantMessage,
breakdown: MessageBreakdown,
): void {
// Process each content block individually
for (const block of msg.message.content) {
const blockStr = jsonStringify(block)
const blockTokens = roughTokenCountEstimation(blockStr)
if ('type' in block && block.type === 'tool_use') {
breakdown.toolCallTokens += blockTokens
const toolName = ('name' in block ? block.name : undefined) || 'unknown'
breakdown.toolCallsByType.set(
toolName,
(breakdown.toolCallsByType.get(toolName) || 0) + blockTokens,
)
} else {
// Text blocks or other non-tool content
breakdown.assistantMessageTokens += blockTokens
}
}
}
function processUserMessage(
msg: UserMessage | NormalizedUserMessage,
breakdown: MessageBreakdown,
toolUseIdToName: Map<string, string>,
): void {
// Handle both string and array content
if (typeof msg.message.content === 'string') {
// Simple string content
const tokens = roughTokenCountEstimation(msg.message.content)
breakdown.userMessageTokens += tokens
return
}
// Process each content block individually
for (const block of msg.message.content) {
const blockStr = jsonStringify(block)
const blockTokens = roughTokenCountEstimation(blockStr)
if ('type' in block && block.type === 'tool_result') {
breakdown.toolResultTokens += blockTokens
const toolUseId = 'tool_use_id' in block ? block.tool_use_id : undefined
const toolName =
(toolUseId ? toolUseIdToName.get(toolUseId) : undefined) || 'unknown'
breakdown.toolResultsByType.set(
toolName,
(breakdown.toolResultsByType.get(toolName) || 0) + blockTokens,
)
} else {
// Text blocks or other non-tool content
breakdown.userMessageTokens += blockTokens
}
}
}
function processAttachment(
msg: AttachmentMessage,
breakdown: MessageBreakdown,
): void {
const contentStr = jsonStringify(msg.attachment)
const tokens = roughTokenCountEstimation(contentStr)
breakdown.attachmentTokens += tokens
const attachType = msg.attachment.type || 'unknown'
breakdown.attachmentsByType.set(
attachType,
(breakdown.attachmentsByType.get(attachType) || 0) + tokens,
)
}
async function approximateMessageTokens(
messages: Message[],
): Promise<MessageBreakdown> {
const microcompactResult = await microcompactMessages(messages)
// Initialize tracking
const breakdown: MessageBreakdown = {
totalTokens: 0,
toolCallTokens: 0,
toolResultTokens: 0,
attachmentTokens: 0,
assistantMessageTokens: 0,
userMessageTokens: 0,
toolCallsByType: new Map<string, number>(),
toolResultsByType: new Map<string, number>(),
attachmentsByType: new Map<string, number>(),
}
// Build a map of tool_use_id to tool_name for easier lookup
const toolUseIdToName = new Map<string, string>()
for (const msg of microcompactResult.messages) {
if (msg.type === 'assistant') {
for (const block of msg.message.content) {
if ('type' in block && block.type === 'tool_use') {
const toolUseId = 'id' in block ? block.id : undefined
const toolName =
('name' in block ? block.name : undefined) || 'unknown'
if (toolUseId) {
toolUseIdToName.set(toolUseId, toolName)
}
}
}
}
}
// Process each message for detailed breakdown
for (const msg of microcompactResult.messages) {
if (msg.type === 'assistant') {
processAssistantMessage(msg, breakdown)
} else if (msg.type === 'user') {
processUserMessage(msg, breakdown, toolUseIdToName)
} else if (msg.type === 'attachment') {
processAttachment(msg, breakdown)
}
}
// Calculate total tokens using the API for accuracy
const approximateMessageTokens = await countTokensWithFallback(
normalizeMessagesForAPI(microcompactResult.messages).map(_ => {
if (_.type === 'assistant') {
return {
// Important: strip out fields like id, etc. -- the counting API errors if they're present
role: 'assistant',
content: _.message.content,
}
}
return _.message
}),
[],
)
breakdown.totalTokens = approximateMessageTokens ?? 0
return breakdown
}
export async function analyzeContextUsage(
messages: Message[],
model: string,
getToolPermissionContext: () => Promise<ToolPermissionContext>,
tools: Tools,
agentDefinitions: AgentDefinitionsResult,
terminalWidth?: number,
toolUseContext?: Pick<ToolUseContext, 'options'>,
mainThreadAgentDefinition?: AgentDefinition,
/** Original messages before microcompact, used to extract API usage */
originalMessages?: Message[],
): Promise<ContextData> {
const runtimeModel = getRuntimeMainLoopModel({
permissionMode: (await getToolPermissionContext()).mode,
mainLoopModel: model,
})
// Get context window size
const contextWindow = getContextWindowForModel(runtimeModel, getSdkBetas())
// Build the effective system prompt using the shared utility
const defaultSystemPrompt = await getSystemPrompt(tools, runtimeModel)
const effectiveSystemPrompt = buildEffectiveSystemPrompt({
mainThreadAgentDefinition,
toolUseContext: toolUseContext ?? {
options: {} as ToolUseContext['options'],
},
customSystemPrompt: toolUseContext?.options.customSystemPrompt,
defaultSystemPrompt,
appendSystemPrompt: toolUseContext?.options.appendSystemPrompt,
})
// Critical operations that should not fail due to skills
const [
{ systemPromptTokens, systemPromptSections },
{ claudeMdTokens, memoryFileDetails },
{
builtInToolTokens,
deferredBuiltinDetails,
deferredBuiltinTokens,
systemToolDetails,
},
{ mcpToolTokens, mcpToolDetails, deferredToolTokens },
{ agentTokens, agentDetails },
{ slashCommandTokens, commandInfo },
messageBreakdown,
] = await Promise.all([
countSystemTokens(effectiveSystemPrompt),
countMemoryFileTokens(),
countBuiltInToolTokens(
tools,
getToolPermissionContext,
agentDefinitions,
runtimeModel,
messages,
),
countMcpToolTokens(
tools,
getToolPermissionContext,
agentDefinitions,
runtimeModel,
messages,
),
countCustomAgentTokens(agentDefinitions),
countSlashCommandTokens(tools, getToolPermissionContext, agentDefinitions),
approximateMessageTokens(messages),
])
// Count skills separately with error isolation
const skillResult = await countSkillTokens(
tools,
getToolPermissionContext,
agentDefinitions,
)
const skillInfo = skillResult.skillInfo
// Use sum of individual skill token estimates (matches what's shown in details)
// rather than skillResult.skillTokens which includes tool schema overhead
const skillFrontmatterTokens = skillInfo.skillFrontmatter.reduce(
(sum, skill) => sum + skill.tokens,
0,
)
const messageTokens = messageBreakdown.totalTokens
// Check if autocompact is enabled and calculate threshold
const isAutoCompact = isAutoCompactEnabled()
const autoCompactThreshold = isAutoCompact
? getEffectiveContextWindowSize(model) - AUTOCOMPACT_BUFFER_TOKENS
: undefined
// Create categories
const cats: ContextCategory[] = []
// System prompt is always shown first (fixed overhead)
if (systemPromptTokens > 0) {
cats.push({
name: 'System prompt',
tokens: systemPromptTokens,
color: 'promptBorder',
})
}
// Built-in tools right after system prompt (skills shown separately below)
// Ant users get a per-tool breakdown via systemToolDetails
const systemToolsTokens = builtInToolTokens - skillFrontmatterTokens
if (systemToolsTokens > 0) {
cats.push({
name:
process.env.USER_TYPE === 'ant'
? '[ANT-ONLY] System tools'
: 'System tools',
tokens: systemToolsTokens,
color: 'inactive',
})
}
// MCP tools after system tools
if (mcpToolTokens > 0) {
cats.push({
name: 'MCP tools',
tokens: mcpToolTokens,
color: 'cyan_FOR_SUBAGENTS_ONLY',
})
}
// Show deferred MCP tools (when tool search is enabled)
// These don't count toward context usage but we show them for visibility
if (deferredToolTokens > 0) {
cats.push({
name: 'MCP tools (deferred)',
tokens: deferredToolTokens,
color: 'inactive',
isDeferred: true,
})
}
// Show deferred builtin tools (when tool search is enabled)
if (deferredBuiltinTokens > 0) {
cats.push({
name: 'System tools (deferred)',
tokens: deferredBuiltinTokens,
color: 'inactive',
isDeferred: true,
})
}
// Custom agents after MCP tools
if (agentTokens > 0) {
cats.push({
name: 'Custom agents',
tokens: agentTokens,
color: 'permission',
})
}
// Memory files after custom agents
if (claudeMdTokens > 0) {
cats.push({
name: 'Memory files',
tokens: claudeMdTokens,
color: 'claude',
})
}
// Skills after memory files
if (skillFrontmatterTokens > 0) {
cats.push({
name: 'Skills',
tokens: skillFrontmatterTokens,
color: 'warning',
})
}
if (messageTokens !== null && messageTokens > 0) {
cats.push({
name: 'Messages',
tokens: messageTokens,
color: 'purple_FOR_SUBAGENTS_ONLY',
})
}
// Calculate actual content usage (before adding reserved buffers)
// Exclude deferred categories from the usage calculation
const actualUsage = cats.reduce(
(sum, cat) => sum + (cat.isDeferred ? 0 : cat.tokens),
0,
)
// Reserved space after messages (not counted in actualUsage shown to user).
// Under reactive-only mode (cobalt_raccoon), proactive autocompact never
// fires and the reserved buffer is a lie — skip it entirely and let Free
// space fill the grid. feature() guard keeps the flag string out of
// external builds. Same for context-collapse (marble_origami) — collapse
// owns the threshold ladder and autocompact is suppressed in
// shouldAutoCompact, so the 33k buffer shown here would be a lie too.
let reservedTokens = 0
let skipReservedBuffer = false
if (feature('REACTIVE_COMPACT')) {
if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) {
skipReservedBuffer = true
}
}
if (feature('CONTEXT_COLLAPSE')) {
/* eslint-disable @typescript-eslint/no-require-imports */
const { isContextCollapseEnabled } =
require('../services/contextCollapse/index.js') as typeof import('../services/contextCollapse/index.js')
/* eslint-enable @typescript-eslint/no-require-imports */
if (isContextCollapseEnabled()) {
skipReservedBuffer = true
}
}
if (skipReservedBuffer) {
// No buffer category pushed — reactive compaction is transparent and
// doesn't need a visible reservation in the grid.
} else if (isAutoCompact && autoCompactThreshold !== undefined) {
// Autocompact buffer (from effective context)
reservedTokens = contextWindow - autoCompactThreshold
cats.push({
name: RESERVED_CATEGORY_NAME,
tokens: reservedTokens,
color: 'inactive',
})
} else if (!isAutoCompact) {
// Compact buffer reserve (3k from actual context limit)
reservedTokens = MANUAL_COMPACT_BUFFER_TOKENS
cats.push({
name: MANUAL_COMPACT_BUFFER_NAME,
tokens: reservedTokens,
color: 'inactive',
})
}
// Calculate free space (subtract both actual usage and reserved buffer)
const freeTokens = Math.max(0, contextWindow - actualUsage - reservedTokens)
cats.push({
name: 'Free space',
tokens: freeTokens,
color: 'promptBorder',
})
// Total for display (everything except free space)
const totalIncludingReserved = actualUsage
// Extract API usage from original messages (if provided) to match status line
// This uses the same source of truth as the status line for consistency
const apiUsage = getCurrentUsage(originalMessages ?? messages)
// When API usage is available, use it for total to match status line calculation
// Status line uses: input_tokens + cache_creation_input_tokens + cache_read_input_tokens
const totalFromAPI = apiUsage
? apiUsage.input_tokens +
apiUsage.cache_creation_input_tokens +
apiUsage.cache_read_input_tokens
: null
// Use API total if available, otherwise fall back to estimated total
const finalTotalTokens = totalFromAPI ?? totalIncludingReserved
// Pre-calculate grid based on model context window and terminal width
// For narrow screens (< 80 cols), use 5x5 for 200k models, 5x10 for 1M+ models
// For normal screens, use 10x10 for 200k models, 20x10 for 1M+ models
const isNarrowScreen = terminalWidth && terminalWidth < 80
const GRID_WIDTH =
contextWindow >= 1000000
? isNarrowScreen
? 5
: 20
: isNarrowScreen
? 5
: 10
const GRID_HEIGHT = contextWindow >= 1000000 ? 10 : isNarrowScreen ? 5 : 10
const TOTAL_SQUARES = GRID_WIDTH * GRID_HEIGHT
// Filter out deferred categories - they don't take up actual context space
// (e.g., MCP tools when tool search is enabled)
const nonDeferredCats = cats.filter(cat => !cat.isDeferred)
// Calculate squares per category (use rawEffectiveMax for visualization to show full context)
const categorySquares = nonDeferredCats.map(cat => ({
...cat,
squares:
cat.name === 'Free space'
? Math.round((cat.tokens / contextWindow) * TOTAL_SQUARES)
: Math.max(1, Math.round((cat.tokens / contextWindow) * TOTAL_SQUARES)),
percentageOfTotal: Math.round((cat.tokens / contextWindow) * 100),
}))
// Helper function to create grid squares for a category
function createCategorySquares(
category: (typeof categorySquares)[0],
): GridSquare[] {
const squares: GridSquare[] = []
const exactSquares = (category.tokens / contextWindow) * TOTAL_SQUARES
const wholeSquares = Math.floor(exactSquares)
const fractionalPart = exactSquares - wholeSquares
for (let i = 0; i < category.squares; i++) {
// Determine fullness: full squares get 1.0, partial square gets fractional amount
let squareFullness = 1.0
if (i === wholeSquares && fractionalPart > 0) {
// This is the partial square
squareFullness = fractionalPart
}
squares.push({
color: category.color,
isFilled: true,
categoryName: category.name,
tokens: category.tokens,
percentage: category.percentageOfTotal,
squareFullness,
})
}
return squares
}
// Build the grid as an array of squares with full metadata
const gridSquares: GridSquare[] = []
// Separate reserved category for end placement (either autocompact or manual compact buffer)
const reservedCategory = categorySquares.find(
cat =>
cat.name === RESERVED_CATEGORY_NAME ||
cat.name === MANUAL_COMPACT_BUFFER_NAME,
)
const nonReservedCategories = categorySquares.filter(
cat =>
cat.name !== RESERVED_CATEGORY_NAME &&
cat.name !== MANUAL_COMPACT_BUFFER_NAME &&
cat.name !== 'Free space',
)
// Add all non-reserved, non-free-space squares first
for (const cat of nonReservedCategories) {
const squares = createCategorySquares(cat)
for (const square of squares) {
if (gridSquares.length < TOTAL_SQUARES) {
gridSquares.push(square)
}
}
}
// Calculate how many squares are needed for reserved
const reservedSquareCount = reservedCategory ? reservedCategory.squares : 0
// Fill with free space, leaving room for reserved at the end
const freeSpaceCat = cats.find(c => c.name === 'Free space')
const freeSpaceTarget = TOTAL_SQUARES - reservedSquareCount
while (gridSquares.length < freeSpaceTarget) {
gridSquares.push({
color: 'promptBorder',
isFilled: true,
categoryName: 'Free space',
tokens: freeSpaceCat?.tokens || 0,
percentage: freeSpaceCat
? Math.round((freeSpaceCat.tokens / contextWindow) * 100)
: 0,
squareFullness: 1.0, // Free space is always "full"
})
}
// Add reserved squares at the end
if (reservedCategory) {
const squares = createCategorySquares(reservedCategory)
for (const square of squares) {
if (gridSquares.length < TOTAL_SQUARES) {
gridSquares.push(square)
}
}
}
// Convert to rows for rendering
const gridRows: GridSquare[][] = []
for (let i = 0; i < GRID_HEIGHT; i++) {
gridRows.push(gridSquares.slice(i * GRID_WIDTH, (i + 1) * GRID_WIDTH))
}
// Format message breakdown (used by context suggestions for all users)
// Combine tool calls and results, then get top 5
const toolsMap = new Map<
string,
{ callTokens: number; resultTokens: number }
>()
// Add call tokens
for (const [name, tokens] of messageBreakdown.toolCallsByType.entries()) {
const existing = toolsMap.get(name) || { callTokens: 0, resultTokens: 0 }
toolsMap.set(name, { ...existing, callTokens: tokens })
}
// Add result tokens
for (const [name, tokens] of messageBreakdown.toolResultsByType.entries()) {
const existing = toolsMap.get(name) || { callTokens: 0, resultTokens: 0 }
toolsMap.set(name, { ...existing, resultTokens: tokens })
}
// Convert to array and sort by total tokens (calls + results)
const toolsByTypeArray = Array.from(toolsMap.entries())
.map(([name, { callTokens, resultTokens }]) => ({
name,
callTokens,
resultTokens,
}))
.sort(
(a, b) => b.callTokens + b.resultTokens - (a.callTokens + a.resultTokens),
)
const attachmentsByTypeArray = Array.from(
messageBreakdown.attachmentsByType.entries(),
)
.map(([name, tokens]) => ({ name, tokens }))
.sort((a, b) => b.tokens - a.tokens)
const formattedMessageBreakdown = {
toolCallTokens: messageBreakdown.toolCallTokens,
toolResultTokens: messageBreakdown.toolResultTokens,
attachmentTokens: messageBreakdown.attachmentTokens,
assistantMessageTokens: messageBreakdown.assistantMessageTokens,
userMessageTokens: messageBreakdown.userMessageTokens,
toolCallsByType: toolsByTypeArray,
attachmentsByType: attachmentsByTypeArray,
}
return {
categories: cats,
totalTokens: finalTotalTokens,
maxTokens: contextWindow,
rawMaxTokens: contextWindow,
percentage: Math.round((finalTotalTokens / contextWindow) * 100),
gridRows,
model: runtimeModel,
memoryFiles: memoryFileDetails,
mcpTools: mcpToolDetails,
deferredBuiltinTools:
process.env.USER_TYPE === 'ant' ? deferredBuiltinDetails : undefined,
systemTools:
process.env.USER_TYPE === 'ant' ? systemToolDetails : undefined,
systemPromptSections:
process.env.USER_TYPE === 'ant' ? systemPromptSections : undefined,
agents: agentDetails,
slashCommands:
slashCommandTokens > 0
? {
totalCommands: commandInfo.totalCommands,
includedCommands: commandInfo.includedCommands,
tokens: slashCommandTokens,
}
: undefined,
skills:
skillFrontmatterTokens > 0
? {
totalSkills: skillInfo.totalSkills,
includedSkills: skillInfo.includedSkills,
tokens: skillFrontmatterTokens,
skillFrontmatter: skillInfo.skillFrontmatter,
}
: undefined,
autoCompactThreshold,
isAutoCompactEnabled: isAutoCompact,
messageBreakdown: formattedMessageBreakdown,
apiUsage,
}
}