/** * Query profiling utility for measuring and reporting time spent in the query * pipeline from user input to first token arrival. Enable by setting CLAUDE_CODE_PROFILE_QUERY=1 * * Uses Node.js built-in performance hooks API for standard timing measurement. * Tracks each query session with detailed checkpoints for identifying bottlenecks. * * Checkpoints tracked (in order): * - query_user_input_received: Start of profiling * - query_context_loading_start/end: Loading system prompts and contexts * - query_query_start: Entry to query call from REPL * - query_fn_entry: Entry to query() function * - query_microcompact_start/end: Microcompaction of messages * - query_autocompact_start/end: Autocompaction check * - query_setup_start/end: StreamingToolExecutor and model setup * - query_api_loop_start: Start of API retry loop * - query_api_streaming_start: Start of streaming API call * - query_tool_schema_build_start/end: Building tool schemas * - query_message_normalization_start/end: Normalizing messages * - query_client_creation_start/end: Creating Anthropic client * - query_api_request_sent: HTTP request dispatched (before await, inside retry body) * - query_response_headers_received: .withResponse() resolved (headers arrived) * - query_first_chunk_received: First streaming chunk received (TTFT) * - query_api_streaming_end: Streaming complete * - query_tool_execution_start/end: Tool execution * - query_recursive_call: Before recursive query call * - query_end: End of query */ import { logForDebugging } from './debug.js' import { isEnvTruthy } from './envUtils.js' import { formatMs, formatTimelineLine, getPerformance } from './profilerBase.js' // Module-level state - initialized once when the module loads // eslint-disable-next-line custom-rules/no-process-env-top-level const ENABLED = isEnvTruthy(process.env.CLAUDE_CODE_PROFILE_QUERY) // Track memory snapshots separately (perf_hooks doesn't track memory) const memorySnapshots = new Map() // Track query count for reporting let queryCount = 0 // Track first token received time separately for summary let firstTokenTime: number | null = null /** * Start profiling a new query session */ export function startQueryProfile(): void { if (!ENABLED) return const perf = getPerformance() // Clear previous marks and memory snapshots perf.clearMarks() memorySnapshots.clear() firstTokenTime = null queryCount++ // Record the start checkpoint queryCheckpoint('query_user_input_received') } /** * Record a checkpoint with the given name */ export function queryCheckpoint(name: string): void { if (!ENABLED) return const perf = getPerformance() perf.mark(name) memorySnapshots.set(name, process.memoryUsage()) // Track first token specially if (name === 'query_first_chunk_received' && firstTokenTime === null) { const marks = perf.getEntriesByType('mark') if (marks.length > 0) { const lastMark = marks[marks.length - 1] firstTokenTime = lastMark?.startTime ?? 0 } } } /** * End the current query profiling session */ export function endQueryProfile(): void { if (!ENABLED) return queryCheckpoint('query_profile_end') } /** * Identify slow operations (> 100ms delta) */ function getSlowWarning(deltaMs: number, name: string): string { // Don't flag the first checkpoint as slow - it measures time from process start, // not actual processing overhead if (name === 'query_user_input_received') { return '' } if (deltaMs > 1000) { return ` ⚠️ VERY SLOW` } if (deltaMs > 100) { return ` ⚠️ SLOW` } // Specific warnings for known bottlenecks if (name.includes('git_status') && deltaMs > 50) { return ' ⚠️ git status' } if (name.includes('tool_schema') && deltaMs > 50) { return ' ⚠️ tool schemas' } if (name.includes('client_creation') && deltaMs > 50) { return ' ⚠️ client creation' } return '' } /** * Get a formatted report of all checkpoints for the current/last query */ function getQueryProfileReport(): string { if (!ENABLED) { return 'Query profiling not enabled (set CLAUDE_CODE_PROFILE_QUERY=1)' } const perf = getPerformance() const marks = perf.getEntriesByType('mark') if (marks.length === 0) { return 'No query profiling checkpoints recorded' } const lines: string[] = [] lines.push('='.repeat(80)) lines.push(`QUERY PROFILING REPORT - Query #${queryCount}`) lines.push('='.repeat(80)) lines.push('') // Use first mark as baseline (query start time) to show relative times const baselineTime = marks[0]?.startTime ?? 0 let prevTime = baselineTime let apiRequestSentTime = 0 let firstChunkTime = 0 for (const mark of marks) { const relativeTime = mark.startTime - baselineTime const deltaMs = mark.startTime - prevTime lines.push( formatTimelineLine( relativeTime, deltaMs, mark.name, memorySnapshots.get(mark.name), 10, 9, getSlowWarning(deltaMs, mark.name), ), ) // Track key milestones for summary (use relative times) if (mark.name === 'query_api_request_sent') { apiRequestSentTime = relativeTime } if (mark.name === 'query_first_chunk_received') { firstChunkTime = relativeTime } prevTime = mark.startTime } // Calculate summary statistics (relative to baseline) const lastMark = marks[marks.length - 1] const totalTime = lastMark ? lastMark.startTime - baselineTime : 0 lines.push('') lines.push('-'.repeat(80)) if (firstChunkTime > 0) { const preRequestOverhead = apiRequestSentTime const networkLatency = firstChunkTime - apiRequestSentTime const preRequestPercent = ( (preRequestOverhead / firstChunkTime) * 100 ).toFixed(1) const networkPercent = ((networkLatency / firstChunkTime) * 100).toFixed(1) lines.push(`Total TTFT: ${formatMs(firstChunkTime)}ms`) lines.push( ` - Pre-request overhead: ${formatMs(preRequestOverhead)}ms (${preRequestPercent}%)`, ) lines.push( ` - Network latency: ${formatMs(networkLatency)}ms (${networkPercent}%)`, ) } else { lines.push(`Total time: ${formatMs(totalTime)}ms`) } // Add phase summary lines.push(getPhaseSummary(marks, baselineTime)) lines.push('='.repeat(80)) return lines.join('\n') } /** * Get phase-based summary showing time spent in each major phase */ function getPhaseSummary( marks: Array<{ name: string; startTime: number }>, baselineTime: number, ): string { const phases: Array<{ name: string; start: string; end: string }> = [ { name: 'Context loading', start: 'query_context_loading_start', end: 'query_context_loading_end', }, { name: 'Microcompact', start: 'query_microcompact_start', end: 'query_microcompact_end', }, { name: 'Autocompact', start: 'query_autocompact_start', end: 'query_autocompact_end', }, { name: 'Query setup', start: 'query_setup_start', end: 'query_setup_end' }, { name: 'Tool schemas', start: 'query_tool_schema_build_start', end: 'query_tool_schema_build_end', }, { name: 'Message normalization', start: 'query_message_normalization_start', end: 'query_message_normalization_end', }, { name: 'Client creation', start: 'query_client_creation_start', end: 'query_client_creation_end', }, { name: 'Network TTFB', start: 'query_api_request_sent', end: 'query_first_chunk_received', }, { name: 'Tool execution', start: 'query_tool_execution_start', end: 'query_tool_execution_end', }, ] const markMap = new Map(marks.map(m => [m.name, m.startTime - baselineTime])) const lines: string[] = [] lines.push('') lines.push('PHASE BREAKDOWN:') for (const phase of phases) { const startTime = markMap.get(phase.start) const endTime = markMap.get(phase.end) if (startTime !== undefined && endTime !== undefined) { const duration = endTime - startTime const bar = '█'.repeat(Math.min(Math.ceil(duration / 10), 50)) // 1 block per 10ms, max 50 lines.push( ` ${phase.name.padEnd(22)} ${formatMs(duration).padStart(10)}ms ${bar}`, ) } } // Calculate pre-API overhead (everything before api_request_sent) const apiRequestSent = markMap.get('query_api_request_sent') if (apiRequestSent !== undefined) { lines.push('') lines.push( ` ${'Total pre-API overhead'.padEnd(22)} ${formatMs(apiRequestSent).padStart(10)}ms`, ) } return lines.join('\n') } /** * Log the query profile report to debug output */ export function logQueryProfileReport(): void { if (!ENABLED) return logForDebugging(getQueryProfileReport()) }