302 lines
8.7 KiB
TypeScript
302 lines
8.7 KiB
TypeScript
/**
|
|
* Query profiling utility for measuring and reporting time spent in the query
|
|
* pipeline from user input to first token arrival. Enable by setting CLAUDE_CODE_PROFILE_QUERY=1
|
|
*
|
|
* Uses Node.js built-in performance hooks API for standard timing measurement.
|
|
* Tracks each query session with detailed checkpoints for identifying bottlenecks.
|
|
*
|
|
* Checkpoints tracked (in order):
|
|
* - query_user_input_received: Start of profiling
|
|
* - query_context_loading_start/end: Loading system prompts and contexts
|
|
* - query_query_start: Entry to query call from REPL
|
|
* - query_fn_entry: Entry to query() function
|
|
* - query_microcompact_start/end: Microcompaction of messages
|
|
* - query_autocompact_start/end: Autocompaction check
|
|
* - query_setup_start/end: StreamingToolExecutor and model setup
|
|
* - query_api_loop_start: Start of API retry loop
|
|
* - query_api_streaming_start: Start of streaming API call
|
|
* - query_tool_schema_build_start/end: Building tool schemas
|
|
* - query_message_normalization_start/end: Normalizing messages
|
|
* - query_client_creation_start/end: Creating Anthropic client
|
|
* - query_api_request_sent: HTTP request dispatched (before await, inside retry body)
|
|
* - query_response_headers_received: .withResponse() resolved (headers arrived)
|
|
* - query_first_chunk_received: First streaming chunk received (TTFT)
|
|
* - query_api_streaming_end: Streaming complete
|
|
* - query_tool_execution_start/end: Tool execution
|
|
* - query_recursive_call: Before recursive query call
|
|
* - query_end: End of query
|
|
*/
|
|
|
|
import { logForDebugging } from './debug.js'
|
|
import { isEnvTruthy } from './envUtils.js'
|
|
import { formatMs, formatTimelineLine, getPerformance } from './profilerBase.js'
|
|
|
|
// Module-level state - initialized once when the module loads
|
|
// eslint-disable-next-line custom-rules/no-process-env-top-level
|
|
const ENABLED = isEnvTruthy(process.env.CLAUDE_CODE_PROFILE_QUERY)
|
|
|
|
// Track memory snapshots separately (perf_hooks doesn't track memory)
|
|
const memorySnapshots = new Map<string, NodeJS.MemoryUsage>()
|
|
|
|
// Track query count for reporting
|
|
let queryCount = 0
|
|
|
|
// Track first token received time separately for summary
|
|
let firstTokenTime: number | null = null
|
|
|
|
/**
|
|
* Start profiling a new query session
|
|
*/
|
|
export function startQueryProfile(): void {
|
|
if (!ENABLED) return
|
|
|
|
const perf = getPerformance()
|
|
|
|
// Clear previous marks and memory snapshots
|
|
perf.clearMarks()
|
|
memorySnapshots.clear()
|
|
firstTokenTime = null
|
|
|
|
queryCount++
|
|
|
|
// Record the start checkpoint
|
|
queryCheckpoint('query_user_input_received')
|
|
}
|
|
|
|
/**
|
|
* Record a checkpoint with the given name
|
|
*/
|
|
export function queryCheckpoint(name: string): void {
|
|
if (!ENABLED) return
|
|
|
|
const perf = getPerformance()
|
|
perf.mark(name)
|
|
memorySnapshots.set(name, process.memoryUsage())
|
|
|
|
// Track first token specially
|
|
if (name === 'query_first_chunk_received' && firstTokenTime === null) {
|
|
const marks = perf.getEntriesByType('mark')
|
|
if (marks.length > 0) {
|
|
const lastMark = marks[marks.length - 1]
|
|
firstTokenTime = lastMark?.startTime ?? 0
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* End the current query profiling session
|
|
*/
|
|
export function endQueryProfile(): void {
|
|
if (!ENABLED) return
|
|
|
|
queryCheckpoint('query_profile_end')
|
|
}
|
|
|
|
/**
|
|
* Identify slow operations (> 100ms delta)
|
|
*/
|
|
function getSlowWarning(deltaMs: number, name: string): string {
|
|
// Don't flag the first checkpoint as slow - it measures time from process start,
|
|
// not actual processing overhead
|
|
if (name === 'query_user_input_received') {
|
|
return ''
|
|
}
|
|
|
|
if (deltaMs > 1000) {
|
|
return ` ⚠️ VERY SLOW`
|
|
}
|
|
if (deltaMs > 100) {
|
|
return ` ⚠️ SLOW`
|
|
}
|
|
|
|
// Specific warnings for known bottlenecks
|
|
if (name.includes('git_status') && deltaMs > 50) {
|
|
return ' ⚠️ git status'
|
|
}
|
|
if (name.includes('tool_schema') && deltaMs > 50) {
|
|
return ' ⚠️ tool schemas'
|
|
}
|
|
if (name.includes('client_creation') && deltaMs > 50) {
|
|
return ' ⚠️ client creation'
|
|
}
|
|
|
|
return ''
|
|
}
|
|
|
|
/**
|
|
* Get a formatted report of all checkpoints for the current/last query
|
|
*/
|
|
function getQueryProfileReport(): string {
|
|
if (!ENABLED) {
|
|
return 'Query profiling not enabled (set CLAUDE_CODE_PROFILE_QUERY=1)'
|
|
}
|
|
|
|
const perf = getPerformance()
|
|
const marks = perf.getEntriesByType('mark')
|
|
if (marks.length === 0) {
|
|
return 'No query profiling checkpoints recorded'
|
|
}
|
|
|
|
const lines: string[] = []
|
|
lines.push('='.repeat(80))
|
|
lines.push(`QUERY PROFILING REPORT - Query #${queryCount}`)
|
|
lines.push('='.repeat(80))
|
|
lines.push('')
|
|
|
|
// Use first mark as baseline (query start time) to show relative times
|
|
const baselineTime = marks[0]?.startTime ?? 0
|
|
let prevTime = baselineTime
|
|
let apiRequestSentTime = 0
|
|
let firstChunkTime = 0
|
|
|
|
for (const mark of marks) {
|
|
const relativeTime = mark.startTime - baselineTime
|
|
const deltaMs = mark.startTime - prevTime
|
|
lines.push(
|
|
formatTimelineLine(
|
|
relativeTime,
|
|
deltaMs,
|
|
mark.name,
|
|
memorySnapshots.get(mark.name),
|
|
10,
|
|
9,
|
|
getSlowWarning(deltaMs, mark.name),
|
|
),
|
|
)
|
|
|
|
// Track key milestones for summary (use relative times)
|
|
if (mark.name === 'query_api_request_sent') {
|
|
apiRequestSentTime = relativeTime
|
|
}
|
|
if (mark.name === 'query_first_chunk_received') {
|
|
firstChunkTime = relativeTime
|
|
}
|
|
|
|
prevTime = mark.startTime
|
|
}
|
|
|
|
// Calculate summary statistics (relative to baseline)
|
|
const lastMark = marks[marks.length - 1]
|
|
const totalTime = lastMark ? lastMark.startTime - baselineTime : 0
|
|
|
|
lines.push('')
|
|
lines.push('-'.repeat(80))
|
|
|
|
if (firstChunkTime > 0) {
|
|
const preRequestOverhead = apiRequestSentTime
|
|
const networkLatency = firstChunkTime - apiRequestSentTime
|
|
const preRequestPercent = (
|
|
(preRequestOverhead / firstChunkTime) *
|
|
100
|
|
).toFixed(1)
|
|
const networkPercent = ((networkLatency / firstChunkTime) * 100).toFixed(1)
|
|
|
|
lines.push(`Total TTFT: ${formatMs(firstChunkTime)}ms`)
|
|
lines.push(
|
|
` - Pre-request overhead: ${formatMs(preRequestOverhead)}ms (${preRequestPercent}%)`,
|
|
)
|
|
lines.push(
|
|
` - Network latency: ${formatMs(networkLatency)}ms (${networkPercent}%)`,
|
|
)
|
|
} else {
|
|
lines.push(`Total time: ${formatMs(totalTime)}ms`)
|
|
}
|
|
|
|
// Add phase summary
|
|
lines.push(getPhaseSummary(marks, baselineTime))
|
|
|
|
lines.push('='.repeat(80))
|
|
|
|
return lines.join('\n')
|
|
}
|
|
|
|
/**
|
|
* Get phase-based summary showing time spent in each major phase
|
|
*/
|
|
function getPhaseSummary(
|
|
marks: Array<{ name: string; startTime: number }>,
|
|
baselineTime: number,
|
|
): string {
|
|
const phases: Array<{ name: string; start: string; end: string }> = [
|
|
{
|
|
name: 'Context loading',
|
|
start: 'query_context_loading_start',
|
|
end: 'query_context_loading_end',
|
|
},
|
|
{
|
|
name: 'Microcompact',
|
|
start: 'query_microcompact_start',
|
|
end: 'query_microcompact_end',
|
|
},
|
|
{
|
|
name: 'Autocompact',
|
|
start: 'query_autocompact_start',
|
|
end: 'query_autocompact_end',
|
|
},
|
|
{ name: 'Query setup', start: 'query_setup_start', end: 'query_setup_end' },
|
|
{
|
|
name: 'Tool schemas',
|
|
start: 'query_tool_schema_build_start',
|
|
end: 'query_tool_schema_build_end',
|
|
},
|
|
{
|
|
name: 'Message normalization',
|
|
start: 'query_message_normalization_start',
|
|
end: 'query_message_normalization_end',
|
|
},
|
|
{
|
|
name: 'Client creation',
|
|
start: 'query_client_creation_start',
|
|
end: 'query_client_creation_end',
|
|
},
|
|
{
|
|
name: 'Network TTFB',
|
|
start: 'query_api_request_sent',
|
|
end: 'query_first_chunk_received',
|
|
},
|
|
{
|
|
name: 'Tool execution',
|
|
start: 'query_tool_execution_start',
|
|
end: 'query_tool_execution_end',
|
|
},
|
|
]
|
|
|
|
const markMap = new Map(marks.map(m => [m.name, m.startTime - baselineTime]))
|
|
|
|
const lines: string[] = []
|
|
lines.push('')
|
|
lines.push('PHASE BREAKDOWN:')
|
|
|
|
for (const phase of phases) {
|
|
const startTime = markMap.get(phase.start)
|
|
const endTime = markMap.get(phase.end)
|
|
|
|
if (startTime !== undefined && endTime !== undefined) {
|
|
const duration = endTime - startTime
|
|
const bar = '█'.repeat(Math.min(Math.ceil(duration / 10), 50)) // 1 block per 10ms, max 50
|
|
lines.push(
|
|
` ${phase.name.padEnd(22)} ${formatMs(duration).padStart(10)}ms ${bar}`,
|
|
)
|
|
}
|
|
}
|
|
|
|
// Calculate pre-API overhead (everything before api_request_sent)
|
|
const apiRequestSent = markMap.get('query_api_request_sent')
|
|
if (apiRequestSent !== undefined) {
|
|
lines.push('')
|
|
lines.push(
|
|
` ${'Total pre-API overhead'.padEnd(22)} ${formatMs(apiRequestSent).padStart(10)}ms`,
|
|
)
|
|
}
|
|
|
|
return lines.join('\n')
|
|
}
|
|
|
|
/**
|
|
* Log the query profile report to debug output
|
|
*/
|
|
export function logQueryProfileReport(): void {
|
|
if (!ENABLED) return
|
|
logForDebugging(getQueryProfileReport())
|
|
}
|