/** * Side Question ("/btw") feature - allows asking quick questions without * interrupting the main agent context. * * Uses runForkedAgent to leverage prompt caching from the parent context * while keeping the side question response separate from main conversation. */ import { formatAPIError } from '../services/api/errorUtils.js' import type { NonNullableUsage } from '../services/api/logging.js' import type { Message, SystemAPIErrorMessage } from '../types/message.js' import { type CacheSafeParams, runForkedAgent } from './forkedAgent.js' import { createUserMessage, extractTextContent } from './messages.js' // Pattern to detect "/btw" at start of input (case-insensitive, word boundary) const BTW_PATTERN = /^\/btw\b/gi /** * Find positions of "/btw" keyword at the start of text for highlighting. * Similar to findThinkingTriggerPositions in thinking.ts. */ export function findBtwTriggerPositions(text: string): Array<{ word: string start: number end: number }> { const positions: Array<{ word: string; start: number; end: number }> = [] const matches = text.matchAll(BTW_PATTERN) for (const match of matches) { if (match.index !== undefined) { positions.push({ word: match[0], start: match.index, end: match.index + match[0].length, }) } } return positions } export type SideQuestionResult = { response: string | null usage: NonNullableUsage } /** * Run a side question using a forked agent. * Shares the parent's prompt cache — no thinking override, no cache write. * All tools are blocked and we cap at 1 turn. */ export async function runSideQuestion({ question, cacheSafeParams, }: { question: string cacheSafeParams: CacheSafeParams }): Promise { // Wrap the question with instructions to answer without tools const wrappedQuestion = `This is a side question from the user. You must answer this question directly in a single response. IMPORTANT CONTEXT: - You are a separate, lightweight agent spawned to answer this one question - The main agent is NOT interrupted - it continues working independently in the background - You share the conversation context but are a completely separate instance - Do NOT reference being interrupted or what you were "previously doing" - that framing is incorrect CRITICAL CONSTRAINTS: - You have NO tools available - you cannot read files, run commands, search, or take any actions - This is a one-off response - there will be no follow-up turns - You can ONLY provide information based on what you already know from the conversation context - NEVER say things like "Let me try...", "I'll now...", "Let me check...", or promise to take any action - If you don't know the answer, say so - do not offer to look it up or investigate Simply answer the question with the information you have. ${question}` const agentResult = await runForkedAgent({ promptMessages: [createUserMessage({ content: wrappedQuestion })], // Do NOT override thinkingConfig — thinking is part of the API cache key, // and diverging from the main thread's config busts the prompt cache. // Adaptive thinking on a quick Q&A has negligible overhead. cacheSafeParams, canUseTool: async () => ({ behavior: 'deny' as const, message: 'Side questions cannot use tools', decisionReason: { type: 'other' as const, reason: 'side_question' }, }), querySource: 'side_question', forkLabel: 'side_question', maxTurns: 1, // Single turn only - no tool use loops // No future request shares this suffix; skip writing cache entries. skipCacheWrite: true, }) return { response: extractSideQuestionResponse(agentResult.messages), usage: agentResult.totalUsage, } } /** * Extract a display string from forked agent messages. * * IMPORTANT: claude.ts yields one AssistantMessage PER CONTENT BLOCK, not one * per API response. With adaptive thinking enabled (inherited from the main * thread to preserve the cache key), a thinking response arrives as: * messages[0] = assistant { content: [thinking_block] } * messages[1] = assistant { content: [text_block] } * * The old code used `.find(m => m.type === 'assistant')` which grabbed the * first (thinking-only) message, found no text block, and returned null → * "No response received". Repos with large context (many skills, big CLAUDE.md) * trigger thinking more often, which is why this reproduced in the monorepo * but not here. * * Secondary failure modes also surfaced as "No response received": * - Model attempts tool_use → content = [thinking, tool_use], no text. * Rare — the system-reminder usually prevents this, but handled here. * - API error exhausts retries → query yields system api_error + user * interruption, no assistant message at all. */ function extractSideQuestionResponse(messages: Message[]): string | null { // Flatten all assistant content blocks across the per-block messages. const assistantBlocks = messages.flatMap(m => m.type === 'assistant' ? m.message.content : [], ) if (assistantBlocks.length > 0) { // Concatenate all text blocks (there's normally at most one, but be safe). const text = extractTextContent(assistantBlocks, '\n\n').trim() if (text) return text // No text — check if the model tried to call a tool despite instructions. const toolUse = assistantBlocks.find(b => b.type === 'tool_use') if (toolUse) { const toolName = 'name' in toolUse ? toolUse.name : 'a tool' return `(The model tried to call ${toolName} instead of answering directly. Try rephrasing or ask in the main conversation.)` } } // No assistant content — likely API error exhausted retries. Surface the // first system api_error message so the user sees what happened. const apiErr = messages.find( (m): m is SystemAPIErrorMessage => m.type === 'system' && 'subtype' in m && m.subtype === 'api_error', ) if (apiErr) { return `(API error: ${formatAPIError(apiErr.error)})` } return null }