223 lines
8.0 KiB
TypeScript
223 lines
8.0 KiB
TypeScript
import type Anthropic from '@anthropic-ai/sdk'
|
|
import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages.js'
|
|
import {
|
|
getLastApiCompletionTimestamp,
|
|
setLastApiCompletionTimestamp,
|
|
} from '../bootstrap/state.js'
|
|
import { STRUCTURED_OUTPUTS_BETA_HEADER } from '../constants/betas.js'
|
|
import type { QuerySource } from '../constants/querySource.js'
|
|
import {
|
|
getAttributionHeader,
|
|
getCLISyspromptPrefix,
|
|
} from '../constants/system.js'
|
|
import { logEvent } from '../services/analytics/index.js'
|
|
import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../services/analytics/metadata.js'
|
|
import { getAPIMetadata } from '../services/api/claude.js'
|
|
import { getAnthropicClient } from '../services/api/client.js'
|
|
import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
|
|
import { computeFingerprint } from './fingerprint.js'
|
|
import { normalizeModelStringForAPI } from './model/model.js'
|
|
|
|
type MessageParam = Anthropic.MessageParam
|
|
type TextBlockParam = Anthropic.TextBlockParam
|
|
type Tool = Anthropic.Tool
|
|
type ToolChoice = Anthropic.ToolChoice
|
|
type BetaMessage = Anthropic.Beta.Messages.BetaMessage
|
|
type BetaJSONOutputFormat = Anthropic.Beta.Messages.BetaJSONOutputFormat
|
|
type BetaThinkingConfigParam = Anthropic.Beta.Messages.BetaThinkingConfigParam
|
|
|
|
export type SideQueryOptions = {
|
|
/** Model to use for the query */
|
|
model: string
|
|
/**
|
|
* System prompt - string or array of text blocks (will be prefixed with CLI attribution).
|
|
*
|
|
* The attribution header is always placed in its own TextBlockParam block to ensure
|
|
* server-side parsing correctly extracts the cc_entrypoint value without including
|
|
* system prompt content.
|
|
*/
|
|
system?: string | TextBlockParam[]
|
|
/** Messages to send (supports cache_control on content blocks) */
|
|
messages: MessageParam[]
|
|
/** Optional tools (supports both standard Tool[] and BetaToolUnion[] for custom tool types) */
|
|
tools?: Tool[] | BetaToolUnion[]
|
|
/** Optional tool choice (use { type: 'tool', name: 'x' } for forced output) */
|
|
tool_choice?: ToolChoice
|
|
/** Optional JSON output format for structured responses */
|
|
output_format?: BetaJSONOutputFormat
|
|
/** Max tokens (default: 1024) */
|
|
max_tokens?: number
|
|
/** Max retries (default: 2) */
|
|
maxRetries?: number
|
|
/** Abort signal */
|
|
signal?: AbortSignal
|
|
/** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */
|
|
skipSystemPromptPrefix?: boolean
|
|
/** Temperature override */
|
|
temperature?: number
|
|
/** Thinking budget (enables thinking), or `false` to send `{ type: 'disabled' }`. */
|
|
thinking?: number | false
|
|
/** Stop sequences — generation stops when any of these strings is emitted */
|
|
stop_sequences?: string[]
|
|
/** Attributes this call in tengu_api_success for COGS joining against reporting.sampling_calls. */
|
|
querySource: QuerySource
|
|
}
|
|
|
|
/**
|
|
* Extract text from first user message for fingerprint computation.
|
|
*/
|
|
function extractFirstUserMessageText(messages: MessageParam[]): string {
|
|
const firstUserMessage = messages.find(m => m.role === 'user')
|
|
if (!firstUserMessage) return ''
|
|
|
|
const content = firstUserMessage.content
|
|
if (typeof content === 'string') return content
|
|
|
|
// Array of content blocks - find first text block
|
|
const textBlock = content.find(block => block.type === 'text')
|
|
return textBlock?.type === 'text' ? textBlock.text : ''
|
|
}
|
|
|
|
/**
|
|
* Lightweight API wrapper for "side queries" outside the main conversation loop.
|
|
*
|
|
* Use this instead of direct client.beta.messages.create() calls to ensure
|
|
* proper OAuth token validation with fingerprint attribution headers.
|
|
*
|
|
* This handles:
|
|
* - Fingerprint computation for OAuth validation
|
|
* - Attribution header injection
|
|
* - CLI system prompt prefix
|
|
* - Proper betas for the model
|
|
* - API metadata
|
|
* - Model string normalization (strips [1m] suffix for API)
|
|
*
|
|
* @example
|
|
* // Permission explainer
|
|
* await sideQuery({ querySource: 'permission_explainer', model, system: SYSTEM_PROMPT, messages, tools, tool_choice })
|
|
*
|
|
* @example
|
|
* // Session search
|
|
* await sideQuery({ querySource: 'session_search', model, system: SEARCH_PROMPT, messages })
|
|
*
|
|
* @example
|
|
* // Model validation
|
|
* await sideQuery({ querySource: 'model_validation', model, max_tokens: 1, messages: [{ role: 'user', content: 'Hi' }] })
|
|
*/
|
|
export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
|
|
const {
|
|
model,
|
|
system,
|
|
messages,
|
|
tools,
|
|
tool_choice,
|
|
output_format,
|
|
max_tokens = 1024,
|
|
maxRetries = 2,
|
|
signal,
|
|
skipSystemPromptPrefix,
|
|
temperature,
|
|
thinking,
|
|
stop_sequences,
|
|
} = opts
|
|
|
|
const client = await getAnthropicClient({
|
|
maxRetries,
|
|
model,
|
|
source: 'side_query',
|
|
})
|
|
const betas = [...getModelBetas(model)]
|
|
// Add structured-outputs beta if using output_format and provider supports it
|
|
if (
|
|
output_format &&
|
|
modelSupportsStructuredOutputs(model) &&
|
|
!betas.includes(STRUCTURED_OUTPUTS_BETA_HEADER)
|
|
) {
|
|
betas.push(STRUCTURED_OUTPUTS_BETA_HEADER)
|
|
}
|
|
|
|
// Extract first user message text for fingerprint
|
|
const messageText = extractFirstUserMessageText(messages)
|
|
|
|
// Compute fingerprint for OAuth attribution
|
|
const fingerprint = computeFingerprint(messageText, MACRO.VERSION)
|
|
const attributionHeader = getAttributionHeader(fingerprint)
|
|
|
|
// Build system as array to keep attribution header in its own block
|
|
// (prevents server-side parsing from including system content in cc_entrypoint)
|
|
const systemBlocks: TextBlockParam[] = [
|
|
attributionHeader ? { type: 'text', text: attributionHeader } : null,
|
|
// Skip CLI system prompt prefix for internal classifiers that provide their own prompt
|
|
...(skipSystemPromptPrefix
|
|
? []
|
|
: [
|
|
{
|
|
type: 'text' as const,
|
|
text: getCLISyspromptPrefix({
|
|
isNonInteractive: false,
|
|
hasAppendSystemPrompt: false,
|
|
}),
|
|
},
|
|
]),
|
|
...(Array.isArray(system)
|
|
? system
|
|
: system
|
|
? [{ type: 'text' as const, text: system }]
|
|
: []),
|
|
].filter((block): block is TextBlockParam => block !== null)
|
|
|
|
let thinkingConfig: BetaThinkingConfigParam | undefined
|
|
if (thinking === false) {
|
|
thinkingConfig = { type: 'disabled' }
|
|
} else if (thinking !== undefined) {
|
|
thinkingConfig = {
|
|
type: 'enabled',
|
|
budget_tokens: Math.min(thinking, max_tokens - 1),
|
|
}
|
|
}
|
|
|
|
const normalizedModel = normalizeModelStringForAPI(model)
|
|
const start = Date.now()
|
|
// biome-ignore lint/plugin: this IS the wrapper that handles OAuth attribution
|
|
const response = await client.beta.messages.create(
|
|
{
|
|
model: normalizedModel,
|
|
max_tokens,
|
|
system: systemBlocks,
|
|
messages,
|
|
...(tools && { tools }),
|
|
...(tool_choice && { tool_choice }),
|
|
...(output_format && { output_config: { format: output_format } }),
|
|
...(temperature !== undefined && { temperature }),
|
|
...(stop_sequences && { stop_sequences }),
|
|
...(thinkingConfig && { thinking: thinkingConfig }),
|
|
...(betas.length > 0 && { betas }),
|
|
metadata: getAPIMetadata(),
|
|
},
|
|
{ signal },
|
|
)
|
|
|
|
const requestId =
|
|
(response as { _request_id?: string | null })._request_id ?? undefined
|
|
const now = Date.now()
|
|
const lastCompletion = getLastApiCompletionTimestamp()
|
|
logEvent('tengu_api_success', {
|
|
requestId:
|
|
requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
querySource:
|
|
opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
model:
|
|
normalizedModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
inputTokens: response.usage.input_tokens,
|
|
outputTokens: response.usage.output_tokens,
|
|
cachedInputTokens: response.usage.cache_read_input_tokens ?? 0,
|
|
uncachedInputTokens: response.usage.cache_creation_input_tokens ?? 0,
|
|
durationMsIncludingRetries: now - start,
|
|
timeSinceLastApiCallMs:
|
|
lastCompletion !== null ? now - lastCompletion : undefined,
|
|
})
|
|
setLastApiCompletionTimestamp(now)
|
|
|
|
return response
|
|
}
|