974 lines
32 KiB
TypeScript
974 lines
32 KiB
TypeScript
// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
|
|
/**
|
|
* Shared event metadata enrichment for analytics systems
|
|
*
|
|
* This module provides a single source of truth for collecting and formatting
|
|
* event metadata across all analytics systems (Datadog, 1P).
|
|
*/
|
|
|
|
import { extname } from 'path'
|
|
import memoize from 'lodash-es/memoize.js'
|
|
import { env, getHostPlatformForAnalytics } from '../../utils/env.js'
|
|
import { envDynamic } from '../../utils/envDynamic.js'
|
|
import { getModelBetas } from '../../utils/betas.js'
|
|
import { getMainLoopModel } from '../../utils/model/model.js'
|
|
import {
|
|
getSessionId,
|
|
getIsInteractive,
|
|
getKairosActive,
|
|
getClientType,
|
|
getParentSessionId as getParentSessionIdFromState,
|
|
} from '../../bootstrap/state.js'
|
|
import { isEnvTruthy } from '../../utils/envUtils.js'
|
|
import { isOfficialMcpUrl } from '../mcp/officialRegistry.js'
|
|
import { isClaudeAISubscriber, getSubscriptionType } from '../../utils/auth.js'
|
|
import { getRepoRemoteHash } from '../../utils/git.js'
|
|
import {
|
|
getWslVersion,
|
|
getLinuxDistroInfo,
|
|
detectVcs,
|
|
} from '../../utils/platform.js'
|
|
import type { CoreUserData } from 'src/utils/user.js'
|
|
import { getAgentContext } from '../../utils/agentContext.js'
|
|
import type { EnvironmentMetadata } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
|
|
import type { PublicApiAuth } from '../../types/generated/events_mono/common/v1/auth.js'
|
|
import { jsonStringify } from '../../utils/slowOperations.js'
|
|
import {
|
|
getAgentId,
|
|
getParentSessionId as getTeammateParentSessionId,
|
|
getTeamName,
|
|
isTeammate,
|
|
} from '../../utils/teammate.js'
|
|
import { feature } from 'bun:bundle'
|
|
|
|
/**
|
|
* Marker type for verifying analytics metadata doesn't contain sensitive data
|
|
*
|
|
* This type forces explicit verification that string values being logged
|
|
* don't contain code snippets, file paths, or other sensitive information.
|
|
*
|
|
* The metadata is expected to be JSON-serializable.
|
|
*
|
|
* Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS`
|
|
*
|
|
* The type is `never` which means it can never actually hold a value - this is
|
|
* intentional as it's only used for type-casting to document developer intent.
|
|
*/
|
|
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
|
|
|
|
/**
|
|
* Sanitizes tool names for analytics logging to avoid PII exposure.
|
|
*
|
|
* MCP tool names follow the format `mcp__<server>__<tool>` and can reveal
|
|
* user-specific server configurations, which is considered PII-medium.
|
|
* This function redacts MCP tool names while preserving built-in tool names
|
|
* (Bash, Read, Write, etc.) which are safe to log.
|
|
*
|
|
* @param toolName - The tool name to sanitize
|
|
* @returns The original name for built-in tools, or 'mcp_tool' for MCP tools
|
|
*/
|
|
export function sanitizeToolNameForAnalytics(
|
|
toolName: string,
|
|
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
|
|
if (toolName.startsWith('mcp__')) {
|
|
return 'mcp_tool' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
}
|
|
return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
}
|
|
|
|
/**
|
|
* Check if detailed tool name logging is enabled for OTLP events.
|
|
* When enabled, MCP server/tool names and Skill names are logged.
|
|
* Disabled by default to protect PII (user-specific server configurations).
|
|
*
|
|
* Enable with OTEL_LOG_TOOL_DETAILS=1
|
|
*/
|
|
export function isToolDetailsLoggingEnabled(): boolean {
|
|
return isEnvTruthy(process.env.OTEL_LOG_TOOL_DETAILS)
|
|
}
|
|
|
|
/**
|
|
* Check if detailed tool name logging (MCP server/tool names) is enabled
|
|
* for analytics events.
|
|
*
|
|
* Per go/taxonomy, MCP names are medium PII. We log them for:
|
|
* - Cowork (entrypoint=local-agent) — no ZDR concept, log all MCPs
|
|
* - claude.ai-proxied connectors — always official (from claude.ai's list)
|
|
* - Servers whose URL matches the official MCP registry — directory
|
|
* connectors added via `claude mcp add`, not customer-specific config
|
|
*
|
|
* Custom/user-configured MCPs stay sanitized (toolName='mcp_tool').
|
|
*/
|
|
export function isAnalyticsToolDetailsLoggingEnabled(
|
|
mcpServerType: string | undefined,
|
|
mcpServerBaseUrl: string | undefined,
|
|
): boolean {
|
|
if (process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent') {
|
|
return true
|
|
}
|
|
if (mcpServerType === 'claudeai-proxy') {
|
|
return true
|
|
}
|
|
if (mcpServerBaseUrl && isOfficialMcpUrl(mcpServerBaseUrl)) {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
/**
|
|
* Built-in first-party MCP servers whose names are fixed reserved strings,
|
|
* not user-configured — so logging them is not PII. Checked in addition to
|
|
* isAnalyticsToolDetailsLoggingEnabled's transport/URL gates, which a stdio
|
|
* built-in would otherwise fail.
|
|
*
|
|
* Feature-gated so the set is empty when the feature is off: the name
|
|
* reservation (main.tsx, config.ts addMcpServer) is itself feature-gated, so
|
|
* a user-configured 'computer-use' is possible in builds without the feature.
|
|
*/
|
|
/* eslint-disable @typescript-eslint/no-require-imports */
|
|
const BUILTIN_MCP_SERVER_NAMES: ReadonlySet<string> = new Set(
|
|
feature('CHICAGO_MCP')
|
|
? [
|
|
(
|
|
require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')
|
|
).COMPUTER_USE_MCP_SERVER_NAME,
|
|
]
|
|
: [],
|
|
)
|
|
/* eslint-enable @typescript-eslint/no-require-imports */
|
|
|
|
/**
|
|
* Spreadable helper for logEvent payloads — returns {mcpServerName, mcpToolName}
|
|
* if the gate passes, empty object otherwise. Consolidates the identical IIFE
|
|
* pattern at each tengu_tool_use_* call site.
|
|
*/
|
|
export function mcpToolDetailsForAnalytics(
|
|
toolName: string,
|
|
mcpServerType: string | undefined,
|
|
mcpServerBaseUrl: string | undefined,
|
|
): {
|
|
mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
} {
|
|
const details = extractMcpToolDetails(toolName)
|
|
if (!details) {
|
|
return {}
|
|
}
|
|
if (
|
|
!BUILTIN_MCP_SERVER_NAMES.has(details.serverName) &&
|
|
!isAnalyticsToolDetailsLoggingEnabled(mcpServerType, mcpServerBaseUrl)
|
|
) {
|
|
return {}
|
|
}
|
|
return {
|
|
mcpServerName: details.serverName,
|
|
mcpToolName: details.mcpToolName,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract MCP server and tool names from a full MCP tool name.
|
|
* MCP tool names follow the format: mcp__<server>__<tool>
|
|
*
|
|
* @param toolName - The full tool name (e.g., 'mcp__slack__read_channel')
|
|
* @returns Object with serverName and toolName, or undefined if not an MCP tool
|
|
*/
|
|
export function extractMcpToolDetails(toolName: string):
|
|
| {
|
|
serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
mcpToolName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
}
|
|
| undefined {
|
|
if (!toolName.startsWith('mcp__')) {
|
|
return undefined
|
|
}
|
|
|
|
// Format: mcp__<server>__<tool>
|
|
const parts = toolName.split('__')
|
|
if (parts.length < 3) {
|
|
return undefined
|
|
}
|
|
|
|
const serverName = parts[1]
|
|
// Tool name may contain __ so rejoin remaining parts
|
|
const mcpToolName = parts.slice(2).join('__')
|
|
|
|
if (!serverName || !mcpToolName) {
|
|
return undefined
|
|
}
|
|
|
|
return {
|
|
serverName:
|
|
serverName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
mcpToolName:
|
|
mcpToolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract skill name from Skill tool input.
|
|
*
|
|
* @param toolName - The tool name (should be 'Skill')
|
|
* @param input - The tool input containing the skill name
|
|
* @returns The skill name if this is a Skill tool call, undefined otherwise
|
|
*/
|
|
export function extractSkillName(
|
|
toolName: string,
|
|
input: unknown,
|
|
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
|
if (toolName !== 'Skill') {
|
|
return undefined
|
|
}
|
|
|
|
if (
|
|
typeof input === 'object' &&
|
|
input !== null &&
|
|
'skill' in input &&
|
|
typeof (input as { skill: unknown }).skill === 'string'
|
|
) {
|
|
return (input as { skill: string })
|
|
.skill as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
}
|
|
|
|
return undefined
|
|
}
|
|
|
|
const TOOL_INPUT_STRING_TRUNCATE_AT = 512
|
|
const TOOL_INPUT_STRING_TRUNCATE_TO = 128
|
|
const TOOL_INPUT_MAX_JSON_CHARS = 4 * 1024
|
|
const TOOL_INPUT_MAX_COLLECTION_ITEMS = 20
|
|
const TOOL_INPUT_MAX_DEPTH = 2
|
|
|
|
function truncateToolInputValue(value: unknown, depth = 0): unknown {
|
|
if (typeof value === 'string') {
|
|
if (value.length > TOOL_INPUT_STRING_TRUNCATE_AT) {
|
|
return `${value.slice(0, TOOL_INPUT_STRING_TRUNCATE_TO)}…[${value.length} chars]`
|
|
}
|
|
return value
|
|
}
|
|
if (
|
|
typeof value === 'number' ||
|
|
typeof value === 'boolean' ||
|
|
value === null ||
|
|
value === undefined
|
|
) {
|
|
return value
|
|
}
|
|
if (depth >= TOOL_INPUT_MAX_DEPTH) {
|
|
return '<nested>'
|
|
}
|
|
if (Array.isArray(value)) {
|
|
const mapped = value
|
|
.slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
|
|
.map(v => truncateToolInputValue(v, depth + 1))
|
|
if (value.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
|
|
mapped.push(`…[${value.length} items]`)
|
|
}
|
|
return mapped
|
|
}
|
|
if (typeof value === 'object') {
|
|
const entries = Object.entries(value as Record<string, unknown>)
|
|
// Skip internal marker keys (e.g. _simulatedSedEdit re-introduced by
|
|
// SedEditPermissionRequest) so they don't leak into telemetry.
|
|
.filter(([k]) => !k.startsWith('_'))
|
|
const mapped = entries
|
|
.slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
|
|
.map(([k, v]) => [k, truncateToolInputValue(v, depth + 1)])
|
|
if (entries.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
|
|
mapped.push(['…', `${entries.length} keys`])
|
|
}
|
|
return Object.fromEntries(mapped)
|
|
}
|
|
return String(value)
|
|
}
|
|
|
|
/**
|
|
* Serialize a tool's input arguments for the OTel tool_result event.
|
|
* Truncates long strings and deep nesting to keep the output bounded while
|
|
* preserving forensically useful fields like file paths, URLs, and MCP args.
|
|
* Returns undefined when OTEL_LOG_TOOL_DETAILS is not enabled.
|
|
*/
|
|
export function extractToolInputForTelemetry(
|
|
input: unknown,
|
|
): string | undefined {
|
|
if (!isToolDetailsLoggingEnabled()) {
|
|
return undefined
|
|
}
|
|
const truncated = truncateToolInputValue(input)
|
|
let json = jsonStringify(truncated)
|
|
if (json.length > TOOL_INPUT_MAX_JSON_CHARS) {
|
|
json = json.slice(0, TOOL_INPUT_MAX_JSON_CHARS) + '…[truncated]'
|
|
}
|
|
return json
|
|
}
|
|
|
|
/**
|
|
* Maximum length for file extensions to be logged.
|
|
* Extensions longer than this are considered potentially sensitive
|
|
* (e.g., hash-based filenames like "key-hash-abcd-123-456") and
|
|
* will be replaced with 'other'.
|
|
*/
|
|
const MAX_FILE_EXTENSION_LENGTH = 10
|
|
|
|
/**
|
|
* Extracts and sanitizes a file extension for analytics logging.
|
|
*
|
|
* Uses Node's path.extname for reliable cross-platform extension extraction.
|
|
* Returns 'other' for extensions exceeding MAX_FILE_EXTENSION_LENGTH to avoid
|
|
* logging potentially sensitive data (like hash-based filenames).
|
|
*
|
|
* @param filePath - The file path to extract the extension from
|
|
* @returns The sanitized extension, 'other' for long extensions, or undefined if no extension
|
|
*/
|
|
export function getFileExtensionForAnalytics(
|
|
filePath: string,
|
|
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
|
const ext = extname(filePath).toLowerCase()
|
|
if (!ext || ext === '.') {
|
|
return undefined
|
|
}
|
|
|
|
const extension = ext.slice(1) // remove leading dot
|
|
if (extension.length > MAX_FILE_EXTENSION_LENGTH) {
|
|
return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
}
|
|
|
|
return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
}
|
|
|
|
/** Allow list of commands we extract file extensions from. */
|
|
const FILE_COMMANDS = new Set([
|
|
'rm',
|
|
'mv',
|
|
'cp',
|
|
'touch',
|
|
'mkdir',
|
|
'chmod',
|
|
'chown',
|
|
'cat',
|
|
'head',
|
|
'tail',
|
|
'sort',
|
|
'stat',
|
|
'diff',
|
|
'wc',
|
|
'grep',
|
|
'rg',
|
|
'sed',
|
|
])
|
|
|
|
/** Regex to split bash commands on compound operators (&&, ||, ;, |). */
|
|
const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/
|
|
|
|
/** Regex to split on whitespace. */
|
|
const WHITESPACE_REGEX = /\s+/
|
|
|
|
/**
|
|
* Extracts file extensions from a bash command for analytics.
|
|
* Best-effort: splits on operators and whitespace, extracts extensions
|
|
* from non-flag args of allowed commands. No heavy shell parsing needed
|
|
* because grep patterns and sed scripts rarely resemble file extensions.
|
|
*/
|
|
export function getFileExtensionsFromBashCommand(
|
|
command: string,
|
|
simulatedSedEditFilePath?: string,
|
|
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
|
if (!command.includes('.') && !simulatedSedEditFilePath) return undefined
|
|
|
|
let result: string | undefined
|
|
const seen = new Set<string>()
|
|
|
|
if (simulatedSedEditFilePath) {
|
|
const ext = getFileExtensionForAnalytics(simulatedSedEditFilePath)
|
|
if (ext) {
|
|
seen.add(ext)
|
|
result = ext
|
|
}
|
|
}
|
|
|
|
for (const subcmd of command.split(COMPOUND_OPERATOR_REGEX)) {
|
|
if (!subcmd) continue
|
|
const tokens = subcmd.split(WHITESPACE_REGEX)
|
|
if (tokens.length < 2) continue
|
|
|
|
const firstToken = tokens[0]!
|
|
const slashIdx = firstToken.lastIndexOf('/')
|
|
const baseCmd = slashIdx >= 0 ? firstToken.slice(slashIdx + 1) : firstToken
|
|
if (!FILE_COMMANDS.has(baseCmd)) continue
|
|
|
|
for (let i = 1; i < tokens.length; i++) {
|
|
const arg = tokens[i]!
|
|
if (arg.charCodeAt(0) === 45 /* - */) continue
|
|
const ext = getFileExtensionForAnalytics(arg)
|
|
if (ext && !seen.has(ext)) {
|
|
seen.add(ext)
|
|
result = result ? result + ',' + ext : ext
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!result) return undefined
|
|
return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
}
|
|
|
|
/**
|
|
* Environment context metadata
|
|
*/
|
|
export type EnvContext = {
|
|
platform: string
|
|
platformRaw: string
|
|
arch: string
|
|
nodeVersion: string
|
|
terminal: string | null
|
|
packageManagers: string
|
|
runtimes: string
|
|
isRunningWithBun: boolean
|
|
isCi: boolean
|
|
isClaubbit: boolean
|
|
isClaudeCodeRemote: boolean
|
|
isLocalAgentMode: boolean
|
|
isConductor: boolean
|
|
remoteEnvironmentType?: string
|
|
coworkerType?: string
|
|
claudeCodeContainerId?: string
|
|
claudeCodeRemoteSessionId?: string
|
|
tags?: string
|
|
isGithubAction: boolean
|
|
isClaudeCodeAction: boolean
|
|
isClaudeAiAuth: boolean
|
|
version: string
|
|
versionBase?: string
|
|
buildTime: string
|
|
deploymentEnvironment: string
|
|
githubEventName?: string
|
|
githubActionsRunnerEnvironment?: string
|
|
githubActionsRunnerOs?: string
|
|
githubActionRef?: string
|
|
wslVersion?: string
|
|
linuxDistroId?: string
|
|
linuxDistroVersion?: string
|
|
linuxKernel?: string
|
|
vcs?: string
|
|
}
|
|
|
|
/**
|
|
* Process metrics included with all analytics events.
|
|
*/
|
|
export type ProcessMetrics = {
|
|
uptime: number
|
|
rss: number
|
|
heapTotal: number
|
|
heapUsed: number
|
|
external: number
|
|
arrayBuffers: number
|
|
constrainedMemory: number | undefined
|
|
cpuUsage: NodeJS.CpuUsage
|
|
cpuPercent: number | undefined
|
|
}
|
|
|
|
/**
|
|
* Core event metadata shared across all analytics systems
|
|
*/
|
|
export type EventMetadata = {
|
|
model: string
|
|
sessionId: string
|
|
userType: string
|
|
betas?: string
|
|
envContext: EnvContext
|
|
entrypoint?: string
|
|
agentSdkVersion?: string
|
|
isInteractive: string
|
|
clientType: string
|
|
processMetrics?: ProcessMetrics
|
|
sweBenchRunId: string
|
|
sweBenchInstanceId: string
|
|
sweBenchTaskId: string
|
|
// Swarm/team agent identification for analytics attribution
|
|
agentId?: string // CLAUDE_CODE_AGENT_ID (format: agentName@teamName) or subagent UUID
|
|
parentSessionId?: string // CLAUDE_CODE_PARENT_SESSION_ID (team lead's session)
|
|
agentType?: 'teammate' | 'subagent' | 'standalone' // Distinguishes swarm teammates, Agent tool subagents, and standalone agents
|
|
teamName?: string // Team name for swarm agents (from env var or AsyncLocalStorage)
|
|
subscriptionType?: string // OAuth subscription tier (max, pro, enterprise, team)
|
|
rh?: string // Hashed repo remote URL (first 16 chars of SHA256), for joining with server-side data
|
|
kairosActive?: true // KAIROS assistant mode active (ant-only; set in main.tsx after gate check)
|
|
skillMode?: 'discovery' | 'coach' | 'discovery_and_coach' // Which skill surfacing mechanism(s) are gated on (ant-only; for BQ session segmentation)
|
|
observerMode?: 'backseat' | 'skillcoach' | 'both' // Which observer classifiers are gated on (ant-only; for BQ cohort splits on tengu_backseat_* events)
|
|
}
|
|
|
|
/**
|
|
* Options for enriching event metadata
|
|
*/
|
|
export type EnrichMetadataOptions = {
|
|
// Model to use, falls back to getMainLoopModel() if not provided
|
|
model?: unknown
|
|
// Explicit betas string (already joined)
|
|
betas?: unknown
|
|
// Additional metadata to include (optional)
|
|
additionalMetadata?: Record<string, unknown>
|
|
}
|
|
|
|
/**
|
|
* Get agent identification for analytics.
|
|
* Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
|
|
*/
|
|
function getAgentIdentification(): {
|
|
agentId?: string
|
|
parentSessionId?: string
|
|
agentType?: 'teammate' | 'subagent' | 'standalone'
|
|
teamName?: string
|
|
} {
|
|
// Check AsyncLocalStorage first (for subagents running in same process)
|
|
const agentContext = getAgentContext()
|
|
if (agentContext) {
|
|
const result: ReturnType<typeof getAgentIdentification> = {
|
|
agentId: agentContext.agentId,
|
|
parentSessionId: agentContext.parentSessionId,
|
|
agentType: agentContext.agentType,
|
|
}
|
|
if (agentContext.agentType === 'teammate') {
|
|
result.teamName = agentContext.teamName
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Fall back to swarm helpers (for swarm agents)
|
|
const agentId = getAgentId()
|
|
const parentSessionId = getTeammateParentSessionId()
|
|
const teamName = getTeamName()
|
|
const isSwarmAgent = isTeammate()
|
|
// For standalone agents (have agent ID but not a teammate), set agentType to 'standalone'
|
|
const agentType = isSwarmAgent
|
|
? ('teammate' as const)
|
|
: agentId
|
|
? ('standalone' as const)
|
|
: undefined
|
|
if (agentId || agentType || parentSessionId || teamName) {
|
|
return {
|
|
...(agentId ? { agentId } : {}),
|
|
...(agentType ? { agentType } : {}),
|
|
...(parentSessionId ? { parentSessionId } : {}),
|
|
...(teamName ? { teamName } : {}),
|
|
}
|
|
}
|
|
|
|
// Check bootstrap state for parent session ID (e.g., plan mode -> implementation)
|
|
const stateParentSessionId = getParentSessionIdFromState()
|
|
if (stateParentSessionId) {
|
|
return { parentSessionId: stateParentSessionId }
|
|
}
|
|
|
|
return {}
|
|
}
|
|
|
|
/**
|
|
* Extract base version from full version string. "2.0.36-dev.20251107.t174150.sha2709699" → "2.0.36-dev"
|
|
*/
|
|
const getVersionBase = memoize((): string | undefined => {
|
|
const match = MACRO.VERSION.match(/^\d+\.\d+\.\d+(?:-[a-z]+)?/)
|
|
return match ? match[0] : undefined
|
|
})
|
|
|
|
/**
|
|
* Builds the environment context object
|
|
*/
|
|
const buildEnvContext = memoize(async (): Promise<EnvContext> => {
|
|
const [packageManagers, runtimes, linuxDistroInfo, vcs] = await Promise.all([
|
|
env.getPackageManagers(),
|
|
env.getRuntimes(),
|
|
getLinuxDistroInfo(),
|
|
detectVcs(),
|
|
])
|
|
|
|
return {
|
|
platform: getHostPlatformForAnalytics(),
|
|
// Raw process.platform so freebsd/openbsd/aix/sunos are visible in BQ.
|
|
// getHostPlatformForAnalytics() buckets those into 'linux'; here we want
|
|
// the truth. CLAUDE_CODE_HOST_PLATFORM still overrides for container/remote.
|
|
platformRaw: process.env.CLAUDE_CODE_HOST_PLATFORM || process.platform,
|
|
arch: env.arch,
|
|
nodeVersion: env.nodeVersion,
|
|
terminal: envDynamic.terminal,
|
|
packageManagers: packageManagers.join(','),
|
|
runtimes: runtimes.join(','),
|
|
isRunningWithBun: env.isRunningWithBun(),
|
|
isCi: isEnvTruthy(process.env.CI),
|
|
isClaubbit: isEnvTruthy(process.env.CLAUBBIT),
|
|
isClaudeCodeRemote: isEnvTruthy(process.env.CLAUDE_CODE_REMOTE),
|
|
isLocalAgentMode: process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent',
|
|
isConductor: env.isConductor(),
|
|
...(process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE && {
|
|
remoteEnvironmentType: process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE,
|
|
}),
|
|
// Gated by feature flag to prevent leaking "coworkerType" string in external builds
|
|
...(feature('COWORKER_TYPE_TELEMETRY')
|
|
? process.env.CLAUDE_CODE_COWORKER_TYPE
|
|
? { coworkerType: process.env.CLAUDE_CODE_COWORKER_TYPE }
|
|
: {}
|
|
: {}),
|
|
...(process.env.CLAUDE_CODE_CONTAINER_ID && {
|
|
claudeCodeContainerId: process.env.CLAUDE_CODE_CONTAINER_ID,
|
|
}),
|
|
...(process.env.CLAUDE_CODE_REMOTE_SESSION_ID && {
|
|
claudeCodeRemoteSessionId: process.env.CLAUDE_CODE_REMOTE_SESSION_ID,
|
|
}),
|
|
...(process.env.CLAUDE_CODE_TAGS && {
|
|
tags: process.env.CLAUDE_CODE_TAGS,
|
|
}),
|
|
isGithubAction: isEnvTruthy(process.env.GITHUB_ACTIONS),
|
|
isClaudeCodeAction: isEnvTruthy(process.env.CLAUDE_CODE_ACTION),
|
|
isClaudeAiAuth: isClaudeAISubscriber(),
|
|
version: MACRO.VERSION,
|
|
versionBase: getVersionBase(),
|
|
buildTime: MACRO.BUILD_TIME,
|
|
deploymentEnvironment: env.detectDeploymentEnvironment(),
|
|
...(isEnvTruthy(process.env.GITHUB_ACTIONS) && {
|
|
githubEventName: process.env.GITHUB_EVENT_NAME,
|
|
githubActionsRunnerEnvironment: process.env.RUNNER_ENVIRONMENT,
|
|
githubActionsRunnerOs: process.env.RUNNER_OS,
|
|
githubActionRef: process.env.GITHUB_ACTION_PATH?.includes(
|
|
'claude-code-action/',
|
|
)
|
|
? process.env.GITHUB_ACTION_PATH.split('claude-code-action/')[1]
|
|
: undefined,
|
|
}),
|
|
...(getWslVersion() && { wslVersion: getWslVersion() }),
|
|
...(linuxDistroInfo ?? {}),
|
|
...(vcs.length > 0 ? { vcs: vcs.join(',') } : {}),
|
|
}
|
|
})
|
|
|
|
// --
|
|
// CPU% delta tracking — inherently process-global, same pattern as logBatch/flushTimer in datadog.ts
|
|
let prevCpuUsage: NodeJS.CpuUsage | null = null
|
|
let prevWallTimeMs: number | null = null
|
|
|
|
/**
|
|
* Builds process metrics object for all users.
|
|
*/
|
|
function buildProcessMetrics(): ProcessMetrics | undefined {
|
|
try {
|
|
const mem = process.memoryUsage()
|
|
const cpu = process.cpuUsage()
|
|
const now = Date.now()
|
|
|
|
let cpuPercent: number | undefined
|
|
if (prevCpuUsage && prevWallTimeMs) {
|
|
const wallDeltaMs = now - prevWallTimeMs
|
|
if (wallDeltaMs > 0) {
|
|
const userDeltaUs = cpu.user - prevCpuUsage.user
|
|
const systemDeltaUs = cpu.system - prevCpuUsage.system
|
|
cpuPercent =
|
|
((userDeltaUs + systemDeltaUs) / (wallDeltaMs * 1000)) * 100
|
|
}
|
|
}
|
|
prevCpuUsage = cpu
|
|
prevWallTimeMs = now
|
|
|
|
return {
|
|
uptime: process.uptime(),
|
|
rss: mem.rss,
|
|
heapTotal: mem.heapTotal,
|
|
heapUsed: mem.heapUsed,
|
|
external: mem.external,
|
|
arrayBuffers: mem.arrayBuffers,
|
|
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
|
constrainedMemory: process.constrainedMemory(),
|
|
cpuUsage: cpu,
|
|
cpuPercent,
|
|
}
|
|
} catch {
|
|
return undefined
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get core event metadata shared across all analytics systems.
|
|
*
|
|
* This function collects environment, runtime, and context information
|
|
* that should be included with all analytics events.
|
|
*
|
|
* @param options - Configuration options
|
|
* @returns Promise resolving to enriched metadata object
|
|
*/
|
|
export async function getEventMetadata(
|
|
options: EnrichMetadataOptions = {},
|
|
): Promise<EventMetadata> {
|
|
const model = options.model ? String(options.model) : getMainLoopModel()
|
|
const betas =
|
|
typeof options.betas === 'string'
|
|
? options.betas
|
|
: getModelBetas(model).join(',')
|
|
const [envContext, repoRemoteHash] = await Promise.all([
|
|
buildEnvContext(),
|
|
getRepoRemoteHash(),
|
|
])
|
|
const processMetrics = buildProcessMetrics()
|
|
|
|
const metadata: EventMetadata = {
|
|
model,
|
|
sessionId: getSessionId(),
|
|
userType: process.env.USER_TYPE || '',
|
|
...(betas.length > 0 ? { betas: betas } : {}),
|
|
envContext,
|
|
...(process.env.CLAUDE_CODE_ENTRYPOINT && {
|
|
entrypoint: process.env.CLAUDE_CODE_ENTRYPOINT,
|
|
}),
|
|
...(process.env.CLAUDE_AGENT_SDK_VERSION && {
|
|
agentSdkVersion: process.env.CLAUDE_AGENT_SDK_VERSION,
|
|
}),
|
|
isInteractive: String(getIsInteractive()),
|
|
clientType: getClientType(),
|
|
...(processMetrics && { processMetrics }),
|
|
sweBenchRunId: process.env.SWE_BENCH_RUN_ID || '',
|
|
sweBenchInstanceId: process.env.SWE_BENCH_INSTANCE_ID || '',
|
|
sweBenchTaskId: process.env.SWE_BENCH_TASK_ID || '',
|
|
// Swarm/team agent identification
|
|
// Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
|
|
...getAgentIdentification(),
|
|
// Subscription tier for DAU-by-tier analytics
|
|
...(getSubscriptionType() && {
|
|
subscriptionType: getSubscriptionType()!,
|
|
}),
|
|
// Assistant mode tag — lives outside memoized buildEnvContext() because
|
|
// setKairosActive() runs at main.tsx:~1648, after the first event may
|
|
// have already fired and memoized the env. Read fresh per-event instead.
|
|
...(feature('KAIROS') && getKairosActive()
|
|
? { kairosActive: true as const }
|
|
: {}),
|
|
// Repo remote hash for joining with server-side repo bundle data
|
|
...(repoRemoteHash && { rh: repoRemoteHash }),
|
|
}
|
|
|
|
return metadata
|
|
}
|
|
|
|
|
|
/**
|
|
* Core event metadata for 1P event logging (snake_case format).
|
|
*/
|
|
export type FirstPartyEventLoggingCoreMetadata = {
|
|
session_id: string
|
|
model: string
|
|
user_type: string
|
|
betas?: string
|
|
entrypoint?: string
|
|
agent_sdk_version?: string
|
|
is_interactive: boolean
|
|
client_type: string
|
|
swe_bench_run_id?: string
|
|
swe_bench_instance_id?: string
|
|
swe_bench_task_id?: string
|
|
// Swarm/team agent identification
|
|
agent_id?: string
|
|
parent_session_id?: string
|
|
agent_type?: 'teammate' | 'subagent' | 'standalone'
|
|
team_name?: string
|
|
}
|
|
|
|
/**
|
|
* Complete event logging metadata format for 1P events.
|
|
*/
|
|
export type FirstPartyEventLoggingMetadata = {
|
|
env: EnvironmentMetadata
|
|
process?: string
|
|
// auth is a top-level field on ClaudeCodeInternalEvent (proto PublicApiAuth).
|
|
// account_id is intentionally omitted — only UUID fields are populated client-side.
|
|
auth?: PublicApiAuth
|
|
// core fields correspond to the top level of ClaudeCodeInternalEvent.
|
|
// They get directly exported to their individual columns in the BigQuery tables
|
|
core: FirstPartyEventLoggingCoreMetadata
|
|
// additional fields are populated in the additional_metadata field of the
|
|
// ClaudeCodeInternalEvent proto. Includes but is not limited to information
|
|
// that differs by event type.
|
|
additional: Record<string, unknown>
|
|
}
|
|
|
|
/**
|
|
* Convert metadata to 1P event logging format (snake_case fields).
|
|
*
|
|
* The /api/event_logging/batch endpoint expects snake_case field names
|
|
* for environment and core metadata.
|
|
*
|
|
* @param metadata - Core event metadata
|
|
* @param additionalMetadata - Additional metadata to include
|
|
* @returns Metadata formatted for 1P event logging
|
|
*/
|
|
export function to1PEventFormat(
|
|
metadata: EventMetadata,
|
|
userMetadata: CoreUserData,
|
|
additionalMetadata: Record<string, unknown> = {},
|
|
): FirstPartyEventLoggingMetadata {
|
|
const {
|
|
envContext,
|
|
processMetrics,
|
|
rh,
|
|
kairosActive,
|
|
skillMode,
|
|
observerMode,
|
|
...coreFields
|
|
} = metadata
|
|
|
|
// Convert envContext to snake_case.
|
|
// IMPORTANT: env is typed as the proto-generated EnvironmentMetadata so that
|
|
// adding a field here that the proto doesn't define is a compile error. The
|
|
// generated toJSON() serializer silently drops unknown keys — a hand-written
|
|
// parallel type previously let #11318, #13924, #19448, and coworker_type all
|
|
// ship fields that never reached BQ.
|
|
// Adding a field? Update the monorepo proto first (go/cc-logging):
|
|
// event_schemas/.../claude_code/v1/claude_code_internal_event.proto
|
|
// then run `bun run generate:proto` here.
|
|
const env: EnvironmentMetadata = {
|
|
platform: envContext.platform,
|
|
platform_raw: envContext.platformRaw,
|
|
arch: envContext.arch,
|
|
node_version: envContext.nodeVersion,
|
|
terminal: envContext.terminal || 'unknown',
|
|
package_managers: envContext.packageManagers,
|
|
runtimes: envContext.runtimes,
|
|
is_running_with_bun: envContext.isRunningWithBun,
|
|
is_ci: envContext.isCi,
|
|
is_claubbit: envContext.isClaubbit,
|
|
is_claude_code_remote: envContext.isClaudeCodeRemote,
|
|
is_local_agent_mode: envContext.isLocalAgentMode,
|
|
is_conductor: envContext.isConductor,
|
|
is_github_action: envContext.isGithubAction,
|
|
is_claude_code_action: envContext.isClaudeCodeAction,
|
|
is_claude_ai_auth: envContext.isClaudeAiAuth,
|
|
version: envContext.version,
|
|
build_time: envContext.buildTime,
|
|
deployment_environment: envContext.deploymentEnvironment,
|
|
}
|
|
|
|
// Add optional env fields
|
|
if (envContext.remoteEnvironmentType) {
|
|
env.remote_environment_type = envContext.remoteEnvironmentType
|
|
}
|
|
if (feature('COWORKER_TYPE_TELEMETRY') && envContext.coworkerType) {
|
|
env.coworker_type = envContext.coworkerType
|
|
}
|
|
if (envContext.claudeCodeContainerId) {
|
|
env.claude_code_container_id = envContext.claudeCodeContainerId
|
|
}
|
|
if (envContext.claudeCodeRemoteSessionId) {
|
|
env.claude_code_remote_session_id = envContext.claudeCodeRemoteSessionId
|
|
}
|
|
if (envContext.tags) {
|
|
env.tags = envContext.tags
|
|
.split(',')
|
|
.map(t => t.trim())
|
|
.filter(Boolean)
|
|
}
|
|
if (envContext.githubEventName) {
|
|
env.github_event_name = envContext.githubEventName
|
|
}
|
|
if (envContext.githubActionsRunnerEnvironment) {
|
|
env.github_actions_runner_environment =
|
|
envContext.githubActionsRunnerEnvironment
|
|
}
|
|
if (envContext.githubActionsRunnerOs) {
|
|
env.github_actions_runner_os = envContext.githubActionsRunnerOs
|
|
}
|
|
if (envContext.githubActionRef) {
|
|
env.github_action_ref = envContext.githubActionRef
|
|
}
|
|
if (envContext.wslVersion) {
|
|
env.wsl_version = envContext.wslVersion
|
|
}
|
|
if (envContext.linuxDistroId) {
|
|
env.linux_distro_id = envContext.linuxDistroId
|
|
}
|
|
if (envContext.linuxDistroVersion) {
|
|
env.linux_distro_version = envContext.linuxDistroVersion
|
|
}
|
|
if (envContext.linuxKernel) {
|
|
env.linux_kernel = envContext.linuxKernel
|
|
}
|
|
if (envContext.vcs) {
|
|
env.vcs = envContext.vcs
|
|
}
|
|
if (envContext.versionBase) {
|
|
env.version_base = envContext.versionBase
|
|
}
|
|
|
|
// Convert core fields to snake_case
|
|
const core: FirstPartyEventLoggingCoreMetadata = {
|
|
session_id: coreFields.sessionId,
|
|
model: coreFields.model,
|
|
user_type: coreFields.userType,
|
|
is_interactive: coreFields.isInteractive === 'true',
|
|
client_type: coreFields.clientType,
|
|
}
|
|
|
|
// Add other core fields
|
|
if (coreFields.betas) {
|
|
core.betas = coreFields.betas
|
|
}
|
|
if (coreFields.entrypoint) {
|
|
core.entrypoint = coreFields.entrypoint
|
|
}
|
|
if (coreFields.agentSdkVersion) {
|
|
core.agent_sdk_version = coreFields.agentSdkVersion
|
|
}
|
|
if (coreFields.sweBenchRunId) {
|
|
core.swe_bench_run_id = coreFields.sweBenchRunId
|
|
}
|
|
if (coreFields.sweBenchInstanceId) {
|
|
core.swe_bench_instance_id = coreFields.sweBenchInstanceId
|
|
}
|
|
if (coreFields.sweBenchTaskId) {
|
|
core.swe_bench_task_id = coreFields.sweBenchTaskId
|
|
}
|
|
// Swarm/team agent identification
|
|
if (coreFields.agentId) {
|
|
core.agent_id = coreFields.agentId
|
|
}
|
|
if (coreFields.parentSessionId) {
|
|
core.parent_session_id = coreFields.parentSessionId
|
|
}
|
|
if (coreFields.agentType) {
|
|
core.agent_type = coreFields.agentType
|
|
}
|
|
if (coreFields.teamName) {
|
|
core.team_name = coreFields.teamName
|
|
}
|
|
|
|
// Map userMetadata to output fields.
|
|
// Based on src/utils/user.ts getUser(), but with fields present in other
|
|
// parts of ClaudeCodeInternalEvent deduplicated.
|
|
// Convert camelCase GitHubActionsMetadata to snake_case for 1P API
|
|
// Note: github_actions_metadata is placed inside env (EnvironmentMetadata)
|
|
// rather than at the top level of ClaudeCodeInternalEvent
|
|
if (userMetadata.githubActionsMetadata) {
|
|
const ghMeta = userMetadata.githubActionsMetadata
|
|
env.github_actions_metadata = {
|
|
actor_id: ghMeta.actorId,
|
|
repository_id: ghMeta.repositoryId,
|
|
repository_owner_id: ghMeta.repositoryOwnerId,
|
|
}
|
|
}
|
|
|
|
let auth: PublicApiAuth | undefined
|
|
if (userMetadata.accountUuid || userMetadata.organizationUuid) {
|
|
auth = {
|
|
account_uuid: userMetadata.accountUuid,
|
|
organization_uuid: userMetadata.organizationUuid,
|
|
}
|
|
}
|
|
|
|
return {
|
|
env,
|
|
...(processMetrics && {
|
|
process: Buffer.from(jsonStringify(processMetrics)).toString('base64'),
|
|
}),
|
|
...(auth && { auth }),
|
|
core,
|
|
additional: {
|
|
...(rh && { rh }),
|
|
...(kairosActive && { is_assistant_mode: true }),
|
|
...(skillMode && { skill_mode: skillMode }),
|
|
...(observerMode && { observer_mode: observerMode }),
|
|
...additionalMetadata,
|
|
},
|
|
}
|
|
}
|