mono/packages/kbot/ref/services/analytics/index.ts
2026-04-01 01:05:48 +02:00

174 lines
5.4 KiB
TypeScript

/**
* Analytics service - public API for event logging
*
* This module serves as the main entry point for analytics events in Claude CLI.
*
* DESIGN: This module has NO dependencies to avoid import cycles.
* Events are queued until attachAnalyticsSink() is called during app initialization.
* The sink handles routing to Datadog and 1P event logging.
*/
/**
* Marker type for verifying analytics metadata doesn't contain sensitive data
*
* This type forces explicit verification that string values being logged
* don't contain code snippets, file paths, or other sensitive information.
*
* Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS`
*/
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
/**
* Marker type for values routed to PII-tagged proto columns via `_PROTO_*`
* payload keys. The destination BQ column has privileged access controls,
* so unredacted values are acceptable — unlike general-access backends.
*
* sink.ts strips `_PROTO_*` keys before Datadog fanout; only the 1P
* exporter (firstPartyEventLoggingExporter) sees them and hoists them to the
* top-level proto field. A single stripProtoFields call guards all non-1P
* sinks — no per-sink filtering to forget.
*
* Usage: `rawName as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED`
*/
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED = never
/**
* Strip `_PROTO_*` keys from a payload destined for general-access storage.
* Used by:
* - sink.ts: before Datadog fanout (never sees PII-tagged values)
* - firstPartyEventLoggingExporter: defensive strip of additional_metadata
* after hoisting known _PROTO_* keys to proto fields — prevents a future
* unrecognized _PROTO_foo from silently landing in the BQ JSON blob.
*
* Returns the input unchanged (same reference) when no _PROTO_ keys present.
*/
export function stripProtoFields<V>(
metadata: Record<string, V>,
): Record<string, V> {
let result: Record<string, V> | undefined
for (const key in metadata) {
if (key.startsWith('_PROTO_')) {
if (result === undefined) {
result = { ...metadata }
}
delete result[key]
}
}
return result ?? metadata
}
// Internal type for logEvent metadata - different from the enriched EventMetadata in metadata.ts
type LogEventMetadata = { [key: string]: boolean | number | undefined }
type QueuedEvent = {
eventName: string
metadata: LogEventMetadata
async: boolean
}
/**
* Sink interface for the analytics backend
*/
export type AnalyticsSink = {
logEvent: (eventName: string, metadata: LogEventMetadata) => void
logEventAsync: (
eventName: string,
metadata: LogEventMetadata,
) => Promise<void>
}
// Event queue for events logged before sink is attached
const eventQueue: QueuedEvent[] = []
// Sink - initialized during app startup
let sink: AnalyticsSink | null = null
/**
* Attach the analytics sink that will receive all events.
* Queued events are drained asynchronously via queueMicrotask to avoid
* adding latency to the startup path.
*
* Idempotent: if a sink is already attached, this is a no-op. This allows
* calling from both the preAction hook (for subcommands) and setup() (for
* the default command) without coordination.
*/
export function attachAnalyticsSink(newSink: AnalyticsSink): void {
if (sink !== null) {
return
}
sink = newSink
// Drain the queue asynchronously to avoid blocking startup
if (eventQueue.length > 0) {
const queuedEvents = [...eventQueue]
eventQueue.length = 0
// Log queue size for ants to help debug analytics initialization timing
if (process.env.USER_TYPE === 'ant') {
sink.logEvent('analytics_sink_attached', {
queued_event_count: queuedEvents.length,
})
}
queueMicrotask(() => {
for (const event of queuedEvents) {
if (event.async) {
void sink!.logEventAsync(event.eventName, event.metadata)
} else {
sink!.logEvent(event.eventName, event.metadata)
}
}
})
}
}
/**
* Log an event to analytics backends (synchronous)
*
* Events may be sampled based on the 'tengu_event_sampling_config' dynamic config.
* When sampled, the sample_rate is added to the event metadata.
*
* If no sink is attached, events are queued and drained when the sink attaches.
*/
export function logEvent(
eventName: string,
// intentionally no strings unless AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
// to avoid accidentally logging code/filepaths
metadata: LogEventMetadata,
): void {
if (sink === null) {
eventQueue.push({ eventName, metadata, async: false })
return
}
sink.logEvent(eventName, metadata)
}
/**
* Log an event to analytics backends (asynchronous)
*
* Events may be sampled based on the 'tengu_event_sampling_config' dynamic config.
* When sampled, the sample_rate is added to the event metadata.
*
* If no sink is attached, events are queued and drained when the sink attaches.
*/
export async function logEventAsync(
eventName: string,
// intentionally no strings, to avoid accidentally logging code/filepaths
metadata: LogEventMetadata,
): Promise<void> {
if (sink === null) {
eventQueue.push({ eventName, metadata, async: true })
return
}
await sink.logEventAsync(eventName, metadata)
}
/**
* Reset analytics state for testing purposes only.
* @internal
*/
export function _resetForTesting(): void {
sink = null
eventQueue.length = 0
}