160 lines
5.2 KiB
TypeScript
160 lines
5.2 KiB
TypeScript
import axios from 'axios'
|
|
import { hasProfileScope, isClaudeAISubscriber } from '../../utils/auth.js'
|
|
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
|
import { logForDebugging } from '../../utils/debug.js'
|
|
import { errorMessage } from '../../utils/errors.js'
|
|
import { getAuthHeaders, withOAuth401Retry } from '../../utils/http.js'
|
|
import { logError } from '../../utils/log.js'
|
|
import { memoizeWithTTLAsync } from '../../utils/memoize.js'
|
|
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
|
|
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
|
|
|
type MetricsEnabledResponse = {
|
|
metrics_logging_enabled: boolean
|
|
}
|
|
|
|
type MetricsStatus = {
|
|
enabled: boolean
|
|
hasError: boolean
|
|
}
|
|
|
|
// In-memory TTL — dedupes calls within a single process
|
|
const CACHE_TTL_MS = 60 * 60 * 1000
|
|
|
|
// Disk TTL — org settings rarely change. When disk cache is fresher than this,
|
|
// we skip the network entirely (no background refresh). This is what collapses
|
|
// N `claude -p` invocations into ~1 API call/day.
|
|
const DISK_CACHE_TTL_MS = 24 * 60 * 60 * 1000
|
|
|
|
/**
|
|
* Internal function to call the API and check if metrics are enabled
|
|
* This is wrapped by memoizeWithTTLAsync to add caching behavior
|
|
*/
|
|
async function _fetchMetricsEnabled(): Promise<MetricsEnabledResponse> {
|
|
const authResult = getAuthHeaders()
|
|
if (authResult.error) {
|
|
throw new Error(`Auth error: ${authResult.error}`)
|
|
}
|
|
|
|
const headers = {
|
|
'Content-Type': 'application/json',
|
|
'User-Agent': getClaudeCodeUserAgent(),
|
|
...authResult.headers,
|
|
}
|
|
|
|
const endpoint = `https://api.anthropic.com/api/claude_code/organizations/metrics_enabled`
|
|
const response = await axios.get<MetricsEnabledResponse>(endpoint, {
|
|
headers,
|
|
timeout: 5000,
|
|
})
|
|
return response.data
|
|
}
|
|
|
|
async function _checkMetricsEnabledAPI(): Promise<MetricsStatus> {
|
|
// Incident kill switch: skip the network call when nonessential traffic is disabled.
|
|
// Returning enabled:false sheds load at the consumer (bigqueryExporter skips
|
|
// export). Matches the non-subscriber early-return shape below.
|
|
if (isEssentialTrafficOnly()) {
|
|
return { enabled: false, hasError: false }
|
|
}
|
|
|
|
try {
|
|
const data = await withOAuth401Retry(_fetchMetricsEnabled, {
|
|
also403Revoked: true,
|
|
})
|
|
|
|
logForDebugging(
|
|
`Metrics opt-out API response: enabled=${data.metrics_logging_enabled}`,
|
|
)
|
|
|
|
return {
|
|
enabled: data.metrics_logging_enabled,
|
|
hasError: false,
|
|
}
|
|
} catch (error) {
|
|
logForDebugging(
|
|
`Failed to check metrics opt-out status: ${errorMessage(error)}`,
|
|
)
|
|
logError(error)
|
|
return { enabled: false, hasError: true }
|
|
}
|
|
}
|
|
|
|
// Create memoized version with custom error handling
|
|
const memoizedCheckMetrics = memoizeWithTTLAsync(
|
|
_checkMetricsEnabledAPI,
|
|
CACHE_TTL_MS,
|
|
)
|
|
|
|
/**
|
|
* Fetch (in-memory memoized) and persist to disk on change.
|
|
* Errors are not persisted — a transient failure should not overwrite a
|
|
* known-good disk value.
|
|
*/
|
|
async function refreshMetricsStatus(): Promise<MetricsStatus> {
|
|
const result = await memoizedCheckMetrics()
|
|
if (result.hasError) {
|
|
return result
|
|
}
|
|
|
|
const cached = getGlobalConfig().metricsStatusCache
|
|
const unchanged = cached !== undefined && cached.enabled === result.enabled
|
|
// Skip write when unchanged AND timestamp still fresh — avoids config churn
|
|
// when concurrent callers race past a stale disk entry and all try to write.
|
|
if (unchanged && Date.now() - cached.timestamp < DISK_CACHE_TTL_MS) {
|
|
return result
|
|
}
|
|
|
|
saveGlobalConfig(current => ({
|
|
...current,
|
|
metricsStatusCache: {
|
|
enabled: result.enabled,
|
|
timestamp: Date.now(),
|
|
},
|
|
}))
|
|
return result
|
|
}
|
|
|
|
/**
|
|
* Check if metrics are enabled for the current organization.
|
|
*
|
|
* Two-tier cache:
|
|
* - Disk (24h TTL): survives process restarts. Fresh disk cache → zero network.
|
|
* - In-memory (1h TTL): dedupes the background refresh within a process.
|
|
*
|
|
* The caller (bigqueryExporter) tolerates stale reads — a missed export or
|
|
* an extra one during the 24h window is acceptable.
|
|
*/
|
|
export async function checkMetricsEnabled(): Promise<MetricsStatus> {
|
|
// Service key OAuth sessions lack user:profile scope → would 403.
|
|
// API key users (non-subscribers) fall through and use x-api-key auth.
|
|
// This check runs before the disk read so we never persist auth-state-derived
|
|
// answers — only real API responses go to disk. Otherwise a service-key
|
|
// session would poison the cache for a later full-OAuth session.
|
|
if (isClaudeAISubscriber() && !hasProfileScope()) {
|
|
return { enabled: false, hasError: false }
|
|
}
|
|
|
|
const cached = getGlobalConfig().metricsStatusCache
|
|
if (cached) {
|
|
if (Date.now() - cached.timestamp > DISK_CACHE_TTL_MS) {
|
|
// saveGlobalConfig's fallback path (config.ts:731) can throw if both
|
|
// locked and fallback writes fail — catch here so fire-and-forget
|
|
// doesn't become an unhandled rejection.
|
|
void refreshMetricsStatus().catch(logError)
|
|
}
|
|
return {
|
|
enabled: cached.enabled,
|
|
hasError: false,
|
|
}
|
|
}
|
|
|
|
// First-ever run on this machine: block on the network to populate disk.
|
|
return refreshMetricsStatus()
|
|
}
|
|
|
|
// Export for testing purposes only
|
|
export const _clearMetricsEnabledCacheForTesting = (): void => {
|
|
memoizedCheckMetrics.cache.clear()
|
|
}
|