136 lines
4.8 KiB
TypeScript
136 lines
4.8 KiB
TypeScript
/**
|
|
* Telemetry for plugin/marketplace fetches that hit the network.
|
|
*
|
|
* Added for inc-5046 (GitHub complained about claude-plugins-official load).
|
|
* Before this, fetch operations only had logForDebugging — no way to measure
|
|
* actual network volume. This surfaces what's hitting GitHub vs GCS vs
|
|
* user-hosted so we can see the GCS migration take effect and catch future
|
|
* hot-path regressions before GitHub emails us again.
|
|
*
|
|
* Volume: these fire at startup (install-counts 24h-TTL)
|
|
* and on explicit user action (install/update). NOT per-interaction. Similar
|
|
* envelope to tengu_binary_download_*.
|
|
*/
|
|
|
|
import {
|
|
logEvent,
|
|
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS as SafeString,
|
|
} from '../../services/analytics/index.js'
|
|
import { OFFICIAL_MARKETPLACE_NAME } from './officialMarketplace.js'
|
|
|
|
export type PluginFetchSource =
|
|
| 'install_counts'
|
|
| 'marketplace_clone'
|
|
| 'marketplace_pull'
|
|
| 'marketplace_url'
|
|
| 'plugin_clone'
|
|
| 'mcpb'
|
|
|
|
export type PluginFetchOutcome = 'success' | 'failure' | 'cache_hit'
|
|
|
|
// Allowlist of public hosts we report by name. Anything else (enterprise
|
|
// git, self-hosted, internal) is bucketed as 'other' — we don't want
|
|
// internal hostnames (git.mycorp.internal) landing in telemetry. Bounded
|
|
// cardinality also keeps the dashboard host-breakdown tractable.
|
|
const KNOWN_PUBLIC_HOSTS = new Set([
|
|
'github.com',
|
|
'raw.githubusercontent.com',
|
|
'objects.githubusercontent.com',
|
|
'gist.githubusercontent.com',
|
|
'gitlab.com',
|
|
'bitbucket.org',
|
|
'codeberg.org',
|
|
'dev.azure.com',
|
|
'ssh.dev.azure.com',
|
|
'storage.googleapis.com', // GCS — where Dickson's migration points
|
|
])
|
|
|
|
/**
|
|
* Extract hostname from a URL or git spec and bucket to the allowlist.
|
|
* Handles `https://host/...`, `git@host:path`, `ssh://host/...`.
|
|
* Returns a known public host, 'other' (parseable but not allowlisted —
|
|
* don't leak private hostnames), or 'unknown' (unparseable / local path).
|
|
*/
|
|
function extractHost(urlOrSpec: string): string {
|
|
let host: string
|
|
const scpMatch = /^[^@/]+@([^:/]+):/.exec(urlOrSpec)
|
|
if (scpMatch) {
|
|
host = scpMatch[1]!
|
|
} else {
|
|
try {
|
|
host = new URL(urlOrSpec).hostname
|
|
} catch {
|
|
return 'unknown'
|
|
}
|
|
}
|
|
const normalized = host.toLowerCase()
|
|
return KNOWN_PUBLIC_HOSTS.has(normalized) ? normalized : 'other'
|
|
}
|
|
|
|
/**
|
|
* True if the URL/spec points at anthropics/claude-plugins-official — the
|
|
* repo GitHub complained about. Lets the dashboard separate "our problem"
|
|
* traffic from user-configured marketplaces.
|
|
*/
|
|
function isOfficialRepo(urlOrSpec: string): boolean {
|
|
return urlOrSpec.includes(`anthropics/${OFFICIAL_MARKETPLACE_NAME}`)
|
|
}
|
|
|
|
export function logPluginFetch(
|
|
source: PluginFetchSource,
|
|
urlOrSpec: string | undefined,
|
|
outcome: PluginFetchOutcome,
|
|
durationMs: number,
|
|
errorKind?: string,
|
|
): void {
|
|
// String values are bounded enums / hostname-only — no code, no paths,
|
|
// no raw error messages. Same privacy envelope as tengu_web_fetch_host.
|
|
logEvent('tengu_plugin_remote_fetch', {
|
|
source: source as SafeString,
|
|
host: (urlOrSpec ? extractHost(urlOrSpec) : 'unknown') as SafeString,
|
|
is_official: urlOrSpec ? isOfficialRepo(urlOrSpec) : false,
|
|
outcome: outcome as SafeString,
|
|
duration_ms: Math.round(durationMs),
|
|
...(errorKind && { error_kind: errorKind as SafeString }),
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Classify an error into a stable bucket for the error_kind field. Keeps
|
|
* cardinality bounded — raw error messages would explode dashboard grouping.
|
|
*
|
|
* Handles both axios Error objects (Node.js error codes like ENOTFOUND) and
|
|
* git stderr strings (human phrases like "Could not resolve host"). DNS
|
|
* checked BEFORE timeout because gitClone's error enhancement at
|
|
* marketplaceManager.ts:~950 rewrites DNS failures to include the word
|
|
* "timeout" — ordering the other way would misclassify git DNS as timeout.
|
|
*/
|
|
export function classifyFetchError(error: unknown): string {
|
|
const msg = String((error as { message?: unknown })?.message ?? error)
|
|
if (
|
|
/ENOTFOUND|ECONNREFUSED|EAI_AGAIN|Could not resolve host|Connection refused/i.test(
|
|
msg,
|
|
)
|
|
) {
|
|
return 'dns_or_refused'
|
|
}
|
|
if (/ETIMEDOUT|timed out|timeout/i.test(msg)) return 'timeout'
|
|
if (
|
|
/ECONNRESET|socket hang up|Connection reset by peer|remote end hung up/i.test(
|
|
msg,
|
|
)
|
|
) {
|
|
return 'conn_reset'
|
|
}
|
|
if (/403|401|authentication|permission denied/i.test(msg)) return 'auth'
|
|
if (/404|not found|repository not found/i.test(msg)) return 'not_found'
|
|
if (/certificate|SSL|TLS|unable to get local issuer/i.test(msg)) return 'tls'
|
|
// Schema validation throws "Invalid response format" (install_counts) —
|
|
// distinguish from true unknowns so the dashboard can
|
|
// see "server sent garbage" separately.
|
|
if (/Invalid response format|Invalid marketplace schema/i.test(msg)) {
|
|
return 'invalid_schema'
|
|
}
|
|
return 'other'
|
|
}
|