107 lines
3.4 KiB
TypeScript
107 lines
3.4 KiB
TypeScript
// Voice keyterms for improving STT accuracy in the voice_stream endpoint.
|
|
//
|
|
// Provides domain-specific vocabulary hints (Deepgram "keywords") so the STT
|
|
// engine correctly recognises coding terminology, project names, and branch
|
|
// names that would otherwise be misheard.
|
|
|
|
import { basename } from 'path'
|
|
import { getProjectRoot } from '../bootstrap/state.js'
|
|
import { getBranch } from '../utils/git.js'
|
|
|
|
// ─── Global keyterms ────────────────────────────────────────────────
|
|
|
|
const GLOBAL_KEYTERMS: readonly string[] = [
|
|
// Terms Deepgram consistently mangles without keyword hints.
|
|
// Note: "Claude" and "Anthropic" are already server-side base keyterms.
|
|
// Avoid terms nobody speaks aloud as-spelled (stdout → "standard out").
|
|
'MCP',
|
|
'symlink',
|
|
'grep',
|
|
'regex',
|
|
'localhost',
|
|
'codebase',
|
|
'TypeScript',
|
|
'JSON',
|
|
'OAuth',
|
|
'webhook',
|
|
'gRPC',
|
|
'dotfiles',
|
|
'subagent',
|
|
'worktree',
|
|
]
|
|
|
|
// ─── Helpers ────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Split an identifier (camelCase, PascalCase, kebab-case, snake_case, or
|
|
* path segments) into individual words. Fragments of 2 chars or fewer are
|
|
* discarded to avoid noise.
|
|
*/
|
|
export function splitIdentifier(name: string): string[] {
|
|
return name
|
|
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
.split(/[-_./\s]+/)
|
|
.map(w => w.trim())
|
|
.filter(w => w.length > 2 && w.length <= 20)
|
|
}
|
|
|
|
function fileNameWords(filePath: string): string[] {
|
|
const stem = basename(filePath).replace(/\.[^.]+$/, '')
|
|
return splitIdentifier(stem)
|
|
}
|
|
|
|
// ─── Public API ─────────────────────────────────────────────────────
|
|
|
|
const MAX_KEYTERMS = 50
|
|
|
|
/**
|
|
* Build a list of keyterms for the voice_stream STT endpoint.
|
|
*
|
|
* Combines hardcoded global coding terms with session context (project name,
|
|
* git branch, recent files) without any model calls.
|
|
*/
|
|
export async function getVoiceKeyterms(
|
|
recentFiles?: ReadonlySet<string>,
|
|
): Promise<string[]> {
|
|
const terms = new Set<string>(GLOBAL_KEYTERMS)
|
|
|
|
// Project root basename as a single term — users say "claude CLI internal"
|
|
// as a phrase, not isolated words. Keeping the whole basename lets the
|
|
// STT's keyterm boosting match the phrase regardless of separator.
|
|
try {
|
|
const projectRoot = getProjectRoot()
|
|
if (projectRoot) {
|
|
const name = basename(projectRoot)
|
|
if (name.length > 2 && name.length <= 50) {
|
|
terms.add(name)
|
|
}
|
|
}
|
|
} catch {
|
|
// getProjectRoot() may throw if not initialised yet — ignore
|
|
}
|
|
|
|
// Git branch words (e.g. "feat/voice-keyterms" → "feat", "voice", "keyterms")
|
|
try {
|
|
const branch = await getBranch()
|
|
if (branch) {
|
|
for (const word of splitIdentifier(branch)) {
|
|
terms.add(word)
|
|
}
|
|
}
|
|
} catch {
|
|
// getBranch() may fail if not in a git repo — ignore
|
|
}
|
|
|
|
// Recent file names — only scan enough to fill remaining slots
|
|
if (recentFiles) {
|
|
for (const filePath of recentFiles) {
|
|
if (terms.size >= MAX_KEYTERMS) break
|
|
for (const word of fileNameWords(filePath)) {
|
|
terms.add(word)
|
|
}
|
|
}
|
|
}
|
|
|
|
return [...terms].slice(0, MAX_KEYTERMS)
|
|
}
|