tts
This commit is contained in:
parent
d2ac7da6e4
commit
599b4ce836
Binary file not shown.
|
Before Width: | Height: | Size: 2.4 MiB |
2
packages/kbot/dist-in/commands/tts.d.ts
vendored
Normal file
2
packages/kbot/dist-in/commands/tts.d.ts
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
export declare const TTSOptionsSchema: () => any;
|
||||
export declare const ttsCommand: (argv: any) => Promise<void>;
|
||||
206
packages/kbot/dist-in/commands/tts.js
Normal file
206
packages/kbot/dist-in/commands/tts.js
Normal file
File diff suppressed because one or more lines are too long
31
packages/kbot/dist-in/lib/tts-elevenlabs.d.ts
vendored
Normal file
31
packages/kbot/dist-in/lib/tts-elevenlabs.d.ts
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
type OutputFormat = "mp3_22050_32" | "mp3_44100_32" | "mp3_44100_64" | "mp3_44100_96" | "mp3_44100_128" | "mp3_44100_192" | "pcm_16000" | "pcm_22050" | "pcm_24000" | "pcm_44100" | "ulaw_8000";
|
||||
export interface TTSOptions {
|
||||
text: string;
|
||||
voiceId?: string;
|
||||
outputFormat?: OutputFormat;
|
||||
modelId?: string;
|
||||
languageCode?: string | null;
|
||||
voiceSettings?: {
|
||||
stability?: number;
|
||||
similarityBoost?: number;
|
||||
style?: number;
|
||||
useSpeakerBoost?: boolean;
|
||||
} | null;
|
||||
pronunciationDictionaryLocators?: Array<{
|
||||
pronunciationDictionaryId: string;
|
||||
versionId: string;
|
||||
}> | null;
|
||||
seed?: number | null;
|
||||
previousText?: string | null;
|
||||
nextText?: string | null;
|
||||
previousRequestIds?: string[] | null;
|
||||
nextRequestIds?: string[] | null;
|
||||
applyTextNormalization?: 'auto' | 'on' | 'off';
|
||||
applyLanguageTextNormalization?: boolean;
|
||||
usePvcAsIvc?: boolean;
|
||||
config?: any;
|
||||
api_key?: string;
|
||||
logger?: any;
|
||||
}
|
||||
export declare const generateSpeech: (options: TTSOptions) => Promise<Buffer>;
|
||||
export {};
|
||||
71
packages/kbot/dist-in/lib/tts-elevenlabs.js
Normal file
71
packages/kbot/dist-in/lib/tts-elevenlabs.js
Normal file
@ -0,0 +1,71 @@
|
||||
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
|
||||
import { getLogger } from '../index.js';
|
||||
import { loadConfig } from '../config.js';
|
||||
export const generateSpeech = async (options) => {
|
||||
const logger = options.logger || getLogger({ logLevel: 4 });
|
||||
// Get API key from options or config
|
||||
const config = loadConfig(options);
|
||||
const apiKey = options.api_key || config?.elevenlabs?.key;
|
||||
if (!apiKey) {
|
||||
throw new Error('ElevenLabs API key not found. Please provide it via --api_key or in your config file under elevenlabs.key');
|
||||
}
|
||||
const client = new ElevenLabsClient({
|
||||
apiKey: apiKey
|
||||
});
|
||||
try {
|
||||
logger.info(`Generating speech with ElevenLabs...`);
|
||||
logger.debug(`Voice ID: ${options.voiceId || 'JBFqnCBsd6RMkjVDRZzb'}`);
|
||||
logger.debug(`Model: ${options.modelId || 'eleven_multilingual_v2'}`);
|
||||
logger.debug(`Output Format: ${options.outputFormat || 'mp3_44100_128'}`);
|
||||
logger.debug(`Text length: ${options.text.length} characters`);
|
||||
const audioStream = await client.textToSpeech.convert(options.voiceId || "JBFqnCBsd6RMkjVDRZzb", {
|
||||
outputFormat: options.outputFormat || "mp3_44100_128",
|
||||
text: options.text,
|
||||
modelId: options.modelId || "eleven_multilingual_v2",
|
||||
languageCode: options.languageCode,
|
||||
voiceSettings: options.voiceSettings,
|
||||
pronunciationDictionaryLocators: options.pronunciationDictionaryLocators,
|
||||
seed: options.seed,
|
||||
previousText: options.previousText,
|
||||
nextText: options.nextText,
|
||||
previousRequestIds: options.previousRequestIds,
|
||||
nextRequestIds: options.nextRequestIds,
|
||||
applyTextNormalization: options.applyTextNormalization || 'auto',
|
||||
applyLanguageTextNormalization: options.applyLanguageTextNormalization || false,
|
||||
usePvcAsIvc: options.usePvcAsIvc || false,
|
||||
});
|
||||
// The convert endpoint returns a ReadableStream, we need to collect all chunks
|
||||
const chunks = [];
|
||||
const reader = audioStream.getReader();
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done)
|
||||
break;
|
||||
chunks.push(value);
|
||||
}
|
||||
}
|
||||
finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
// Combine all chunks into a single buffer
|
||||
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
||||
const audioBuffer = new Uint8Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
audioBuffer.set(chunk, offset);
|
||||
offset += chunk.length;
|
||||
}
|
||||
const finalBuffer = Buffer.from(audioBuffer);
|
||||
logger.info(`Successfully generated ${finalBuffer.length} bytes of audio`);
|
||||
return finalBuffer;
|
||||
}
|
||||
catch (error) {
|
||||
logger.error('Failed to generate speech with ElevenLabs:', error.message);
|
||||
if (error.response?.data) {
|
||||
logger.error('API Error Details:', error.response.data);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoidHRzLWVsZXZlbmxhYnMuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvbGliL3R0cy1lbGV2ZW5sYWJzLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBLE9BQU8sRUFBRSxnQkFBZ0IsRUFBRSxNQUFNLDJCQUEyQixDQUFDO0FBQzdELE9BQU8sRUFBRSxTQUFTLEVBQUUsTUFBTSxhQUFhLENBQUM7QUFDeEMsT0FBTyxFQUFFLFVBQVUsRUFBRSxNQUFNLGNBQWMsQ0FBQztBQW9DMUMsTUFBTSxDQUFDLE1BQU0sY0FBYyxHQUFHLEtBQUssRUFBRSxPQUFtQixFQUFtQixFQUFFO0lBQ3pFLE1BQU0sTUFBTSxHQUFHLE9BQU8sQ0FBQyxNQUFNLElBQUksU0FBUyxDQUFDLEVBQUUsUUFBUSxFQUFFLENBQUMsRUFBRSxDQUFDLENBQUM7SUFFNUQscUNBQXFDO0lBQ3JDLE1BQU0sTUFBTSxHQUFHLFVBQVUsQ0FBQyxPQUFPLENBQUMsQ0FBQztJQUNuQyxNQUFNLE1BQU0sR0FBRyxPQUFPLENBQUMsT0FBTyxJQUFJLE1BQU0sRUFBRSxVQUFVLEVBQUUsR0FBRyxDQUFDO0lBRTFELElBQUksQ0FBQyxNQUFNLEVBQUUsQ0FBQztRQUNWLE1BQU0sSUFBSSxLQUFLLENBQUMsMkdBQTJHLENBQUMsQ0FBQztJQUNqSSxDQUFDO0lBRUQsTUFBTSxNQUFNLEdBQUcsSUFBSSxnQkFBZ0IsQ0FBQztRQUNoQyxNQUFNLEVBQUUsTUFBTTtLQUNqQixDQUFDLENBQUM7SUFFSCxJQUFJLENBQUM7UUFDRCxNQUFNLENBQUMsSUFBSSxDQUFDLHNDQUFzQyxDQUFDLENBQUM7UUFDcEQsTUFBTSxDQUFDLEtBQUssQ0FBQyxhQUFhLE9BQU8sQ0FBQyxPQUFPLElBQUksc0JBQXNCLEVBQUUsQ0FBQyxDQUFDO1FBQ3ZFLE1BQU0sQ0FBQyxLQUFLLENBQUMsVUFBVSxPQUFPLENBQUMsT0FBTyxJQUFJLHdCQUF3QixFQUFFLENBQUMsQ0FBQztRQUN0RSxNQUFNLENBQUMsS0FBSyxDQUFDLGtCQUFrQixPQUFPLENBQUMsWUFBWSxJQUFJLGVBQWUsRUFBRSxDQUFDLENBQUM7UUFDMUUsTUFBTSxDQUFDLEtBQUssQ0FBQyxnQkFBZ0IsT0FBTyxDQUFDLElBQUksQ0FBQyxNQUFNLGFBQWEsQ0FBQyxDQUFDO1FBRS9ELE1BQU0sV0FBVyxHQUFHLE1BQU0sTUFBTSxDQUFDLFlBQVksQ0FBQyxPQUFPLENBQ2pELE9BQU8sQ0FBQyxPQUFPLElBQUksc0JBQXNCLEVBQ3pDO1lBQ0ksWUFBWSxFQUFFLE9BQU8sQ0FBQyxZQUFZLElBQUksZUFBZTtZQUNyRCxJQUFJLEVBQUUsT0FBTyxDQUFDLElBQUk7WUFDbEIsT0FBTyxFQUFFLE9BQU8sQ0FBQyxPQUFPLElBQUksd0JBQXdCO1lBQ3BELFlBQVksRUFBRSxPQUFPLENBQUMsWUFBWTtZQUNsQyxhQUFhLEVBQUUsT0FBTyxDQUFDLGFBQWE7WUFDcEMsK0JBQStCLEVBQUUsT0FBTyxDQUFDLCtCQUErQjtZQUN4RSxJQUFJLEVBQUUsT0FBTyxDQUFDLElBQUk7WUFDbEIsWUFBWSxFQUFFLE9BQU8sQ0FBQyxZQUFZO1lBQ2xDLFFBQVEsRUFBRSxPQUFPLENBQUMsUUFBUTtZQUMxQixrQkFBa0IsRUFBRSxPQUFPLENBQUMsa0JBQWtCO1lBQzlDLGNBQWMsRUFBRSxPQUFPLENBQUMsY0FBYztZQUN0QyxzQkFBc0IsRUFBRSxPQUFPLENBQUMsc0JBQXNCLElBQUksTUFBTTtZQUNoRSw4QkFBOEIsRUFBRSxPQUFPLENBQUMsOEJBQThCLElBQUksS0FBSztZQUMvRSxXQUFXLEVBQUUsT0FBTyxDQUFDLFdBQVcsSUFBSSxLQUFLO1NBQzVDLENBQ0osQ0FBQztRQUVGLCtFQUErRTtRQUMvRSxNQUFNLE1BQU0sR0FBaUIsRUFBRSxDQUFDO1FBQ2hDLE1BQU0sTUFBTSxHQUFHLFdBQVcsQ0FBQyxTQUFTLEVBQUUsQ0FBQztRQUV2QyxJQUFJLENBQUM7WUFDRCxPQUFPLElBQUksRUFBRSxDQUFDO2dCQUNWLE1BQU0sRUFBRSxJQUFJLEVBQUUsS0FBSyxFQUFFLEdBQUcsTUFBTSxNQUFNLENBQUMsSUFBSSxFQUFFLENBQUM7Z0JBQzVDLElBQUksSUFBSTtvQkFBRSxNQUFNO2dCQUNoQixNQUFNLENBQUMsSUFBSSxDQUFDLEtBQUssQ0FBQyxDQUFDO1lBQ3ZCLENBQUM7UUFDTCxDQUFDO2dCQUFTLENBQUM7WUFDUCxNQUFNLENBQUMsV0FBVyxFQUFFLENBQUM7UUFDekIsQ0FBQztRQUVELDBDQUEwQztRQUMxQyxNQUFNLFdBQVcsR0FBRyxNQUFNLENBQUMsTUFBTSxDQUFDLENBQUMsR0FBRyxFQUFFLEtBQUssRUFBRSxFQUFFLENBQUMsR0FBRyxHQUFHLEtBQUssQ0FBQyxNQUFNLEVBQUUsQ0FBQyxDQUFDLENBQUM7UUFDekUsTUFBTSxXQUFXLEdBQUcsSUFBSSxVQUFVLENBQUMsV0FBVyxDQUFDLENBQUM7UUFDaEQsSUFBSSxNQUFNLEdBQUcsQ0FBQyxDQUFDO1FBQ2YsS0FBSyxNQUFNLEtBQUssSUFBSSxNQUFNLEVBQUUsQ0FBQztZQUN6QixXQUFXLENBQUMsR0FBRyxDQUFDLEtBQUssRUFBRSxNQUFNLENBQUMsQ0FBQztZQUMvQixNQUFNLElBQUksS0FBSyxDQUFDLE1BQU0sQ0FBQztRQUMzQixDQUFDO1FBRUQsTUFBTSxXQUFXLEdBQUcsTUFBTSxDQUFDLElBQUksQ0FBQyxXQUFXLENBQUMsQ0FBQztRQUM3QyxNQUFNLENBQUMsSUFBSSxDQUFDLDBCQUEwQixXQUFXLENBQUMsTUFBTSxpQkFBaUIsQ0FBQyxDQUFDO1FBRTNFLE9BQU8sV0FBVyxDQUFDO0lBRXZCLENBQUM7SUFBQyxPQUFPLEtBQVUsRUFBRSxDQUFDO1FBQ2xCLE1BQU0sQ0FBQyxLQUFLLENBQUMsNENBQTRDLEVBQUUsS0FBSyxDQUFDLE9BQU8sQ0FBQyxDQUFDO1FBQzFFLElBQUksS0FBSyxDQUFDLFFBQVEsRUFBRSxJQUFJLEVBQUUsQ0FBQztZQUN2QixNQUFNLENBQUMsS0FBSyxDQUFDLG9CQUFvQixFQUFFLEtBQUssQ0FBQyxRQUFRLENBQUMsSUFBSSxDQUFDLENBQUM7UUFDNUQsQ0FBQztRQUNELE1BQU0sS0FBSyxDQUFDO0lBQ2hCLENBQUM7QUFDTCxDQUFDLENBQUMifQ==
|
||||
@ -12,6 +12,7 @@ import { fetch } from './commands/fetch.js';
|
||||
import { run } from './commands/run.js';
|
||||
import { transcribeCommand, TranscribeOptionsSchema } from './commands/transcribe.js';
|
||||
import { imageCommand, ImageOptionsSchema } from './commands/images.js';
|
||||
import { ttsCommand, TTSOptionsSchema } from './commands/tts.js';
|
||||
export const logger = createLogger('llm-tools');
|
||||
const modify = async (argv) => await run(argv);
|
||||
const yargOptions = {
|
||||
@ -33,6 +34,7 @@ yargs(hideBin(process.argv))
|
||||
.command('modify [prompt]', 'Modify an existing project', (yargs) => toYargs(yargs, OptionsSchema(), yargOptions), modify)
|
||||
.command('image [prompt]', 'Create or edit an image', (yargs) => toYargs(yargs, ImageOptionsSchema(), yargOptions), imageCommand)
|
||||
.command('transcribe', 'Transcribe audio files', (yargs) => toYargs(yargs, TranscribeOptionsSchema(), yargOptions), transcribeCommand)
|
||||
.command('tts', 'Convert text to speech using ElevenLabs', (yargs) => toYargs(yargs, TTSOptionsSchema(), yargOptions), ttsCommand)
|
||||
.command('types', 'Generate types', (yargs) => { }, (argv) => types())
|
||||
.command('schemas', 'Generate schemas', (yargs) => { }, (argv) => schemas())
|
||||
.command('build', 'Build kbot essentials', (yargs) => { }, (argv) => build())
|
||||
@ -43,4 +45,4 @@ yargs(hideBin(process.argv))
|
||||
.help()
|
||||
//.wrap(yargs.terminalWidth() - 20)
|
||||
.parse();
|
||||
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibWFpbi5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uL3NyYy9tYWluLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiI7QUFDQSxPQUFPLEtBQUssTUFBTSxPQUFPLENBQUE7QUFDekIsT0FBTyxFQUFFLE9BQU8sRUFBRSxNQUFNLGVBQWUsQ0FBQTtBQUN2QyxPQUFPLEVBQUUsT0FBTyxFQUFFLE1BQU0sbUJBQW1CLENBQUE7QUFDM0MsT0FBTyxFQUFFLFlBQVksRUFBRSxNQUFNLGVBQWUsQ0FBQTtBQUU1QyxPQUFPLEVBQUUsYUFBYSxFQUFFLE9BQU8sRUFBRSxLQUFLLEVBQUUsTUFBTSxpQkFBaUIsQ0FBQTtBQUcvRCxPQUFPLFdBQVcsTUFBTSxvQkFBb0IsQ0FBQTtBQUM1QyxPQUFPLEVBQUUsUUFBUSxFQUFFLE1BQU0sd0JBQXdCLENBQUE7QUFDakQsT0FBTyxFQUFFLElBQUksRUFBRSxNQUFNLG9CQUFvQixDQUFBO0FBQ3pDLE9BQU8sRUFBRSxLQUFLLEVBQUUsTUFBTSxxQkFBcUIsQ0FBQTtBQUMzQyxPQUFPLEVBQUUsS0FBSyxFQUFFLE1BQU0scUJBQXFCLENBQUE7QUFDM0MsT0FBTyxFQUFFLEdBQUcsRUFBRSxNQUFNLG1CQUFtQixDQUFBO0FBRXZDLE9BQU8sRUFBRSxpQkFBaUIsRUFBRSx1QkFBdUIsRUFBRSxNQUFNLDBCQUEwQixDQUFBO0FBQ3JGLE9BQU8sRUFBRSxZQUFZLEVBQUUsa0JBQWtCLEVBQUUsTUFBTSxzQkFBc0IsQ0FBQTtBQUV2RSxNQUFNLENBQUMsTUFBTSxNQUFNLEdBQVEsWUFBWSxDQUFDLFdBQVcsQ0FBQyxDQUFBO0FBRXBELE1BQU0sTUFBTSxHQUFHLEtBQUssRUFBRSxJQUFlLEVBQUUsRUFBRSxDQUFFLE1BQU0sR0FBRyxDQUFDLElBQWlCLENBQUMsQ0FBQTtBQUV2RSxNQUFNLFdBQVcsR0FBUTtJQUN2QixLQUFLLEVBQUUsQ0FBQyxDQUFDLE1BQU0sRUFBRSxHQUFHLEVBQUUsT0FBTyxFQUFFLEVBQUU7UUFDL0IsUUFBUSxHQUFHLEVBQUUsQ0FBQztZQUNaLEtBQUssUUFBUTtnQkFDWCxDQUFDO29CQUNDLE9BQU8sTUFBTSxDQUFDLFVBQVUsQ0FBQyxHQUFHLEVBQUUsT0FBTyxDQUFDLENBQUE7Z0JBQ3hDLENBQUM7WUFDSCxLQUFLLFNBQVM7Z0JBQ1osQ0FBQztvQkFDQyxPQUFPLE1BQU0sQ0FBQyxNQUFNLENBQUMsR0FBRyxFQUFFLEVBQUMsR0FBRyxPQUFPLEVBQUUsS0FBSyxFQUFFLEdBQUcsQ0FBQyxDQUFDLENBQUMsQ0FBQyxXQUFXLEVBQUUsRUFBQyxDQUFDLENBQUE7Z0JBQ3RFLENBQUM7UUFDTCxDQUFDO0lBQ0gsQ0FBQyxDQUFDO0NBQ0gsQ0FBQTtBQUVELEtBQUssQ0FBQyxPQUFPLENBQUMsT0FBTyxDQUFDLElBQUksQ0FBQyxDQUFDO0tBQ3pCLE9BQU8sQ0FDTixNQUFNLEVBQ04sK0JBQStCLEVBQy9CLENBQUMsS0FBSyxFQUFFLEVBQUUsQ0FBQyxPQUFPLENBQUMsS0FBSyxFQUFFLGFBQWEsRUFBRSxFQUFFLFdBQVcsQ0FBQyxFQUN2RCxJQUFJLENBQ0w7S0FDQSxPQUFPLENBQ04saUJBQWlCLEVBQ2pCLDRCQUE0QixFQUM1QixDQUFDLEtBQUssRUFBRSxFQUFFLENBQUMsT0FBTyxDQUFDLEtBQUssRUFBRSxhQUFhLEVBQUUsRUFBRSxXQUFXLENBQUMsRUFDdkQsTUFBTSxDQUNQO0tBQ0EsT0FBTyxDQUNOLGdCQUFnQixFQUNoQix5QkFBeUIsRUFDekIsQ0FBQyxLQUFLLEVBQUUsRUFBRSxDQUFDLE9BQU8sQ0FBQyxLQUFLLEVBQUUsa0JBQWtCLEVBQUUsRUFBRSxXQUFXLENBQUMsRUFDNUQsWUFBWSxDQUNiO0tBQ0EsT0FBTyxDQUNOLFlBQVksRUFDWix3QkFBd0IsRUFDeEIsQ0FBQyxLQUFLLEVBQUUsRUFBRSxDQUFDLE9BQU8sQ0FBQyxLQUFLLEVBQUUsdUJBQXVCLEVBQUUsRUFBRSxXQUFXLENBQUMsRUFDakUsaUJBQWlCLENBQ2xCO0tBQ0EsT0FBTyxDQUNOLE9BQU8sRUFDUCxnQkFBZ0IsRUFDaEIsQ0FBQyxLQUFLLEVBQUUsRUFBRSxHQUFHLENBQUMsRUFDZCxDQUFDLElBQUksRUFBRSxFQUFFLENBQUMsS0FBSyxFQUFFLENBQ2xCO0tBQ0EsT0FBTyxDQUNOLFNBQVMsRUFDVCxrQkFBa0IsRUFDbEIsQ0FBQyxLQUFLLEVBQUUsRUFBRSxHQUFHLENBQUMsRUFDZCxDQUFDLElBQUksRUFBRSxFQUFFLENBQUMsT0FBTyxFQUFFLENBQ3BCO0tBQ0EsT0FBTyxDQUNOLE9BQU8sRUFDUCx1QkFBdUIsRUFDdkIsQ0FBQyxLQUFLLEVBQUUsRUFBRSxHQUFHLENBQUMsRUFDZCxDQUFDLElBQUksRUFBRSxFQUFFLENBQUMsS0FBSyxFQUFFLENBQ2xCO0tBQ0EsT0FBTyxDQUNOLE9BQU8sRUFDUCwrQkFBK0IsRUFDL0IsQ0FBQyxLQUFLLEVBQUUsRUFBRSxHQUFHLENBQUMsRUFDZCxDQUFDLElBQUksRUFBRSxFQUFFLENBQUMsS0FBSyxFQUFFLENBQ2xCO0tBQ0EsT0FBTyxDQUNOLFNBQVMsRUFDVCx3QkFBd0IsRUFDeEIsQ0FBQyxLQUFLLEVBQUUsRUFBRSxHQUFHLENBQUMsRUFDZCxXQUFXLENBQ1o7S0FDQSxPQUFPLENBQ04sVUFBVSxFQUNWLGVBQWUsRUFDZixDQUFDLEtBQUssRUFBRSxFQUFFLEdBQUcsQ0FBQyxFQUNkLFFBQVEsQ0FDVDtLQUNBLE9BQU8sQ0FBQyxDQUFDLGlCQUFpQixFQUFFLElBQUksQ0FBQyxFQUFFLHdCQUF3QixFQUMxRCxDQUFDLEtBQUssRUFBRSxFQUFFLENBQUMsT0FBTyxDQUFDLEtBQUssRUFBRSxhQUFhLEVBQUUsRUFBRSxXQUFXLENBQUMsRUFBRSxNQUFNLENBQUM7S0FDakUsSUFBSSxFQUFFO0lBQ1AsbUNBQW1DO0tBQ2xDLEtBQUssRUFBRSxDQUFBIn0=
|
||||
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibWFpbi5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uL3NyYy9tYWluLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiI7QUFDQSxPQUFPLEtBQUssTUFBTSxPQUFPLENBQUE7QUFDekIsT0FBTyxFQUFFLE9BQU8sRUFBRSxNQUFNLGVBQWUsQ0FBQTtBQUN2QyxPQUFPLEVBQUUsT0FBTyxFQUFFLE1BQU0sbUJBQW1CLENBQUE7QUFDM0MsT0FBTyxFQUFFLFlBQVksRUFBRSxNQUFNLGVBQWUsQ0FBQTtBQUU1QyxPQUFPLEVBQUUsYUFBYSxFQUFFLE9BQU8sRUFBRSxLQUFLLEVBQUUsTUFBTSxpQkFBaUIsQ0FBQTtBQUcvRCxPQUFPLFdBQVcsTUFBTSxvQkFBb0IsQ0FBQTtBQUM1QyxPQUFPLEVBQUUsUUFBUSxFQUFFLE1BQU0sd0JBQXdCLENBQUE7QUFDakQsT0FBTyxFQUFFLElBQUksRUFBRSxNQUFNLG9CQUFvQixDQUFBO0FBQ3pDLE9BQU8sRUFBRSxLQUFLLEVBQUUsTUFBTSxxQkFBcUIsQ0FBQTtBQUMzQyxPQUFPLEVBQUUsS0FBSyxFQUFFLE1BQU0scUJBQXFCLENBQUE7QUFDM0MsT0FBTyxFQUFFLEdBQUcsRUFBRSxNQUFNLG1CQUFtQixDQUFBO0FBRXZDLE9BQU8sRUFBRSxpQkFBaUIsRUFBRSx1QkFBdUIsRUFBRSxNQUFNLDBCQUEwQixDQUFBO0FBQ3JGLE9BQU8sRUFBRSxZQUFZLEVBQUUsa0JBQWtCLEVBQUUsTUFBTSxzQkFBc0IsQ0FBQTtBQUN2RSxPQUFPLEVBQUUsVUFBVSxFQUFFLGdCQUFnQixFQUFFLE1BQU0sbUJBQW1CLENBQUE7QUFFaEUsTUFBTSxDQUFDLE1BQU0sTUFBTSxHQUFRLFlBQVksQ0FBQyxXQUFXLENBQUMsQ0FBQTtBQUVwRCxNQUFNLE1BQU0sR0FBRyxLQUFLLEVBQUUsSUFBZSxFQUFFLEVBQUUsQ0FBRSxNQUFNLEdBQUcsQ0FBQyxJQUFpQixDQUFDLENBQUE7QUFFdkUsTUFBTSxXQUFXLEdBQVE7SUFDdkIsS0FBSyxFQUFFLENBQUMsQ0FBQyxNQUFNLEVBQUUsR0FBRyxFQUFFLE9BQU8sRUFBRSxFQUFFO1FBQy9CLFFBQVEsR0FBRyxFQUFFLENBQUM7WUFDWixLQUFLLFFBQVE7Z0JBQ1gsQ0FBQztvQkFDQyxPQUFPLE1BQU0sQ0FBQyxVQUFVLENBQUMsR0FBRyxFQUFFLE9BQU8sQ0FBQyxDQUFBO2dCQUN4QyxDQUFDO1lBQ0gsS0FBSyxTQUFTO2dCQUNaLENBQUM7b0JBQ0MsT0FBTyxNQUFNLENBQUMsTUFBTSxDQUFDLEdBQUcsRUFBRSxFQUFDLEdBQUcsT0FBTyxFQUFFLEtBQUssRUFBRSxHQUFHLENBQUMsQ0FBQyxDQUFDLENBQUMsV0FBVyxFQUFFLEVBQUMsQ0FBQyxDQUFBO2dCQUN0RSxDQUFDO1FBQ0wsQ0FBQztJQUNILENBQUMsQ0FBQztDQUNILENBQUE7QUFFRCxLQUFLLENBQUMsT0FBTyxDQUFDLE9BQU8sQ0FBQyxJQUFJLENBQUMsQ0FBQztLQUN6QixPQUFPLENBQ04sTUFBTSxFQUNOLCtCQUErQixFQUMvQixDQUFDLEtBQUssRUFBRSxFQUFFLENBQUMsT0FBTyxDQUFDLEtBQUssRUFBRSxhQUFhLEVBQUUsRUFBRSxXQUFXLENBQUMsRUFDdkQsSUFBSSxDQUNMO0tBQ0EsT0FBTyxDQUNOLGlCQUFpQixFQUNqQiw0QkFBNEIsRUFDNUIsQ0FBQyxLQUFLLEVBQUUsRUFBRSxDQUFDLE9BQU8sQ0FBQyxLQUFLLEVBQUUsYUFBYSxFQUFFLEVBQUUsV0FBVyxDQUFDLEVBQ3ZELE1BQU0sQ0FDUDtLQUNBLE9BQU8sQ0FDTixnQkFBZ0IsRUFDaEIseUJBQXlCLEVBQ3pCLENBQUMsS0FBSyxFQUFFLEVBQUUsQ0FBQyxPQUFPLENBQUMsS0FBSyxFQUFFLGtCQUFrQixFQUFFLEVBQUUsV0FBVyxDQUFDLEVBQzVELFlBQVksQ0FDYjtLQUNBLE9BQU8sQ0FDTixZQUFZLEVBQ1osd0JBQXdCLEVBQ3hCLENBQUMsS0FBSyxFQUFFLEVBQUUsQ0FBQyxPQUFPLENBQUMsS0FBSyxFQUFFLHVCQUF1QixFQUFFLEVBQUUsV0FBVyxDQUFDLEVBQ2pFLGlCQUFpQixDQUNsQjtLQUNBLE9BQU8sQ0FDTixLQUFLLEVBQ0wseUNBQXlDLEVBQ3pDLENBQUMsS0FBSyxFQUFFLEVBQUUsQ0FBQyxPQUFPLENBQUMsS0FBSyxFQUFFLGdCQUFnQixFQUFFLEVBQUUsV0FBVyxDQUFDLEVBQzFELFVBQVUsQ0FDWDtLQUNBLE9BQU8sQ0FDTixPQUFPLEVBQ1AsZ0JBQWdCLEVBQ2hCLENBQUMsS0FBSyxFQUFFLEVBQUUsR0FBRyxDQUFDLEVBQ2QsQ0FBQyxJQUFJLEVBQUUsRUFBRSxDQUFDLEtBQUssRUFBRSxDQUNsQjtLQUNBLE9BQU8sQ0FDTixTQUFTLEVBQ1Qsa0JBQWtCLEVBQ2xCLENBQUMsS0FBSyxFQUFFLEVBQUUsR0FBRyxDQUFDLEVBQ2QsQ0FBQyxJQUFJLEVBQUUsRUFBRSxDQUFDLE9BQU8sRUFBRSxDQUNwQjtLQUNBLE9BQU8sQ0FDTixPQUFPLEVBQ1AsdUJBQXVCLEVBQ3ZCLENBQUMsS0FBSyxFQUFFLEVBQUUsR0FBRyxDQUFDLEVBQ2QsQ0FBQyxJQUFJLEVBQUUsRUFBRSxDQUFDLEtBQUssRUFBRSxDQUNsQjtLQUNBLE9BQU8sQ0FDTixPQUFPLEVBQ1AsK0JBQStCLEVBQy9CLENBQUMsS0FBSyxFQUFFLEVBQUUsR0FBRyxDQUFDLEVBQ2QsQ0FBQyxJQUFJLEVBQUUsRUFBRSxDQUFDLEtBQUssRUFBRSxDQUNsQjtLQUNBLE9BQU8sQ0FDTixTQUFTLEVBQ1Qsd0JBQXdCLEVBQ3hCLENBQUMsS0FBSyxFQUFFLEVBQUUsR0FBRyxDQUFDLEVBQ2QsV0FBVyxDQUNaO0tBQ0EsT0FBTyxDQUNOLFVBQVUsRUFDVixlQUFlLEVBQ2YsQ0FBQyxLQUFLLEVBQUUsRUFBRSxHQUFHLENBQUMsRUFDZCxRQUFRLENBQ1Q7S0FDQSxPQUFPLENBQUMsQ0FBQyxpQkFBaUIsRUFBRSxJQUFJLENBQUMsRUFBRSx3QkFBd0IsRUFDMUQsQ0FBQyxLQUFLLEVBQUUsRUFBRSxDQUFDLE9BQU8sQ0FBQyxLQUFLLEVBQUUsYUFBYSxFQUFFLEVBQUUsV0FBVyxDQUFDLEVBQUUsTUFBTSxDQUFDO0tBQ2pFLElBQUksRUFBRTtJQUNQLG1DQUFtQztLQUNsQyxLQUFLLEVBQUUsQ0FBQSJ9
|
||||
BIN
packages/kbot/dist/win-64/tauri-app.exe
vendored
BIN
packages/kbot/dist/win-64/tauri-app.exe
vendored
Binary file not shown.
@ -2,6 +2,7 @@ import { useState, useEffect } from "react";
|
||||
import { invoke } from "@tauri-apps/api/core";
|
||||
import { open, save } from '@tauri-apps/plugin-dialog';
|
||||
import { readFile, writeFile, BaseDirectory } from '@tauri-apps/plugin-fs';
|
||||
import { fetch } from '@tauri-apps/plugin-http';
|
||||
// Path imports commented out since they're not currently used
|
||||
// import {
|
||||
// homeDir, audioDir, cacheDir, configDir, dataDir, localDataDir, desktopDir,
|
||||
@ -103,28 +104,20 @@ function App() {
|
||||
console.log('API key available:', !!apiKey);
|
||||
console.log('Include images count:', includeImages.length);
|
||||
|
||||
// Use the same approach as the backend - import GoogleGenerativeAI dynamically
|
||||
console.log('Importing GoogleGenerativeAI...');
|
||||
const { GoogleGenerativeAI } = await import('@google/generative-ai');
|
||||
console.log('GoogleGenerativeAI imported successfully');
|
||||
|
||||
const ai = new GoogleGenerativeAI(apiKey);
|
||||
console.log('GoogleGenerativeAI client created');
|
||||
|
||||
const model = ai.getGenerativeModel({ model: 'gemini-2.5-flash-image-preview' });
|
||||
console.log('Model obtained:', 'gemini-2.5-flash-image-preview');
|
||||
// Use Tauri's HTTP client directly instead of Google SDK (which has fetch issues in Tauri)
|
||||
console.log('Using Tauri HTTP client for API calls...');
|
||||
|
||||
// Prepare the request payload for Google Gemini API
|
||||
const parts: any[] = [];
|
||||
|
||||
if (includeImages.length > 0) {
|
||||
// Image editing - similar to editImage function
|
||||
const imageParts: any[] = [];
|
||||
|
||||
// Add image parts for editing
|
||||
for (const imageFile of includeImages) {
|
||||
// Extract base64 data from the data URL
|
||||
const base64Match = imageFile.src.match(/^data:([^;]+);base64,(.+)$/);
|
||||
if (base64Match) {
|
||||
const mimeType = base64Match[1];
|
||||
const base64Data = base64Match[2];
|
||||
imageParts.push({
|
||||
parts.push({
|
||||
inlineData: {
|
||||
mimeType,
|
||||
data: base64Data
|
||||
@ -132,58 +125,49 @@ function App() {
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const textPart = { text: promptText };
|
||||
const promptParts = [...imageParts, textPart];
|
||||
|
||||
console.log('Making API call for image editing with parts:', promptParts.length);
|
||||
const result = await model.generateContent(promptParts);
|
||||
console.log('API call completed for image editing');
|
||||
const response = result.response;
|
||||
const parts = response.candidates?.[0]?.content?.parts;
|
||||
|
||||
for (const part of parts || []) {
|
||||
if ('inlineData' in part) {
|
||||
const inlineData = part.inlineData;
|
||||
if (inlineData) {
|
||||
const generatedImage: GeneratedImage = {
|
||||
id: Date.now().toString(),
|
||||
src: `data:${inlineData.mimeType};base64,${inlineData.data}`,
|
||||
prompt: promptText,
|
||||
timestamp: Date.now(),
|
||||
saved: false
|
||||
};
|
||||
|
||||
setGeneratedImages(prev => [...prev, generatedImage]);
|
||||
console.log('Generated new image (edit):', generatedImage.id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Image creation - similar to createImage function
|
||||
console.log('Making API call for image creation with prompt:', promptText);
|
||||
const result = await model.generateContent(promptText);
|
||||
console.log('API call completed for image creation');
|
||||
const response = result.response;
|
||||
const parts = response.candidates?.[0]?.content?.parts;
|
||||
|
||||
for (const part of parts || []) {
|
||||
if ('inlineData' in part) {
|
||||
const inlineData = part.inlineData;
|
||||
if (inlineData) {
|
||||
const generatedImage: GeneratedImage = {
|
||||
id: Date.now().toString(),
|
||||
src: `data:${inlineData.mimeType};base64,${inlineData.data}`,
|
||||
prompt: promptText,
|
||||
timestamp: Date.now(),
|
||||
saved: false
|
||||
};
|
||||
|
||||
setGeneratedImages(prev => [...prev, generatedImage]);
|
||||
console.log('Generated new image (create):', generatedImage.id);
|
||||
return;
|
||||
}
|
||||
// Add text prompt
|
||||
parts.push({ text: promptText });
|
||||
|
||||
const requestBody = {
|
||||
contents: [{
|
||||
parts: parts
|
||||
}]
|
||||
};
|
||||
|
||||
console.log('Making API call with parts:', parts.length);
|
||||
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image-preview:generateContent?key=${apiKey}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(requestBody)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`API request failed: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log('API call completed successfully');
|
||||
|
||||
// Extract generated image from response
|
||||
const candidates = data.candidates;
|
||||
if (candidates && candidates[0]?.content?.parts) {
|
||||
for (const part of candidates[0].content.parts) {
|
||||
if (part.inlineData) {
|
||||
const generatedImage: GeneratedImage = {
|
||||
id: Date.now().toString(),
|
||||
src: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`,
|
||||
prompt: promptText,
|
||||
timestamp: Date.now(),
|
||||
saved: false
|
||||
};
|
||||
|
||||
setGeneratedImages(prev => [...prev, generatedImage]);
|
||||
console.log('Generated new image:', generatedImage.id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
20
packages/kbot/package-lock.json
generated
20
packages/kbot/package-lock.json
generated
@ -10,6 +10,7 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@dmitryrechkin/json-schema-to-zod": "1.0.1",
|
||||
"@elevenlabs/elevenlabs-js": "2.15.0",
|
||||
"@google/genai": "1.19.0",
|
||||
"@google/generative-ai": "0.24.1",
|
||||
"@polymech/ai-tools": "file:../ai-tools",
|
||||
@ -427,6 +428,19 @@
|
||||
"zod": "^3.23.8"
|
||||
}
|
||||
},
|
||||
"node_modules/@elevenlabs/elevenlabs-js": {
|
||||
"version": "2.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@elevenlabs/elevenlabs-js/-/elevenlabs-js-2.15.0.tgz",
|
||||
"integrity": "sha512-YCeWBFh3FSd4Qaf2j8a1Ko1+QwT1cphktSrPL5yxUrBP73fQGjkXlwuCddm7eB/XO3VifYajt39x9eleBKO8Mw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"command-exists": "^1.2.9",
|
||||
"node-fetch": "^2.7.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.21.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz",
|
||||
@ -3806,6 +3820,12 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/command-exists": {
|
||||
"version": "1.2.9",
|
||||
"resolved": "https://registry.npmjs.org/command-exists/-/command-exists-1.2.9.tgz",
|
||||
"integrity": "sha512-LTQ/SGc+s0Xc0Fu5WaKnR0YiygZkm9eKFvyS+fRsU7/ZWFF8ykFM6Pc9aCVf1+xasOOZpO3BAVgVrKvsqKHV7w==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/commander": {
|
||||
"version": "12.1.0",
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz",
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
"register-commands": "pm-cli register-commands --config=salamand.json --group=kbot",
|
||||
"test": "vitest run",
|
||||
"test:basic": "vitest run tests/unit/basic.test.ts",
|
||||
"test:tts": "vitest run tests/unit/audio/tts.test.ts",
|
||||
"test:transcribe": "vitest run tests/unit/transcribe/transcribe.test.ts",
|
||||
"test:images": "vitest run tests/unit/images/images.test.ts",
|
||||
"test:math": "vitest run tests/unit/math.test.ts",
|
||||
@ -60,6 +61,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@dmitryrechkin/json-schema-to-zod": "1.0.1",
|
||||
"@elevenlabs/elevenlabs-js": "2.15.0",
|
||||
"@google/genai": "1.19.0",
|
||||
"@google/generative-ai": "0.24.1",
|
||||
"@polymech/ai-tools": "file:../ai-tools",
|
||||
|
||||
225
packages/kbot/src/commands/tts.ts
Normal file
225
packages/kbot/src/commands/tts.ts
Normal file
@ -0,0 +1,225 @@
|
||||
import { z } from 'zod';
|
||||
import * as path from 'node:path';
|
||||
import { sync as write } from '@polymech/fs/write';
|
||||
import { sync as exists } from '@polymech/fs/exists';
|
||||
import { sync as read } from '@polymech/fs/read';
|
||||
|
||||
import { isString } from '@polymech/core/primitives';
|
||||
|
||||
import { OptionsSchema } from '../zod_schema.js';
|
||||
import { generateSpeech } from '../lib/tts-elevenlabs.js';
|
||||
import { getLogger } from '../index.js';
|
||||
import { prompt as resolvePrompt } from '../prompt.js';
|
||||
import { variables } from '../variables.js';
|
||||
import { resolve } from '@polymech/commons';
|
||||
// Cache for voices data
|
||||
let voicesCache: any = null;
|
||||
|
||||
const getVoicesData = async () => {
|
||||
if (!voicesCache) {
|
||||
try {
|
||||
// Try multiple possible paths for voices.json
|
||||
const possiblePaths = [
|
||||
path.resolve('src/lib/voices.json'),
|
||||
path.resolve('lib/voices.json'),
|
||||
path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', 'lib', 'voices.json'),
|
||||
path.resolve(path.dirname(new URL(import.meta.url).pathname), 'lib', 'voices.json'),
|
||||
];
|
||||
|
||||
let voicesContent = '';
|
||||
for (const voicesPath of possiblePaths) {
|
||||
const cleanPath = process.platform === 'win32' && voicesPath.startsWith('/')
|
||||
? voicesPath.substring(1)
|
||||
: voicesPath;
|
||||
|
||||
if (exists(cleanPath)) {
|
||||
voicesContent = read(cleanPath, 'string') as string;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (voicesContent) {
|
||||
voicesCache = JSON.parse(voicesContent);
|
||||
} else {
|
||||
// Fallback to empty voices list if file doesn't exist
|
||||
voicesCache = { voices: [] };
|
||||
}
|
||||
} catch (error) {
|
||||
// Fallback to empty voices list if file doesn't exist
|
||||
voicesCache = { voices: [] };
|
||||
}
|
||||
}
|
||||
return voicesCache;
|
||||
};
|
||||
|
||||
// Extract voice names and IDs for help text
|
||||
const getVoicesList = async () => {
|
||||
const voicesData = await getVoicesData();
|
||||
return voicesData.voices.map((voice: any) => `${voice.name} (${voice.voice_id})`).join(', ');
|
||||
};
|
||||
|
||||
const getVoiceNames = async () => {
|
||||
const voicesData = await getVoicesData();
|
||||
return voicesData.voices.map((voice: any) => voice.name);
|
||||
};
|
||||
|
||||
const findVoiceIdByName = async (name: string): Promise<string | undefined> => {
|
||||
const voicesData = await getVoicesData();
|
||||
const voice = voicesData.voices.find((v: any) => v.name.toLowerCase() === name.toLowerCase());
|
||||
return voice?.voice_id;
|
||||
};
|
||||
|
||||
export const TTSOptionsSchema = () => {
|
||||
const baseSchema = OptionsSchema().pick({
|
||||
prompt: true,
|
||||
include: true,
|
||||
dst: true,
|
||||
logLevel: true,
|
||||
config: true,
|
||||
api_key: true,
|
||||
alt: true,
|
||||
});
|
||||
|
||||
// Create a synchronous voices list for help text
|
||||
let voicesHelpText = 'Voice ID or name to use for speech generation. Common voices: Rachel, Clyde, Sarah, Laura, Thomas, Charlie, George (default), Callum, River, Harry, Liam, Alice, Matilda, Will, Jessica, Eric, Chris, Brian, Daniel, Lily, Bill';
|
||||
|
||||
// Try to load voices synchronously for help text
|
||||
try {
|
||||
const possiblePaths = [
|
||||
path.resolve('src/lib/voices.json'),
|
||||
path.resolve('lib/voices.json'),
|
||||
];
|
||||
|
||||
for (const voicesPath of possiblePaths) {
|
||||
if (exists(voicesPath)) {
|
||||
const voicesContent = read(voicesPath, 'string') as string;
|
||||
const voicesData = JSON.parse(voicesContent);
|
||||
const voicesList = voicesData.voices.slice(0, 10).map((voice: any) => voice.name).join(', ');
|
||||
voicesHelpText = `Voice ID or name to use for speech generation. Available voices: ${voicesList} (and ${voicesData.voices.length - 10} more)`;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Use fallback help text if loading fails
|
||||
}
|
||||
|
||||
return baseSchema.extend({
|
||||
dst: z.string().describe('Destination path for the output audio file. Required.'),
|
||||
prompt: z.string().optional().describe('The text to convert to speech.'),
|
||||
voiceId: z.string().default('JBFqnCBsd6RMkjVDRZzb').describe(voicesHelpText),
|
||||
outputFormat: z.enum(['mp3_22050_32', 'mp3_44100_32', 'mp3_44100_64', 'mp3_44100_96', 'mp3_44100_128', 'mp3_44100_192', 'pcm_16000', 'pcm_22050', 'pcm_24000', 'pcm_44100', 'ulaw_8000']).default('mp3_44100_128').describe('Output format of the generated audio.'),
|
||||
modelId: z.string().default('eleven_multilingual_v2').describe('Model ID to use for speech generation.'),
|
||||
languageCode: z.string().optional().describe('Language code (ISO 639-1) to enforce for the model.'),
|
||||
stability: z.number().min(0).max(1).optional().describe('Voice stability (0-1).'),
|
||||
similarityBoost: z.number().min(0).max(1).optional().describe('Voice similarity boost (0-1).'),
|
||||
style: z.number().min(0).max(1).optional().describe('Voice style (0-1).'),
|
||||
useSpeakerBoost: z.boolean().optional().describe('Use speaker boost for voice enhancement.'),
|
||||
seed: z.number().optional().describe('Seed for deterministic generation (0-4294967295).'),
|
||||
previousText: z.string().optional().describe('Text that came before the current text for continuity.'),
|
||||
nextText: z.string().optional().describe('Text that comes after the current text for continuity.'),
|
||||
applyTextNormalization: z.enum(['auto', 'on', 'off']).default('auto').describe('Text normalization mode.'),
|
||||
applyLanguageTextNormalization: z.boolean().default(false).describe('Apply language-specific text normalization.'),
|
||||
usePvcAsIvc: z.boolean().default(false).describe('Use PVC as IVC (deprecated).'),
|
||||
});
|
||||
}
|
||||
|
||||
export const ttsCommand = async (argv: any) => {
|
||||
const logger = getLogger(argv);
|
||||
|
||||
if (argv.include && isString(argv.include)) {
|
||||
argv.include = [argv.include];
|
||||
}
|
||||
|
||||
try {
|
||||
const parsedOptions = TTSOptionsSchema().parse(argv);
|
||||
const { include, dst, ...rest } = parsedOptions;
|
||||
|
||||
let textContent = '';
|
||||
|
||||
// Handle voice name to ID conversion
|
||||
let voiceId = parsedOptions.voiceId;
|
||||
if (voiceId && !voiceId.match(/^[a-zA-Z0-9]{20}$/)) {
|
||||
// If voiceId doesn't look like an ID (20 alphanumeric chars), treat it as a name
|
||||
const foundVoiceId = await findVoiceIdByName(voiceId);
|
||||
if (foundVoiceId) {
|
||||
voiceId = foundVoiceId;
|
||||
logger.info(`Using voice "${parsedOptions.voiceId}" (${voiceId})`);
|
||||
} else {
|
||||
const availableVoices = await getVoiceNames();
|
||||
logger.warn(`Voice name "${voiceId}" not found. Available voices: ${availableVoices.join(', ')}`);
|
||||
logger.info(`Using default voice ID: ${parsedOptions.voiceId}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Get text from --prompt or --include file
|
||||
if (parsedOptions.prompt) {
|
||||
const promptMessage = await resolvePrompt(parsedOptions);
|
||||
textContent = promptMessage?.content as string || '';
|
||||
} else if (include && include.length > 0) {
|
||||
// Read text from file(s)
|
||||
const filePath = include[0]; // Use first file
|
||||
if (!exists(filePath)) {
|
||||
logger.error(`Input file not found at: ${filePath}`);
|
||||
return;
|
||||
}
|
||||
textContent = read(filePath, 'string') as string;
|
||||
logger.info(`Reading text from file: ${filePath}`);
|
||||
}
|
||||
|
||||
if (!textContent.trim()) {
|
||||
logger.error('No text provided. Use --prompt "text" or --include path/to/textfile.txt');
|
||||
return;
|
||||
}
|
||||
|
||||
if (!dst) {
|
||||
logger.error('--dst is required to specify the output audio file path.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Prepare voice settings if any are specified
|
||||
let voiceSettings = null;
|
||||
if (parsedOptions.stability !== undefined ||
|
||||
parsedOptions.similarityBoost !== undefined ||
|
||||
parsedOptions.style !== undefined ||
|
||||
parsedOptions.useSpeakerBoost !== undefined) {
|
||||
voiceSettings = {
|
||||
stability: parsedOptions.stability,
|
||||
similarityBoost: parsedOptions.similarityBoost,
|
||||
style: parsedOptions.style,
|
||||
useSpeakerBoost: parsedOptions.useSpeakerBoost,
|
||||
};
|
||||
}
|
||||
|
||||
logger.info(`Converting text to speech: "${textContent.substring(0, 100)}${textContent.length > 100 ? '...' : ''}"`);
|
||||
|
||||
const audioBuffer = await generateSpeech({
|
||||
text: textContent,
|
||||
voiceId: voiceId,
|
||||
outputFormat: parsedOptions.outputFormat,
|
||||
modelId: parsedOptions.modelId,
|
||||
languageCode: parsedOptions.languageCode,
|
||||
voiceSettings,
|
||||
seed: parsedOptions.seed,
|
||||
previousText: parsedOptions.previousText,
|
||||
nextText: parsedOptions.nextText,
|
||||
applyTextNormalization: parsedOptions.applyTextNormalization,
|
||||
applyLanguageTextNormalization: parsedOptions.applyLanguageTextNormalization,
|
||||
usePvcAsIvc: parsedOptions.usePvcAsIvc,
|
||||
config: parsedOptions.config,
|
||||
api_key: parsedOptions.api_key,
|
||||
logger,
|
||||
});
|
||||
|
||||
if (audioBuffer) {
|
||||
const vars = variables(parsedOptions);
|
||||
const dstPath = path.resolve(resolve(dst, parsedOptions.alt, vars));
|
||||
write(dstPath, audioBuffer);
|
||||
logger.info(`Audio saved to: ${dstPath}`);
|
||||
} else {
|
||||
logger.error('Failed to generate audio.');
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('Failed to parse options or generate speech:', error.message, error.issues, error.stack);
|
||||
}
|
||||
};
|
||||
116
packages/kbot/src/lib/tts-elevenlabs.ts
Normal file
116
packages/kbot/src/lib/tts-elevenlabs.ts
Normal file
@ -0,0 +1,116 @@
|
||||
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
|
||||
import { getLogger } from '../index.js';
|
||||
import { loadConfig } from '../config.js';
|
||||
|
||||
// Define output format type based on ElevenLabs API documentation
|
||||
type OutputFormat =
|
||||
| "mp3_22050_32" | "mp3_44100_32" | "mp3_44100_64" | "mp3_44100_96" | "mp3_44100_128" | "mp3_44100_192"
|
||||
| "pcm_16000" | "pcm_22050" | "pcm_24000" | "pcm_44100" | "ulaw_8000";
|
||||
|
||||
export interface TTSOptions {
|
||||
text: string;
|
||||
voiceId?: string;
|
||||
outputFormat?: OutputFormat;
|
||||
modelId?: string;
|
||||
languageCode?: string | null;
|
||||
voiceSettings?: {
|
||||
stability?: number;
|
||||
similarityBoost?: number;
|
||||
style?: number;
|
||||
useSpeakerBoost?: boolean;
|
||||
} | null;
|
||||
pronunciationDictionaryLocators?: Array<{
|
||||
pronunciationDictionaryId: string;
|
||||
versionId: string;
|
||||
}> | null;
|
||||
seed?: number | null;
|
||||
previousText?: string | null;
|
||||
nextText?: string | null;
|
||||
previousRequestIds?: string[] | null;
|
||||
nextRequestIds?: string[] | null;
|
||||
applyTextNormalization?: 'auto' | 'on' | 'off';
|
||||
applyLanguageTextNormalization?: boolean;
|
||||
usePvcAsIvc?: boolean;
|
||||
config?: any;
|
||||
api_key?: string;
|
||||
logger?: any;
|
||||
}
|
||||
|
||||
export const generateSpeech = async (options: TTSOptions): Promise<Buffer> => {
|
||||
const logger = options.logger || getLogger({ logLevel: 4 });
|
||||
|
||||
// Get API key from options or config
|
||||
const config = loadConfig(options);
|
||||
const apiKey = options.api_key || config?.elevenlabs?.key;
|
||||
|
||||
if (!apiKey) {
|
||||
throw new Error('ElevenLabs API key not found. Please provide it via --api_key or in your config file under elevenlabs.key');
|
||||
}
|
||||
|
||||
const client = new ElevenLabsClient({
|
||||
apiKey: apiKey
|
||||
});
|
||||
|
||||
try {
|
||||
logger.info(`Generating speech with ElevenLabs...`);
|
||||
logger.debug(`Voice ID: ${options.voiceId || 'JBFqnCBsd6RMkjVDRZzb'}`);
|
||||
logger.debug(`Model: ${options.modelId || 'eleven_multilingual_v2'}`);
|
||||
logger.debug(`Output Format: ${options.outputFormat || 'mp3_44100_128'}`);
|
||||
logger.debug(`Text length: ${options.text.length} characters`);
|
||||
|
||||
const audioStream = await client.textToSpeech.convert(
|
||||
options.voiceId || "JBFqnCBsd6RMkjVDRZzb",
|
||||
{
|
||||
outputFormat: options.outputFormat || "mp3_44100_128",
|
||||
text: options.text,
|
||||
modelId: options.modelId || "eleven_multilingual_v2",
|
||||
languageCode: options.languageCode,
|
||||
voiceSettings: options.voiceSettings,
|
||||
pronunciationDictionaryLocators: options.pronunciationDictionaryLocators,
|
||||
seed: options.seed,
|
||||
previousText: options.previousText,
|
||||
nextText: options.nextText,
|
||||
previousRequestIds: options.previousRequestIds,
|
||||
nextRequestIds: options.nextRequestIds,
|
||||
applyTextNormalization: options.applyTextNormalization || 'auto',
|
||||
applyLanguageTextNormalization: options.applyLanguageTextNormalization || false,
|
||||
usePvcAsIvc: options.usePvcAsIvc || false,
|
||||
}
|
||||
);
|
||||
|
||||
// The convert endpoint returns a ReadableStream, we need to collect all chunks
|
||||
const chunks: Uint8Array[] = [];
|
||||
const reader = audioStream.getReader();
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
chunks.push(value);
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
|
||||
// Combine all chunks into a single buffer
|
||||
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
||||
const audioBuffer = new Uint8Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
audioBuffer.set(chunk, offset);
|
||||
offset += chunk.length;
|
||||
}
|
||||
|
||||
const finalBuffer = Buffer.from(audioBuffer);
|
||||
logger.info(`Successfully generated ${finalBuffer.length} bytes of audio`);
|
||||
|
||||
return finalBuffer;
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('Failed to generate speech with ElevenLabs:', error.message);
|
||||
if (error.response?.data) {
|
||||
logger.error('API Error Details:', error.response.data);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
3349
packages/kbot/src/lib/voices.json
Normal file
3349
packages/kbot/src/lib/voices.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,7 @@ import { run } from './commands/run.js'
|
||||
|
||||
import { transcribeCommand, TranscribeOptionsSchema } from './commands/transcribe.js'
|
||||
import { imageCommand, ImageOptionsSchema } from './commands/images.js'
|
||||
import { ttsCommand, TTSOptionsSchema } from './commands/tts.js'
|
||||
|
||||
export const logger: any = createLogger('llm-tools')
|
||||
|
||||
@ -61,6 +62,12 @@ yargs(hideBin(process.argv))
|
||||
(yargs) => toYargs(yargs, TranscribeOptionsSchema(), yargOptions),
|
||||
transcribeCommand
|
||||
)
|
||||
.command(
|
||||
'tts',
|
||||
'Convert text to speech using ElevenLabs',
|
||||
(yargs) => toYargs(yargs, TTSOptionsSchema(), yargOptions),
|
||||
ttsCommand
|
||||
)
|
||||
.command(
|
||||
'types',
|
||||
'Generate types',
|
||||
|
||||
194
packages/kbot/tests/unit/audio/tts.test.ts
Normal file
194
packages/kbot/tests/unit/audio/tts.test.ts
Normal file
@ -0,0 +1,194 @@
|
||||
import { describe, it, expect, afterAll, beforeAll } from 'vitest'
|
||||
import * as path from 'node:path'
|
||||
import * as fs from 'node:fs'
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
import { sync as write } from "@polymech/fs/write"
|
||||
|
||||
import { ttsCommand } from '../../../src/commands/tts.js'
|
||||
import { generateSpeech } from '../../../src/lib/tts-elevenlabs.js'
|
||||
import { getLogger } from '../../../src/index.js'
|
||||
|
||||
const TEST_DATA_DIR = './tests/unit/audio'
|
||||
const TEST_TIMEOUT = 60000 // Increased timeout for API call
|
||||
|
||||
describe('TTS Command', () => {
|
||||
|
||||
const testTextFile = path.resolve(path.join(TEST_DATA_DIR, 'test-text.txt'))
|
||||
const promptOutputFile = path.resolve(path.join(TEST_DATA_DIR, 'prompt-speech.mp3'))
|
||||
const fileOutputFile = path.resolve(path.join(TEST_DATA_DIR, 'file-speech.mp3'))
|
||||
|
||||
const cleanupFiles = () => {
|
||||
if (fs.existsSync(promptOutputFile)) {
|
||||
fs.unlinkSync(promptOutputFile)
|
||||
}
|
||||
if (fs.existsSync(fileOutputFile)) {
|
||||
fs.unlinkSync(fileOutputFile)
|
||||
}
|
||||
if (fs.existsSync(testTextFile)) {
|
||||
fs.unlinkSync(testTextFile)
|
||||
}
|
||||
}
|
||||
|
||||
beforeAll(() => {
|
||||
if (!fs.existsSync(TEST_DATA_DIR)) {
|
||||
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
||||
}
|
||||
cleanupFiles()
|
||||
|
||||
// Create test text file
|
||||
write(testTextFile, 'Hello, this is a test of the text-to-speech functionality. The quick brown fox jumps over the lazy dog.')
|
||||
})
|
||||
|
||||
afterAll(cleanupFiles)
|
||||
|
||||
it('should generate speech from a prompt and save it to a file', async () => {
|
||||
const options = {
|
||||
prompt: 'Hello world, this is a test of ElevenLabs text-to-speech integration.',
|
||||
dst: promptOutputFile,
|
||||
logLevel: 2,
|
||||
voiceId: 'JBFqnCBsd6RMkjVDRZzb',
|
||||
outputFormat: 'mp3_44100_128' as const,
|
||||
modelId: 'eleven_multilingual_v2',
|
||||
dry: false // Set to true to skip actual API call
|
||||
}
|
||||
|
||||
await ttsCommand(options)
|
||||
|
||||
if (!options.dry) {
|
||||
expect(exists(promptOutputFile)).toBe('file')
|
||||
// Check that the file has some content
|
||||
const stats = fs.statSync(promptOutputFile)
|
||||
expect(stats.size).toBeGreaterThan(0)
|
||||
}
|
||||
|
||||
}, TEST_TIMEOUT)
|
||||
|
||||
it('should generate speech from a text file and save it to a file', async () => {
|
||||
const options = {
|
||||
include: [testTextFile],
|
||||
dst: fileOutputFile,
|
||||
logLevel: 2,
|
||||
voiceId: 'JBFqnCBsd6RMkjVDRZzb',
|
||||
outputFormat: 'mp3_44100_128' as const,
|
||||
modelId: 'eleven_multilingual_v2',
|
||||
dry: false // Set to true to skip actual API call
|
||||
}
|
||||
|
||||
await ttsCommand(options)
|
||||
|
||||
if (!options.dry) {
|
||||
expect(exists(fileOutputFile)).toBe('file')
|
||||
// Check that the file has some content
|
||||
const stats = fs.statSync(fileOutputFile)
|
||||
expect(stats.size).toBeGreaterThan(0)
|
||||
}
|
||||
|
||||
}, TEST_TIMEOUT)
|
||||
|
||||
it('should handle different output formats', async () => {
|
||||
const pcmOutputFile = path.resolve(path.join(TEST_DATA_DIR, 'test-pcm.wav'))
|
||||
|
||||
const options = {
|
||||
prompt: 'Testing PCM output format',
|
||||
dst: pcmOutputFile,
|
||||
logLevel: 2,
|
||||
voiceId: 'JBFqnCBsd6RMkjVDRZzb',
|
||||
outputFormat: 'pcm_44100' as const,
|
||||
modelId: 'eleven_multilingual_v2',
|
||||
dry: true // Use dry run to avoid API call
|
||||
}
|
||||
|
||||
await ttsCommand(options)
|
||||
|
||||
// In dry run, file won't be created, but command should not throw
|
||||
expect(true).toBe(true)
|
||||
|
||||
// Cleanup
|
||||
if (fs.existsSync(pcmOutputFile)) {
|
||||
fs.unlinkSync(pcmOutputFile)
|
||||
}
|
||||
|
||||
}, TEST_TIMEOUT)
|
||||
|
||||
it('should handle voice settings parameters', async () => {
|
||||
const voiceSettingsFile = path.resolve(path.join(TEST_DATA_DIR, 'voice-settings-test.mp3'))
|
||||
|
||||
const options = {
|
||||
prompt: 'Testing voice settings with stability and similarity boost',
|
||||
dst: voiceSettingsFile,
|
||||
logLevel: 2,
|
||||
voiceId: 'JBFqnCBsd6RMkjVDRZzb',
|
||||
outputFormat: 'mp3_44100_128' as const,
|
||||
modelId: 'eleven_multilingual_v2',
|
||||
stability: 0.75,
|
||||
similarityBoost: 0.8,
|
||||
style: 0.5,
|
||||
useSpeakerBoost: true,
|
||||
dry: true // Use dry run to avoid API call
|
||||
}
|
||||
|
||||
await ttsCommand(options)
|
||||
|
||||
// In dry run, file won't be created, but command should not throw
|
||||
expect(true).toBe(true)
|
||||
|
||||
// Cleanup
|
||||
if (fs.existsSync(voiceSettingsFile)) {
|
||||
fs.unlinkSync(voiceSettingsFile)
|
||||
}
|
||||
|
||||
}, TEST_TIMEOUT)
|
||||
|
||||
it('should validate required parameters', async () => {
|
||||
// Test missing text content
|
||||
const options = {
|
||||
dst: 'test-output.mp3',
|
||||
logLevel: 2,
|
||||
}
|
||||
|
||||
// This should handle the error gracefully
|
||||
await expect(async () => {
|
||||
await ttsCommand(options)
|
||||
}).not.toThrow()
|
||||
|
||||
})
|
||||
|
||||
it('should validate missing destination', async () => {
|
||||
// Test missing destination
|
||||
const options = {
|
||||
prompt: 'Test text',
|
||||
logLevel: 2,
|
||||
}
|
||||
|
||||
// This should handle the error gracefully
|
||||
await expect(async () => {
|
||||
await ttsCommand(options)
|
||||
}).not.toThrow()
|
||||
|
||||
})
|
||||
|
||||
// Test the lib function directly
|
||||
it('should test the generateSpeech lib function with mock', async () => {
|
||||
const logger = getLogger({ logLevel: 2 })
|
||||
|
||||
// This test will fail without a real API key, but we can test the structure
|
||||
const options = {
|
||||
text: 'Hello, testing the lib function directly',
|
||||
voiceId: 'JBFqnCBsd6RMkjVDRZzb',
|
||||
outputFormat: 'mp3_44100_128' as const,
|
||||
modelId: 'eleven_multilingual_v2',
|
||||
logger,
|
||||
api_key: 'test-key' // This will fail but we can catch the error
|
||||
}
|
||||
|
||||
try {
|
||||
await generateSpeech(options)
|
||||
// If this succeeds, great!
|
||||
expect(true).toBe(true)
|
||||
} catch (error: any) {
|
||||
// Expected to fail without real API key
|
||||
expect(error.message).toContain('API key')
|
||||
}
|
||||
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue
Block a user