media:cpp --transform 1/3

This commit is contained in:
lovebird 2026-04-15 20:51:41 +02:00
parent 07dbf000f3
commit 04a7bd1da4
7 changed files with 4801 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,279 @@
import { apiClient } from "@/lib/db";
// Simple logger for user feedback
const logger = {
debug: (message: string, data?: any) => console.debug(`[AIMLAPI] ${message}`, data),
info: (message: string, data?: any) => console.info(`[AIMLAPI] ${message}`, data),
warn: (message: string, data?: any) => console.warn(`[AIMLAPI] ${message}`, data),
error: (message: string, data?: any) => console.error(`[AIMLAPI] ${message}`, data),
};
const AIMLAPI_BASE_URL = 'https://api.aimlapi.com';
// Get user's AIML API key from server secrets
const getAimlApiKey = async (): Promise<string | null> => {
try {
const data = await apiClient<{ api_keys?: Record<string, any> }>('/api/me/secrets');
const key = data.api_keys?.aimlapi_api_key;
if (!key) {
logger.error('No AIML API key found. Please add your AIML API key in your profile settings.');
return null;
}
return key;
} catch (error) {
logger.error('Error getting AIML API key:', error);
return null;
}
};
// Helper function to convert File to base64
const fileToBase64 = (file: File): Promise<string> => {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.readAsDataURL(file);
reader.onload = () => {
const result = reader.result as string;
// Remove data URL prefix to get just the base64 string
const base64 = result.split(',')[1];
resolve(base64);
};
reader.onerror = error => reject(error);
});
};
interface ImageResult {
imageData: ArrayBuffer;
text?: string;
}
/**
* Generate image using AIML API text-to-image
* Supports various models including ByteDance SeeDream v4, Flux, Stable Diffusion, etc.
*/
export const createImageWithAimlApi = async (
prompt: string,
model: string = 'bytedance/seedream-v4',
apiKey?: string
): Promise<ImageResult | null> => {
const key = apiKey || await getAimlApiKey();
if (!key) {
logger.error('No AIML API key found. Please provide an API key or set it in your profile.');
return null;
}
try {
logger.info('Starting AIML API image generation', {
model,
promptLength: prompt.length,
promptPreview: prompt.substring(0, 100) + '...'
});
const endpoint = `${AIMLAPI_BASE_URL}/v1/images/generations`;
// Build request body based on model requirements
const requestBody: any = {
model,
prompt,
};
// Most models support these common parameters
if (!model.includes('dall-e')) {
requestBody.image_size = { width: 1024, height: 1024 };
requestBody.num_images = 1;
requestBody.sync_mode = true;
} else {
// DALL-E uses different parameters
requestBody.n = 1;
requestBody.size = '1024x1024';
}
logger.debug('AIML API request body:', requestBody);
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
const errorText = await response.text();
logger.error('AIML API error:', { status: response.status, error: errorText });
throw new Error(`AIML API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
logger.debug('AIML API response:', data);
// Handle response format: { data: [{ url: "...", b64_json: "..." }] }
if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
throw new Error('Invalid response from AIML API: no image data');
}
const firstResult = data.data[0];
// Prefer URL over base64 if both are provided
let arrayBuffer: ArrayBuffer;
if (firstResult.url) {
logger.info('Image URL received from AIML API:', firstResult.url);
// Fetch the image from URL
const imageResponse = await fetch(firstResult.url);
if (!imageResponse.ok) {
throw new Error(`Failed to fetch generated image: ${imageResponse.statusText}`);
}
arrayBuffer = await imageResponse.arrayBuffer();
} else if (firstResult.b64_json) {
logger.info('Base64 image received from AIML API');
// Convert base64 to ArrayBuffer
const binaryString = atob(firstResult.b64_json);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
arrayBuffer = bytes.buffer;
} else {
throw new Error('No image URL or base64 data in AIML API response');
}
logger.info('Successfully generated image with AIML API', {
model,
imageSize: arrayBuffer.byteLength,
});
return {
imageData: arrayBuffer,
text: undefined, // AIML API doesn't return text descriptions
};
} catch (error: any) {
logger.error('AIML API image generation failed:', {
error: error.message,
model,
promptPreview: prompt.substring(0, 100) + '...'
});
throw error;
}
};
/**
* Edit image using AIML API image-to-image
* Supports models like SeeDream v4 Edit, SeedEdit 3.0, Flux i2i, etc.
*/
export const editImageWithAimlApi = async (
prompt: string,
imageFiles: File[],
model: string = 'bytedance/seedream-v4-edit',
apiKey?: string
): Promise<ImageResult | null> => {
const key = apiKey || await getAimlApiKey();
if (!key) {
logger.error('No AIML API key found. Please provide an API key or set it in your profile.');
return null;
}
try {
logger.info('Starting AIML API image editing', {
model,
imageCount: imageFiles.length,
promptLength: prompt.length,
promptPreview: prompt.substring(0, 100) + '...'
});
// Convert the first image to base64
const imageBase64 = await fileToBase64(imageFiles[0]);
const endpoint = `${AIMLAPI_BASE_URL}/v1/images/generations`;
// Different models use different parameter names for the image
const requestBody: any = {
model,
prompt,
num_images: 1,
sync_mode: true,
};
// AIML API edit endpoint requires image_urls for all models
requestBody.image_urls = [`data:image/png;base64,${imageBase64}`];
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
const errorText = await response.text();
logger.error('AIML API error:', { status: response.status, error: errorText });
throw new Error(`AIML API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
logger.debug('AIML API response (edit):', data);
// Handle response format
if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
throw new Error('Invalid response from AIML API: no image data');
}
const firstResult = data.data[0];
// Prefer URL over base64 if both are provided
let arrayBuffer: ArrayBuffer;
if (firstResult.url) {
logger.info('Edited image URL received from AIML API:', firstResult.url);
// Fetch the image from URL
const imageResponse = await fetch(firstResult.url);
if (!imageResponse.ok) {
throw new Error(`Failed to fetch edited image: ${imageResponse.statusText}`);
}
arrayBuffer = await imageResponse.arrayBuffer();
} else if (firstResult.b64_json) {
logger.info('Base64 edited image received from AIML API');
// Convert base64 to ArrayBuffer
const binaryString = atob(firstResult.b64_json);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
arrayBuffer = bytes.buffer;
} else {
throw new Error('No image URL or base64 data in AIML API response');
}
logger.info('Successfully edited image with AIML API', {
model,
imageSize: arrayBuffer.byteLength,
});
return {
imageData: arrayBuffer,
text: undefined,
};
} catch (error: any) {
logger.error('AIML API image editing failed:', {
error: error.message,
model,
imageCount: imageFiles.length,
promptPreview: prompt.substring(0, 100) + '...'
});
throw error;
}
};
// Export the logger for consistency
export { logger };

View File

@ -0,0 +1,304 @@
import { apiClient } from "@/lib/db";
// Simple logger for user feedback
const logger = {
debug: (message: string, data?: any) => console.debug(`[BRIA] ${message}`, data),
info: (message: string, data?: any) => console.info(`[BRIA] ${message}`, data),
warn: (message: string, data?: any) => console.warn(`[BRIA] ${message}`, data),
error: (message: string, data?: any) => console.error(`[BRIA] ${message}`, data),
};
const BRIA_BASE_URL = 'https://engine.prod.bria-api.com/v1';
// Get user's Bria API key from server secrets
const getBriaApiKey = async (): Promise<string | null> => {
try {
const data = await apiClient<{ api_keys?: Record<string, any> }>('/api/me/secrets');
const key = data.api_keys?.bria_api_key;
if (!key) {
logger.error('No Bria API key found. Please add your Bria API key in your profile settings.');
return null;
}
return key;
} catch (error) {
logger.error('Error getting Bria API key:', error);
return null;
}
};
// Helper function to convert File to base64
const fileToBase64 = (file: File): Promise<string> => {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.readAsDataURL(file);
reader.onload = () => {
const result = reader.result as string;
// Remove data URL prefix to get just the base64 string
const base64 = result.split(',')[1];
resolve(base64);
};
reader.onerror = error => reject(error);
});
};
// Helper to poll for async image generation
const pollForImage = async (url: string, maxAttempts = 60, delayMs = 2000): Promise<boolean> => {
for (let attempt = 0; attempt < maxAttempts; attempt++) {
try {
const response = await fetch(url, { method: 'HEAD' });
if (response.ok && response.headers.get('content-length') !== '0') {
return true; // Image is ready
}
await new Promise(resolve => setTimeout(resolve, delayMs));
} catch (error) {
logger.debug(`Poll attempt ${attempt + 1} failed, retrying...`);
}
}
return false;
};
interface ImageResult {
imageData: ArrayBuffer;
text?: string;
}
/**
* Generate image using Bria text-to-image API
* Uses the fast endpoint with model version 3.2 for good balance of speed and quality
*/
export const createImageWithBria = async (
prompt: string,
model: string = 'bria-2.3-fast',
apiKey?: string
): Promise<ImageResult | null> => {
const key = apiKey || await getBriaApiKey();
if (!key) {
logger.error('No Bria API key found. Please provide an API key or set it in your profile.');
return null;
}
try {
logger.info('Starting Bria image generation', {
model,
promptLength: prompt.length,
promptPreview: prompt.substring(0, 100) + '...'
});
// Parse model string to determine endpoint and version
// Format: "bria-{version}-{speed}" e.g., "bria-3.2-fast", "bria-2.3-base", "bria-2.2-hd"
const parts = model.split('-');
const version = parts[1] || '3.2';
const speed = parts[2] || 'fast'; // fast, base, or hd
const endpoint = `${BRIA_BASE_URL}/text-to-image/${speed}/${version}`;
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'api_token': key,
},
body: JSON.stringify({
prompt,
num_results: 1,
sync: false, // Use async for better performance
aspect_ratio: '1:1',
steps_num: speed === 'fast' ? 8 : 30,
}),
});
if (!response.ok) {
const errorText = await response.text();
logger.error('Bria API error:', { status: response.status, error: errorText });
throw new Error(`Bria API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
logger.debug('Bria API response:', data);
// Handle response format
if (data.error_code) {
throw new Error(data.description || `Bria API error: ${data.error_code}`);
}
if (!data.result || !Array.isArray(data.result) || data.result.length === 0) {
throw new Error('Invalid response from Bria API: no results');
}
const firstResult = data.result[0];
// Check if result was blocked by content moderation
if (firstResult.blocked) {
throw new Error(firstResult.description || 'Content blocked by Bria moderation');
}
if (!firstResult.urls || firstResult.urls.length === 0) {
throw new Error('No image URL in Bria response');
}
const imageUrl = firstResult.urls[0];
logger.info('Image URL received from Bria:', imageUrl);
// Poll for the image to be ready (async generation)
logger.info('Polling for image completion...');
const isReady = await pollForImage(imageUrl);
if (!isReady) {
throw new Error('Image generation timed out');
}
// Fetch the generated image
const imageResponse = await fetch(imageUrl);
if (!imageResponse.ok) {
throw new Error(`Failed to fetch generated image: ${imageResponse.statusText}`);
}
const arrayBuffer = await imageResponse.arrayBuffer();
logger.info('Successfully generated image with Bria', {
model,
imageSize: arrayBuffer.byteLength,
seed: firstResult.seed,
});
return {
imageData: arrayBuffer,
text: undefined, // Bria doesn't return text descriptions
};
} catch (error: any) {
logger.error('Bria image generation failed:', {
error: error.message,
model,
promptPreview: prompt.substring(0, 100) + '...'
});
throw error;
}
};
/**
* Edit image using Bria reimagine API (structure reference)
* Maintains the structure and depth of the input while incorporating new materials, colors, and textures
*/
export const editImageWithBria = async (
prompt: string,
imageFiles: File[],
model: string = 'bria-2.3-fast',
apiKey?: string
): Promise<ImageResult | null> => {
const key = apiKey || await getBriaApiKey();
if (!key) {
logger.error('No Bria API key found. Please provide an API key or set it in your profile.');
return null;
}
try {
logger.info('Starting Bria image editing (reimagine)', {
model,
imageCount: imageFiles.length,
promptLength: prompt.length,
promptPreview: prompt.substring(0, 100) + '...'
});
// Convert the first image to base64 for the structure reference
const imageBase64 = await fileToBase64(imageFiles[0]);
const endpoint = `${BRIA_BASE_URL}/reimagine`;
// Parse model to determine if we should use fast mode
const parts = model.split('-');
const speed = parts[2] || 'fast';
const useFast = speed === 'fast';
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'api_token': key,
},
body: JSON.stringify({
prompt,
structure_image_file: imageBase64,
structure_ref_influence: 0.75, // Good balance for maintaining structure while allowing changes
num_results: 1,
sync: false, // Use async for better performance
fast: useFast,
steps_num: useFast ? 12 : 30,
}),
});
if (!response.ok) {
const errorText = await response.text();
logger.error('Bria API error:', { status: response.status, error: errorText });
throw new Error(`Bria API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
logger.debug('Bria API response (reimagine):', data);
// Handle response format
if (data.error_code) {
throw new Error(data.description || `Bria API error: ${data.error_code}`);
}
if (!data.result || !Array.isArray(data.result) || data.result.length === 0) {
throw new Error('Invalid response from Bria API: no results');
}
const firstResult = data.result[0];
// Check if result was blocked by content moderation
if (firstResult.blocked) {
throw new Error(firstResult.description || 'Content blocked by Bria moderation');
}
if (!firstResult.urls || firstResult.urls.length === 0) {
throw new Error('No image URL in Bria response');
}
const imageUrl = firstResult.urls[0];
logger.info('Edited image URL received from Bria:', imageUrl);
// Poll for the image to be ready (async generation)
logger.info('Polling for edited image completion...');
const isReady = await pollForImage(imageUrl);
if (!isReady) {
throw new Error('Image editing timed out');
}
// Fetch the edited image
const imageResponse = await fetch(imageUrl);
if (!imageResponse.ok) {
throw new Error(`Failed to fetch edited image: ${imageResponse.statusText}`);
}
const arrayBuffer = await imageResponse.arrayBuffer();
logger.info('Successfully edited image with Bria', {
model,
imageSize: arrayBuffer.byteLength,
seed: firstResult.seed,
});
return {
imageData: arrayBuffer,
text: undefined,
};
} catch (error: any) {
logger.error('Bria image editing failed:', {
error: error.message,
model,
imageCount: imageFiles.length,
promptPreview: prompt.substring(0, 100) + '...'
});
throw error;
}
};
// Export the logger for consistency
export { logger };

View File

@ -0,0 +1,419 @@
/**
* Image Generation Router
* Routes image generation requests to the appropriate AI provider based on the model format.
* Model format: "provider/model-name"
*
* Supported providers:
* - google: Google Generative AI (Gemini models)
* - replicate: Replicate API (various models)
* - bria: Bria.ai (coming soon)
*/
import { createImage as createImageGoogle, editImage as editImageGoogle } from '@/image-api';
//import { createImageWithReplicate, editImageWithReplicate } from '@/lib/replicate';
import { createImageWithBria, editImageWithBria } from '@/lib/bria';
import { createImageWithAimlApi, editImageWithAimlApi } from '@/lib/aimlapi';
// Logger for debugging
const logger = {
debug: (message: string, data?: any) => console.debug(`[IMAGE-ROUTER] ${message}`, data),
info: (message: string, data?: any) => console.info(`[IMAGE-ROUTER] ${message}`, data),
warn: (message: string, data?: any) => console.warn(`[IMAGE-ROUTER] ${message}`, data),
error: (message: string, data?: any) => console.error(`[IMAGE-ROUTER] ${message}`, data),
};
export interface ImageResult {
imageData: ArrayBuffer;
text?: string;
}
export interface ModelInfo {
provider: string;
modelName: string;
displayName: string;
supportsTextToImage: boolean;
supportsImageToImage: boolean;
}
// Available models configuration
export const AVAILABLE_MODELS: ModelInfo[] = [
{
provider: 'google',
modelName: 'gemini-3-pro-image-preview',
displayName: 'Google Gemini 3 Pro (Image Preview)',
supportsTextToImage: true,
supportsImageToImage: true,
},
{
provider: 'google',
modelName: 'gemini-3.1-flash-image-preview',
displayName: 'Google Gemini 3.1 Flash (Image Preview)',
supportsTextToImage: true,
supportsImageToImage: true,
},
/* Duplicate model name causing key conflicts - temporarily disabled
{
provider: 'google',
modelName: 'gemini-3-pro-image-preview',
displayName: 'Google Gemini 2.5 Flash (Image Preview)',
supportsTextToImage: true,
supportsImageToImage: true,
},
*/
{
provider: 'replicate',
modelName: 'bytedance/seedream-4',
displayName: 'Replicate SeeDream-4 (Bytedance)',
supportsTextToImage: true,
supportsImageToImage: true,
},
{
provider: 'bria',
modelName: 'bria-3.2-fast',
displayName: 'Bria.ai 3.2 Fast',
supportsTextToImage: true,
supportsImageToImage: true,
},
{
provider: 'bria',
modelName: 'bria-2.3-base',
displayName: 'Bria.ai 2.3 Base (High Quality)',
supportsTextToImage: true,
supportsImageToImage: true,
},
{
provider: 'bria',
modelName: 'bria-2.2-hd',
displayName: 'Bria.ai 2.2 HD (1920x1080)',
supportsTextToImage: true,
supportsImageToImage: false, // HD doesn't support reimagine
},
// AIML API - ByteDance Models
{
provider: 'aimlapi',
modelName: 'bytedance/seedream-v4-text-to-image',
displayName: 'AIML API - SeeDream v4 (4K)',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'bytedance/seedream-v4-edit',
displayName: 'AIML API - SeeDream v4 Edit (4K)',
supportsTextToImage: false,
supportsImageToImage: true,
},
{
provider: 'aimlapi',
modelName: 'bytedance/seededit-3.0-i2i',
displayName: 'AIML API - SeedEdit 3.0',
supportsTextToImage: false,
supportsImageToImage: true,
},
{
provider: 'aimlapi',
modelName: 'bytedance/seedream-3.0',
displayName: 'AIML API - SeeDream 3.0',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'bytedance/uso',
displayName: 'AIML API - USO (i2i)',
supportsTextToImage: false,
supportsImageToImage: true,
},
{
provider: 'aimlapi',
modelName: 'alibaba/qwen-image',
displayName: 'AIML API - Qwen Image',
supportsTextToImage: true,
supportsImageToImage: false,
},
// AIML API - Flux Models
{
provider: 'aimlapi',
modelName: 'flux-pro',
displayName: 'AIML API - Flux Pro',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'flux-pro/v1.1',
displayName: 'AIML API - Flux Pro v1.1',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'flux-pro/v1.1-ultra',
displayName: 'AIML API - Flux Pro v1.1 Ultra',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'flux-realism',
displayName: 'AIML API - Flux Realism',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'flux/dev',
displayName: 'AIML API - Flux Dev',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'flux/dev/image-to-image',
displayName: 'AIML API - Flux Dev i2i',
supportsTextToImage: false,
supportsImageToImage: true,
},
{
provider: 'aimlapi',
modelName: 'flux/schnell',
displayName: 'AIML API - Flux Schnell (Fast)',
supportsTextToImage: true,
supportsImageToImage: false,
},
// AIML API - Google Models
{
provider: 'aimlapi',
modelName: 'imagen-3.0-generate-002',
displayName: 'AIML API - Google Imagen 3',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'google/imagen4/preview',
displayName: 'AIML API - Google Imagen 4 Preview',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'google/imagen-4.0-generate-001',
displayName: 'AIML API - Google Imagen 4.0',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'google/imagen-4.0-fast-generate-001',
displayName: 'AIML API - Google Imagen 4.0 Fast',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'google/imagen-4.0-ultra-generate-001',
displayName: 'AIML API - Google Imagen 4.0 Ultra',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'google/gemini-2.5-flash-image',
displayName: 'AIML API - Gemini 2.5 Flash Image',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'google/gemini-2.5-flash-image-edit',
displayName: 'AIML API - Gemini 2.5 Flash Edit',
supportsTextToImage: false,
supportsImageToImage: true,
},
// AIML API - OpenAI Models
{
provider: 'aimlapi',
modelName: 'dall-e-2',
displayName: 'AIML API - DALL-E 2 (OpenAI)',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'dall-e-3',
displayName: 'AIML API - DALL-E 3 (OpenAI)',
supportsTextToImage: true,
supportsImageToImage: false,
},
// AIML API - Stability AI Models
{
provider: 'aimlapi',
modelName: 'stable-diffusion-v3-medium',
displayName: 'AIML API - Stable Diffusion 3 Medium',
supportsTextToImage: true,
supportsImageToImage: false,
},
{
provider: 'aimlapi',
modelName: 'stable-diffusion-v35-large',
displayName: 'AIML API - Stable Diffusion 3.5 Large',
supportsTextToImage: true,
supportsImageToImage: false,
},
// AIML API - Recraft AI
{
provider: 'aimlapi',
modelName: 'recraft-v3',
displayName: 'AIML API - Recraft v3',
supportsTextToImage: true,
supportsImageToImage: false,
},
];
/**
* Parse model string into provider and model name
* @param modelString Format: "provider/model-name"
* @returns { provider, modelName }
*/
export const parseModelString = (modelString: string): { provider: string; modelName: string } => {
const parts = modelString.split('/');
if (parts.length < 2) {
// Default to Google if no provider specified
logger.warn('Model string missing provider, defaulting to Google', { modelString });
return {
provider: 'google',
modelName: modelString,
};
}
const provider = parts[0].toLowerCase();
const modelName = parts.slice(1).join('/'); // Handle models with multiple slashes
return { provider, modelName };
};
/**
* Get full model string from provider and model name
*/
export const getModelString = (provider: string, modelName: string): string => {
return `${provider}/${modelName}`;
};
/**
* Create/generate a new image from text prompt
* Routes to the appropriate provider based on model string
*/
export const createImage = async (
prompt: string,
modelString: string = 'google/gemini-3-pro-image-preview',
apiKey?: string,
aspectRatio?: string,
resolution?: string,
enableSearchGrounding?: boolean,
enableImageSearch?: boolean
): Promise<ImageResult | null> => {
const { provider, modelName } = parseModelString(modelString);
logger.info('Routing image creation request', {
provider,
modelName,
promptLength: prompt.length,
searchGrounding: !!enableSearchGrounding,
imageSearch: !!enableImageSearch,
});
try {
switch (provider) {
case 'google':
return await createImageGoogle(prompt, modelName, apiKey, aspectRatio, resolution, enableSearchGrounding, enableImageSearch);
case 'bria':
return await createImageWithBria(prompt, modelName, apiKey);
case 'aimlapi':
return await createImageWithAimlApi(prompt, modelName, apiKey);
default:
logger.error('Unsupported provider', { provider, modelName });
throw new Error(`Unsupported provider: ${provider}. Supported providers: google, replicate, bria, aimlapi`);
}
} catch (error: any) {
logger.error('Image creation failed', {
provider,
modelName,
error: error.message,
});
throw error;
}
};
/**
* Edit an existing image with a text prompt
* Routes to the appropriate provider based on model string
*/
export const editImage = async (
prompt: string,
imageFiles: File[],
modelString: string = 'google/gemini-3-pro-image-preview',
apiKey?: string,
aspectRatio?: string,
resolution?: string,
enableSearchGrounding?: boolean,
enableImageSearch?: boolean
): Promise<ImageResult | null> => {
const { provider, modelName } = parseModelString(modelString);
logger.info('Routing image editing request', {
provider,
modelName,
promptLength: prompt.length,
imageCount: imageFiles.length,
searchGrounding: !!enableSearchGrounding,
imageSearch: !!enableImageSearch,
});
try {
switch (provider) {
case 'google':
return await editImageGoogle(prompt, imageFiles, modelName, apiKey, aspectRatio, resolution, enableSearchGrounding, enableImageSearch);
case 'bria':
return await editImageWithBria(prompt, imageFiles, modelName, apiKey);
case 'aimlapi':
return await editImageWithAimlApi(prompt, imageFiles, modelName, apiKey);
default:
logger.error('Unsupported provider', { provider, modelName });
throw new Error(`Unsupported provider: ${provider}. Supported providers: google, replicate, bria, aimlapi`);
}
} catch (error: any) {
logger.error('Image editing failed', {
provider,
modelName,
imageCount: imageFiles.length,
error: error.message,
});
throw error;
}
};
/**
* Get model info by model string
*/
export const getModelInfo = (modelString: string): ModelInfo | undefined => {
return AVAILABLE_MODELS.find(
(m) => getModelString(m.provider, m.modelName) === modelString
);
};
/**
* Get all models for a specific provider
*/
export const getModelsByProvider = (provider: string): ModelInfo[] => {
return AVAILABLE_MODELS.filter((m) => m.provider === provider);
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,306 @@
#include "transform.hpp"
#include "url_fetch.hpp"
#include <curl/curl.h>
#include <nlohmann/json.hpp>
#include <algorithm>
#include <cctype>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <mutex>
#include <sstream>
#include <vector>
namespace fs = std::filesystem;
using json = nlohmann::json;
namespace media {
// ── base64 encode/decode ────────────────────────────────────────────
static const char b64_table[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static std::string base64_encode(const uint8_t* data, size_t len) {
std::string out;
out.reserve(((len + 2) / 3) * 4);
for (size_t i = 0; i < len; i += 3) {
uint32_t n = (uint32_t)data[i] << 16;
if (i + 1 < len) n |= (uint32_t)data[i + 1] << 8;
if (i + 2 < len) n |= (uint32_t)data[i + 2];
out.push_back(b64_table[(n >> 18) & 0x3F]);
out.push_back(b64_table[(n >> 12) & 0x3F]);
out.push_back((i + 1 < len) ? b64_table[(n >> 6) & 0x3F] : '=');
out.push_back((i + 2 < len) ? b64_table[n & 0x3F] : '=');
}
return out;
}
static int b64_decode_char(char c) {
if (c >= 'A' && c <= 'Z') return c - 'A';
if (c >= 'a' && c <= 'z') return c - 'a' + 26;
if (c >= '0' && c <= '9') return c - '0' + 52;
if (c == '+') return 62;
if (c == '/') return 63;
return -1;
}
static std::vector<uint8_t> base64_decode(const std::string& in) {
std::vector<uint8_t> out;
out.reserve(in.size() * 3 / 4);
uint32_t buf = 0;
int bits = 0;
for (char c : in) {
if (c == '=' || c == '\n' || c == '\r' || c == ' ') continue;
int v = b64_decode_char(c);
if (v < 0) continue;
buf = (buf << 6) | (uint32_t)v;
bits += 6;
if (bits >= 8) {
bits -= 8;
out.push_back((uint8_t)(buf >> bits));
}
}
return out;
}
// ── MIME type from extension ────────────────────────────────────────
static std::string mime_from_ext(const std::string& ext) {
std::string e = ext;
for (auto& c : e) c = (char)std::tolower((unsigned char)c);
if (e == ".jpg" || e == ".jpeg") return "image/jpeg";
if (e == ".png") return "image/png";
if (e == ".webp") return "image/webp";
if (e == ".gif") return "image/gif";
if (e == ".bmp") return "image/bmp";
if (e == ".tif" || e == ".tiff") return "image/tiff";
if (e == ".avif") return "image/avif";
if (e == ".heic") return "image/heic";
return "image/jpeg";
}
// ── curl helpers ────────────────────────────────────────────────────
static size_t string_write_cb(char* ptr, size_t size, size_t nmemb, void* ud) {
auto* s = static_cast<std::string*>(ud);
s->append(ptr, size * nmemb);
return size * nmemb;
}
// ── Google Gemini generateContent ───────────────────────────────────
static TransformResult call_gemini(
const std::string& input_path,
const std::string& output_path,
const TransformOptions& opts,
TransformProgressFn progress)
{
TransformResult res;
// Read input image
if (progress) progress("Reading " + input_path);
std::ifstream ifs(input_path, std::ios::binary);
if (!ifs) {
res.error = "Cannot open input: " + input_path;
return res;
}
std::vector<uint8_t> img_bytes((std::istreambuf_iterator<char>(ifs)),
std::istreambuf_iterator<char>());
ifs.close();
if (img_bytes.empty()) {
res.error = "Input file is empty: " + input_path;
return res;
}
std::string mime = mime_from_ext(fs::path(input_path).extension().string());
std::string b64 = base64_encode(img_bytes.data(), img_bytes.size());
// Build request JSON
json req_body = {
{"contents", json::array({
{{"parts", json::array({
{{"text", opts.prompt}},
{{"inlineData", {{"mimeType", mime}, {"data", b64}}}}
})}}
})},
{"generationConfig", {
{"responseModalities", json::array({"TEXT", "IMAGE"})}
}}
};
std::string url = "https://generativelanguage.googleapis.com/v1beta/models/"
+ opts.model + ":generateContent?key=" + opts.api_key;
std::string body_str = req_body.dump();
if (progress) progress("Sending to " + opts.model + " (" +
std::to_string(img_bytes.size() / 1024) + " KB)...");
ensure_curl_global();
CURL* curl = curl_easy_init();
if (!curl) {
res.error = "curl_easy_init failed";
return res;
}
std::string response_str;
struct curl_slist* headers = nullptr;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body_str.c_str());
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, (long)body_str.size());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, string_write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_str);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 120L);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 15L);
CURLcode cc = curl_easy_perform(curl);
long http_code = 0;
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
curl_slist_free_all(headers);
curl_easy_cleanup(curl);
if (cc != CURLE_OK) {
res.error = std::string("HTTP request failed: ") + curl_easy_strerror(cc);
return res;
}
if (http_code != 200) {
res.error = "API returned HTTP " + std::to_string(http_code);
// Try to extract error message from response JSON
try {
auto j = json::parse(response_str);
if (j.contains("error") && j["error"].contains("message"))
res.error += ": " + j["error"]["message"].get<std::string>();
} catch (...) {
if (response_str.size() < 500) res.error += ": " + response_str;
}
return res;
}
// Parse response
if (progress) progress("Parsing response...");
json resp;
try {
resp = json::parse(response_str);
} catch (const std::exception& e) {
res.error = std::string("JSON parse error: ") + e.what();
return res;
}
// Extract image and text from candidates[0].content.parts[]
bool found_image = false;
try {
auto& parts = resp["candidates"][0]["content"]["parts"];
for (auto& part : parts) {
if (part.contains("inlineData")) {
auto& id = part["inlineData"];
std::string resp_mime = id.value("mimeType", "image/png");
std::string resp_b64 = id["data"].get<std::string>();
res.image_data = base64_decode(resp_b64);
found_image = true;
}
if (part.contains("text")) {
if (!res.ai_text.empty()) res.ai_text += "\n";
res.ai_text += part["text"].get<std::string>();
}
}
} catch (const std::exception& e) {
res.error = std::string("Response parsing error: ") + e.what();
// Include raw response excerpt for debugging
if (response_str.size() < 2000) res.error += "\nRaw: " + response_str;
return res;
}
if (!found_image || res.image_data.empty()) {
res.error = "No image in API response";
if (!res.ai_text.empty()) res.error += ". Model said: " + res.ai_text;
return res;
}
// Write output
std::string out = output_path;
if (out.empty()) out = default_transform_output(input_path, opts.prompt);
fs::path out_dir = fs::path(out).parent_path();
if (!out_dir.empty()) {
std::error_code ec;
fs::create_directories(out_dir, ec);
}
if (progress) progress("Writing " + out);
std::ofstream ofs(out, std::ios::binary);
if (!ofs) {
res.error = "Cannot write output: " + out;
return res;
}
ofs.write(reinterpret_cast<const char*>(res.image_data.data()),
static_cast<std::streamsize>(res.image_data.size()));
ofs.close();
res.ok = true;
res.output_path = out;
return res;
}
// ── public API ──────────────────────────────────────────────────────
std::string default_transform_output(const std::string& input_path, const std::string& prompt) {
fs::path p(input_path);
std::string stem = p.stem().string();
std::string ext = p.extension().string();
if (ext.empty()) ext = ".png";
// Sanitize and truncate prompt for filename
std::string slug;
slug.reserve(prompt.size());
for (char c : prompt) {
if (std::isalnum((unsigned char)c))
slug.push_back((char)std::tolower((unsigned char)c));
else if (c == ' ' || c == '-' || c == '_')
slug.push_back('_');
// skip other chars
}
// collapse consecutive underscores
std::string clean;
for (char c : slug) {
if (c == '_' && !clean.empty() && clean.back() == '_') continue;
clean.push_back(c);
}
// trim trailing underscore
while (!clean.empty() && clean.back() == '_') clean.pop_back();
// truncate to ~40 chars
if (clean.size() > 40) clean.resize(40);
while (!clean.empty() && clean.back() == '_') clean.pop_back();
std::string result = stem + "_" + clean + ext;
return (p.parent_path() / result).string();
}
TransformResult transform_image(
const std::string& input_path,
const std::string& output_path,
const TransformOptions& opts,
TransformProgressFn progress)
{
if (opts.prompt.empty()) {
return {false, "prompt is required"};
}
if (opts.api_key.empty()) {
return {false, "API key is required (set IMAGE_TRANSFORM_GOOGLE_API_KEY in .env or pass --api-key)"};
}
if (opts.provider == "google") {
return call_gemini(input_path, output_path, opts, progress);
}
return {false, "Unsupported provider: " + opts.provider};
}
} // namespace media

View File

@ -0,0 +1,39 @@
#pragma once
#include <string>
#include <vector>
#include <functional>
namespace media {
struct TransformOptions {
std::string provider = "google";
std::string model = "gemini-3-pro-image-preview";
std::string api_key;
std::string prompt;
};
struct TransformResult {
bool ok = false;
std::string error;
std::string output_path; // written file
std::string ai_text; // optional text part from model
std::vector<uint8_t> image_data; // raw bytes (PNG/JPEG) before writing
};
using TransformProgressFn = std::function<void(const std::string& status)>;
/// Edit a single image using a generative AI model.
/// Reads `input_path`, sends image + prompt to the API, writes result to `output_path`.
/// If `output_path` is empty, derives it from input + prompt.
TransformResult transform_image(
const std::string& input_path,
const std::string& output_path,
const TransformOptions& opts,
TransformProgressFn progress = nullptr
);
/// Build a default output path from input path and prompt text.
std::string default_transform_output(const std::string& input_path, const std::string& prompt);
} // namespace media