media:cpp --transform 1/3
This commit is contained in:
parent
07dbf000f3
commit
04a7bd1da4
2006
packages/media/cpp/ref/images-ai/ImageWizard.tsx
Normal file
2006
packages/media/cpp/ref/images-ai/ImageWizard.tsx
Normal file
File diff suppressed because it is too large
Load Diff
279
packages/media/cpp/ref/images-ai/aimlapi.ts
Normal file
279
packages/media/cpp/ref/images-ai/aimlapi.ts
Normal file
@ -0,0 +1,279 @@
|
||||
import { apiClient } from "@/lib/db";
|
||||
|
||||
// Simple logger for user feedback
|
||||
const logger = {
|
||||
debug: (message: string, data?: any) => console.debug(`[AIMLAPI] ${message}`, data),
|
||||
info: (message: string, data?: any) => console.info(`[AIMLAPI] ${message}`, data),
|
||||
warn: (message: string, data?: any) => console.warn(`[AIMLAPI] ${message}`, data),
|
||||
error: (message: string, data?: any) => console.error(`[AIMLAPI] ${message}`, data),
|
||||
};
|
||||
|
||||
const AIMLAPI_BASE_URL = 'https://api.aimlapi.com';
|
||||
|
||||
// Get user's AIML API key from server secrets
|
||||
const getAimlApiKey = async (): Promise<string | null> => {
|
||||
try {
|
||||
const data = await apiClient<{ api_keys?: Record<string, any> }>('/api/me/secrets');
|
||||
const key = data.api_keys?.aimlapi_api_key;
|
||||
if (!key) {
|
||||
logger.error('No AIML API key found. Please add your AIML API key in your profile settings.');
|
||||
return null;
|
||||
}
|
||||
return key;
|
||||
} catch (error) {
|
||||
logger.error('Error getting AIML API key:', error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
// Helper function to convert File to base64
|
||||
const fileToBase64 = (file: File): Promise<string> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.readAsDataURL(file);
|
||||
reader.onload = () => {
|
||||
const result = reader.result as string;
|
||||
// Remove data URL prefix to get just the base64 string
|
||||
const base64 = result.split(',')[1];
|
||||
resolve(base64);
|
||||
};
|
||||
reader.onerror = error => reject(error);
|
||||
});
|
||||
};
|
||||
|
||||
interface ImageResult {
|
||||
imageData: ArrayBuffer;
|
||||
text?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate image using AIML API text-to-image
|
||||
* Supports various models including ByteDance SeeDream v4, Flux, Stable Diffusion, etc.
|
||||
*/
|
||||
export const createImageWithAimlApi = async (
|
||||
prompt: string,
|
||||
model: string = 'bytedance/seedream-v4',
|
||||
apiKey?: string
|
||||
): Promise<ImageResult | null> => {
|
||||
const key = apiKey || await getAimlApiKey();
|
||||
|
||||
if (!key) {
|
||||
logger.error('No AIML API key found. Please provide an API key or set it in your profile.');
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('Starting AIML API image generation', {
|
||||
model,
|
||||
promptLength: prompt.length,
|
||||
promptPreview: prompt.substring(0, 100) + '...'
|
||||
});
|
||||
|
||||
const endpoint = `${AIMLAPI_BASE_URL}/v1/images/generations`;
|
||||
|
||||
// Build request body based on model requirements
|
||||
const requestBody: any = {
|
||||
model,
|
||||
prompt,
|
||||
};
|
||||
|
||||
// Most models support these common parameters
|
||||
if (!model.includes('dall-e')) {
|
||||
requestBody.image_size = { width: 1024, height: 1024 };
|
||||
requestBody.num_images = 1;
|
||||
requestBody.sync_mode = true;
|
||||
} else {
|
||||
// DALL-E uses different parameters
|
||||
requestBody.n = 1;
|
||||
requestBody.size = '1024x1024';
|
||||
}
|
||||
|
||||
logger.debug('AIML API request body:', requestBody);
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${key}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error('AIML API error:', { status: response.status, error: errorText });
|
||||
throw new Error(`AIML API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
logger.debug('AIML API response:', data);
|
||||
|
||||
// Handle response format: { data: [{ url: "...", b64_json: "..." }] }
|
||||
if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
|
||||
throw new Error('Invalid response from AIML API: no image data');
|
||||
}
|
||||
|
||||
const firstResult = data.data[0];
|
||||
|
||||
// Prefer URL over base64 if both are provided
|
||||
let arrayBuffer: ArrayBuffer;
|
||||
|
||||
if (firstResult.url) {
|
||||
logger.info('Image URL received from AIML API:', firstResult.url);
|
||||
|
||||
// Fetch the image from URL
|
||||
const imageResponse = await fetch(firstResult.url);
|
||||
if (!imageResponse.ok) {
|
||||
throw new Error(`Failed to fetch generated image: ${imageResponse.statusText}`);
|
||||
}
|
||||
arrayBuffer = await imageResponse.arrayBuffer();
|
||||
} else if (firstResult.b64_json) {
|
||||
logger.info('Base64 image received from AIML API');
|
||||
|
||||
// Convert base64 to ArrayBuffer
|
||||
const binaryString = atob(firstResult.b64_json);
|
||||
const bytes = new Uint8Array(binaryString.length);
|
||||
for (let i = 0; i < binaryString.length; i++) {
|
||||
bytes[i] = binaryString.charCodeAt(i);
|
||||
}
|
||||
arrayBuffer = bytes.buffer;
|
||||
} else {
|
||||
throw new Error('No image URL or base64 data in AIML API response');
|
||||
}
|
||||
|
||||
logger.info('Successfully generated image with AIML API', {
|
||||
model,
|
||||
imageSize: arrayBuffer.byteLength,
|
||||
});
|
||||
|
||||
return {
|
||||
imageData: arrayBuffer,
|
||||
text: undefined, // AIML API doesn't return text descriptions
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('AIML API image generation failed:', {
|
||||
error: error.message,
|
||||
model,
|
||||
promptPreview: prompt.substring(0, 100) + '...'
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Edit image using AIML API image-to-image
|
||||
* Supports models like SeeDream v4 Edit, SeedEdit 3.0, Flux i2i, etc.
|
||||
*/
|
||||
export const editImageWithAimlApi = async (
|
||||
prompt: string,
|
||||
imageFiles: File[],
|
||||
model: string = 'bytedance/seedream-v4-edit',
|
||||
apiKey?: string
|
||||
): Promise<ImageResult | null> => {
|
||||
const key = apiKey || await getAimlApiKey();
|
||||
|
||||
if (!key) {
|
||||
logger.error('No AIML API key found. Please provide an API key or set it in your profile.');
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('Starting AIML API image editing', {
|
||||
model,
|
||||
imageCount: imageFiles.length,
|
||||
promptLength: prompt.length,
|
||||
promptPreview: prompt.substring(0, 100) + '...'
|
||||
});
|
||||
|
||||
// Convert the first image to base64
|
||||
const imageBase64 = await fileToBase64(imageFiles[0]);
|
||||
|
||||
const endpoint = `${AIMLAPI_BASE_URL}/v1/images/generations`;
|
||||
|
||||
// Different models use different parameter names for the image
|
||||
const requestBody: any = {
|
||||
model,
|
||||
prompt,
|
||||
num_images: 1,
|
||||
sync_mode: true,
|
||||
};
|
||||
|
||||
// AIML API edit endpoint requires image_urls for all models
|
||||
requestBody.image_urls = [`data:image/png;base64,${imageBase64}`];
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${key}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error('AIML API error:', { status: response.status, error: errorText });
|
||||
throw new Error(`AIML API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
logger.debug('AIML API response (edit):', data);
|
||||
|
||||
// Handle response format
|
||||
if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
|
||||
throw new Error('Invalid response from AIML API: no image data');
|
||||
}
|
||||
|
||||
const firstResult = data.data[0];
|
||||
|
||||
// Prefer URL over base64 if both are provided
|
||||
let arrayBuffer: ArrayBuffer;
|
||||
|
||||
if (firstResult.url) {
|
||||
logger.info('Edited image URL received from AIML API:', firstResult.url);
|
||||
|
||||
// Fetch the image from URL
|
||||
const imageResponse = await fetch(firstResult.url);
|
||||
if (!imageResponse.ok) {
|
||||
throw new Error(`Failed to fetch edited image: ${imageResponse.statusText}`);
|
||||
}
|
||||
arrayBuffer = await imageResponse.arrayBuffer();
|
||||
} else if (firstResult.b64_json) {
|
||||
logger.info('Base64 edited image received from AIML API');
|
||||
|
||||
// Convert base64 to ArrayBuffer
|
||||
const binaryString = atob(firstResult.b64_json);
|
||||
const bytes = new Uint8Array(binaryString.length);
|
||||
for (let i = 0; i < binaryString.length; i++) {
|
||||
bytes[i] = binaryString.charCodeAt(i);
|
||||
}
|
||||
arrayBuffer = bytes.buffer;
|
||||
} else {
|
||||
throw new Error('No image URL or base64 data in AIML API response');
|
||||
}
|
||||
|
||||
logger.info('Successfully edited image with AIML API', {
|
||||
model,
|
||||
imageSize: arrayBuffer.byteLength,
|
||||
});
|
||||
|
||||
return {
|
||||
imageData: arrayBuffer,
|
||||
text: undefined,
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('AIML API image editing failed:', {
|
||||
error: error.message,
|
||||
model,
|
||||
imageCount: imageFiles.length,
|
||||
promptPreview: prompt.substring(0, 100) + '...'
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
// Export the logger for consistency
|
||||
export { logger };
|
||||
|
||||
304
packages/media/cpp/ref/images-ai/bria.ts
Normal file
304
packages/media/cpp/ref/images-ai/bria.ts
Normal file
@ -0,0 +1,304 @@
|
||||
import { apiClient } from "@/lib/db";
|
||||
|
||||
// Simple logger for user feedback
|
||||
const logger = {
|
||||
debug: (message: string, data?: any) => console.debug(`[BRIA] ${message}`, data),
|
||||
info: (message: string, data?: any) => console.info(`[BRIA] ${message}`, data),
|
||||
warn: (message: string, data?: any) => console.warn(`[BRIA] ${message}`, data),
|
||||
error: (message: string, data?: any) => console.error(`[BRIA] ${message}`, data),
|
||||
};
|
||||
|
||||
const BRIA_BASE_URL = 'https://engine.prod.bria-api.com/v1';
|
||||
|
||||
// Get user's Bria API key from server secrets
|
||||
const getBriaApiKey = async (): Promise<string | null> => {
|
||||
try {
|
||||
const data = await apiClient<{ api_keys?: Record<string, any> }>('/api/me/secrets');
|
||||
const key = data.api_keys?.bria_api_key;
|
||||
if (!key) {
|
||||
logger.error('No Bria API key found. Please add your Bria API key in your profile settings.');
|
||||
return null;
|
||||
}
|
||||
return key;
|
||||
} catch (error) {
|
||||
logger.error('Error getting Bria API key:', error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
// Helper function to convert File to base64
|
||||
const fileToBase64 = (file: File): Promise<string> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.readAsDataURL(file);
|
||||
reader.onload = () => {
|
||||
const result = reader.result as string;
|
||||
// Remove data URL prefix to get just the base64 string
|
||||
const base64 = result.split(',')[1];
|
||||
resolve(base64);
|
||||
};
|
||||
reader.onerror = error => reject(error);
|
||||
});
|
||||
};
|
||||
|
||||
// Helper to poll for async image generation
|
||||
const pollForImage = async (url: string, maxAttempts = 60, delayMs = 2000): Promise<boolean> => {
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
const response = await fetch(url, { method: 'HEAD' });
|
||||
if (response.ok && response.headers.get('content-length') !== '0') {
|
||||
return true; // Image is ready
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, delayMs));
|
||||
} catch (error) {
|
||||
logger.debug(`Poll attempt ${attempt + 1} failed, retrying...`);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
interface ImageResult {
|
||||
imageData: ArrayBuffer;
|
||||
text?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate image using Bria text-to-image API
|
||||
* Uses the fast endpoint with model version 3.2 for good balance of speed and quality
|
||||
*/
|
||||
export const createImageWithBria = async (
|
||||
prompt: string,
|
||||
model: string = 'bria-2.3-fast',
|
||||
apiKey?: string
|
||||
): Promise<ImageResult | null> => {
|
||||
const key = apiKey || await getBriaApiKey();
|
||||
|
||||
if (!key) {
|
||||
logger.error('No Bria API key found. Please provide an API key or set it in your profile.');
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('Starting Bria image generation', {
|
||||
model,
|
||||
promptLength: prompt.length,
|
||||
promptPreview: prompt.substring(0, 100) + '...'
|
||||
});
|
||||
|
||||
// Parse model string to determine endpoint and version
|
||||
// Format: "bria-{version}-{speed}" e.g., "bria-3.2-fast", "bria-2.3-base", "bria-2.2-hd"
|
||||
const parts = model.split('-');
|
||||
const version = parts[1] || '3.2';
|
||||
const speed = parts[2] || 'fast'; // fast, base, or hd
|
||||
|
||||
const endpoint = `${BRIA_BASE_URL}/text-to-image/${speed}/${version}`;
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'api_token': key,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt,
|
||||
num_results: 1,
|
||||
sync: false, // Use async for better performance
|
||||
aspect_ratio: '1:1',
|
||||
steps_num: speed === 'fast' ? 8 : 30,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error('Bria API error:', { status: response.status, error: errorText });
|
||||
throw new Error(`Bria API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
logger.debug('Bria API response:', data);
|
||||
|
||||
// Handle response format
|
||||
if (data.error_code) {
|
||||
throw new Error(data.description || `Bria API error: ${data.error_code}`);
|
||||
}
|
||||
|
||||
if (!data.result || !Array.isArray(data.result) || data.result.length === 0) {
|
||||
throw new Error('Invalid response from Bria API: no results');
|
||||
}
|
||||
|
||||
const firstResult = data.result[0];
|
||||
|
||||
// Check if result was blocked by content moderation
|
||||
if (firstResult.blocked) {
|
||||
throw new Error(firstResult.description || 'Content blocked by Bria moderation');
|
||||
}
|
||||
|
||||
if (!firstResult.urls || firstResult.urls.length === 0) {
|
||||
throw new Error('No image URL in Bria response');
|
||||
}
|
||||
|
||||
const imageUrl = firstResult.urls[0];
|
||||
logger.info('Image URL received from Bria:', imageUrl);
|
||||
|
||||
// Poll for the image to be ready (async generation)
|
||||
logger.info('Polling for image completion...');
|
||||
const isReady = await pollForImage(imageUrl);
|
||||
|
||||
if (!isReady) {
|
||||
throw new Error('Image generation timed out');
|
||||
}
|
||||
|
||||
// Fetch the generated image
|
||||
const imageResponse = await fetch(imageUrl);
|
||||
if (!imageResponse.ok) {
|
||||
throw new Error(`Failed to fetch generated image: ${imageResponse.statusText}`);
|
||||
}
|
||||
|
||||
const arrayBuffer = await imageResponse.arrayBuffer();
|
||||
|
||||
logger.info('Successfully generated image with Bria', {
|
||||
model,
|
||||
imageSize: arrayBuffer.byteLength,
|
||||
seed: firstResult.seed,
|
||||
});
|
||||
|
||||
return {
|
||||
imageData: arrayBuffer,
|
||||
text: undefined, // Bria doesn't return text descriptions
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('Bria image generation failed:', {
|
||||
error: error.message,
|
||||
model,
|
||||
promptPreview: prompt.substring(0, 100) + '...'
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Edit image using Bria reimagine API (structure reference)
|
||||
* Maintains the structure and depth of the input while incorporating new materials, colors, and textures
|
||||
*/
|
||||
export const editImageWithBria = async (
|
||||
prompt: string,
|
||||
imageFiles: File[],
|
||||
model: string = 'bria-2.3-fast',
|
||||
apiKey?: string
|
||||
): Promise<ImageResult | null> => {
|
||||
const key = apiKey || await getBriaApiKey();
|
||||
|
||||
if (!key) {
|
||||
logger.error('No Bria API key found. Please provide an API key or set it in your profile.');
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('Starting Bria image editing (reimagine)', {
|
||||
model,
|
||||
imageCount: imageFiles.length,
|
||||
promptLength: prompt.length,
|
||||
promptPreview: prompt.substring(0, 100) + '...'
|
||||
});
|
||||
|
||||
// Convert the first image to base64 for the structure reference
|
||||
const imageBase64 = await fileToBase64(imageFiles[0]);
|
||||
|
||||
const endpoint = `${BRIA_BASE_URL}/reimagine`;
|
||||
|
||||
// Parse model to determine if we should use fast mode
|
||||
const parts = model.split('-');
|
||||
const speed = parts[2] || 'fast';
|
||||
const useFast = speed === 'fast';
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'api_token': key,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt,
|
||||
structure_image_file: imageBase64,
|
||||
structure_ref_influence: 0.75, // Good balance for maintaining structure while allowing changes
|
||||
num_results: 1,
|
||||
sync: false, // Use async for better performance
|
||||
fast: useFast,
|
||||
steps_num: useFast ? 12 : 30,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error('Bria API error:', { status: response.status, error: errorText });
|
||||
throw new Error(`Bria API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
logger.debug('Bria API response (reimagine):', data);
|
||||
|
||||
// Handle response format
|
||||
if (data.error_code) {
|
||||
throw new Error(data.description || `Bria API error: ${data.error_code}`);
|
||||
}
|
||||
|
||||
if (!data.result || !Array.isArray(data.result) || data.result.length === 0) {
|
||||
throw new Error('Invalid response from Bria API: no results');
|
||||
}
|
||||
|
||||
const firstResult = data.result[0];
|
||||
|
||||
// Check if result was blocked by content moderation
|
||||
if (firstResult.blocked) {
|
||||
throw new Error(firstResult.description || 'Content blocked by Bria moderation');
|
||||
}
|
||||
|
||||
if (!firstResult.urls || firstResult.urls.length === 0) {
|
||||
throw new Error('No image URL in Bria response');
|
||||
}
|
||||
|
||||
const imageUrl = firstResult.urls[0];
|
||||
logger.info('Edited image URL received from Bria:', imageUrl);
|
||||
|
||||
// Poll for the image to be ready (async generation)
|
||||
logger.info('Polling for edited image completion...');
|
||||
const isReady = await pollForImage(imageUrl);
|
||||
|
||||
if (!isReady) {
|
||||
throw new Error('Image editing timed out');
|
||||
}
|
||||
|
||||
// Fetch the edited image
|
||||
const imageResponse = await fetch(imageUrl);
|
||||
if (!imageResponse.ok) {
|
||||
throw new Error(`Failed to fetch edited image: ${imageResponse.statusText}`);
|
||||
}
|
||||
|
||||
const arrayBuffer = await imageResponse.arrayBuffer();
|
||||
|
||||
logger.info('Successfully edited image with Bria', {
|
||||
model,
|
||||
imageSize: arrayBuffer.byteLength,
|
||||
seed: firstResult.seed,
|
||||
});
|
||||
|
||||
return {
|
||||
imageData: arrayBuffer,
|
||||
text: undefined,
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('Bria image editing failed:', {
|
||||
error: error.message,
|
||||
model,
|
||||
imageCount: imageFiles.length,
|
||||
promptPreview: prompt.substring(0, 100) + '...'
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
// Export the logger for consistency
|
||||
export { logger };
|
||||
|
||||
419
packages/media/cpp/ref/images-ai/image-router.ts
Normal file
419
packages/media/cpp/ref/images-ai/image-router.ts
Normal file
@ -0,0 +1,419 @@
|
||||
/**
|
||||
* Image Generation Router
|
||||
* Routes image generation requests to the appropriate AI provider based on the model format.
|
||||
* Model format: "provider/model-name"
|
||||
*
|
||||
* Supported providers:
|
||||
* - google: Google Generative AI (Gemini models)
|
||||
* - replicate: Replicate API (various models)
|
||||
* - bria: Bria.ai (coming soon)
|
||||
*/
|
||||
|
||||
import { createImage as createImageGoogle, editImage as editImageGoogle } from '@/image-api';
|
||||
//import { createImageWithReplicate, editImageWithReplicate } from '@/lib/replicate';
|
||||
import { createImageWithBria, editImageWithBria } from '@/lib/bria';
|
||||
import { createImageWithAimlApi, editImageWithAimlApi } from '@/lib/aimlapi';
|
||||
|
||||
// Logger for debugging
|
||||
const logger = {
|
||||
debug: (message: string, data?: any) => console.debug(`[IMAGE-ROUTER] ${message}`, data),
|
||||
info: (message: string, data?: any) => console.info(`[IMAGE-ROUTER] ${message}`, data),
|
||||
warn: (message: string, data?: any) => console.warn(`[IMAGE-ROUTER] ${message}`, data),
|
||||
error: (message: string, data?: any) => console.error(`[IMAGE-ROUTER] ${message}`, data),
|
||||
};
|
||||
|
||||
export interface ImageResult {
|
||||
imageData: ArrayBuffer;
|
||||
text?: string;
|
||||
}
|
||||
|
||||
export interface ModelInfo {
|
||||
provider: string;
|
||||
modelName: string;
|
||||
displayName: string;
|
||||
supportsTextToImage: boolean;
|
||||
supportsImageToImage: boolean;
|
||||
}
|
||||
|
||||
// Available models configuration
|
||||
export const AVAILABLE_MODELS: ModelInfo[] = [
|
||||
{
|
||||
provider: 'google',
|
||||
modelName: 'gemini-3-pro-image-preview',
|
||||
displayName: 'Google Gemini 3 Pro (Image Preview)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
{
|
||||
provider: 'google',
|
||||
modelName: 'gemini-3.1-flash-image-preview',
|
||||
displayName: 'Google Gemini 3.1 Flash (Image Preview)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
/* Duplicate model name causing key conflicts - temporarily disabled
|
||||
{
|
||||
provider: 'google',
|
||||
modelName: 'gemini-3-pro-image-preview',
|
||||
displayName: 'Google Gemini 2.5 Flash (Image Preview)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
*/
|
||||
{
|
||||
provider: 'replicate',
|
||||
modelName: 'bytedance/seedream-4',
|
||||
displayName: 'Replicate SeeDream-4 (Bytedance)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
{
|
||||
provider: 'bria',
|
||||
modelName: 'bria-3.2-fast',
|
||||
displayName: 'Bria.ai 3.2 Fast',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
{
|
||||
provider: 'bria',
|
||||
modelName: 'bria-2.3-base',
|
||||
displayName: 'Bria.ai 2.3 Base (High Quality)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
{
|
||||
provider: 'bria',
|
||||
modelName: 'bria-2.2-hd',
|
||||
displayName: 'Bria.ai 2.2 HD (1920x1080)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false, // HD doesn't support reimagine
|
||||
},
|
||||
// AIML API - ByteDance Models
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'bytedance/seedream-v4-text-to-image',
|
||||
displayName: 'AIML API - SeeDream v4 (4K)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'bytedance/seedream-v4-edit',
|
||||
displayName: 'AIML API - SeeDream v4 Edit (4K)',
|
||||
supportsTextToImage: false,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'bytedance/seededit-3.0-i2i',
|
||||
displayName: 'AIML API - SeedEdit 3.0',
|
||||
supportsTextToImage: false,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'bytedance/seedream-3.0',
|
||||
displayName: 'AIML API - SeeDream 3.0',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'bytedance/uso',
|
||||
displayName: 'AIML API - USO (i2i)',
|
||||
supportsTextToImage: false,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'alibaba/qwen-image',
|
||||
displayName: 'AIML API - Qwen Image',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
// AIML API - Flux Models
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'flux-pro',
|
||||
displayName: 'AIML API - Flux Pro',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'flux-pro/v1.1',
|
||||
displayName: 'AIML API - Flux Pro v1.1',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'flux-pro/v1.1-ultra',
|
||||
displayName: 'AIML API - Flux Pro v1.1 Ultra',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'flux-realism',
|
||||
displayName: 'AIML API - Flux Realism',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'flux/dev',
|
||||
displayName: 'AIML API - Flux Dev',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'flux/dev/image-to-image',
|
||||
displayName: 'AIML API - Flux Dev i2i',
|
||||
supportsTextToImage: false,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'flux/schnell',
|
||||
displayName: 'AIML API - Flux Schnell (Fast)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
// AIML API - Google Models
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'imagen-3.0-generate-002',
|
||||
displayName: 'AIML API - Google Imagen 3',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'google/imagen4/preview',
|
||||
displayName: 'AIML API - Google Imagen 4 Preview',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'google/imagen-4.0-generate-001',
|
||||
displayName: 'AIML API - Google Imagen 4.0',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'google/imagen-4.0-fast-generate-001',
|
||||
displayName: 'AIML API - Google Imagen 4.0 Fast',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'google/imagen-4.0-ultra-generate-001',
|
||||
displayName: 'AIML API - Google Imagen 4.0 Ultra',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'google/gemini-2.5-flash-image',
|
||||
displayName: 'AIML API - Gemini 2.5 Flash Image',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'google/gemini-2.5-flash-image-edit',
|
||||
displayName: 'AIML API - Gemini 2.5 Flash Edit',
|
||||
supportsTextToImage: false,
|
||||
supportsImageToImage: true,
|
||||
},
|
||||
// AIML API - OpenAI Models
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'dall-e-2',
|
||||
displayName: 'AIML API - DALL-E 2 (OpenAI)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'dall-e-3',
|
||||
displayName: 'AIML API - DALL-E 3 (OpenAI)',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
// AIML API - Stability AI Models
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'stable-diffusion-v3-medium',
|
||||
displayName: 'AIML API - Stable Diffusion 3 Medium',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'stable-diffusion-v35-large',
|
||||
displayName: 'AIML API - Stable Diffusion 3.5 Large',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
// AIML API - Recraft AI
|
||||
{
|
||||
provider: 'aimlapi',
|
||||
modelName: 'recraft-v3',
|
||||
displayName: 'AIML API - Recraft v3',
|
||||
supportsTextToImage: true,
|
||||
supportsImageToImage: false,
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Parse model string into provider and model name
|
||||
* @param modelString Format: "provider/model-name"
|
||||
* @returns { provider, modelName }
|
||||
*/
|
||||
export const parseModelString = (modelString: string): { provider: string; modelName: string } => {
|
||||
const parts = modelString.split('/');
|
||||
|
||||
if (parts.length < 2) {
|
||||
// Default to Google if no provider specified
|
||||
logger.warn('Model string missing provider, defaulting to Google', { modelString });
|
||||
return {
|
||||
provider: 'google',
|
||||
modelName: modelString,
|
||||
};
|
||||
}
|
||||
|
||||
const provider = parts[0].toLowerCase();
|
||||
const modelName = parts.slice(1).join('/'); // Handle models with multiple slashes
|
||||
|
||||
return { provider, modelName };
|
||||
};
|
||||
|
||||
/**
|
||||
* Get full model string from provider and model name
|
||||
*/
|
||||
export const getModelString = (provider: string, modelName: string): string => {
|
||||
return `${provider}/${modelName}`;
|
||||
};
|
||||
|
||||
/**
|
||||
* Create/generate a new image from text prompt
|
||||
* Routes to the appropriate provider based on model string
|
||||
*/
|
||||
export const createImage = async (
|
||||
prompt: string,
|
||||
modelString: string = 'google/gemini-3-pro-image-preview',
|
||||
apiKey?: string,
|
||||
aspectRatio?: string,
|
||||
resolution?: string,
|
||||
enableSearchGrounding?: boolean,
|
||||
enableImageSearch?: boolean
|
||||
): Promise<ImageResult | null> => {
|
||||
const { provider, modelName } = parseModelString(modelString);
|
||||
|
||||
logger.info('Routing image creation request', {
|
||||
provider,
|
||||
modelName,
|
||||
promptLength: prompt.length,
|
||||
searchGrounding: !!enableSearchGrounding,
|
||||
imageSearch: !!enableImageSearch,
|
||||
});
|
||||
|
||||
try {
|
||||
switch (provider) {
|
||||
case 'google':
|
||||
return await createImageGoogle(prompt, modelName, apiKey, aspectRatio, resolution, enableSearchGrounding, enableImageSearch);
|
||||
|
||||
case 'bria':
|
||||
return await createImageWithBria(prompt, modelName, apiKey);
|
||||
|
||||
case 'aimlapi':
|
||||
return await createImageWithAimlApi(prompt, modelName, apiKey);
|
||||
|
||||
default:
|
||||
logger.error('Unsupported provider', { provider, modelName });
|
||||
throw new Error(`Unsupported provider: ${provider}. Supported providers: google, replicate, bria, aimlapi`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
logger.error('Image creation failed', {
|
||||
provider,
|
||||
modelName,
|
||||
error: error.message,
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Edit an existing image with a text prompt
|
||||
* Routes to the appropriate provider based on model string
|
||||
*/
|
||||
export const editImage = async (
|
||||
prompt: string,
|
||||
imageFiles: File[],
|
||||
modelString: string = 'google/gemini-3-pro-image-preview',
|
||||
apiKey?: string,
|
||||
aspectRatio?: string,
|
||||
resolution?: string,
|
||||
enableSearchGrounding?: boolean,
|
||||
enableImageSearch?: boolean
|
||||
): Promise<ImageResult | null> => {
|
||||
const { provider, modelName } = parseModelString(modelString);
|
||||
|
||||
logger.info('Routing image editing request', {
|
||||
provider,
|
||||
modelName,
|
||||
promptLength: prompt.length,
|
||||
imageCount: imageFiles.length,
|
||||
searchGrounding: !!enableSearchGrounding,
|
||||
imageSearch: !!enableImageSearch,
|
||||
});
|
||||
|
||||
try {
|
||||
switch (provider) {
|
||||
case 'google':
|
||||
return await editImageGoogle(prompt, imageFiles, modelName, apiKey, aspectRatio, resolution, enableSearchGrounding, enableImageSearch);
|
||||
|
||||
case 'bria':
|
||||
return await editImageWithBria(prompt, imageFiles, modelName, apiKey);
|
||||
|
||||
case 'aimlapi':
|
||||
return await editImageWithAimlApi(prompt, imageFiles, modelName, apiKey);
|
||||
|
||||
default:
|
||||
logger.error('Unsupported provider', { provider, modelName });
|
||||
throw new Error(`Unsupported provider: ${provider}. Supported providers: google, replicate, bria, aimlapi`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
logger.error('Image editing failed', {
|
||||
provider,
|
||||
modelName,
|
||||
imageCount: imageFiles.length,
|
||||
error: error.message,
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Get model info by model string
|
||||
*/
|
||||
export const getModelInfo = (modelString: string): ModelInfo | undefined => {
|
||||
return AVAILABLE_MODELS.find(
|
||||
(m) => getModelString(m.provider, m.modelName) === modelString
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Get all models for a specific provider
|
||||
*/
|
||||
export const getModelsByProvider = (provider: string): ModelInfo[] => {
|
||||
return AVAILABLE_MODELS.filter((m) => m.provider === provider);
|
||||
};
|
||||
|
||||
|
||||
1448
packages/media/cpp/ref/images-ai/openai.ts
Normal file
1448
packages/media/cpp/ref/images-ai/openai.ts
Normal file
File diff suppressed because it is too large
Load Diff
306
packages/media/cpp/src/core/transform.cpp
Normal file
306
packages/media/cpp/src/core/transform.cpp
Normal file
@ -0,0 +1,306 @@
|
||||
#include "transform.hpp"
|
||||
#include "url_fetch.hpp"
|
||||
|
||||
#include <curl/curl.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace media {
|
||||
|
||||
// ── base64 encode/decode ────────────────────────────────────────────
|
||||
|
||||
static const char b64_table[] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
static std::string base64_encode(const uint8_t* data, size_t len) {
|
||||
std::string out;
|
||||
out.reserve(((len + 2) / 3) * 4);
|
||||
for (size_t i = 0; i < len; i += 3) {
|
||||
uint32_t n = (uint32_t)data[i] << 16;
|
||||
if (i + 1 < len) n |= (uint32_t)data[i + 1] << 8;
|
||||
if (i + 2 < len) n |= (uint32_t)data[i + 2];
|
||||
out.push_back(b64_table[(n >> 18) & 0x3F]);
|
||||
out.push_back(b64_table[(n >> 12) & 0x3F]);
|
||||
out.push_back((i + 1 < len) ? b64_table[(n >> 6) & 0x3F] : '=');
|
||||
out.push_back((i + 2 < len) ? b64_table[n & 0x3F] : '=');
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int b64_decode_char(char c) {
|
||||
if (c >= 'A' && c <= 'Z') return c - 'A';
|
||||
if (c >= 'a' && c <= 'z') return c - 'a' + 26;
|
||||
if (c >= '0' && c <= '9') return c - '0' + 52;
|
||||
if (c == '+') return 62;
|
||||
if (c == '/') return 63;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static std::vector<uint8_t> base64_decode(const std::string& in) {
|
||||
std::vector<uint8_t> out;
|
||||
out.reserve(in.size() * 3 / 4);
|
||||
uint32_t buf = 0;
|
||||
int bits = 0;
|
||||
for (char c : in) {
|
||||
if (c == '=' || c == '\n' || c == '\r' || c == ' ') continue;
|
||||
int v = b64_decode_char(c);
|
||||
if (v < 0) continue;
|
||||
buf = (buf << 6) | (uint32_t)v;
|
||||
bits += 6;
|
||||
if (bits >= 8) {
|
||||
bits -= 8;
|
||||
out.push_back((uint8_t)(buf >> bits));
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ── MIME type from extension ────────────────────────────────────────
|
||||
|
||||
static std::string mime_from_ext(const std::string& ext) {
|
||||
std::string e = ext;
|
||||
for (auto& c : e) c = (char)std::tolower((unsigned char)c);
|
||||
if (e == ".jpg" || e == ".jpeg") return "image/jpeg";
|
||||
if (e == ".png") return "image/png";
|
||||
if (e == ".webp") return "image/webp";
|
||||
if (e == ".gif") return "image/gif";
|
||||
if (e == ".bmp") return "image/bmp";
|
||||
if (e == ".tif" || e == ".tiff") return "image/tiff";
|
||||
if (e == ".avif") return "image/avif";
|
||||
if (e == ".heic") return "image/heic";
|
||||
return "image/jpeg";
|
||||
}
|
||||
|
||||
// ── curl helpers ────────────────────────────────────────────────────
|
||||
|
||||
static size_t string_write_cb(char* ptr, size_t size, size_t nmemb, void* ud) {
|
||||
auto* s = static_cast<std::string*>(ud);
|
||||
s->append(ptr, size * nmemb);
|
||||
return size * nmemb;
|
||||
}
|
||||
|
||||
// ── Google Gemini generateContent ───────────────────────────────────
|
||||
|
||||
static TransformResult call_gemini(
|
||||
const std::string& input_path,
|
||||
const std::string& output_path,
|
||||
const TransformOptions& opts,
|
||||
TransformProgressFn progress)
|
||||
{
|
||||
TransformResult res;
|
||||
|
||||
// Read input image
|
||||
if (progress) progress("Reading " + input_path);
|
||||
std::ifstream ifs(input_path, std::ios::binary);
|
||||
if (!ifs) {
|
||||
res.error = "Cannot open input: " + input_path;
|
||||
return res;
|
||||
}
|
||||
std::vector<uint8_t> img_bytes((std::istreambuf_iterator<char>(ifs)),
|
||||
std::istreambuf_iterator<char>());
|
||||
ifs.close();
|
||||
|
||||
if (img_bytes.empty()) {
|
||||
res.error = "Input file is empty: " + input_path;
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string mime = mime_from_ext(fs::path(input_path).extension().string());
|
||||
std::string b64 = base64_encode(img_bytes.data(), img_bytes.size());
|
||||
|
||||
// Build request JSON
|
||||
json req_body = {
|
||||
{"contents", json::array({
|
||||
{{"parts", json::array({
|
||||
{{"text", opts.prompt}},
|
||||
{{"inlineData", {{"mimeType", mime}, {"data", b64}}}}
|
||||
})}}
|
||||
})},
|
||||
{"generationConfig", {
|
||||
{"responseModalities", json::array({"TEXT", "IMAGE"})}
|
||||
}}
|
||||
};
|
||||
|
||||
std::string url = "https://generativelanguage.googleapis.com/v1beta/models/"
|
||||
+ opts.model + ":generateContent?key=" + opts.api_key;
|
||||
|
||||
std::string body_str = req_body.dump();
|
||||
|
||||
if (progress) progress("Sending to " + opts.model + " (" +
|
||||
std::to_string(img_bytes.size() / 1024) + " KB)...");
|
||||
|
||||
ensure_curl_global();
|
||||
CURL* curl = curl_easy_init();
|
||||
if (!curl) {
|
||||
res.error = "curl_easy_init failed";
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string response_str;
|
||||
struct curl_slist* headers = nullptr;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body_str.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, (long)body_str.size());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, string_write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_str);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 120L);
|
||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 15L);
|
||||
|
||||
CURLcode cc = curl_easy_perform(curl);
|
||||
long http_code = 0;
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
|
||||
curl_slist_free_all(headers);
|
||||
curl_easy_cleanup(curl);
|
||||
|
||||
if (cc != CURLE_OK) {
|
||||
res.error = std::string("HTTP request failed: ") + curl_easy_strerror(cc);
|
||||
return res;
|
||||
}
|
||||
|
||||
if (http_code != 200) {
|
||||
res.error = "API returned HTTP " + std::to_string(http_code);
|
||||
// Try to extract error message from response JSON
|
||||
try {
|
||||
auto j = json::parse(response_str);
|
||||
if (j.contains("error") && j["error"].contains("message"))
|
||||
res.error += ": " + j["error"]["message"].get<std::string>();
|
||||
} catch (...) {
|
||||
if (response_str.size() < 500) res.error += ": " + response_str;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// Parse response
|
||||
if (progress) progress("Parsing response...");
|
||||
json resp;
|
||||
try {
|
||||
resp = json::parse(response_str);
|
||||
} catch (const std::exception& e) {
|
||||
res.error = std::string("JSON parse error: ") + e.what();
|
||||
return res;
|
||||
}
|
||||
|
||||
// Extract image and text from candidates[0].content.parts[]
|
||||
bool found_image = false;
|
||||
try {
|
||||
auto& parts = resp["candidates"][0]["content"]["parts"];
|
||||
for (auto& part : parts) {
|
||||
if (part.contains("inlineData")) {
|
||||
auto& id = part["inlineData"];
|
||||
std::string resp_mime = id.value("mimeType", "image/png");
|
||||
std::string resp_b64 = id["data"].get<std::string>();
|
||||
res.image_data = base64_decode(resp_b64);
|
||||
found_image = true;
|
||||
}
|
||||
if (part.contains("text")) {
|
||||
if (!res.ai_text.empty()) res.ai_text += "\n";
|
||||
res.ai_text += part["text"].get<std::string>();
|
||||
}
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
res.error = std::string("Response parsing error: ") + e.what();
|
||||
// Include raw response excerpt for debugging
|
||||
if (response_str.size() < 2000) res.error += "\nRaw: " + response_str;
|
||||
return res;
|
||||
}
|
||||
|
||||
if (!found_image || res.image_data.empty()) {
|
||||
res.error = "No image in API response";
|
||||
if (!res.ai_text.empty()) res.error += ". Model said: " + res.ai_text;
|
||||
return res;
|
||||
}
|
||||
|
||||
// Write output
|
||||
std::string out = output_path;
|
||||
if (out.empty()) out = default_transform_output(input_path, opts.prompt);
|
||||
|
||||
fs::path out_dir = fs::path(out).parent_path();
|
||||
if (!out_dir.empty()) {
|
||||
std::error_code ec;
|
||||
fs::create_directories(out_dir, ec);
|
||||
}
|
||||
|
||||
if (progress) progress("Writing " + out);
|
||||
std::ofstream ofs(out, std::ios::binary);
|
||||
if (!ofs) {
|
||||
res.error = "Cannot write output: " + out;
|
||||
return res;
|
||||
}
|
||||
ofs.write(reinterpret_cast<const char*>(res.image_data.data()),
|
||||
static_cast<std::streamsize>(res.image_data.size()));
|
||||
ofs.close();
|
||||
|
||||
res.ok = true;
|
||||
res.output_path = out;
|
||||
return res;
|
||||
}
|
||||
|
||||
// ── public API ──────────────────────────────────────────────────────
|
||||
|
||||
std::string default_transform_output(const std::string& input_path, const std::string& prompt) {
|
||||
fs::path p(input_path);
|
||||
std::string stem = p.stem().string();
|
||||
std::string ext = p.extension().string();
|
||||
if (ext.empty()) ext = ".png";
|
||||
|
||||
// Sanitize and truncate prompt for filename
|
||||
std::string slug;
|
||||
slug.reserve(prompt.size());
|
||||
for (char c : prompt) {
|
||||
if (std::isalnum((unsigned char)c))
|
||||
slug.push_back((char)std::tolower((unsigned char)c));
|
||||
else if (c == ' ' || c == '-' || c == '_')
|
||||
slug.push_back('_');
|
||||
// skip other chars
|
||||
}
|
||||
// collapse consecutive underscores
|
||||
std::string clean;
|
||||
for (char c : slug) {
|
||||
if (c == '_' && !clean.empty() && clean.back() == '_') continue;
|
||||
clean.push_back(c);
|
||||
}
|
||||
// trim trailing underscore
|
||||
while (!clean.empty() && clean.back() == '_') clean.pop_back();
|
||||
// truncate to ~40 chars
|
||||
if (clean.size() > 40) clean.resize(40);
|
||||
while (!clean.empty() && clean.back() == '_') clean.pop_back();
|
||||
|
||||
std::string result = stem + "_" + clean + ext;
|
||||
return (p.parent_path() / result).string();
|
||||
}
|
||||
|
||||
TransformResult transform_image(
|
||||
const std::string& input_path,
|
||||
const std::string& output_path,
|
||||
const TransformOptions& opts,
|
||||
TransformProgressFn progress)
|
||||
{
|
||||
if (opts.prompt.empty()) {
|
||||
return {false, "prompt is required"};
|
||||
}
|
||||
if (opts.api_key.empty()) {
|
||||
return {false, "API key is required (set IMAGE_TRANSFORM_GOOGLE_API_KEY in .env or pass --api-key)"};
|
||||
}
|
||||
|
||||
if (opts.provider == "google") {
|
||||
return call_gemini(input_path, output_path, opts, progress);
|
||||
}
|
||||
|
||||
return {false, "Unsupported provider: " + opts.provider};
|
||||
}
|
||||
|
||||
} // namespace media
|
||||
39
packages/media/cpp/src/core/transform.hpp
Normal file
39
packages/media/cpp/src/core/transform.hpp
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
namespace media {
|
||||
|
||||
struct TransformOptions {
|
||||
std::string provider = "google";
|
||||
std::string model = "gemini-3-pro-image-preview";
|
||||
std::string api_key;
|
||||
std::string prompt;
|
||||
};
|
||||
|
||||
struct TransformResult {
|
||||
bool ok = false;
|
||||
std::string error;
|
||||
std::string output_path; // written file
|
||||
std::string ai_text; // optional text part from model
|
||||
std::vector<uint8_t> image_data; // raw bytes (PNG/JPEG) before writing
|
||||
};
|
||||
|
||||
using TransformProgressFn = std::function<void(const std::string& status)>;
|
||||
|
||||
/// Edit a single image using a generative AI model.
|
||||
/// Reads `input_path`, sends image + prompt to the API, writes result to `output_path`.
|
||||
/// If `output_path` is empty, derives it from input + prompt.
|
||||
TransformResult transform_image(
|
||||
const std::string& input_path,
|
||||
const std::string& output_path,
|
||||
const TransformOptions& opts,
|
||||
TransformProgressFn progress = nullptr
|
||||
);
|
||||
|
||||
/// Build a default output path from input path and prompt text.
|
||||
std::string default_transform_output(const std::string& input_path, const std::string& prompt);
|
||||
|
||||
} // namespace media
|
||||
Loading…
Reference in New Issue
Block a user