media:cpp --transform 1/3

2026-04-15 20:51:41 +02:00 · 2026-04-15 20:51:41 +02:00 · 04a7bd1da4
commit 04a7bd1da4
parent 07dbf000f3
7 changed files with 4801 additions and 0 deletions
--- a/packages/media/cpp/ref/images-ai/ImageWizard.tsx
+++ b/packages/media/cpp/ref/images-ai/ImageWizard.tsx
--- a/packages/media/cpp/ref/images-ai/aimlapi.ts
+++ b/packages/media/cpp/ref/images-ai/aimlapi.ts
@ -0,0 +1,279 @@
+import { apiClient } from "@/lib/db";
+
+// Simple logger for user feedback
+const logger = {
+  debug: (message: string, data?: any) => console.debug(`[AIMLAPI] ${message}`, data),
+  info: (message: string, data?: any) => console.info(`[AIMLAPI] ${message}`, data),
+  warn: (message: string, data?: any) => console.warn(`[AIMLAPI] ${message}`, data),
+  error: (message: string, data?: any) => console.error(`[AIMLAPI] ${message}`, data),
+};
+
+const AIMLAPI_BASE_URL = 'https://api.aimlapi.com';
+
+// Get user's AIML API key from server secrets
+const getAimlApiKey = async (): Promise<string | null> => {
+  try {
+    const data = await apiClient<{ api_keys?: Record<string, any> }>('/api/me/secrets');
+    const key = data.api_keys?.aimlapi_api_key;
+    if (!key) {
+      logger.error('No AIML API key found. Please add your AIML API key in your profile settings.');
+      return null;
+    }
+    return key;
+  } catch (error) {
+    logger.error('Error getting AIML API key:', error);
+    return null;
+  }
+};
+
+// Helper function to convert File to base64
+const fileToBase64 = (file: File): Promise<string> => {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader();
+    reader.readAsDataURL(file);
+    reader.onload = () => {
+      const result = reader.result as string;
+      // Remove data URL prefix to get just the base64 string
+      const base64 = result.split(',')[1];
+      resolve(base64);
+    };
+    reader.onerror = error => reject(error);
+  });
+};
+
+interface ImageResult {
+  imageData: ArrayBuffer;
+  text?: string;
+}
+
+/**
+ * Generate image using AIML API text-to-image
+ * Supports various models including ByteDance SeeDream v4, Flux, Stable Diffusion, etc.
+ */
+export const createImageWithAimlApi = async (
+  prompt: string,
+  model: string = 'bytedance/seedream-v4',
+  apiKey?: string
+): Promise<ImageResult | null> => {
+  const key = apiKey || await getAimlApiKey();
+
+  if (!key) {
+    logger.error('No AIML API key found. Please provide an API key or set it in your profile.');
+    return null;
+  }
+
+  try {
+    logger.info('Starting AIML API image generation', {
+      model,
+      promptLength: prompt.length,
+      promptPreview: prompt.substring(0, 100) + '...'
+    });
+
+    const endpoint = `${AIMLAPI_BASE_URL}/v1/images/generations`;
+
+    // Build request body based on model requirements
+    const requestBody: any = {
+      model,
+      prompt,
+    };
+
+    // Most models support these common parameters
+    if (!model.includes('dall-e')) {
+      requestBody.image_size = { width: 1024, height: 1024 };
+      requestBody.num_images = 1;
+      requestBody.sync_mode = true;
+    } else {
+      // DALL-E uses different parameters
+      requestBody.n = 1;
+      requestBody.size = '1024x1024';
+    }
+
+    logger.debug('AIML API request body:', requestBody);
+
+    const response = await fetch(endpoint, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${key}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify(requestBody),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      logger.error('AIML API error:', { status: response.status, error: errorText });
+      throw new Error(`AIML API error: ${response.status} - ${errorText}`);
+    }
+
+    const data = await response.json();
+    logger.debug('AIML API response:', data);
+
+    // Handle response format: { data: [{ url: "...", b64_json: "..." }] }
+    if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
+      throw new Error('Invalid response from AIML API: no image data');
+    }
+
+    const firstResult = data.data[0];
+
+    // Prefer URL over base64 if both are provided
+    let arrayBuffer: ArrayBuffer;
+
+    if (firstResult.url) {
+      logger.info('Image URL received from AIML API:', firstResult.url);
+
+      // Fetch the image from URL
+      const imageResponse = await fetch(firstResult.url);
+      if (!imageResponse.ok) {
+        throw new Error(`Failed to fetch generated image: ${imageResponse.statusText}`);
+      }
+      arrayBuffer = await imageResponse.arrayBuffer();
+    } else if (firstResult.b64_json) {
+      logger.info('Base64 image received from AIML API');
+
+      // Convert base64 to ArrayBuffer
+      const binaryString = atob(firstResult.b64_json);
+      const bytes = new Uint8Array(binaryString.length);
+      for (let i = 0; i < binaryString.length; i++) {
+        bytes[i] = binaryString.charCodeAt(i);
+      }
+      arrayBuffer = bytes.buffer;
+    } else {
+      throw new Error('No image URL or base64 data in AIML API response');
+    }
+
+    logger.info('Successfully generated image with AIML API', {
+      model,
+      imageSize: arrayBuffer.byteLength,
+    });
+
+    return {
+      imageData: arrayBuffer,
+      text: undefined, // AIML API doesn't return text descriptions
+    };
+
+  } catch (error: any) {
+    logger.error('AIML API image generation failed:', {
+      error: error.message,
+      model,
+      promptPreview: prompt.substring(0, 100) + '...'
+    });
+    throw error;
+  }
+};
+
+/**
+ * Edit image using AIML API image-to-image
+ * Supports models like SeeDream v4 Edit, SeedEdit 3.0, Flux i2i, etc.
+ */
+export const editImageWithAimlApi = async (
+  prompt: string,
+  imageFiles: File[],
+  model: string = 'bytedance/seedream-v4-edit',
+  apiKey?: string
+): Promise<ImageResult | null> => {
+  const key = apiKey || await getAimlApiKey();
+
+  if (!key) {
+    logger.error('No AIML API key found. Please provide an API key or set it in your profile.');
+    return null;
+  }
+
+  try {
+    logger.info('Starting AIML API image editing', {
+      model,
+      imageCount: imageFiles.length,
+      promptLength: prompt.length,
+      promptPreview: prompt.substring(0, 100) + '...'
+    });
+
+    // Convert the first image to base64
+    const imageBase64 = await fileToBase64(imageFiles[0]);
+
+    const endpoint = `${AIMLAPI_BASE_URL}/v1/images/generations`;
+
+    // Different models use different parameter names for the image
+    const requestBody: any = {
+      model,
+      prompt,
+      num_images: 1,
+      sync_mode: true,
+    };
+
+    // AIML API edit endpoint requires image_urls for all models
+    requestBody.image_urls = [`data:image/png;base64,${imageBase64}`];
+
+    const response = await fetch(endpoint, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${key}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify(requestBody),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      logger.error('AIML API error:', { status: response.status, error: errorText });
+      throw new Error(`AIML API error: ${response.status} - ${errorText}`);
+    }
+
+    const data = await response.json();
+    logger.debug('AIML API response (edit):', data);
+
+    // Handle response format
+    if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
+      throw new Error('Invalid response from AIML API: no image data');
+    }
+
+    const firstResult = data.data[0];
+
+    // Prefer URL over base64 if both are provided
+    let arrayBuffer: ArrayBuffer;
+
+    if (firstResult.url) {
+      logger.info('Edited image URL received from AIML API:', firstResult.url);
+
+      // Fetch the image from URL
+      const imageResponse = await fetch(firstResult.url);
+      if (!imageResponse.ok) {
+        throw new Error(`Failed to fetch edited image: ${imageResponse.statusText}`);
+      }
+      arrayBuffer = await imageResponse.arrayBuffer();
+    } else if (firstResult.b64_json) {
+      logger.info('Base64 edited image received from AIML API');
+
+      // Convert base64 to ArrayBuffer
+      const binaryString = atob(firstResult.b64_json);
+      const bytes = new Uint8Array(binaryString.length);
+      for (let i = 0; i < binaryString.length; i++) {
+        bytes[i] = binaryString.charCodeAt(i);
+      }
+      arrayBuffer = bytes.buffer;
+    } else {
+      throw new Error('No image URL or base64 data in AIML API response');
+    }
+
+    logger.info('Successfully edited image with AIML API', {
+      model,
+      imageSize: arrayBuffer.byteLength,
+    });
+
+    return {
+      imageData: arrayBuffer,
+      text: undefined,
+    };
+
+  } catch (error: any) {
+    logger.error('AIML API image editing failed:', {
+      error: error.message,
+      model,
+      imageCount: imageFiles.length,
+      promptPreview: prompt.substring(0, 100) + '...'
+    });
+    throw error;
+  }
+};
+
+// Export the logger for consistency
+export { logger };
+
--- a/packages/media/cpp/ref/images-ai/bria.ts
+++ b/packages/media/cpp/ref/images-ai/bria.ts
@ -0,0 +1,304 @@
+import { apiClient } from "@/lib/db";
+
+// Simple logger for user feedback
+const logger = {
+  debug: (message: string, data?: any) => console.debug(`[BRIA] ${message}`, data),
+  info: (message: string, data?: any) => console.info(`[BRIA] ${message}`, data),
+  warn: (message: string, data?: any) => console.warn(`[BRIA] ${message}`, data),
+  error: (message: string, data?: any) => console.error(`[BRIA] ${message}`, data),
+};
+
+const BRIA_BASE_URL = 'https://engine.prod.bria-api.com/v1';
+
+// Get user's Bria API key from server secrets
+const getBriaApiKey = async (): Promise<string | null> => {
+  try {
+    const data = await apiClient<{ api_keys?: Record<string, any> }>('/api/me/secrets');
+    const key = data.api_keys?.bria_api_key;
+    if (!key) {
+      logger.error('No Bria API key found. Please add your Bria API key in your profile settings.');
+      return null;
+    }
+    return key;
+  } catch (error) {
+    logger.error('Error getting Bria API key:', error);
+    return null;
+  }
+};
+
+// Helper function to convert File to base64
+const fileToBase64 = (file: File): Promise<string> => {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader();
+    reader.readAsDataURL(file);
+    reader.onload = () => {
+      const result = reader.result as string;
+      // Remove data URL prefix to get just the base64 string
+      const base64 = result.split(',')[1];
+      resolve(base64);
+    };
+    reader.onerror = error => reject(error);
+  });
+};
+
+// Helper to poll for async image generation
+const pollForImage = async (url: string, maxAttempts = 60, delayMs = 2000): Promise<boolean> => {
+  for (let attempt = 0; attempt < maxAttempts; attempt++) {
+    try {
+      const response = await fetch(url, { method: 'HEAD' });
+      if (response.ok && response.headers.get('content-length') !== '0') {
+        return true; // Image is ready
+      }
+      await new Promise(resolve => setTimeout(resolve, delayMs));
+    } catch (error) {
+      logger.debug(`Poll attempt ${attempt + 1} failed, retrying...`);
+    }
+  }
+  return false;
+};
+
+interface ImageResult {
+  imageData: ArrayBuffer;
+  text?: string;
+}
+
+/**
+ * Generate image using Bria text-to-image API
+ * Uses the fast endpoint with model version 3.2 for good balance of speed and quality
+ */
+export const createImageWithBria = async (
+  prompt: string,
+  model: string = 'bria-2.3-fast',
+  apiKey?: string
+): Promise<ImageResult | null> => {
+  const key = apiKey || await getBriaApiKey();
+  
+  if (!key) {
+    logger.error('No Bria API key found. Please provide an API key or set it in your profile.');
+    return null;
+  }
+
+  try {
+    logger.info('Starting Bria image generation', {
+      model,
+      promptLength: prompt.length,
+      promptPreview: prompt.substring(0, 100) + '...'
+    });
+
+    // Parse model string to determine endpoint and version
+    // Format: "bria-{version}-{speed}" e.g., "bria-3.2-fast", "bria-2.3-base", "bria-2.2-hd"
+    const parts = model.split('-');
+    const version = parts[1] || '3.2';
+    const speed = parts[2] || 'fast'; // fast, base, or hd
+
+    const endpoint = `${BRIA_BASE_URL}/text-to-image/${speed}/${version}`;
+
+    const response = await fetch(endpoint, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'api_token': key,
+      },
+      body: JSON.stringify({
+        prompt,
+        num_results: 1,
+        sync: false, // Use async for better performance
+        aspect_ratio: '1:1',
+        steps_num: speed === 'fast' ? 8 : 30,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      logger.error('Bria API error:', { status: response.status, error: errorText });
+      throw new Error(`Bria API error: ${response.status} - ${errorText}`);
+    }
+
+    const data = await response.json();
+    logger.debug('Bria API response:', data);
+
+    // Handle response format
+    if (data.error_code) {
+      throw new Error(data.description || `Bria API error: ${data.error_code}`);
+    }
+
+    if (!data.result || !Array.isArray(data.result) || data.result.length === 0) {
+      throw new Error('Invalid response from Bria API: no results');
+    }
+
+    const firstResult = data.result[0];
+    
+    // Check if result was blocked by content moderation
+    if (firstResult.blocked) {
+      throw new Error(firstResult.description || 'Content blocked by Bria moderation');
+    }
+
+    if (!firstResult.urls || firstResult.urls.length === 0) {
+      throw new Error('No image URL in Bria response');
+    }
+
+    const imageUrl = firstResult.urls[0];
+    logger.info('Image URL received from Bria:', imageUrl);
+
+    // Poll for the image to be ready (async generation)
+    logger.info('Polling for image completion...');
+    const isReady = await pollForImage(imageUrl);
+    
+    if (!isReady) {
+      throw new Error('Image generation timed out');
+    }
+
+    // Fetch the generated image
+    const imageResponse = await fetch(imageUrl);
+    if (!imageResponse.ok) {
+      throw new Error(`Failed to fetch generated image: ${imageResponse.statusText}`);
+    }
+
+    const arrayBuffer = await imageResponse.arrayBuffer();
+
+    logger.info('Successfully generated image with Bria', {
+      model,
+      imageSize: arrayBuffer.byteLength,
+      seed: firstResult.seed,
+    });
+
+    return {
+      imageData: arrayBuffer,
+      text: undefined, // Bria doesn't return text descriptions
+    };
+
+  } catch (error: any) {
+    logger.error('Bria image generation failed:', {
+      error: error.message,
+      model,
+      promptPreview: prompt.substring(0, 100) + '...'
+    });
+    throw error;
+  }
+};
+
+/**
+ * Edit image using Bria reimagine API (structure reference)
+ * Maintains the structure and depth of the input while incorporating new materials, colors, and textures
+ */
+export const editImageWithBria = async (
+  prompt: string,
+  imageFiles: File[],
+  model: string = 'bria-2.3-fast',
+  apiKey?: string
+): Promise<ImageResult | null> => {
+  const key = apiKey || await getBriaApiKey();
+  
+  if (!key) {
+    logger.error('No Bria API key found. Please provide an API key or set it in your profile.');
+    return null;
+  }
+
+  try {
+    logger.info('Starting Bria image editing (reimagine)', {
+      model,
+      imageCount: imageFiles.length,
+      promptLength: prompt.length,
+      promptPreview: prompt.substring(0, 100) + '...'
+    });
+
+    // Convert the first image to base64 for the structure reference
+    const imageBase64 = await fileToBase64(imageFiles[0]);
+
+    const endpoint = `${BRIA_BASE_URL}/reimagine`;
+
+    // Parse model to determine if we should use fast mode
+    const parts = model.split('-');
+    const speed = parts[2] || 'fast';
+    const useFast = speed === 'fast';
+
+    const response = await fetch(endpoint, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'api_token': key,
+      },
+      body: JSON.stringify({
+        prompt,
+        structure_image_file: imageBase64,
+        structure_ref_influence: 0.75, // Good balance for maintaining structure while allowing changes
+        num_results: 1,
+        sync: false, // Use async for better performance
+        fast: useFast,
+        steps_num: useFast ? 12 : 30,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      logger.error('Bria API error:', { status: response.status, error: errorText });
+      throw new Error(`Bria API error: ${response.status} - ${errorText}`);
+    }
+
+    const data = await response.json();
+    logger.debug('Bria API response (reimagine):', data);
+
+    // Handle response format
+    if (data.error_code) {
+      throw new Error(data.description || `Bria API error: ${data.error_code}`);
+    }
+
+    if (!data.result || !Array.isArray(data.result) || data.result.length === 0) {
+      throw new Error('Invalid response from Bria API: no results');
+    }
+
+    const firstResult = data.result[0];
+    
+    // Check if result was blocked by content moderation
+    if (firstResult.blocked) {
+      throw new Error(firstResult.description || 'Content blocked by Bria moderation');
+    }
+
+    if (!firstResult.urls || firstResult.urls.length === 0) {
+      throw new Error('No image URL in Bria response');
+    }
+
+    const imageUrl = firstResult.urls[0];
+    logger.info('Edited image URL received from Bria:', imageUrl);
+
+    // Poll for the image to be ready (async generation)
+    logger.info('Polling for edited image completion...');
+    const isReady = await pollForImage(imageUrl);
+    
+    if (!isReady) {
+      throw new Error('Image editing timed out');
+    }
+
+    // Fetch the edited image
+    const imageResponse = await fetch(imageUrl);
+    if (!imageResponse.ok) {
+      throw new Error(`Failed to fetch edited image: ${imageResponse.statusText}`);
+    }
+
+    const arrayBuffer = await imageResponse.arrayBuffer();
+
+    logger.info('Successfully edited image with Bria', {
+      model,
+      imageSize: arrayBuffer.byteLength,
+      seed: firstResult.seed,
+    });
+
+    return {
+      imageData: arrayBuffer,
+      text: undefined,
+    };
+
+  } catch (error: any) {
+    logger.error('Bria image editing failed:', {
+      error: error.message,
+      model,
+      imageCount: imageFiles.length,
+      promptPreview: prompt.substring(0, 100) + '...'
+    });
+    throw error;
+  }
+};
+
+// Export the logger for consistency
+export { logger };
+
--- a/packages/media/cpp/ref/images-ai/image-router.ts
+++ b/packages/media/cpp/ref/images-ai/image-router.ts
@ -0,0 +1,419 @@
+/**
+ * Image Generation Router
+ * Routes image generation requests to the appropriate AI provider based on the model format.
+ * Model format: "provider/model-name"
+ * 
+ * Supported providers:
+ * - google: Google Generative AI (Gemini models)
+ * - replicate: Replicate API (various models)
+ * - bria: Bria.ai (coming soon)
+ */
+
+import { createImage as createImageGoogle, editImage as editImageGoogle } from '@/image-api';
+//import { createImageWithReplicate, editImageWithReplicate } from '@/lib/replicate';
+import { createImageWithBria, editImageWithBria } from '@/lib/bria';
+import { createImageWithAimlApi, editImageWithAimlApi } from '@/lib/aimlapi';
+
+// Logger for debugging
+const logger = {
+  debug: (message: string, data?: any) => console.debug(`[IMAGE-ROUTER] ${message}`, data),
+  info: (message: string, data?: any) => console.info(`[IMAGE-ROUTER] ${message}`, data),
+  warn: (message: string, data?: any) => console.warn(`[IMAGE-ROUTER] ${message}`, data),
+  error: (message: string, data?: any) => console.error(`[IMAGE-ROUTER] ${message}`, data),
+};
+
+export interface ImageResult {
+  imageData: ArrayBuffer;
+  text?: string;
+}
+
+export interface ModelInfo {
+  provider: string;
+  modelName: string;
+  displayName: string;
+  supportsTextToImage: boolean;
+  supportsImageToImage: boolean;
+}
+
+// Available models configuration
+export const AVAILABLE_MODELS: ModelInfo[] = [
+  {
+    provider: 'google',
+    modelName: 'gemini-3-pro-image-preview',
+    displayName: 'Google Gemini 3 Pro (Image Preview)',
+    supportsTextToImage: true,
+    supportsImageToImage: true,
+  },
+  {
+    provider: 'google',
+    modelName: 'gemini-3.1-flash-image-preview',
+    displayName: 'Google Gemini 3.1 Flash (Image Preview)',
+    supportsTextToImage: true,
+    supportsImageToImage: true,
+  },
+  /* Duplicate model name causing key conflicts - temporarily disabled
+  {
+    provider: 'google',
+    modelName: 'gemini-3-pro-image-preview',
+    displayName: 'Google Gemini 2.5 Flash (Image Preview)',
+    supportsTextToImage: true,
+    supportsImageToImage: true,
+  },
+  */
+  {
+    provider: 'replicate',
+    modelName: 'bytedance/seedream-4',
+    displayName: 'Replicate SeeDream-4 (Bytedance)',
+    supportsTextToImage: true,
+    supportsImageToImage: true,
+  },
+  {
+    provider: 'bria',
+    modelName: 'bria-3.2-fast',
+    displayName: 'Bria.ai 3.2 Fast',
+    supportsTextToImage: true,
+    supportsImageToImage: true,
+  },
+  {
+    provider: 'bria',
+    modelName: 'bria-2.3-base',
+    displayName: 'Bria.ai 2.3 Base (High Quality)',
+    supportsTextToImage: true,
+    supportsImageToImage: true,
+  },
+  {
+    provider: 'bria',
+    modelName: 'bria-2.2-hd',
+    displayName: 'Bria.ai 2.2 HD (1920x1080)',
+    supportsTextToImage: true,
+    supportsImageToImage: false, // HD doesn't support reimagine
+  },
+  // AIML API - ByteDance Models
+  {
+    provider: 'aimlapi',
+    modelName: 'bytedance/seedream-v4-text-to-image',
+    displayName: 'AIML API - SeeDream v4 (4K)',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'bytedance/seedream-v4-edit',
+    displayName: 'AIML API - SeeDream v4 Edit (4K)',
+    supportsTextToImage: false,
+    supportsImageToImage: true,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'bytedance/seededit-3.0-i2i',
+    displayName: 'AIML API - SeedEdit 3.0',
+    supportsTextToImage: false,
+    supportsImageToImage: true,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'bytedance/seedream-3.0',
+    displayName: 'AIML API - SeeDream 3.0',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'bytedance/uso',
+    displayName: 'AIML API - USO (i2i)',
+    supportsTextToImage: false,
+    supportsImageToImage: true,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'alibaba/qwen-image',
+    displayName: 'AIML API - Qwen Image',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  // AIML API - Flux Models
+  {
+    provider: 'aimlapi',
+    modelName: 'flux-pro',
+    displayName: 'AIML API - Flux Pro',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'flux-pro/v1.1',
+    displayName: 'AIML API - Flux Pro v1.1',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'flux-pro/v1.1-ultra',
+    displayName: 'AIML API - Flux Pro v1.1 Ultra',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'flux-realism',
+    displayName: 'AIML API - Flux Realism',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'flux/dev',
+    displayName: 'AIML API - Flux Dev',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'flux/dev/image-to-image',
+    displayName: 'AIML API - Flux Dev i2i',
+    supportsTextToImage: false,
+    supportsImageToImage: true,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'flux/schnell',
+    displayName: 'AIML API - Flux Schnell (Fast)',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  // AIML API - Google Models
+  {
+    provider: 'aimlapi',
+    modelName: 'imagen-3.0-generate-002',
+    displayName: 'AIML API - Google Imagen 3',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'google/imagen4/preview',
+    displayName: 'AIML API - Google Imagen 4 Preview',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'google/imagen-4.0-generate-001',
+    displayName: 'AIML API - Google Imagen 4.0',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'google/imagen-4.0-fast-generate-001',
+    displayName: 'AIML API - Google Imagen 4.0 Fast',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'google/imagen-4.0-ultra-generate-001',
+    displayName: 'AIML API - Google Imagen 4.0 Ultra',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'google/gemini-2.5-flash-image',
+    displayName: 'AIML API - Gemini 2.5 Flash Image',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'google/gemini-2.5-flash-image-edit',
+    displayName: 'AIML API - Gemini 2.5 Flash Edit',
+    supportsTextToImage: false,
+    supportsImageToImage: true,
+  },
+  // AIML API - OpenAI Models
+  {
+    provider: 'aimlapi',
+    modelName: 'dall-e-2',
+    displayName: 'AIML API - DALL-E 2 (OpenAI)',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'dall-e-3',
+    displayName: 'AIML API - DALL-E 3 (OpenAI)',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  // AIML API - Stability AI Models
+  {
+    provider: 'aimlapi',
+    modelName: 'stable-diffusion-v3-medium',
+    displayName: 'AIML API - Stable Diffusion 3 Medium',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  {
+    provider: 'aimlapi',
+    modelName: 'stable-diffusion-v35-large',
+    displayName: 'AIML API - Stable Diffusion 3.5 Large',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+  // AIML API - Recraft AI
+  {
+    provider: 'aimlapi',
+    modelName: 'recraft-v3',
+    displayName: 'AIML API - Recraft v3',
+    supportsTextToImage: true,
+    supportsImageToImage: false,
+  },
+];
+
+/**
+ * Parse model string into provider and model name
+ * @param modelString Format: "provider/model-name"
+ * @returns { provider, modelName }
+ */
+export const parseModelString = (modelString: string): { provider: string; modelName: string } => {
+  const parts = modelString.split('/');
+
+  if (parts.length < 2) {
+    // Default to Google if no provider specified
+    logger.warn('Model string missing provider, defaulting to Google', { modelString });
+    return {
+      provider: 'google',
+      modelName: modelString,
+    };
+  }
+
+  const provider = parts[0].toLowerCase();
+  const modelName = parts.slice(1).join('/'); // Handle models with multiple slashes
+
+  return { provider, modelName };
+};
+
+/**
+ * Get full model string from provider and model name
+ */
+export const getModelString = (provider: string, modelName: string): string => {
+  return `${provider}/${modelName}`;
+};
+
+/**
+ * Create/generate a new image from text prompt
+ * Routes to the appropriate provider based on model string
+ */
+export const createImage = async (
+  prompt: string,
+  modelString: string = 'google/gemini-3-pro-image-preview',
+  apiKey?: string,
+  aspectRatio?: string,
+  resolution?: string,
+  enableSearchGrounding?: boolean,
+  enableImageSearch?: boolean
+): Promise<ImageResult | null> => {
+  const { provider, modelName } = parseModelString(modelString);
+
+  logger.info('Routing image creation request', {
+    provider,
+    modelName,
+    promptLength: prompt.length,
+    searchGrounding: !!enableSearchGrounding,
+    imageSearch: !!enableImageSearch,
+  });
+
+  try {
+    switch (provider) {
+      case 'google':
+        return await createImageGoogle(prompt, modelName, apiKey, aspectRatio, resolution, enableSearchGrounding, enableImageSearch);
+
+      case 'bria':
+        return await createImageWithBria(prompt, modelName, apiKey);
+
+      case 'aimlapi':
+        return await createImageWithAimlApi(prompt, modelName, apiKey);
+
+      default:
+        logger.error('Unsupported provider', { provider, modelName });
+        throw new Error(`Unsupported provider: ${provider}. Supported providers: google, replicate, bria, aimlapi`);
+    }
+  } catch (error: any) {
+    logger.error('Image creation failed', {
+      provider,
+      modelName,
+      error: error.message,
+    });
+    throw error;
+  }
+};
+
+/**
+ * Edit an existing image with a text prompt
+ * Routes to the appropriate provider based on model string
+ */
+export const editImage = async (
+  prompt: string,
+  imageFiles: File[],
+  modelString: string = 'google/gemini-3-pro-image-preview',
+  apiKey?: string,
+  aspectRatio?: string,
+  resolution?: string,
+  enableSearchGrounding?: boolean,
+  enableImageSearch?: boolean
+): Promise<ImageResult | null> => {
+  const { provider, modelName } = parseModelString(modelString);
+
+  logger.info('Routing image editing request', {
+    provider,
+    modelName,
+    promptLength: prompt.length,
+    imageCount: imageFiles.length,
+    searchGrounding: !!enableSearchGrounding,
+    imageSearch: !!enableImageSearch,
+  });
+
+  try {
+    switch (provider) {
+      case 'google':
+        return await editImageGoogle(prompt, imageFiles, modelName, apiKey, aspectRatio, resolution, enableSearchGrounding, enableImageSearch);
+
+      case 'bria':
+        return await editImageWithBria(prompt, imageFiles, modelName, apiKey);
+
+      case 'aimlapi':
+        return await editImageWithAimlApi(prompt, imageFiles, modelName, apiKey);
+
+      default:
+        logger.error('Unsupported provider', { provider, modelName });
+        throw new Error(`Unsupported provider: ${provider}. Supported providers: google, replicate, bria, aimlapi`);
+    }
+  } catch (error: any) {
+    logger.error('Image editing failed', {
+      provider,
+      modelName,
+      imageCount: imageFiles.length,
+      error: error.message,
+    });
+    throw error;
+  }
+};
+
+/**
+ * Get model info by model string
+ */
+export const getModelInfo = (modelString: string): ModelInfo | undefined => {
+  return AVAILABLE_MODELS.find(
+    (m) => getModelString(m.provider, m.modelName) === modelString
+  );
+};
+
+/**
+ * Get all models for a specific provider
+ */
+export const getModelsByProvider = (provider: string): ModelInfo[] => {
+  return AVAILABLE_MODELS.filter((m) => m.provider === provider);
+};
+
+
--- a/packages/media/cpp/ref/images-ai/openai.ts
+++ b/packages/media/cpp/ref/images-ai/openai.ts
--- a/packages/media/cpp/src/core/transform.cpp
+++ b/packages/media/cpp/src/core/transform.cpp
@ -0,0 +1,306 @@
+#include "transform.hpp"
+#include "url_fetch.hpp"
+
+#include <curl/curl.h>
+#include <nlohmann/json.hpp>
+
+#include <algorithm>
+#include <cctype>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <mutex>
+#include <sstream>
+#include <vector>
+
+namespace fs = std::filesystem;
+using json = nlohmann::json;
+
+namespace media {
+
+// ── base64 encode/decode ────────────────────────────────────────────
+
+static const char b64_table[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static std::string base64_encode(const uint8_t* data, size_t len) {
+    std::string out;
+    out.reserve(((len + 2) / 3) * 4);
+    for (size_t i = 0; i < len; i += 3) {
+        uint32_t n = (uint32_t)data[i] << 16;
+        if (i + 1 < len) n |= (uint32_t)data[i + 1] << 8;
+        if (i + 2 < len) n |= (uint32_t)data[i + 2];
+        out.push_back(b64_table[(n >> 18) & 0x3F]);
+        out.push_back(b64_table[(n >> 12) & 0x3F]);
+        out.push_back((i + 1 < len) ? b64_table[(n >> 6) & 0x3F] : '=');
+        out.push_back((i + 2 < len) ? b64_table[n & 0x3F] : '=');
+    }
+    return out;
+}
+
+static int b64_decode_char(char c) {
+    if (c >= 'A' && c <= 'Z') return c - 'A';
+    if (c >= 'a' && c <= 'z') return c - 'a' + 26;
+    if (c >= '0' && c <= '9') return c - '0' + 52;
+    if (c == '+') return 62;
+    if (c == '/') return 63;
+    return -1;
+}
+
+static std::vector<uint8_t> base64_decode(const std::string& in) {
+    std::vector<uint8_t> out;
+    out.reserve(in.size() * 3 / 4);
+    uint32_t buf = 0;
+    int bits = 0;
+    for (char c : in) {
+        if (c == '=' || c == '\n' || c == '\r' || c == ' ') continue;
+        int v = b64_decode_char(c);
+        if (v < 0) continue;
+        buf = (buf << 6) | (uint32_t)v;
+        bits += 6;
+        if (bits >= 8) {
+            bits -= 8;
+            out.push_back((uint8_t)(buf >> bits));
+        }
+    }
+    return out;
+}
+
+// ── MIME type from extension ────────────────────────────────────────
+
+static std::string mime_from_ext(const std::string& ext) {
+    std::string e = ext;
+    for (auto& c : e) c = (char)std::tolower((unsigned char)c);
+    if (e == ".jpg" || e == ".jpeg") return "image/jpeg";
+    if (e == ".png")  return "image/png";
+    if (e == ".webp") return "image/webp";
+    if (e == ".gif")  return "image/gif";
+    if (e == ".bmp")  return "image/bmp";
+    if (e == ".tif" || e == ".tiff") return "image/tiff";
+    if (e == ".avif") return "image/avif";
+    if (e == ".heic") return "image/heic";
+    return "image/jpeg";
+}
+
+// ── curl helpers ────────────────────────────────────────────────────
+
+static size_t string_write_cb(char* ptr, size_t size, size_t nmemb, void* ud) {
+    auto* s = static_cast<std::string*>(ud);
+    s->append(ptr, size * nmemb);
+    return size * nmemb;
+}
+
+// ── Google Gemini generateContent ───────────────────────────────────
+
+static TransformResult call_gemini(
+    const std::string& input_path,
+    const std::string& output_path,
+    const TransformOptions& opts,
+    TransformProgressFn progress)
+{
+    TransformResult res;
+
+    // Read input image
+    if (progress) progress("Reading " + input_path);
+    std::ifstream ifs(input_path, std::ios::binary);
+    if (!ifs) {
+        res.error = "Cannot open input: " + input_path;
+        return res;
+    }
+    std::vector<uint8_t> img_bytes((std::istreambuf_iterator<char>(ifs)),
+                                    std::istreambuf_iterator<char>());
+    ifs.close();
+
+    if (img_bytes.empty()) {
+        res.error = "Input file is empty: " + input_path;
+        return res;
+    }
+
+    std::string mime = mime_from_ext(fs::path(input_path).extension().string());
+    std::string b64  = base64_encode(img_bytes.data(), img_bytes.size());
+
+    // Build request JSON
+    json req_body = {
+        {"contents", json::array({
+            {{"parts", json::array({
+                {{"text", opts.prompt}},
+                {{"inlineData", {{"mimeType", mime}, {"data", b64}}}}
+            })}}
+        })},
+        {"generationConfig", {
+            {"responseModalities", json::array({"TEXT", "IMAGE"})}
+        }}
+    };
+
+    std::string url = "https://generativelanguage.googleapis.com/v1beta/models/"
+                    + opts.model + ":generateContent?key=" + opts.api_key;
+
+    std::string body_str = req_body.dump();
+
+    if (progress) progress("Sending to " + opts.model + " (" +
+                           std::to_string(img_bytes.size() / 1024) + " KB)...");
+
+    ensure_curl_global();
+    CURL* curl = curl_easy_init();
+    if (!curl) {
+        res.error = "curl_easy_init failed";
+        return res;
+    }
+
+    std::string response_str;
+    struct curl_slist* headers = nullptr;
+    headers = curl_slist_append(headers, "Content-Type: application/json");
+
+    curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body_str.c_str());
+    curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, (long)body_str.size());
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, string_write_cb);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_str);
+    curl_easy_setopt(curl, CURLOPT_TIMEOUT, 120L);
+    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 15L);
+
+    CURLcode cc = curl_easy_perform(curl);
+    long http_code = 0;
+    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
+    curl_slist_free_all(headers);
+    curl_easy_cleanup(curl);
+
+    if (cc != CURLE_OK) {
+        res.error = std::string("HTTP request failed: ") + curl_easy_strerror(cc);
+        return res;
+    }
+
+    if (http_code != 200) {
+        res.error = "API returned HTTP " + std::to_string(http_code);
+        // Try to extract error message from response JSON
+        try {
+            auto j = json::parse(response_str);
+            if (j.contains("error") && j["error"].contains("message"))
+                res.error += ": " + j["error"]["message"].get<std::string>();
+        } catch (...) {
+            if (response_str.size() < 500) res.error += ": " + response_str;
+        }
+        return res;
+    }
+
+    // Parse response
+    if (progress) progress("Parsing response...");
+    json resp;
+    try {
+        resp = json::parse(response_str);
+    } catch (const std::exception& e) {
+        res.error = std::string("JSON parse error: ") + e.what();
+        return res;
+    }
+
+    // Extract image and text from candidates[0].content.parts[]
+    bool found_image = false;
+    try {
+        auto& parts = resp["candidates"][0]["content"]["parts"];
+        for (auto& part : parts) {
+            if (part.contains("inlineData")) {
+                auto& id = part["inlineData"];
+                std::string resp_mime = id.value("mimeType", "image/png");
+                std::string resp_b64  = id["data"].get<std::string>();
+                res.image_data = base64_decode(resp_b64);
+                found_image = true;
+            }
+            if (part.contains("text")) {
+                if (!res.ai_text.empty()) res.ai_text += "\n";
+                res.ai_text += part["text"].get<std::string>();
+            }
+        }
+    } catch (const std::exception& e) {
+        res.error = std::string("Response parsing error: ") + e.what();
+        // Include raw response excerpt for debugging
+        if (response_str.size() < 2000) res.error += "\nRaw: " + response_str;
+        return res;
+    }
+
+    if (!found_image || res.image_data.empty()) {
+        res.error = "No image in API response";
+        if (!res.ai_text.empty()) res.error += ". Model said: " + res.ai_text;
+        return res;
+    }
+
+    // Write output
+    std::string out = output_path;
+    if (out.empty()) out = default_transform_output(input_path, opts.prompt);
+
+    fs::path out_dir = fs::path(out).parent_path();
+    if (!out_dir.empty()) {
+        std::error_code ec;
+        fs::create_directories(out_dir, ec);
+    }
+
+    if (progress) progress("Writing " + out);
+    std::ofstream ofs(out, std::ios::binary);
+    if (!ofs) {
+        res.error = "Cannot write output: " + out;
+        return res;
+    }
+    ofs.write(reinterpret_cast<const char*>(res.image_data.data()),
+              static_cast<std::streamsize>(res.image_data.size()));
+    ofs.close();
+
+    res.ok = true;
+    res.output_path = out;
+    return res;
+}
+
+// ── public API ──────────────────────────────────────────────────────
+
+std::string default_transform_output(const std::string& input_path, const std::string& prompt) {
+    fs::path p(input_path);
+    std::string stem = p.stem().string();
+    std::string ext  = p.extension().string();
+    if (ext.empty()) ext = ".png";
+
+    // Sanitize and truncate prompt for filename
+    std::string slug;
+    slug.reserve(prompt.size());
+    for (char c : prompt) {
+        if (std::isalnum((unsigned char)c))
+            slug.push_back((char)std::tolower((unsigned char)c));
+        else if (c == ' ' || c == '-' || c == '_')
+            slug.push_back('_');
+        // skip other chars
+    }
+    // collapse consecutive underscores
+    std::string clean;
+    for (char c : slug) {
+        if (c == '_' && !clean.empty() && clean.back() == '_') continue;
+        clean.push_back(c);
+    }
+    // trim trailing underscore
+    while (!clean.empty() && clean.back() == '_') clean.pop_back();
+    // truncate to ~40 chars
+    if (clean.size() > 40) clean.resize(40);
+    while (!clean.empty() && clean.back() == '_') clean.pop_back();
+
+    std::string result = stem + "_" + clean + ext;
+    return (p.parent_path() / result).string();
+}
+
+TransformResult transform_image(
+    const std::string& input_path,
+    const std::string& output_path,
+    const TransformOptions& opts,
+    TransformProgressFn progress)
+{
+    if (opts.prompt.empty()) {
+        return {false, "prompt is required"};
+    }
+    if (opts.api_key.empty()) {
+        return {false, "API key is required (set IMAGE_TRANSFORM_GOOGLE_API_KEY in .env or pass --api-key)"};
+    }
+
+    if (opts.provider == "google") {
+        return call_gemini(input_path, output_path, opts, progress);
+    }
+
+    return {false, "Unsupported provider: " + opts.provider};
+}
+
+} // namespace media
--- a/packages/media/cpp/src/core/transform.hpp
+++ b/packages/media/cpp/src/core/transform.hpp
@ -0,0 +1,39 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <functional>
+
+namespace media {
+
+struct TransformOptions {
+    std::string provider = "google";
+    std::string model    = "gemini-3-pro-image-preview";
+    std::string api_key;
+    std::string prompt;
+};
+
+struct TransformResult {
+    bool        ok = false;
+    std::string error;
+    std::string output_path;          // written file
+    std::string ai_text;              // optional text part from model
+    std::vector<uint8_t> image_data;  // raw bytes (PNG/JPEG) before writing
+};
+
+using TransformProgressFn = std::function<void(const std::string& status)>;
+
+/// Edit a single image using a generative AI model.
+/// Reads `input_path`, sends image + prompt to the API, writes result to `output_path`.
+/// If `output_path` is empty, derives it from input + prompt.
+TransformResult transform_image(
+    const std::string& input_path,
+    const std::string& output_path,
+    const TransformOptions& opts,
+    TransformProgressFn progress = nullptr
+);
+
+/// Build a default output path from input path and prompt text.
+std::string default_transform_output(const std::string& input_path, const std::string& prompt);
+
+} // namespace media