diff --git a/packages/media/cpp/ref/images-ai/ImageWizard.tsx b/packages/media/cpp/ref/images-ai/ImageWizard.tsx new file mode 100644 index 00000000..43dd9515 --- /dev/null +++ b/packages/media/cpp/ref/images-ai/ImageWizard.tsx @@ -0,0 +1,2006 @@ +import React, { useEffect, useMemo } from "react"; +import { fetchPostById } from '@/modules/posts/client-posts'; +import { Button } from "@/components/ui/button"; +import { useWizardContext } from "@/hooks/useWizardContext"; +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle +} from "@/components/ui/alert-dialog"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { + Wand2, + Save, + ArrowLeft, + Plus, + Trash2 +} from "lucide-react"; +import { toast } from "sonner"; +import { useAuth } from "@/hooks/useAuth"; +import { useNavigate } from "react-router-dom"; +import { createImage, editImage } from "@/lib/image-router"; +import PublishDialog from "@/components/PublishDialog"; + +import { runTools } from "@/lib/openai"; +import { T, translate } from "@/i18n"; +import { DEFAULT_QUICK_ACTIONS, QuickAction as QuickActionType } from "@/constants"; +import { PromptPreset } from "@/components/PresetManager"; +import VoiceRecordingPopup from "@/components/VoiceRecordingPopup"; +import { Workflow } from "@/components/WorkflowManager"; +import { useLog } from "@/contexts/LogContext"; +import LogViewer from "@/components/LogViewer"; +import ImageLightbox from "@/components/ImageLightbox"; +import { uploadInternalVideo } from '@/utils/uploadUtils'; +import { ImageEditor } from "@/components/ImageEditor"; +import EditImageModal from "@/components/EditImageModal"; +import PostPicker from "@/modules/posts/components/PostPicker"; + +// Import types and handlers +import { ImageFile, ImageWizardProps } from "./ImageWizard/types"; +import { getUserApiKeys as getUserSecrets } from "@/modules/user/client-user"; +type QuickAction = QuickActionType; // Re-export for compatibility +import { + handleFileUpload, + toggleImageSelection as toggleImageSelectionUtil, + removeImageRequest, + confirmDeleteImage as confirmDeleteImageUtil, + setAsSelected as setAsSelectedUtil, + handleDownloadImage, +} from "./ImageWizard/handlers/imageHandlers"; +import { + handleOptimizePrompt as handleOptimizePromptUtil, + buildFullPrompt, + abortGeneration, +} from "./ImageWizard/handlers/generationHandlers"; +import { + publishImage as publishImageUtil, + quickPublishAsNew as quickPublishAsNewUtil, + publishToGallery as publishToGalleryUtil, + saveWizardImageAsVfsFile as saveWizardImageAsVfsFileUtil, +} from "./ImageWizard/handlers/publishHandlers"; +import { + loadFamilyVersions as loadFamilyVersionsUtil, + loadAvailableImages as loadAvailableImagesUtil, +} from "./ImageWizard/handlers/dataHandlers"; +// Settings handlers are now used via useUserSettings hook +import { + handleMicrophone as handleMicrophoneUtil, + handleVoiceToImage as handleVoiceToImageUtil, +} from "./ImageWizard/handlers/voiceHandlers"; +import { + handleAgentGeneration as handleAgentGenerationUtil, +} from "./ImageWizard/handlers/agentHandlers"; +import { + generateImageSplit as generateImageSplitUtil, +} from "./ImageWizard/handlers/promptHandlers"; +import { + handleDragEnter, + handleDragOver, + handleDragLeave, + handleDrop, +} from "./ImageWizard/handlers/dropHandlers"; +import { + WizardSidebar, + Prompt, + ImageGalleryPanel, +} from "./ImageWizard/components"; +import { PostComposer } from "@/modules/posts/components/PostComposer"; +import { createLogger } from "./ImageWizard/utils/logger"; +import { useImageWizardState } from "./ImageWizard/hooks/useImageWizardState"; +import * as wizardDb from "./ImageWizard/db"; +import { + loadPromptTemplates, + savePromptTemplates, + loadPromptPresets, + loadWorkflows, + loadQuickActions, + saveQuickActions, + loadPromptHistory, + addToPromptHistory, + navigatePromptHistory, + savePromptPreset, + updatePromptPreset, + deletePromptPreset, + saveWorkflow, + updateWorkflow, + deleteWorkflow, +} from "./ImageWizard/handlers/settingsHandlers"; +import { WizardProvider } from "./ImageWizard/context/WizardContext"; + +const ImageWizard: React.FC = ({ + isOpen, + onClose, + initialImages = [], + onPublish, + originalImageId, + mode = 'default', + initialPostTitle = "", + initialPostDescription = "", + initialPostSettings, + editingPostId = undefined +}) => { + const { user, session } = useAuth(); + const navigate = useNavigate(); + const { addLog, isLoggerVisible, setLoggerVisible } = useLog(); + + // Create logger instance for this component + const logger = createLogger(addLog, 'ImageWizard'); + + // Centralized state management - flat destructuring + const { + // Image state + images, setImages, availableImages, setAvailableImages, generatedImage, setGeneratedImage, + // Generation state + isGenerating, setIsGenerating, isAgentMode, setIsAgentMode, isSplitMode, setIsSplitMode, + isOptimizingPrompt, setIsOptimizingPrompt, abortControllerRef, + // UI state + dragIn, setDragIn, loadingImages, setLoadingImages, isPublishing, setIsPublishing, + dropZoneRef, dragLeaveTimeoutRef, + // Form state + prompt, setPrompt, postTitle, setPostTitle, postDescription, setPostDescription, + selectedModel, setSelectedModel, + // New state for Gemini 3 Pro + aspectRatio, setAspectRatio, + resolution, setResolution, + searchGrounding, setSearchGrounding, + imageSearch, setImageSearch, + // Dialog state + showDeleteConfirmDialog, setShowDeleteConfirmDialog, imageToDelete, setImageToDelete, + showSaveTemplateDialog, setShowSaveTemplateDialog, newTemplateName, setNewTemplateName, + showTemplateManager, setShowTemplateManager, showEditActionsDialog, setShowEditActionsDialog, + showLightboxPublishDialog, setShowLightboxPublishDialog, showVoicePopup, setShowVoicePopup, + // Lightbox state + lightboxOpen, setLightboxOpen, currentImageIndex, setCurrentImageIndex, + lightboxPrompt, setLightboxPrompt, selectedOriginalImageId, setSelectedOriginalImageId, + // Settings state + promptTemplates, setPromptTemplates, loadingTemplates, setLoadingTemplates, + promptPresets, setPromptPresets, selectedPreset, setSelectedPreset, + loadingPresets, setLoadingPresets, workflows, setWorkflows, + loadingWorkflows, setLoadingWorkflows, quickActions, setQuickActions, + editingActions, setEditingActions, loadingActions, setLoadingActions, + promptHistory, setPromptHistory, historyIndex, setHistoryIndex, + // Voice state + isRecording, setIsRecording, isTranscribing, setIsTranscribing, + mediaRecorderRef, audioChunksRef, voicePopupRef, + // Add editingPostId to destructuring + editingPostId: currentEditingPostId, + } = useImageWizardState(initialImages, initialPostTitle, initialPostDescription, editingPostId); + + const [settings, setSettings] = React.useState(initialPostSettings || { visibility: 'public' }); // Post settings + const [lastError, setLastError] = React.useState(null); + + // Auto-retry state for 503 "high demand" errors + const retryTimerRef = React.useRef | null>(null); + const retryCountdownRef = React.useRef | null>(null); + const retryCountRef = React.useRef(0); + const isRetryCallRef = React.useRef(false); + const [retryInfo, setRetryInfo] = React.useState<{ attempt: number; secondsLeft: number } | null>(null); + + const cancelRetry = React.useCallback(() => { + if (retryTimerRef.current) { clearTimeout(retryTimerRef.current); retryTimerRef.current = null; } + if (retryCountdownRef.current) { clearInterval(retryCountdownRef.current); retryCountdownRef.current = null; } + retryCountRef.current = 0; + isRetryCallRef.current = false; + setRetryInfo(null); + }, []); + + const scheduleRetry = React.useCallback((retryFn: () => void, errorMsg: string) => { + const MAX_RETRIES = 5; + const RETRY_SECS = 120; + if (retryCountRef.current >= MAX_RETRIES) { + retryCountRef.current = 0; + setLastError(`${errorMsg} — gave up after ${MAX_RETRIES} attempts`); + return; + } + retryCountRef.current++; + const attempt = retryCountRef.current; + let seconds = RETRY_SECS; + setRetryInfo({ attempt, secondsLeft: seconds }); + retryCountdownRef.current = setInterval(() => { + seconds--; + if (seconds > 0) { setRetryInfo({ attempt, secondsLeft: seconds }); } + else if (retryCountdownRef.current) { clearInterval(retryCountdownRef.current); retryCountdownRef.current = null; } + }, 1000); + retryTimerRef.current = setTimeout(() => { + if (retryCountdownRef.current) { clearInterval(retryCountdownRef.current); retryCountdownRef.current = null; } + retryTimerRef.current = null; + setRetryInfo(null); + setLastError(null); + isRetryCallRef.current = true; + retryFn(); + }, RETRY_SECS * 1000); + }, []); + + // Cleanup retry on unmount + React.useEffect(() => () => cancelRetry(), [cancelRetry]); + + // Editor and Settings state + const [editingImage, setEditingImage] = React.useState<{ url: string; id: string } | null>(null); + const [configuringImageId, setConfiguringImageId] = React.useState<{ id: string; title: string; description: string; visible: boolean } | null>(null); + const [showPostPicker, setShowPostPicker] = React.useState(false); + // const [isPublishing, setIsPublishing] = React.useState(false); // Using hook state or other defined state + useEffect(() => { + if (user?.id) { + loadPromptTemplates(user.id, setPromptTemplates, setLoadingTemplates); + loadPromptPresets(user.id, setPromptPresets, setLoadingPresets); + loadWorkflows(user.id, setWorkflows, setLoadingWorkflows); + loadQuickActions(user.id, setQuickActions, setLoadingActions, DEFAULT_QUICK_ACTIONS); + loadPromptHistory(user.id, setPromptHistory); + + // Load saved model selection from user_secrets + wizardDb.loadWizardModel(user.id).then(model => { + if (model) setSelectedModel(model); + }); + } + }, [user?.id]); + + // Persist model selection when changed + const modelSaveTimeoutRef = React.useRef | null>(null); + useEffect(() => { + if (!user?.id) return; + // Debounce to avoid saving on initial load + if (modelSaveTimeoutRef.current) clearTimeout(modelSaveTimeoutRef.current); + modelSaveTimeoutRef.current = setTimeout(() => { + wizardDb.saveWizardModel(user.id, selectedModel); + }, 500); + return () => { if (modelSaveTimeoutRef.current) clearTimeout(modelSaveTimeoutRef.current); }; + }, [selectedModel, user?.id]); + + // Auto-upload effect for videos that come from share target (or other sources) without upload status + React.useEffect(() => { + // Find videos that have a file but no upload status (meaning they were just added via prop/state) + const pendingVideos = images.filter(img => + img.type === 'video' && + img.file && + !img.uploadStatus + ); + + if (pendingVideos.length > 0) { + console.log('Found pending videos for upload:', pendingVideos.length); + + pendingVideos.forEach(async (video) => { + if (!video.file || !user?.id) return; + + // Mark as uploading immediately + setImages(prev => prev.map(img => + img.id === video.id ? { ...img, uploadStatus: 'uploading', uploadProgress: 0 } : img + )); + + try { + // Perform upload + await uploadInternalVideo(video.file, user.id, (progress) => { + setImages(prev => prev.map(img => + img.id === video.id ? { ...img, uploadProgress: progress } : img + )); + }).then(data => { + setImages(prev => prev.map(img => + img.id === video.id ? { + ...img, + id: data.dbId || video.id, // Update ID to DB ID + realDatabaseId: data.dbId, + uploadStatus: 'ready', + src: data.thumbnailUrl || video.src || '', + } : img + )); + toast.success(translate('Video uploaded successfully')); + }); + } catch (err) { + console.error("Auto-upload video failed", err); + setImages(prev => prev.map(img => + img.id === video.id ? { ...img, uploadStatus: 'error' } : img + )); + toast.error(translate('Failed to upload video')); + } + }); + } + }, [images, user?.id, setImages]); + + // Drop zone handlers are used directly from imported functions + + // Load initial image from Zustand store (replaces sessionStorage) + const { wizardInitialImage } = useWizardContext(); + + useEffect(() => { + if (wizardInitialImage) { + setImages(prev => { + // Load family versions for this image first, then add the initial image + if (wizardInitialImage.realDatabaseId) { + // Check the current selection status from database for this image + wizardDb.getImageSelectionStatus(wizardInitialImage.realDatabaseId) + .then((isSelected) => { + const updatedImageData = { ...wizardInitialImage, selected: isSelected }; + setImages(currentImages => { + // Check if image already exists to avoid duplicates + const exists = currentImages.find(img => img.id === updatedImageData.id); + if (exists) return currentImages; + return [...currentImages, updatedImageData]; + }); + }); + loadFamilyVersions([wizardInitialImage]); + } else { + // For non-database images, just add as is + const exists = prev.find(img => img.id === wizardInitialImage.id); + if (!exists) { + return [...prev, wizardInitialImage]; + } + } + return prev; + }); + + // Clear wizard image after loading (optional - keep it if you want to persist) + // clearWizardImage(); + } + }, [wizardInitialImage]); + + // Load complete family versions when initial images are provided + useEffect(() => { + if (initialImages.length > 0) { + loadFamilyVersions(initialImages); + } + }, [initialImages]); + + // Load available images from user and others + useEffect(() => { + loadAvailableImages(); + }, []); + + // Settings are now loaded in useUserSettings hook + + // Use extracted handlers with local state + const loadFamilyVersions = (parentImages: ImageFile[]) => + loadFamilyVersionsUtil(parentImages, setImages, logger); + + const loadAvailableImages = () => + loadAvailableImagesUtil(setAvailableImages, setLoadingImages); + + // Template operations + const applyTemplate = (template: string) => { + if (lightboxOpen) { + setLightboxPrompt(template); + } else { + setPrompt(template); + } + }; + + const deleteTemplate = async (index: number) => { + if (!user?.id) { + toast.error(translate('User not authenticated')); + return; + } + const newTemplates = promptTemplates.filter((_, i) => i !== index); + await savePromptTemplates(user.id, newTemplates); + setPromptTemplates(newTemplates); + }; + + const executeWorkflow = async (workflow: Workflow) => { + if (!user) { + toast.error(translate('User not authenticated')); + return; + } + toast.info(`🔄 Executing workflow: ${workflow.name}`); + logger.info(`Starting workflow: ${workflow.name} (${workflow.actions.length} actions)`); + try { + for (let i = 0; i < workflow.actions.length; i++) { + const action = workflow.actions[i]; + toast.info(`⚙️ Step ${i + 1}/${workflow.actions.length}: ${action.label}`); + logger.info(`Workflow step ${i + 1}: ${action.label} (${action.type})`); + + switch (action.type) { + case 'optimize_prompt': + await handleOptimizePrompt(); + break; + + case 'generate_image': + await generateImage(); + break; + + case 'generate_metadata': + // Generate title and description using AI + if (prompt.trim()) { + const apiKey = await wizardDb.getUserOpenAIKey(user.id); + + const result = await runTools({ + prompt: `Generate a title and description for this image prompt: "${prompt}"`, + preset: 'metadata-only', + apiKey, + }); + + const metadataToolCall = result.toolCalls?.find( + tc => 'function' in tc && tc.function?.name === 'generate_image_metadata' + ); + + if (metadataToolCall && 'function' in metadataToolCall) { + const metadata = JSON.parse(metadataToolCall.function.arguments || '{}'); + if (metadata.title) setPostTitle(metadata.title); + if (metadata.description) setPostDescription(metadata.description); + } + } + break; + + case 'quick_publish': + await quickPublishAsNew(); + break; + + /* Removed publish_to_gallery from workflow actions as it's not supported in type */ + + case 'download_image': + if (images.length > 0) { + const lastImage = images[images.length - 1]; + await handleDownloadImage(lastImage); + } + break; + + case 'enhance_image': + if (images.filter(img => img.selected).length > 0) { + setPrompt('Enhance and improve this image with better quality and details'); + await generateImage(); + } + break; + + case 'apply_style': + if (images.filter(img => img.selected).length > 0) { + setPrompt('Apply artistic style transformation to this image'); + await generateImage(); + } + break; + } + + // Small delay between actions + await new Promise(resolve => setTimeout(resolve, 500)); + } + toast.success(`✅ Workflow "${workflow.name}" completed successfully!`); + logger.success(`Workflow "${workflow.name}" completed successfully`); + } catch (error: any) { + console.error('Error executing workflow:', error); + logger.error(`Workflow "${workflow.name}" failed: ${error.message}`); + toast.error(`Workflow failed: ${error.message}`); + } + }; + + + const handleSaveCurrentPromptAsTemplate = () => { + const currentPrompt = lightboxOpen ? lightboxPrompt : prompt; + if (!currentPrompt.trim()) { + toast.error(translate('Please enter a prompt first')); + return; + } + setShowSaveTemplateDialog(true); + }; + + const confirmSaveTemplate = async () => { + if (!newTemplateName.trim()) { + toast.error(translate('Please enter a template name')); + return; + } + + if (!user?.id) { + toast.error(translate('User not authenticated')); + return; + } + + const currentPrompt = lightboxOpen ? lightboxPrompt : prompt; + const newTemplates = [...promptTemplates, { name: newTemplateName.trim(), template: currentPrompt }]; + await savePromptTemplates(user.id, newTemplates); + setPromptTemplates(newTemplates); + setNewTemplateName(""); + setShowSaveTemplateDialog(false); + }; + + const toggleImageSelection = (imageId: string, isMultiSelect = false, fromGallery = false) => + toggleImageSelectionUtil(imageId, isMultiSelect, fromGallery, availableImages, images, setImages); + + const removeImage = (imageId: string) => + removeImageRequest(imageId, setImageToDelete, setShowDeleteConfirmDialog); + + const confirmDeleteImage = () => + confirmDeleteImageUtil(imageToDelete, setImages, setImageToDelete, setShowDeleteConfirmDialog, initialImages, loadFamilyVersions); + + const deleteSelectedImages = () => { + const selectedImages = images.filter(img => img.selected); + if (selectedImages.length === 0) { + toast.error(translate('No images selected')); + return; + } + + // Show confirmation for bulk delete + const count = selectedImages.length; + setShowDeleteConfirmDialog(true); + setImageToDelete(`bulk:${count}`); // Special marker for bulk delete + }; + + const confirmBulkDelete = () => { + const selectedImages = images.filter(img => img.selected); + const selectedIds = selectedImages.map(img => img.id); + + // Remove all selected images + setImages(prev => prev.filter(img => !selectedIds.includes(img.id))); + + toast.success(translate(`${selectedImages.length} image(s) deleted`)); + logger.info(`Deleted ${selectedImages.length} selected images`); + + setShowDeleteConfirmDialog(false); + setImageToDelete(null); + }; + + const setAsSelected = (imageId: string) => + setAsSelectedUtil(imageId, images, setImages, initialImages, loadFamilyVersions); + + const handleMicrophone = () => + handleMicrophoneUtil( + isRecording, + mediaRecorderRef, + setIsRecording, + setIsTranscribing, + audioChunksRef, + lightboxOpen, + setLightboxPrompt, + setPrompt + ); + + const handleOptimizePrompt = () => + handleOptimizePromptUtil( + lightboxOpen ? lightboxPrompt : prompt, + lightboxOpen, + setLightboxPrompt, + setPrompt, + setIsOptimizingPrompt + ); + + const handleVoiceTranscription = (transcribedText: string) => { + setPrompt(transcribedText); + toast.success(translate('Voice transcribed successfully!')); + }; + + const handlePresetSelect = (preset: PromptPreset) => { + setSelectedPreset(preset); + toast.success(`Preset "${preset.name}" activated as context`); + }; + + const handlePresetClear = () => { + setSelectedPreset(null); + toast.info(translate('Preset context cleared')); + }; + + const handleVoiceToImage = (transcribedText: string) => + handleVoiceToImageUtil( + transcribedText, + user?.id, + selectedPreset, + selectedModel, + setPrompt, + setImages, + setPostTitle, + setPostDescription, + voicePopupRef, + logger + ); + + const handleAgentGeneration = () => + handleAgentGenerationUtil( + prompt, + user?.id, + buildFullPrompt(selectedPreset, prompt), + selectedModel, + setIsAgentMode, + setIsGenerating, + setImages, + setPostTitle, + setPostDescription, + logger + ); + + + const handlePublishToGallery = async () => { + await publishToGalleryUtil({ + user: user, + generatedImage: typeof generatedImage === 'string' ? generatedImage : (generatedImage as any)?.src || null, + images: images, + lightboxOpen: lightboxOpen, + currentImageIndex: currentImageIndex, + postTitle: '', + postDescription: '', + prompt: prompt, + isOrgContext: false, + orgSlug: null, + onPublish: (url) => { + // No navigation needed for gallery publish, maybe just stay or go specific place? + // For now stay in wizard or close? Implementation plan said "Verify success message" so staying might be fine, but closing is better UX usually. + // Let's close. + onClose(); + navigate('/'); // Or to gallery view if possible + } + }, setIsPublishing); + }; + + const handleAppendToPost = () => { + setShowPostPicker(true); + }; + + const handlePostSelected = async (postId: string) => { + setShowPostPicker(false); + + // Switch to edit mode for this post + // This logic mimics CreationWizardPopup's handlePostSelected + try { + const toastId = toast.loading(translate('Loading post...')); + + // Fetch full post via API (returns FeedPost with pictures) + const post = await fetchPostById(postId); + + if (!post) throw new Error('Post not found'); + + // Transform existing pictures + const existingImages = (post.pictures || []) + .sort((a: any, b: any) => ((a.position || 0) - (b.position || 0))) + .map((p: any) => ({ + id: p.id, + path: p.id, + src: p.image_url, + title: p.title, + description: p.description || '', + selected: false, + realDatabaseId: p.id, + type: p.type || 'image', + isGenerated: false + })); + + // Merge current wizard images as new + const newImages = images.map(img => ({ + ...img, + selected: true + })); + + const combinedImages = [...existingImages, ...newImages]; + + setImages(combinedImages); + setPostTitle(post.title); + setPostDescription(post.description || ''); + + navigate('/wizard', { + state: { + mode: 'post', + editingPostId: postId, + initialImages: combinedImages, + postTitle: post.title, + postDescription: post.description, + postSettings: post.settings + }, + replace: true + }); + + toast.dismiss(toastId); + toast.success(translate('Switched to post editing mode')); + + } catch (err) { + console.error("Error loading post for append:", err); + toast.error(translate('Failed to load post')); + } + }; + + + + const executeQuickAction = (action: QuickAction) => { + const selectedImages = images.filter(img => img.selected); + if (selectedImages.length === 0) { + toast.error(translate('Please select at least one image for this action')); + return; + } + setPrompt(action.prompt); + }; + + const openEditActionsDialog = () => { + setEditingActions([...quickActions]); + setShowEditActionsDialog(true); + }; + + const addQuickAction = () => { + const newAction: QuickAction = { + id: `action_${Date.now()}`, + name: 'New Action', + prompt: '', + icon: '⭐' + }; + setEditingActions([...editingActions, newAction]); + }; + + const updateQuickAction = (id: string, field: keyof QuickAction, value: string) => { + setEditingActions(editingActions.map(action => + action.id === id ? { ...action, [field]: value } : action + )); + }; + + const deleteQuickAction = (id: string) => { + setEditingActions(editingActions.filter(action => action.id !== id)); + }; + + const saveEditedActions = async () => { + // Validate + const invalid = editingActions.find(a => !a.name.trim() || !a.prompt.trim()); + if (invalid) { + toast.error(translate('All actions must have a name and prompt')); + return; + } + + if (!user?.id) { + toast.error(translate('User not authenticated')); + return; + } + + await saveQuickActions(user.id, editingActions); + setQuickActions(editingActions); + setShowEditActionsDialog(false); + }; + + const handleLightboxPromptSubmit = async (promptText: string) => { + if (!lightboxOpen || currentImageIndex >= images.length) return; + + // Set the prompt and get the current lightbox image for editing + setPrompt(promptText); + + const targetImage = images[currentImageIndex]; + if (targetImage) { + + // Store current lightbox state + const wasLightboxOpen = lightboxOpen; + + // Directly pass the target image to avoid state timing issues + await generateImageWithSpecificImage(promptText, targetImage); + setLightboxPrompt(""); // Clear the lightbox prompt + + // Keep lightbox open if it was open during generation + if (wasLightboxOpen && !lightboxOpen) { + setLightboxOpen(true); + } + } + }; + + const openLightbox = (index: number) => { + setCurrentImageIndex(index); + setLightboxOpen(true); + + // Set the parent ID for version creation when opening lightbox + const currentImage = images[index]; + let parentId = null; + + if (currentImage) { + // If this image has a parentForNewVersions (from version map), use that + if (currentImage.parentForNewVersions) { + parentId = currentImage.parentForNewVersions; + } + // If this is the initial image from a database post, use its real ID + else if (currentImage.realDatabaseId) { + parentId = currentImage.realDatabaseId; + } + // If this is a saved image (has UUID), use it as parent + else if (currentImage.id && currentImage.id.match(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i)) { + parentId = currentImage.id; + } + // Otherwise, find the first non-generated image in the array + else { + const originalImage = images.find(img => !img.isGenerated && img.id && img.id.match(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i)); + parentId = originalImage?.id || null; + } + } + + setSelectedOriginalImageId(parentId); + + // Pre-populate lightbox prompt with main prompt for quick publish context + // Only if lightbox prompt is empty to avoid overwriting user's edits + if (!lightboxPrompt.trim() && prompt.trim()) { + setLightboxPrompt(prompt); + } + }; + + // Auto-update lightbox when images change + useEffect(() => { + if (lightboxOpen && currentImageIndex >= images.length && images.length > 0) { + // If current index is out of bounds, go to the last image (newest) + const newIndex = images.length - 1; + setCurrentImageIndex(newIndex); + } + }, [images.length, lightboxOpen, currentImageIndex]); + + const generateImageWithSpecificImage = async (promptText: string, targetImage: ImageFile) => { + if (!isRetryCallRef.current) { cancelRetry(); } + isRetryCallRef.current = false; + setIsGenerating(true); + setLastError(null); + try { + + let result: { imageData: ArrayBuffer; text?: string } | null = null; + + // Resolve correct API key for the selected provider + let apiKey: string | undefined = undefined; + if (user?.id) { + const secrets = await getUserSecrets(user.id); + if (secrets) { + const provider = selectedModel.split('/')[0]?.toLowerCase(); + switch (provider) { + case 'aimlapi': apiKey = secrets.aimlapi_api_key; break; + case 'replicate': apiKey = secrets.replicate_api_key; break; + case 'bria': apiKey = secrets.bria_api_key; break; + default: apiKey = secrets.google_api_key; break; + } + } + } + + // Edit the specific target image + if (targetImage.file) { + result = await editImage( + promptText, + [targetImage.file], + selectedModel, + apiKey, + aspectRatio, + resolution, + searchGrounding, + imageSearch + ); + } else { + // Convert any image source (data URL or HTTP URL) to File for editing + try { + const response = await fetch(targetImage.src); + const blob = await response.blob(); + const file = new File([blob], targetImage.title || 'image.png', { + type: blob.type || 'image/png' + }); + result = await editImage( + promptText, + [file], + selectedModel, + apiKey, + aspectRatio, + resolution, + searchGrounding, + imageSearch + ); + } catch (error) { + console.error('Error converting image:', error); + toast.error(translate('Failed to convert image for editing')); + return; + } + } + + if (result) { + // Convert ArrayBuffer to base64 data URL + const uint8Array = new Uint8Array(result.imageData); + const blob = new Blob([uint8Array], { type: 'image/png' }); + const reader = new FileReader(); + + reader.onload = () => { + const dataUrl = reader.result as string; + + // Add generated image to the images list and auto-select it + const newImage: ImageFile = { + id: `generated-${Date.now()}`, + src: dataUrl, + title: promptText.substring(0, 50) + (promptText.length > 50 ? '...' : ''), + selected: true, + isGenerated: true, + aiText: result.text, // Store AI description + parentForNewVersions: targetImage.parentForNewVersions || targetImage.realDatabaseId || (targetImage.id && /^[0-9a-f]{8}-/.test(targetImage.id) ? targetImage.id : undefined) + }; + const newIndex = images.length; // Calculate the new index BEFORE updating state + + // Deselect all other images and add the new one as selected + setImages(prev => { + const updated = [...prev.map(img => ({ ...img, selected: false })), newImage]; + + // If lightbox is open, update to show the new image immediately after state update + if (lightboxOpen) { + // Use setTimeout to ensure React has processed the state update + setTimeout(() => { + setCurrentImageIndex(newIndex); + }, 50); // Reduced timeout + } + + return updated; + }); + }; + + reader.readAsDataURL(blob); + } + } catch (error: any) { + console.error('Error generating image:', error); + const errMsg = error?.message || String(error); + // Extract meaningful message from GoogleGenerativeAI errors + // Pattern: "[GoogleGenerativeAI Error]: Error fetching from : [503 ] Actual message" + const httpMatch = errMsg.match(/\[\d{3}\s*\]\s*(.+)/s); + const userMessage = httpMatch ? httpMatch[1].trim() : errMsg.replace(/\[GoogleGenerativeAI Error\][:\s]*/i, '').trim(); + setLastError(userMessage || 'Failed to generate image'); + // Auto-retry for "high demand" / rate-limit errors + if (userMessage.toLowerCase().includes('later')) { + scheduleRetry(() => generateImageWithSpecificImage(promptText, targetImage), userMessage); + } else { + toast.error(userMessage || translate('Failed to generate image')); + } + } finally { + setIsGenerating(false); + } + }; + + const generateImageSplit = async () => { + await generateImageSplitUtil( + prompt, + images, + selectedModel, + abortControllerRef, + setIsGenerating, + setImages, + async (promptText: string) => { + if (user?.id) { + await addToPromptHistory(user.id, promptText, setPromptHistory, setHistoryIndex); + } + }, + logger + ); + }; + + const generateImage = async () => { + const fullPrompt = buildFullPrompt(selectedPreset, prompt); + + if (!fullPrompt.trim()) { + toast.error(translate('Please enter a prompt')); + return; + } + + // Add to history before generating + if (prompt.trim() && user?.id) { + await addToPromptHistory(user.id, prompt.trim(), setPromptHistory, setHistoryIndex); + } + + // Create new abort controller + abortControllerRef.current = new AbortController(); + + // Clear retry state only for manual triggers (not auto-retries) + if (!isRetryCallRef.current) { cancelRetry(); } + isRetryCallRef.current = false; + setLastError(null); + setIsGenerating(true); + logger.info(`Starting image generation with prompt: "${fullPrompt.substring(0, 50)}..."`); + // Create placeholder image with loading state + const placeholderId = `placeholder-${Date.now()}`; + const placeholderImage: ImageFile = { + id: placeholderId, + src: 'data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', // Transparent pixel — spinner overlay provides visual + title: 'Generating...', + selected: false, + isGenerated: false, + }; + // Remember which images were selected before we deselect for the placeholder + const previouslySelectedIds = images.filter(img => img.selected).map(img => img.id); + setImages(prev => [...prev.map(img => ({ ...img, selected: false })), placeholderImage]); + + try { + // Check if aborted + if (abortControllerRef.current.signal.aborted) { + // Remove placeholder on abort + setImages(prev => prev.filter(img => img.id !== placeholderId)); + return; + } + + // Get API Key – resolve the correct key based on the selected model's provider + let apiKey: string | undefined = undefined; + if (user?.id) { + const secrets = await getUserSecrets(user.id); + if (secrets) { + const provider = selectedModel.split('/')[0]?.toLowerCase(); + switch (provider) { + case 'aimlapi': apiKey = secrets.aimlapi_api_key; break; + case 'replicate': apiKey = secrets.replicate_api_key; break; + case 'bria': apiKey = secrets.bria_api_key; break; + default: apiKey = secrets.google_api_key; break; + } + } + } + + + const selectedImages = images.filter(img => img.selected); + + let result: { imageData: ArrayBuffer; text?: string } | null = null; + + if (selectedImages.length > 0) { + // Edit existing images - ensure we have File objects for ALL selected images + const files = selectedImages + .map(img => img.file) + .filter((file): file is File => file !== undefined); + + // Check if ALL selected images have File objects + if (files.length === selectedImages.length) { + // All images have files, use them directly + result = await editImage( + fullPrompt, + files, + selectedModel, + apiKey, + aspectRatio, + resolution, + searchGrounding, + imageSearch + ); + } else { + // Some images don't have File objects, convert all from src to ensure consistency + const convertedFiles: File[] = []; + for (const img of selectedImages) { + try { + const response = await fetch(img.src); + const blob = await response.blob(); + const file = new File([blob], img.title || 'image.png', { + type: blob.type || 'image/png' + }); + convertedFiles.push(file); + } catch (error) { + console.error('Error converting image:', img.title, error); + } + } + + if (convertedFiles.length > 0) { + result = await editImage( + fullPrompt, + convertedFiles, + selectedModel, + apiKey, + aspectRatio, + resolution, + searchGrounding, + imageSearch + ); + } else { + toast.error(translate('Failed to convert selected images for editing')); + return; + } + } + } else { + // Generate new image + result = await createImage( + fullPrompt, + selectedModel, + apiKey, + aspectRatio, + resolution, + searchGrounding, + imageSearch + ); + } + + if (result) { + logger.success('Image generated successfully'); + // Convert ArrayBuffer to base64 data URL + const uint8Array = new Uint8Array(result.imageData); + const blob = new Blob([uint8Array], { type: 'image/png' }); + const reader = new FileReader(); + + reader.onload = () => { + const dataUrl = reader.result as string; + setGeneratedImage(dataUrl); + + // Determine parent ID inheritance from selected image (if any) + let inheritedParentId: string | undefined; + if (selectedImages.length === 1) { + // If selected image is a Real DB image, it becomes the parent. + // If it's a generated image that has a parent pointer, we inherit that (sibling/child logic). + // However, strictly speaking, if we edit 'A', 'B' is a child of 'A'. + // But if 'A' is unsaved, we can't link to it. We fall back to 'A's parent if available. + inheritedParentId = selectedImages[0].realDatabaseId || selectedImages[0].parentForNewVersions; + } + + // Add generated image to the images list and auto-select it + const newImage: ImageFile = { + id: `generated-${Date.now()}`, + src: dataUrl, + title: (selectedPreset ? `[${selectedPreset.name}] ` : '') + prompt.substring(0, 40) + (prompt.length > 40 ? '...' : ''), + selected: true, + isGenerated: true, + aiText: result.text, // Store AI description + parentForNewVersions: inheritedParentId // Propagate parent ID + }; + logger.debug(`Added new image: ${newImage.title}`); + const newIndex = images.length; // Calculate the new index BEFORE updating state + + // Replace placeholder with actual image + setImages(prev => { + const withoutPlaceholder = prev.filter(img => img.id !== placeholderId); + const updated = [...withoutPlaceholder.map(img => ({ ...img, selected: false })), newImage]; + + // If lightbox is open, update to show the new image immediately after state update + if (lightboxOpen) { + // Use setTimeout to ensure React has processed the state update + setTimeout(() => { + setCurrentImageIndex(newIndex); + }, 50); // Reduced timeout + } + + return updated; + }); + }; + + reader.readAsDataURL(blob); + } + } catch (error: any) { + console.error('Error generating image:', error); + const errMsg = error?.message || String(error); + logger.error(`Failed to generate image: ${errMsg}`); + // Extract meaningful message from GoogleGenerativeAI errors + // Pattern: "[GoogleGenerativeAI Error]: Error fetching from : [503 ] Actual message" + const httpMatch = errMsg.match(/\[\d{3}\s*\]\s*(.+)/s); + const userMessage = httpMatch ? httpMatch[1].trim() : errMsg.replace(/\[GoogleGenerativeAI Error\][:\s]*/i, '').trim(); + setLastError(userMessage || 'Failed to generate image'); + // Auto-retry for "high demand" / rate-limit errors + if (userMessage.toLowerCase().includes('later')) { + scheduleRetry(() => generateImage(), userMessage); + } else { + toast.error(userMessage || translate('Failed to generate image')); + } + // Remove placeholder on error and restore previous selection + setImages(prev => prev + .filter(img => img.id !== placeholderId) + .map(img => ({ ...img, selected: previouslySelectedIds.includes(img.id) })) + ); + } finally { + setIsGenerating(false); + } + }; + + const publishImage = () => + publishImageUtil( + { + user, + generatedImage, + images, + lightboxOpen, + currentImageIndex, + postTitle, + prompt, + isOrgContext: false, + orgSlug: null, + onPublish, + }, + setIsPublishing + ); + + // Quick publish function that uses prompt as description + const quickPublishAsNew = () => + quickPublishAsNewUtil( + { + user, + generatedImage, + images, + lightboxOpen, + currentImageIndex, + postTitle, + prompt, + isOrgContext: false, + orgSlug: null, + onPublish, + }, + setIsPublishing + ); + + const handleSaveAsVfsFile = () => + saveWizardImageAsVfsFileUtil( + { + user, + generatedImage, + images, + lightboxOpen, + currentImageIndex, + postTitle, + prompt, + isOrgContext: false, + orgSlug: null, + accessToken: session?.access_token, + }, + setIsPublishing, + ); + + const hasVfsFileContext = useMemo(() => images.some((img) => img.meta?.vfs), [images]); + + const handleAddToPost = async (imageSrc: string, title: string, description?: string) => { + if (!currentEditingPostId) { + toast.error("No active post to add to"); + return; + } + + if (!user) { + toast.error(translate('User not authenticated')); + return; + } + + setIsPublishing(true); + try { + // Fetch the image + const response = await fetch(imageSrc); + const blob = await response.blob(); + + await wizardDb.publishImageToPost({ + userId: user.id, + blob, + title: title || prompt, // Fallback to prompt if no title + description: description, + postId: currentEditingPostId, + isOrgContext: false, + orgSlug: null, + collectionIds: [] + }); + // Navigate back success + navigate(`/post/${currentEditingPostId}`); + } catch (error: any) { + console.error('Error adding to post:', error); + + // Reset loading state + if (currentImageIndex !== -1) { + setImages(prev => { + const newImgs = [...prev]; + // Only reset if it still exists at that index (simple check) + if (newImgs[currentImageIndex]) { + newImgs[currentImageIndex] = { ...newImgs[currentImageIndex], isAddingToPost: false }; + } + return newImgs; + }); + } + + if (error.code === '23503') { + toast.error(translate('Could not add to this post. The post might have been deleted or the link is invalid.')); + } else { + toast.error(translate('Failed to add image to post')); + } + } + }; + + const handleLightboxPublish = async (option: 'overwrite' | 'new' | 'version' | 'add-to-post', imageUrl: string, title: string, description?: string, parentId?: string, collectionIds?: string[]) => { + if (!user) { + toast.error(translate('User not authenticated')); + return; + } + + if (currentImageIndex >= images.length) { + toast.error(translate('No image selected')); + return; + } + + const currentImage = images[currentImageIndex]; + if (!currentImage) { + toast.error(translate('No image available to publish')); + return; + } + + // Handle Add to Post + if (option === 'add-to-post' && currentEditingPostId) { + await handleAddToPost(currentImage.src, title, description); + setShowLightboxPublishDialog(false); + return; + } + + setIsPublishing(true); + try { + // Convert image to blob for upload + const response = await fetch(currentImage.src); + const blob = await response.blob(); + + if (option === 'overwrite') { + toast.info(translate('Overwrite not supported in wizard, creating new image instead')); + option = 'new'; + } + + if (option === 'new') { + await wizardDb.publishImageAsNew({ + userId: user.id, + blob, + title, + description, + isOrgContext: false, + orgSlug: null, + collectionIds, + }); + } else if (option === 'version' && parentId) { + await wizardDb.publishImageAsVersion({ + userId: user.id, + blob, + title, + description, + parentId, + isOrgContext: false, + orgSlug: null, + collectionIds, + }); + } else if (option === 'version' && !parentId) { + toast.info(translate('No parent image found, creating as new post instead')); + // Recursive call with 'new' + // FIX: call self but ensure we don't loop if logic is wrong. + // Here it changes option to 'new', so it should be fine. + // We need to re-call handleLightboxPublish logic essentially. + // For simplicity, just run the new logic here: + await wizardDb.publishImageAsNew({ + userId: user.id, + blob, + title, + description, + isOrgContext: false, + orgSlug: null, + collectionIds, + }); + } + + setShowLightboxPublishDialog(false); + // Only navigate/close if NOT "Add to Post" (which handled navigation) + // For standard publish, we usually close wizard or call onPublish prop + onPublish?.(currentImage.src, lightboxPrompt); + } catch (error) { + console.error('Error publishing image:', error); + toast.error(translate('Failed to publish image')); + } finally { + setIsPublishing(false); + } + }; + + const handleEditImage = (index: number) => { + const img = images[index]; + if (img) { + setEditingImage({ url: img.src, id: img.realDatabaseId || img.id }); + } + }; + + const handleConfigureImage = (index: number) => { + const img = images[index]; + if (img && img.realDatabaseId) { + setConfiguringImageId({ + id: img.realDatabaseId, + title: img.title, + description: img.aiText || null, + visible: true // Default + }); + } else { + toast.error(translate("Please save the image first to configure settings")); + } + }; + + // Prepare context value + const contextValue = { + // User + userId: user?.id, + + // Images + images, + setImages, + availableImages, + generatedImage, + + // Generation state + isGenerating, + isAgentMode, + isSplitMode, + setIsSplitMode, + isOptimizingPrompt, + + // Form state + prompt, + setPrompt, + postTitle, + setPostTitle, + selectedModel, + setSelectedModel, + aspectRatio, + setAspectRatio, + resolution, + setResolution, + searchGrounding, + setSearchGrounding, + + // Settings + selectedPreset, + promptPresets, + loadingPresets, + workflows, + loadingWorkflows, + promptTemplates, + quickActions, + + // Prompt history + promptHistory, + historyIndex, + setHistoryIndex, + + // Voice + isRecording, + isTranscribing, + + // UI state + isPublishing, + dragIn, + setDragIn, + loadingImages, + dragLeaveTimeoutRef, + + // Actions - Image operations + toggleImageSelection, + openLightbox, + setAsSelected, + removeImage, + deleteSelectedImages, + + // Actions - Generation + generateImage, + generateImageSplit, + handleAgentGeneration, + handleOptimizePrompt, + handleMicrophone, + + // Actions - Settings + handlePresetSelect, + handlePresetClear, + savePreset: async (preset: Omit) => { + if (user?.id) await savePromptPreset(user.id, preset, setPromptPresets); + }, + updatePreset: async (id: string, preset: Omit) => { + if (user?.id) await updatePromptPreset(user.id, id, preset, setPromptPresets); + }, + deletePreset: async (id: string) => { + if (user?.id) await deletePromptPreset(user.id, id, setPromptPresets); + }, + + saveWorkflow: async (workflow: Omit) => { + if (user?.id) await saveWorkflow(user.id, workflow, setWorkflows); + }, + updateWorkflow: async (id: string, workflow: Omit) => { + if (user?.id) await updateWorkflow(user.id, id, workflow, setWorkflows); + }, + deleteWorkflow: async (id: string) => { + if (user?.id) await deleteWorkflow(user.id, id, setWorkflows); + }, + executeWorkflow, + + applyTemplate, + deleteTemplate, + handleSaveCurrentPromptAsTemplate, + + executeQuickAction, + openEditActionsDialog, + + navigateHistory: (direction: 'up' | 'down') => + navigatePromptHistory(direction, promptHistory, historyIndex, setHistoryIndex, setPrompt), + + // Actions - Publishing + quickPublishAsNew, + publishImage, + + // Actions - Misc + setShowVoicePopup, + + // Logger + logger, + + // Add To Post State + currentEditingPostId, + handleAddToPost: (imageSrc: string) => handleAddToPost(imageSrc, postTitle || prompt, postDescription) + }; + + if (!isOpen) return null; + + return ( + + {/* Editor Overlay */} + {editingImage && ( +
+ setEditingImage(null)} + onSave={(newUrl) => { + setImages(prev => prev.map(img => + (img.id === editingImage.id || img.realDatabaseId === editingImage.id) + ? { ...img, src: newUrl, isGenerated: false } + : img + )); + setEditingImage(null); + }} + /> +
+ )} + + {/* Settings Modal */} + {configuringImageId && ( + !open && setConfiguringImageId(null)} + pictureId={configuringImageId.id} + currentTitle={configuringImageId.title} + currentDescription={configuringImageId.description} + currentVisible={configuringImageId.visible} + imageUrl={images.find(i => i.realDatabaseId === configuringImageId.id)?.src} + onUpdateSuccess={() => { + setConfiguringImageId(null); + toast.success(translate("Image settings updated")); + }} + /> + )} + +
+ {/* Header */} +
+
+ +
+ +

+ {mode === 'post' ? Post Wizard : AI Image Wizard} +

+
+
+ + {/* Unified Lightbox Component */} + 0} + onClose={() => setLightboxOpen(false)} + imageUrl={images[currentImageIndex]?.src || ''} + imageTitle={images[currentImageIndex]?.title || 'Generated Image'} + originalImageId={selectedOriginalImageId || undefined} + onPromptSubmit={(promptText) => handleLightboxPromptSubmit(promptText)} + onPublish={handleLightboxPublish} + isGenerating={isGenerating} + isPublishing={isPublishing} + showPrompt={true} + showPublish={images[currentImageIndex]?.isGenerated} + generatedImageUrl={undefined} + currentIndex={currentImageIndex} + totalCount={images.length} + onNavigate={(direction) => { + const newIndex = direction === 'next' ? currentImageIndex + 1 : currentImageIndex - 1; + if (newIndex >= 0 && newIndex < images.length) { + setCurrentImageIndex(newIndex); + } + }} + // Wizard features + showWizardFeatures={true} + promptTemplates={promptTemplates} + onApplyTemplate={(template) => setLightboxPrompt(template)} + onSaveTemplate={handleSaveCurrentPromptAsTemplate} + onDeleteTemplate={deleteTemplate} + onOptimizePrompt={handleOptimizePrompt} + isOptimizing={isOptimizingPrompt} + onMicrophoneToggle={handleMicrophone} + isRecording={isRecording} + isTranscribing={isTranscribing} + showQuickPublish={images[currentImageIndex]?.isGenerated && lightboxPrompt.trim().length > 0} + onQuickPublish={quickPublishAsNew} + prompt={lightboxPrompt} + onPromptChange={setLightboxPrompt} + // Prompt history + promptHistory={promptHistory} + historyIndex={historyIndex} + onNavigateHistory={(direction) => navigatePromptHistory(direction, promptHistory, historyIndex, setHistoryIndex, setLightboxPrompt)} + onManualPromptEdit={() => setHistoryIndex(-1)} + /> +
+ + {/* Logger Panel - Collapsible */} + {isLoggerVisible && ( +
+
+ setLoggerVisible(false)} /> +
+
+ )} + +
+ {mode === 'post' ? ( + handleFileUpload(event, setImages, user)} + dropZoneRef={dropZoneRef} + isDragging={dragIn} + onDragEnter={(e) => handleDragEnter(e, dragLeaveTimeoutRef, setDragIn)} + onDragOver={(e) => handleDragOver(e, dragIn, setDragIn)} + onDragLeave={(e) => handleDragLeave(e, dragLeaveTimeoutRef, setDragIn)} + onDrop={(e) => handleDrop(e, dragLeaveTimeoutRef, setDragIn, setImages, user)} + postTitle={postTitle} + setPostTitle={setPostTitle} + postDescription={postDescription} + setPostDescription={setPostDescription} + isEditing={!!currentEditingPostId} + postId={currentEditingPostId} + settings={settings} + setSettings={setSettings} + onPublish={() => { + // Auto-inject link into settings if present + const externalPage = images.find(img => img.type === 'page-external'); + const publishSettings = { ...settings }; + + if (externalPage && externalPage.path && !publishSettings.link) { + publishSettings.link = externalPage.path; + } + + publishImageUtil({ + user: user, + generatedImage: typeof generatedImage === 'string' ? generatedImage : (generatedImage as any)?.src || null, + images: images, + lightboxOpen: lightboxOpen, + currentImageIndex: currentImageIndex, + postTitle: postTitle, + postDescription: postDescription, + settings: publishSettings, // Pass enriched settings + prompt: prompt, + isOrgContext: false, + orgSlug: null, + publishAll: mode === 'post', + editingPostId: currentEditingPostId, + onPublish: (url, postId) => { + onClose(); + // If we have a postId (passed as 2nd arg for posts), navigate to it + if (postId && (mode === 'post' || currentEditingPostId)) { + navigate(`/post/${postId}`); + } else { + navigate('/'); + } + } + }, setIsPublishing); + }} + onPublishToGallery={handlePublishToGallery} + onAppendToPost={handleAppendToPost} + isPublishing={isPublishing} + /> + ) : ( + <> + {/* Left Panel - Controls */} + user?.id && savePromptPreset(user.id, preset, setPromptPresets)} + onUpdatePreset={(id, preset) => user?.id && updatePromptPreset(user.id, id, preset, setPromptPresets)} + onDeletePreset={(id) => user?.id && deletePromptPreset(user.id, id, setPromptPresets)} + workflows={workflows} + loadingWorkflows={loadingWorkflows} + onSaveWorkflow={(workflow) => user?.id && saveWorkflow(user.id, workflow, setWorkflows)} + onUpdateWorkflow={(id, workflow) => user?.id && updateWorkflow(user.id, id, workflow, setWorkflows)} + onDeleteWorkflow={(id) => user?.id && deleteWorkflow(user.id, id, setWorkflows)} + onExecuteWorkflow={executeWorkflow} + isGenerating={isGenerating} + isAgentMode={isAgentMode} + isSplitMode={isSplitMode} + prompt={prompt} + onGenerate={generateImage} + onGenerateSplit={generateImageSplit} + onAgentGenerate={handleAgentGeneration} + onVoiceGenerate={() => setShowVoicePopup(true)} + onAbort={() => { cancelRetry(); abortGeneration(abortControllerRef, setIsGenerating, setIsAgentMode, setImages, logger); }} + images={images} + generatedImage={generatedImage} + postTitle={postTitle} + onPostTitleChange={setPostTitle} + isPublishing={isPublishing} + onQuickPublish={quickPublishAsNew} + onPublish={() => setShowLightboxPublishDialog(true)} + onPublishToGallery={handlePublishToGallery} + onAppendToPost={handleAppendToPost} + showSaveAsVfsFile={hasVfsFileContext} + onSaveAsVfsFile={handleSaveAsVfsFile} + onAddToPost={() => { + const currentImage = images[images.length - 1]; // Default to most recent for sidebar action + if (currentImage) handleAddToPost(currentImage.src, postTitle || prompt, postDescription) + }} + editingPostId={currentEditingPostId} + lastError={lastError} + retryInfo={retryInfo} + onDismissError={() => { cancelRetry(); setLastError(null); }} + > + navigatePromptHistory(direction, promptHistory, historyIndex, setHistoryIndex, setPrompt)} + onManualEdit={() => setHistoryIndex(-1)} + isGenerating={isGenerating} + onGenerate={generateImage} + onImagePaste={(image) => setImages(prev => [...prev, image])} + /> + + + {/* Right Panel - Images */} + handleFileUpload(event, setImages, user)} + onDeleteSelected={deleteSelectedImages} + onDownload={handleDownloadImage} + onSetAsSelected={setAsSelected} + onSaveAsVersion={(img, idx) => { + setCurrentImageIndex(idx); + const currentImage = images[idx]; + let parentId = null; + + if (currentImage.parentForNewVersions) { + parentId = currentImage.parentForNewVersions; + } else if (currentImage.realDatabaseId) { + parentId = currentImage.realDatabaseId; + } else { + const originalImage = images.find(img => !img.isGenerated && img.id && img.id.match(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i)); + parentId = originalImage?.id || null; + } + + setSelectedOriginalImageId(parentId); + setShowLightboxPublishDialog(true); + }} + onEdit={handleEditImage} + onRemove={removeImage} + availableImages={availableImages} + loadingImages={loadingImages} + onGalleryImageSelect={(imageId, isMultiSelect) => toggleImageSelection(imageId, isMultiSelect, true)} + quickActions={quickActions} + onExecuteAction={executeQuickAction} + onEditActions={openEditActionsDialog} + onSettings={handleConfigureImage} + isGenerating={isGenerating} + dragIn={dragIn} + onDragEnter={(e) => handleDragEnter(e, dragLeaveTimeoutRef, setDragIn)} + onDragOver={(e) => handleDragOver(e, dragIn, setDragIn)} + onDragLeave={(e) => handleDragLeave(e, dragLeaveTimeoutRef, setDragIn)} + onDrop={(e) => handleDrop(e, dragLeaveTimeoutRef, setDragIn, setImages, user)} + dropZoneRef={dropZoneRef} + onAddToPost={(image) => handleAddToPost(image.src, postTitle || prompt, postDescription)} + editingPostId={currentEditingPostId} + /> + + )} +
+ + + + + Select a Post to Append To + + Choose one of your existing posts to add these images to. + + +
+ +
+
+
+ + {/* Lightbox Publish Dialog */} + {/* Delete Confirmation Dialog */} + + + + + {imageToDelete?.startsWith('bulk:') ? ( + Delete Multiple Images + ) : ( + Delete Image Version + )} + + + {imageToDelete?.startsWith('bulk:') ? ( + <> + Are you sure you want to delete {imageToDelete.split(':')[1]} selected image(s)? This action cannot be undone. + + ) : ( + Are you sure you want to delete this image version? This action cannot be undone and will permanently remove the image from your account. + )} + + + + setShowDeleteConfirmDialog(false)}> + Cancel + + + Delete + + + + + + setShowLightboxPublishDialog(false)} + onPublish={(option, title, description, parentId, collectionIds) => { + const currentImage = images[currentImageIndex]; + if (currentImage) { + // Now accepts 'add-to-post' as option + handleLightboxPublish(option as any, currentImage.src, title || '', description, parentId, collectionIds); + } + }} + originalTitle={images[currentImageIndex]?.title || 'Generated Image'} + originalImageId={selectedOriginalImageId || (originalImageId && originalImageId.match(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i) ? originalImageId : undefined)} + isPublishing={isPublishing} + editingPostId={currentEditingPostId} // Pass prop + /> + + {/* Save Template Dialog */} + + + + Save Prompt Template + + Give your prompt template a name to save it for later use. + + +
+
+ + setNewTemplateName(e.target.value)} + placeholder={translate("e.g. Cyberpunk Portrait")} + onKeyDown={(e) => { + if (e.key === 'Enter') { + e.preventDefault(); + confirmSaveTemplate(); + } + }} + autoFocus + /> +
+
+ +
+ {lightboxOpen ? lightboxPrompt : prompt} +
+
+
+ + + + +
+
+ + {/* Voice Recording Popup */} + setShowVoicePopup(false)} + onTranscriptionComplete={handleVoiceTranscription} + onGenerateImage={handleVoiceToImage} + showToolCalls={true} + /> + + {/* Edit Quick Actions Dialog */} + + + + Edit Quick Actions + + Customize the quick actions that appear above your images. Add, edit, or remove actions. + + + +
+ {editingActions.map((action, index) => ( +
+ updateQuickAction(action.id, 'icon', e.target.value)} + className="w-16 text-center" + maxLength={2} + /> + updateQuickAction(action.id, 'name', e.target.value)} + className="flex-1" + maxLength={30} + /> + updateQuickAction(action.id, 'prompt', e.target.value)} + className="flex-[2]" + maxLength={200} + /> + +
+ ))} + + +
+ + + + + + +
+
+ +
+
+ ); +}; + +export default ImageWizard; diff --git a/packages/media/cpp/ref/images-ai/aimlapi.ts b/packages/media/cpp/ref/images-ai/aimlapi.ts new file mode 100644 index 00000000..532ceae9 --- /dev/null +++ b/packages/media/cpp/ref/images-ai/aimlapi.ts @@ -0,0 +1,279 @@ +import { apiClient } from "@/lib/db"; + +// Simple logger for user feedback +const logger = { + debug: (message: string, data?: any) => console.debug(`[AIMLAPI] ${message}`, data), + info: (message: string, data?: any) => console.info(`[AIMLAPI] ${message}`, data), + warn: (message: string, data?: any) => console.warn(`[AIMLAPI] ${message}`, data), + error: (message: string, data?: any) => console.error(`[AIMLAPI] ${message}`, data), +}; + +const AIMLAPI_BASE_URL = 'https://api.aimlapi.com'; + +// Get user's AIML API key from server secrets +const getAimlApiKey = async (): Promise => { + try { + const data = await apiClient<{ api_keys?: Record }>('/api/me/secrets'); + const key = data.api_keys?.aimlapi_api_key; + if (!key) { + logger.error('No AIML API key found. Please add your AIML API key in your profile settings.'); + return null; + } + return key; + } catch (error) { + logger.error('Error getting AIML API key:', error); + return null; + } +}; + +// Helper function to convert File to base64 +const fileToBase64 = (file: File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.readAsDataURL(file); + reader.onload = () => { + const result = reader.result as string; + // Remove data URL prefix to get just the base64 string + const base64 = result.split(',')[1]; + resolve(base64); + }; + reader.onerror = error => reject(error); + }); +}; + +interface ImageResult { + imageData: ArrayBuffer; + text?: string; +} + +/** + * Generate image using AIML API text-to-image + * Supports various models including ByteDance SeeDream v4, Flux, Stable Diffusion, etc. + */ +export const createImageWithAimlApi = async ( + prompt: string, + model: string = 'bytedance/seedream-v4', + apiKey?: string +): Promise => { + const key = apiKey || await getAimlApiKey(); + + if (!key) { + logger.error('No AIML API key found. Please provide an API key or set it in your profile.'); + return null; + } + + try { + logger.info('Starting AIML API image generation', { + model, + promptLength: prompt.length, + promptPreview: prompt.substring(0, 100) + '...' + }); + + const endpoint = `${AIMLAPI_BASE_URL}/v1/images/generations`; + + // Build request body based on model requirements + const requestBody: any = { + model, + prompt, + }; + + // Most models support these common parameters + if (!model.includes('dall-e')) { + requestBody.image_size = { width: 1024, height: 1024 }; + requestBody.num_images = 1; + requestBody.sync_mode = true; + } else { + // DALL-E uses different parameters + requestBody.n = 1; + requestBody.size = '1024x1024'; + } + + logger.debug('AIML API request body:', requestBody); + + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${key}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(requestBody), + }); + + if (!response.ok) { + const errorText = await response.text(); + logger.error('AIML API error:', { status: response.status, error: errorText }); + throw new Error(`AIML API error: ${response.status} - ${errorText}`); + } + + const data = await response.json(); + logger.debug('AIML API response:', data); + + // Handle response format: { data: [{ url: "...", b64_json: "..." }] } + if (!data.data || !Array.isArray(data.data) || data.data.length === 0) { + throw new Error('Invalid response from AIML API: no image data'); + } + + const firstResult = data.data[0]; + + // Prefer URL over base64 if both are provided + let arrayBuffer: ArrayBuffer; + + if (firstResult.url) { + logger.info('Image URL received from AIML API:', firstResult.url); + + // Fetch the image from URL + const imageResponse = await fetch(firstResult.url); + if (!imageResponse.ok) { + throw new Error(`Failed to fetch generated image: ${imageResponse.statusText}`); + } + arrayBuffer = await imageResponse.arrayBuffer(); + } else if (firstResult.b64_json) { + logger.info('Base64 image received from AIML API'); + + // Convert base64 to ArrayBuffer + const binaryString = atob(firstResult.b64_json); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + arrayBuffer = bytes.buffer; + } else { + throw new Error('No image URL or base64 data in AIML API response'); + } + + logger.info('Successfully generated image with AIML API', { + model, + imageSize: arrayBuffer.byteLength, + }); + + return { + imageData: arrayBuffer, + text: undefined, // AIML API doesn't return text descriptions + }; + + } catch (error: any) { + logger.error('AIML API image generation failed:', { + error: error.message, + model, + promptPreview: prompt.substring(0, 100) + '...' + }); + throw error; + } +}; + +/** + * Edit image using AIML API image-to-image + * Supports models like SeeDream v4 Edit, SeedEdit 3.0, Flux i2i, etc. + */ +export const editImageWithAimlApi = async ( + prompt: string, + imageFiles: File[], + model: string = 'bytedance/seedream-v4-edit', + apiKey?: string +): Promise => { + const key = apiKey || await getAimlApiKey(); + + if (!key) { + logger.error('No AIML API key found. Please provide an API key or set it in your profile.'); + return null; + } + + try { + logger.info('Starting AIML API image editing', { + model, + imageCount: imageFiles.length, + promptLength: prompt.length, + promptPreview: prompt.substring(0, 100) + '...' + }); + + // Convert the first image to base64 + const imageBase64 = await fileToBase64(imageFiles[0]); + + const endpoint = `${AIMLAPI_BASE_URL}/v1/images/generations`; + + // Different models use different parameter names for the image + const requestBody: any = { + model, + prompt, + num_images: 1, + sync_mode: true, + }; + + // AIML API edit endpoint requires image_urls for all models + requestBody.image_urls = [`data:image/png;base64,${imageBase64}`]; + + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${key}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(requestBody), + }); + + if (!response.ok) { + const errorText = await response.text(); + logger.error('AIML API error:', { status: response.status, error: errorText }); + throw new Error(`AIML API error: ${response.status} - ${errorText}`); + } + + const data = await response.json(); + logger.debug('AIML API response (edit):', data); + + // Handle response format + if (!data.data || !Array.isArray(data.data) || data.data.length === 0) { + throw new Error('Invalid response from AIML API: no image data'); + } + + const firstResult = data.data[0]; + + // Prefer URL over base64 if both are provided + let arrayBuffer: ArrayBuffer; + + if (firstResult.url) { + logger.info('Edited image URL received from AIML API:', firstResult.url); + + // Fetch the image from URL + const imageResponse = await fetch(firstResult.url); + if (!imageResponse.ok) { + throw new Error(`Failed to fetch edited image: ${imageResponse.statusText}`); + } + arrayBuffer = await imageResponse.arrayBuffer(); + } else if (firstResult.b64_json) { + logger.info('Base64 edited image received from AIML API'); + + // Convert base64 to ArrayBuffer + const binaryString = atob(firstResult.b64_json); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + arrayBuffer = bytes.buffer; + } else { + throw new Error('No image URL or base64 data in AIML API response'); + } + + logger.info('Successfully edited image with AIML API', { + model, + imageSize: arrayBuffer.byteLength, + }); + + return { + imageData: arrayBuffer, + text: undefined, + }; + + } catch (error: any) { + logger.error('AIML API image editing failed:', { + error: error.message, + model, + imageCount: imageFiles.length, + promptPreview: prompt.substring(0, 100) + '...' + }); + throw error; + } +}; + +// Export the logger for consistency +export { logger }; + diff --git a/packages/media/cpp/ref/images-ai/bria.ts b/packages/media/cpp/ref/images-ai/bria.ts new file mode 100644 index 00000000..d3b6c21d --- /dev/null +++ b/packages/media/cpp/ref/images-ai/bria.ts @@ -0,0 +1,304 @@ +import { apiClient } from "@/lib/db"; + +// Simple logger for user feedback +const logger = { + debug: (message: string, data?: any) => console.debug(`[BRIA] ${message}`, data), + info: (message: string, data?: any) => console.info(`[BRIA] ${message}`, data), + warn: (message: string, data?: any) => console.warn(`[BRIA] ${message}`, data), + error: (message: string, data?: any) => console.error(`[BRIA] ${message}`, data), +}; + +const BRIA_BASE_URL = 'https://engine.prod.bria-api.com/v1'; + +// Get user's Bria API key from server secrets +const getBriaApiKey = async (): Promise => { + try { + const data = await apiClient<{ api_keys?: Record }>('/api/me/secrets'); + const key = data.api_keys?.bria_api_key; + if (!key) { + logger.error('No Bria API key found. Please add your Bria API key in your profile settings.'); + return null; + } + return key; + } catch (error) { + logger.error('Error getting Bria API key:', error); + return null; + } +}; + +// Helper function to convert File to base64 +const fileToBase64 = (file: File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.readAsDataURL(file); + reader.onload = () => { + const result = reader.result as string; + // Remove data URL prefix to get just the base64 string + const base64 = result.split(',')[1]; + resolve(base64); + }; + reader.onerror = error => reject(error); + }); +}; + +// Helper to poll for async image generation +const pollForImage = async (url: string, maxAttempts = 60, delayMs = 2000): Promise => { + for (let attempt = 0; attempt < maxAttempts; attempt++) { + try { + const response = await fetch(url, { method: 'HEAD' }); + if (response.ok && response.headers.get('content-length') !== '0') { + return true; // Image is ready + } + await new Promise(resolve => setTimeout(resolve, delayMs)); + } catch (error) { + logger.debug(`Poll attempt ${attempt + 1} failed, retrying...`); + } + } + return false; +}; + +interface ImageResult { + imageData: ArrayBuffer; + text?: string; +} + +/** + * Generate image using Bria text-to-image API + * Uses the fast endpoint with model version 3.2 for good balance of speed and quality + */ +export const createImageWithBria = async ( + prompt: string, + model: string = 'bria-2.3-fast', + apiKey?: string +): Promise => { + const key = apiKey || await getBriaApiKey(); + + if (!key) { + logger.error('No Bria API key found. Please provide an API key or set it in your profile.'); + return null; + } + + try { + logger.info('Starting Bria image generation', { + model, + promptLength: prompt.length, + promptPreview: prompt.substring(0, 100) + '...' + }); + + // Parse model string to determine endpoint and version + // Format: "bria-{version}-{speed}" e.g., "bria-3.2-fast", "bria-2.3-base", "bria-2.2-hd" + const parts = model.split('-'); + const version = parts[1] || '3.2'; + const speed = parts[2] || 'fast'; // fast, base, or hd + + const endpoint = `${BRIA_BASE_URL}/text-to-image/${speed}/${version}`; + + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'api_token': key, + }, + body: JSON.stringify({ + prompt, + num_results: 1, + sync: false, // Use async for better performance + aspect_ratio: '1:1', + steps_num: speed === 'fast' ? 8 : 30, + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + logger.error('Bria API error:', { status: response.status, error: errorText }); + throw new Error(`Bria API error: ${response.status} - ${errorText}`); + } + + const data = await response.json(); + logger.debug('Bria API response:', data); + + // Handle response format + if (data.error_code) { + throw new Error(data.description || `Bria API error: ${data.error_code}`); + } + + if (!data.result || !Array.isArray(data.result) || data.result.length === 0) { + throw new Error('Invalid response from Bria API: no results'); + } + + const firstResult = data.result[0]; + + // Check if result was blocked by content moderation + if (firstResult.blocked) { + throw new Error(firstResult.description || 'Content blocked by Bria moderation'); + } + + if (!firstResult.urls || firstResult.urls.length === 0) { + throw new Error('No image URL in Bria response'); + } + + const imageUrl = firstResult.urls[0]; + logger.info('Image URL received from Bria:', imageUrl); + + // Poll for the image to be ready (async generation) + logger.info('Polling for image completion...'); + const isReady = await pollForImage(imageUrl); + + if (!isReady) { + throw new Error('Image generation timed out'); + } + + // Fetch the generated image + const imageResponse = await fetch(imageUrl); + if (!imageResponse.ok) { + throw new Error(`Failed to fetch generated image: ${imageResponse.statusText}`); + } + + const arrayBuffer = await imageResponse.arrayBuffer(); + + logger.info('Successfully generated image with Bria', { + model, + imageSize: arrayBuffer.byteLength, + seed: firstResult.seed, + }); + + return { + imageData: arrayBuffer, + text: undefined, // Bria doesn't return text descriptions + }; + + } catch (error: any) { + logger.error('Bria image generation failed:', { + error: error.message, + model, + promptPreview: prompt.substring(0, 100) + '...' + }); + throw error; + } +}; + +/** + * Edit image using Bria reimagine API (structure reference) + * Maintains the structure and depth of the input while incorporating new materials, colors, and textures + */ +export const editImageWithBria = async ( + prompt: string, + imageFiles: File[], + model: string = 'bria-2.3-fast', + apiKey?: string +): Promise => { + const key = apiKey || await getBriaApiKey(); + + if (!key) { + logger.error('No Bria API key found. Please provide an API key or set it in your profile.'); + return null; + } + + try { + logger.info('Starting Bria image editing (reimagine)', { + model, + imageCount: imageFiles.length, + promptLength: prompt.length, + promptPreview: prompt.substring(0, 100) + '...' + }); + + // Convert the first image to base64 for the structure reference + const imageBase64 = await fileToBase64(imageFiles[0]); + + const endpoint = `${BRIA_BASE_URL}/reimagine`; + + // Parse model to determine if we should use fast mode + const parts = model.split('-'); + const speed = parts[2] || 'fast'; + const useFast = speed === 'fast'; + + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'api_token': key, + }, + body: JSON.stringify({ + prompt, + structure_image_file: imageBase64, + structure_ref_influence: 0.75, // Good balance for maintaining structure while allowing changes + num_results: 1, + sync: false, // Use async for better performance + fast: useFast, + steps_num: useFast ? 12 : 30, + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + logger.error('Bria API error:', { status: response.status, error: errorText }); + throw new Error(`Bria API error: ${response.status} - ${errorText}`); + } + + const data = await response.json(); + logger.debug('Bria API response (reimagine):', data); + + // Handle response format + if (data.error_code) { + throw new Error(data.description || `Bria API error: ${data.error_code}`); + } + + if (!data.result || !Array.isArray(data.result) || data.result.length === 0) { + throw new Error('Invalid response from Bria API: no results'); + } + + const firstResult = data.result[0]; + + // Check if result was blocked by content moderation + if (firstResult.blocked) { + throw new Error(firstResult.description || 'Content blocked by Bria moderation'); + } + + if (!firstResult.urls || firstResult.urls.length === 0) { + throw new Error('No image URL in Bria response'); + } + + const imageUrl = firstResult.urls[0]; + logger.info('Edited image URL received from Bria:', imageUrl); + + // Poll for the image to be ready (async generation) + logger.info('Polling for edited image completion...'); + const isReady = await pollForImage(imageUrl); + + if (!isReady) { + throw new Error('Image editing timed out'); + } + + // Fetch the edited image + const imageResponse = await fetch(imageUrl); + if (!imageResponse.ok) { + throw new Error(`Failed to fetch edited image: ${imageResponse.statusText}`); + } + + const arrayBuffer = await imageResponse.arrayBuffer(); + + logger.info('Successfully edited image with Bria', { + model, + imageSize: arrayBuffer.byteLength, + seed: firstResult.seed, + }); + + return { + imageData: arrayBuffer, + text: undefined, + }; + + } catch (error: any) { + logger.error('Bria image editing failed:', { + error: error.message, + model, + imageCount: imageFiles.length, + promptPreview: prompt.substring(0, 100) + '...' + }); + throw error; + } +}; + +// Export the logger for consistency +export { logger }; + diff --git a/packages/media/cpp/ref/images-ai/image-router.ts b/packages/media/cpp/ref/images-ai/image-router.ts new file mode 100644 index 00000000..b844e438 --- /dev/null +++ b/packages/media/cpp/ref/images-ai/image-router.ts @@ -0,0 +1,419 @@ +/** + * Image Generation Router + * Routes image generation requests to the appropriate AI provider based on the model format. + * Model format: "provider/model-name" + * + * Supported providers: + * - google: Google Generative AI (Gemini models) + * - replicate: Replicate API (various models) + * - bria: Bria.ai (coming soon) + */ + +import { createImage as createImageGoogle, editImage as editImageGoogle } from '@/image-api'; +//import { createImageWithReplicate, editImageWithReplicate } from '@/lib/replicate'; +import { createImageWithBria, editImageWithBria } from '@/lib/bria'; +import { createImageWithAimlApi, editImageWithAimlApi } from '@/lib/aimlapi'; + +// Logger for debugging +const logger = { + debug: (message: string, data?: any) => console.debug(`[IMAGE-ROUTER] ${message}`, data), + info: (message: string, data?: any) => console.info(`[IMAGE-ROUTER] ${message}`, data), + warn: (message: string, data?: any) => console.warn(`[IMAGE-ROUTER] ${message}`, data), + error: (message: string, data?: any) => console.error(`[IMAGE-ROUTER] ${message}`, data), +}; + +export interface ImageResult { + imageData: ArrayBuffer; + text?: string; +} + +export interface ModelInfo { + provider: string; + modelName: string; + displayName: string; + supportsTextToImage: boolean; + supportsImageToImage: boolean; +} + +// Available models configuration +export const AVAILABLE_MODELS: ModelInfo[] = [ + { + provider: 'google', + modelName: 'gemini-3-pro-image-preview', + displayName: 'Google Gemini 3 Pro (Image Preview)', + supportsTextToImage: true, + supportsImageToImage: true, + }, + { + provider: 'google', + modelName: 'gemini-3.1-flash-image-preview', + displayName: 'Google Gemini 3.1 Flash (Image Preview)', + supportsTextToImage: true, + supportsImageToImage: true, + }, + /* Duplicate model name causing key conflicts - temporarily disabled + { + provider: 'google', + modelName: 'gemini-3-pro-image-preview', + displayName: 'Google Gemini 2.5 Flash (Image Preview)', + supportsTextToImage: true, + supportsImageToImage: true, + }, + */ + { + provider: 'replicate', + modelName: 'bytedance/seedream-4', + displayName: 'Replicate SeeDream-4 (Bytedance)', + supportsTextToImage: true, + supportsImageToImage: true, + }, + { + provider: 'bria', + modelName: 'bria-3.2-fast', + displayName: 'Bria.ai 3.2 Fast', + supportsTextToImage: true, + supportsImageToImage: true, + }, + { + provider: 'bria', + modelName: 'bria-2.3-base', + displayName: 'Bria.ai 2.3 Base (High Quality)', + supportsTextToImage: true, + supportsImageToImage: true, + }, + { + provider: 'bria', + modelName: 'bria-2.2-hd', + displayName: 'Bria.ai 2.2 HD (1920x1080)', + supportsTextToImage: true, + supportsImageToImage: false, // HD doesn't support reimagine + }, + // AIML API - ByteDance Models + { + provider: 'aimlapi', + modelName: 'bytedance/seedream-v4-text-to-image', + displayName: 'AIML API - SeeDream v4 (4K)', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'bytedance/seedream-v4-edit', + displayName: 'AIML API - SeeDream v4 Edit (4K)', + supportsTextToImage: false, + supportsImageToImage: true, + }, + { + provider: 'aimlapi', + modelName: 'bytedance/seededit-3.0-i2i', + displayName: 'AIML API - SeedEdit 3.0', + supportsTextToImage: false, + supportsImageToImage: true, + }, + { + provider: 'aimlapi', + modelName: 'bytedance/seedream-3.0', + displayName: 'AIML API - SeeDream 3.0', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'bytedance/uso', + displayName: 'AIML API - USO (i2i)', + supportsTextToImage: false, + supportsImageToImage: true, + }, + { + provider: 'aimlapi', + modelName: 'alibaba/qwen-image', + displayName: 'AIML API - Qwen Image', + supportsTextToImage: true, + supportsImageToImage: false, + }, + // AIML API - Flux Models + { + provider: 'aimlapi', + modelName: 'flux-pro', + displayName: 'AIML API - Flux Pro', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'flux-pro/v1.1', + displayName: 'AIML API - Flux Pro v1.1', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'flux-pro/v1.1-ultra', + displayName: 'AIML API - Flux Pro v1.1 Ultra', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'flux-realism', + displayName: 'AIML API - Flux Realism', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'flux/dev', + displayName: 'AIML API - Flux Dev', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'flux/dev/image-to-image', + displayName: 'AIML API - Flux Dev i2i', + supportsTextToImage: false, + supportsImageToImage: true, + }, + { + provider: 'aimlapi', + modelName: 'flux/schnell', + displayName: 'AIML API - Flux Schnell (Fast)', + supportsTextToImage: true, + supportsImageToImage: false, + }, + // AIML API - Google Models + { + provider: 'aimlapi', + modelName: 'imagen-3.0-generate-002', + displayName: 'AIML API - Google Imagen 3', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'google/imagen4/preview', + displayName: 'AIML API - Google Imagen 4 Preview', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'google/imagen-4.0-generate-001', + displayName: 'AIML API - Google Imagen 4.0', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'google/imagen-4.0-fast-generate-001', + displayName: 'AIML API - Google Imagen 4.0 Fast', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'google/imagen-4.0-ultra-generate-001', + displayName: 'AIML API - Google Imagen 4.0 Ultra', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'google/gemini-2.5-flash-image', + displayName: 'AIML API - Gemini 2.5 Flash Image', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'google/gemini-2.5-flash-image-edit', + displayName: 'AIML API - Gemini 2.5 Flash Edit', + supportsTextToImage: false, + supportsImageToImage: true, + }, + // AIML API - OpenAI Models + { + provider: 'aimlapi', + modelName: 'dall-e-2', + displayName: 'AIML API - DALL-E 2 (OpenAI)', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'dall-e-3', + displayName: 'AIML API - DALL-E 3 (OpenAI)', + supportsTextToImage: true, + supportsImageToImage: false, + }, + // AIML API - Stability AI Models + { + provider: 'aimlapi', + modelName: 'stable-diffusion-v3-medium', + displayName: 'AIML API - Stable Diffusion 3 Medium', + supportsTextToImage: true, + supportsImageToImage: false, + }, + { + provider: 'aimlapi', + modelName: 'stable-diffusion-v35-large', + displayName: 'AIML API - Stable Diffusion 3.5 Large', + supportsTextToImage: true, + supportsImageToImage: false, + }, + // AIML API - Recraft AI + { + provider: 'aimlapi', + modelName: 'recraft-v3', + displayName: 'AIML API - Recraft v3', + supportsTextToImage: true, + supportsImageToImage: false, + }, +]; + +/** + * Parse model string into provider and model name + * @param modelString Format: "provider/model-name" + * @returns { provider, modelName } + */ +export const parseModelString = (modelString: string): { provider: string; modelName: string } => { + const parts = modelString.split('/'); + + if (parts.length < 2) { + // Default to Google if no provider specified + logger.warn('Model string missing provider, defaulting to Google', { modelString }); + return { + provider: 'google', + modelName: modelString, + }; + } + + const provider = parts[0].toLowerCase(); + const modelName = parts.slice(1).join('/'); // Handle models with multiple slashes + + return { provider, modelName }; +}; + +/** + * Get full model string from provider and model name + */ +export const getModelString = (provider: string, modelName: string): string => { + return `${provider}/${modelName}`; +}; + +/** + * Create/generate a new image from text prompt + * Routes to the appropriate provider based on model string + */ +export const createImage = async ( + prompt: string, + modelString: string = 'google/gemini-3-pro-image-preview', + apiKey?: string, + aspectRatio?: string, + resolution?: string, + enableSearchGrounding?: boolean, + enableImageSearch?: boolean +): Promise => { + const { provider, modelName } = parseModelString(modelString); + + logger.info('Routing image creation request', { + provider, + modelName, + promptLength: prompt.length, + searchGrounding: !!enableSearchGrounding, + imageSearch: !!enableImageSearch, + }); + + try { + switch (provider) { + case 'google': + return await createImageGoogle(prompt, modelName, apiKey, aspectRatio, resolution, enableSearchGrounding, enableImageSearch); + + case 'bria': + return await createImageWithBria(prompt, modelName, apiKey); + + case 'aimlapi': + return await createImageWithAimlApi(prompt, modelName, apiKey); + + default: + logger.error('Unsupported provider', { provider, modelName }); + throw new Error(`Unsupported provider: ${provider}. Supported providers: google, replicate, bria, aimlapi`); + } + } catch (error: any) { + logger.error('Image creation failed', { + provider, + modelName, + error: error.message, + }); + throw error; + } +}; + +/** + * Edit an existing image with a text prompt + * Routes to the appropriate provider based on model string + */ +export const editImage = async ( + prompt: string, + imageFiles: File[], + modelString: string = 'google/gemini-3-pro-image-preview', + apiKey?: string, + aspectRatio?: string, + resolution?: string, + enableSearchGrounding?: boolean, + enableImageSearch?: boolean +): Promise => { + const { provider, modelName } = parseModelString(modelString); + + logger.info('Routing image editing request', { + provider, + modelName, + promptLength: prompt.length, + imageCount: imageFiles.length, + searchGrounding: !!enableSearchGrounding, + imageSearch: !!enableImageSearch, + }); + + try { + switch (provider) { + case 'google': + return await editImageGoogle(prompt, imageFiles, modelName, apiKey, aspectRatio, resolution, enableSearchGrounding, enableImageSearch); + + case 'bria': + return await editImageWithBria(prompt, imageFiles, modelName, apiKey); + + case 'aimlapi': + return await editImageWithAimlApi(prompt, imageFiles, modelName, apiKey); + + default: + logger.error('Unsupported provider', { provider, modelName }); + throw new Error(`Unsupported provider: ${provider}. Supported providers: google, replicate, bria, aimlapi`); + } + } catch (error: any) { + logger.error('Image editing failed', { + provider, + modelName, + imageCount: imageFiles.length, + error: error.message, + }); + throw error; + } +}; + +/** + * Get model info by model string + */ +export const getModelInfo = (modelString: string): ModelInfo | undefined => { + return AVAILABLE_MODELS.find( + (m) => getModelString(m.provider, m.modelName) === modelString + ); +}; + +/** + * Get all models for a specific provider + */ +export const getModelsByProvider = (provider: string): ModelInfo[] => { + return AVAILABLE_MODELS.filter((m) => m.provider === provider); +}; + + diff --git a/packages/media/cpp/ref/images-ai/openai.ts b/packages/media/cpp/ref/images-ai/openai.ts new file mode 100644 index 00000000..b1aae378 --- /dev/null +++ b/packages/media/cpp/ref/images-ai/openai.ts @@ -0,0 +1,1448 @@ +/** + * OpenAI Integration with Tool Presets + * + * PRESET QUICK REFERENCE: + * ---------------------- + * Agent/Voice Agent: 'smart-generation' (optimize → generate → metadata, NO publish) + * Workflows: 'metadata-only' for metadata step + * Auto workflows: 'auto-publish' (includes publish tool) + * + * See PRESET_TOOLS mapping below for tool combinations. + */ +import OpenAI from 'openai'; +import { getAuthToken as getZitadelToken, serverUrl } from "@/lib/db"; +import { z } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; +import { RunnableToolFunctionWithParse } from 'openai/lib/RunnableFunction'; +import { JSONSchema } from 'openai/lib/jsonschema'; +import { createImage as createImageRouter, editImage as editImageRouter } from '@/lib/image-router'; +import { generateTextWithImagesTool } from '@/lib/markdownImageTools'; +import { createPageTool } from '@/lib/pageTools'; +import { createWidgetsTool } from '@/lib/tools-layout'; +import { encodeWav } from '@/lib/audioUtils'; + +type LogFunction = (level: string, message: string, data?: any) => void; + +// Simple logger for user feedback +const consoleLogger = { + debug: (message: string, data?: any) => console.debug(`[OPENAI-DEBUG] ${message}`, data), + info: (message: string, data?: any) => console.info(`[OPENAI-INFO] ${message}`, data), + warn: (message: string, data?: any) => console.warn(`[OPENAI-WARN] ${message}`, data), + error: (message: string, data?: any) => console.error(`[OPENAI-ERROR] ${message}`, data), +}; + +/** + * SIMPLE TOOL PRESET MAPPING + * + * This mapping defines common tool combinations: + * - generate-only: [generate] + * - generate-metadata: [generate, metadata] + * - generate-publish: [generate, metadata, publish] + * - metadata-only: [metadata] + * - optimize-generate: [optimize, generate, metadata] + * + * Use these to avoid calling unwanted tools (e.g., publish when user wants manual control) + */ +export type PresetType = + | 'generate-only' // Just generate image + | 'generate-metadata' // Generate + metadata + | 'generate-publish' // Generate + metadata + publish + | 'metadata-only' // Only metadata + | 'optimize-generate' // Optimize + generate + metadata + | 'layout-generator'; // Generate widget layout fragments + +const PRESET_TOOLS: Record RunnableToolFunctionWithParse[]> = { + 'generate-only': (apiKey) => [ + generateImageTool(apiKey) + ], + 'generate-metadata': (apiKey) => [ + generateImageTool(apiKey), + generateImageMetadataTool(apiKey) + ], + 'generate-publish': (apiKey) => [ + generateImageTool(apiKey), + generateImageMetadataTool(apiKey), + publishImageTool() + ], + 'metadata-only': (apiKey) => [ + generateImageMetadataTool(apiKey) + ], + 'optimize-generate': (apiKey) => [ + optimizePromptTool(apiKey), + generateImageTool(apiKey), + generateImageMetadataTool(apiKey) + ], + 'layout-generator': () => [ + createWidgetsTool(), + ], +}; + +// Get user's session token for proxy authentication +const getAuthToken = async (): Promise => { + try { + const token = await getZitadelToken(); + if (!token) { + consoleLogger.error('No authenticated session found'); + return null; + } + return token; + } catch (error) { + consoleLogger.error('Error getting auth token:', error); + return null; + } +}; + + +// Create OpenAI client +export const createOpenAIClient = async (apiKey?: string): Promise => { + let token = apiKey; + + if (!token || token.startsWith('sk-')) { + if (token?.startsWith('sk-')) { + consoleLogger.warn('Legacy OpenAI key detected and ignored. Using Zitadel session token for proxy.'); + } + token = (await getAuthToken()) || undefined; + } + + if (!token) { + consoleLogger.error('No authentication token found. Please sign in.'); + return null; + } + + try { + console.log('[createOpenAIClient] apiKey arg:', apiKey ? apiKey.substring(0, 10) + '...' : 'undefined'); + console.log('[createOpenAIClient] resolved token:', token ? token.substring(0, 10) + '...' : 'null'); + return new OpenAI({ + apiKey: token, // This is sent as Bearer token to our proxy + baseURL: `${serverUrl}/api/openai/v1`, + dangerouslyAllowBrowser: true // Required for client-side usage + }); + } catch (error) { + consoleLogger.error('Error creating OpenAI client:', error); + return null; + } +}; + +// Helper function to check if OpenAI is available (non-throwing) +export const isOpenAIAvailable = async (apiKey?: string): Promise => { + try { + const client = await createOpenAIClient(apiKey); + return client !== null; + } catch (error) { + consoleLogger.debug('OpenAI not available:', error); + return false; + } +}; + +// Safe wrapper for OpenAI operations that handles missing API key gracefully +export const withOpenAI = async ( + operation: (client: OpenAI) => Promise, + fallback?: T, + apiKey?: string +): Promise => { + try { + const client = await createOpenAIClient(apiKey); + if (!client) { + consoleLogger.warn('OpenAI client not available, using fallback or returning null'); + return fallback ?? null; + } + + return await operation(client); + } catch (error) { + consoleLogger.error('OpenAI operation failed:', error); + return fallback ?? null; + } +}; + +// Simple text completion function +export const generateText = async ( + input: string, + model: string = "gpt-5", + apiKey?: string, + + signal?: AbortSignal, + onChunk?: (chunk: string) => void, + webSearch?: boolean +): Promise => { + return withOpenAI(async (client) => { + try { + // If web search is enabled, we MUST use the responses API + if (webSearch) { + consoleLogger.info('Using web_search with responses API', { model }); + + const response = await (client as any).responses?.create({ + model, + tools: [{ type: "web_search" }], + input, + }); + + // Parse response.output array structure + if (response?.output && Array.isArray(response.output)) { + const messageItem = response.output.find((item: any) => item.type === 'message'); + + if (messageItem?.content && Array.isArray(messageItem.content)) { + const textItem = messageItem.content.find((c: any) => c.type === 'output_text'); + + if (textItem?.text) { + consoleLogger.info('Successfully generated text with web search', { + inputLength: input.length, + outputLength: textItem.text.length, + model + }); + return textItem.text; + } + } + } + + consoleLogger.warn('Web search response had no valid text content', { response }); + } + + // Streaming implementation + if (onChunk) { + const stream = await client.chat.completions.create({ + model, + messages: [{ role: "user", content: input }], + stream: true, + }, { signal }); + + let fullContent = ''; + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta?.content || ''; + if (delta) { + fullContent += delta; + onChunk(delta); + } + } + + consoleLogger.info('Successfully streamed text response', { + inputLength: input.length, + outputLength: fullContent.length, + model + }); + + return fullContent; + } + + // Non-streaming implementation + const response = await client.chat.completions.create({ + model, + messages: [{ role: "user", content: input }] + }, { + signal, // Pass abort signal to OpenAI client + }); + + const content = response.choices[0]?.message?.content; + if (!content) { + consoleLogger.warn('No content returned from OpenAI'); + return null; + } + + consoleLogger.info('Successfully generated text response', { + inputLength: input.length, + outputLength: content.length, + model, + content + }); + + return content; + } catch (error: any) { + // Handle abort error specifically + if (error.name === 'AbortError' || error.message?.includes('aborted')) { + consoleLogger.info('Text generation aborted by user'); + return null; + } + + consoleLogger.error('OpenAI text generation failed:', { + error: error.message, + model, + inputPreview: input.substring(0, 100) + '...' + }); + throw error; + } + }, null, apiKey); +}; + +// Alternative function using the responses API (if available) +export const generateResponse = async ( + input: string, + model: string = "gpt-4o-mini", + apiKey?: string +): Promise => { + return withOpenAI(async (client) => { + try { + // Note: The responses API might not be available in all OpenAI versions + // This is a placeholder for the API structure you mentioned + const response = await (client as any).responses?.create({ + model, + input, + }); + + if (!response) { + consoleLogger.warn('Responses API not available, falling back to chat completions'); + return generateText(input, model, apiKey); + } + + consoleLogger.info('Successfully generated response', { + inputLength: input.length, + model + }); + + return response; + } catch (error: any) { + consoleLogger.error('OpenAI responses API failed, falling back to chat completions:', error.message); + // Fallback to standard chat completions + return generateText(input, model, apiKey); + } + }, null, apiKey); +}; + +// Helper to convert File to base64 (browser-compatible) +const fileToBase64 = (file: File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.readAsDataURL(file); + reader.onload = () => { + const result = reader.result as string; + // Remove data URL prefix to get just the base64 string + const base64 = result.split(',')[1]; + resolve(base64); + }; + reader.onerror = error => reject(error); + }); +}; + +// Get MIME type from file +const getImageMimeType = (file: File): string => { + return file.type || 'image/jpeg'; +}; + +// Image analysis function for generating descriptions and titles +export const analyzeImages = async ( + imageFiles: File[], + prompt: string = "Analyze this image and provide a detailed description and suggest a creative title.", + model: string = "gpt-4o-mini", + apiKey?: string +): Promise<{ description: string; title: string } | null> => { + return withOpenAI(async (client) => { + try { + if (imageFiles.length === 0) { + consoleLogger.warn('No images provided for analysis'); + return null; + } + + // Convert all images to base64 + const imageContents = await Promise.all( + imageFiles.map(async (file) => { + const base64 = await fileToBase64(file); + const mimeType = getImageMimeType(file); + return { + type: "input_image" as const, + image_url: `data:${mimeType};base64,${base64}`, + }; + }) + ); + + // Create the content array with text prompt and images + const content = [ + { type: "input_text" as const, text: prompt }, + ...imageContents + ]; + + // Use responses API for image analysis + const response = await (client as any).responses?.create({ + model, + input: [ + { + role: "user", + content, + }, + ], + }); + + if (!response?.output_text) { + consoleLogger.warn('No output text returned from OpenAI image analysis'); + return null; + } + + const outputText = response.output_text; + + // Try to parse description and title from the response + // This assumes the AI will format it properly, but we'll add some parsing logic + const lines = outputText.split('\n').filter(line => line.trim()); + + let description = outputText; + let title = `Analysis of ${imageFiles.length} image${imageFiles.length > 1 ? 's' : ''}`; + let titleLineIndex = -1; + + // Simple parsing - look for title patterns + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lowerLine = line.toLowerCase(); + if (lowerLine.includes('title:') || lowerLine.includes('suggested title:')) { + title = line.replace(/^.*title:\s*/i, '').trim(); + titleLineIndex = i; + break; + } + } + + // Remove title line from description + if (titleLineIndex >= 0) { + const descriptionLines = lines.filter((_, index) => index !== titleLineIndex); + description = descriptionLines.join('\n').trim(); + } + + consoleLogger.info('Successfully analyzed images', { + imageCount: imageFiles.length, + descriptionLength: description.length, + title: title.substring(0, 50) + '...', + model + }); + + return { description, title }; + + } catch (error: any) { + consoleLogger.error('OpenAI image analysis failed:', { + error: error.message, + model, + imageCount: imageFiles.length, + promptPreview: prompt.substring(0, 100) + '...' + }); + + // Fallback to chat completions if responses API fails + try { + consoleLogger.info('Falling back to chat completions for image analysis'); + + const imageContents = await Promise.all( + imageFiles.map(async (file) => { + const base64 = await fileToBase64(file); + const mimeType = getImageMimeType(file); + return { + type: "image_url" as const, + image_url: { + url: `data:${mimeType};base64,${base64}`, + }, + }; + }) + ); + + const response = await client.chat.completions.create({ + model: "gpt-4-vision-preview", // Use vision model for fallback + messages: [ + { + role: "user", + content: [ + { type: "text", text: prompt }, + ...imageContents + ], + }, + ] + }); + + const content = response.choices[0]?.message?.content; + if (!content) { + consoleLogger.warn('No content returned from fallback image analysis'); + return null; + } + + // Parse the fallback response + const lines = content.split('\n').filter(line => line.trim()); + let description = content; + let title = `Analysis of ${imageFiles.length} image${imageFiles.length > 1 ? 's' : ''}`; + let titleLineIndex = -1; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lowerLine = line.toLowerCase(); + if (lowerLine.includes('title:') || lowerLine.includes('suggested title:')) { + title = line.replace(/^.*title:\s*/i, '').trim(); + titleLineIndex = i; + break; + } + } + + // Remove title line from description + if (titleLineIndex >= 0) { + const descriptionLines = lines.filter((_, index) => index !== titleLineIndex); + description = descriptionLines.join('\n').trim(); + } + + return { description, title }; + + } catch (fallbackError: any) { + consoleLogger.error('Fallback image analysis also failed:', fallbackError.message); + throw error; // Throw original error + } + } + }, null, apiKey); +}; + +// Simplified function for wizard integration (disabled for now) +export const generateImageMetadata = async ( + imageFiles: File[], + apiKey?: string +): Promise<{ description: string; title: string } | null> => { + // This function is disabled for now as mentioned in requirements + consoleLogger.info('Image metadata generation is currently disabled'); + return null; + + // When enabled, uncomment this: + // return analyzeImages( + // imageFiles, + // "Analyze these images and provide: 1) A detailed description of what you see, and 2) A creative, engaging title. Format your response with 'Title: [your title]' on the first line, followed by the description.", + // "gpt-4.1-mini", + // apiKey + // ); +}; + +// Audio transcription function using OpenRouter Multimodal Chat Completions +export const transcribeAudio = async ( + audioFile: File, + model: string = "google/gemini-2.5-flash", + apiKey?: string +): Promise => { + try { + consoleLogger.info('Starting audio transcription via OpenRouter', { + fileName: audioFile.name, + fileSize: audioFile.size, + fileType: audioFile.type, + model + }); + + // Get auth token for proxy + let token = apiKey; + if (!token || token.startsWith('sk-')) { + token = (await getAuthToken()) || undefined; + } + + if (!token) { + consoleLogger.error('No authentication token found. Please sign in.'); + return null; + } + + // Convert audio file to base64 + const base64Audio = await encodeWav(audioFile); + + // Use the OpenRouter proxy endpoint + const response = await fetch(`${serverUrl}/api/openrouter/v1/chat/completions`, { + method: "POST", + headers: { + "Authorization": `Bearer ${token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "Please transcribe this audio file exactly as spoken, without any additional commentary.", + }, + { + type: "input_audio", + input_audio: { + data: base64Audio, + format: "wav", + }, + }, + ], + }, + ], + }), + }); + + const data = await response.json(); + + if (!response.ok) { + throw new Error(`OpenRouter API error: ${data.error?.message || response.statusText}`); + } + + const transcribedText = data.choices?.[0]?.message?.content; + + if (!transcribedText) { + consoleLogger.warn('No text returned from audio transcription'); + return null; + } + + consoleLogger.info('Successfully transcribed audio', { + textLength: transcribedText.length, + textPreview: transcribedText.substring(0, 100) + '...', + model + }); + + return transcribedText; + } catch (error: any) { + consoleLogger.error('OpenRouter audio transcription failed:', { + error: error.message, + fileName: audioFile.name, + model + }); + throw error; + } +}; + +// Optimize prompt for image generation +export const optimizePrompt = async ( + userPrompt: string, + model: string = "gpt-5", + apiKey?: string +): Promise => { + return withOpenAI(async (client) => { + try { + consoleLogger.info('Starting prompt optimization', { + originalPromptLength: userPrompt.length, + originalPromptPreview: userPrompt.substring(0, 100) + '...', + model + }); + + const systemPrompt = `You are an expert at writing prompts for AI image generation models. Your task is to optimize user prompts to produce better, more detailed, and more visually striking images. + +Guidelines for optimization: +- Add specific visual details (lighting, composition, style, mood, colors) +- Include technical photography/art terms when relevant (e.g., "bokeh", "golden hour", "rule of thirds") +- Specify artistic styles if not mentioned (e.g., "cinematic", "photorealistic", "digital art") +- Add quality enhancers (e.g., "highly detailed", "4k", "masterpiece") +- Keep the core intent of the user's prompt +- Make it concise but descriptive (aim for 1-3 sentences) +- Return ONLY the optimized prompt, no explanations or additional text + +Example: +User: "a cat sitting on a chair" +Optimized: "A fluffy tabby cat sitting gracefully on a vintage wooden chair, soft natural lighting from a nearby window creating gentle shadows, photorealistic style, highly detailed fur texture, warm color palette, shallow depth of field"`; + + const response = await client.chat.completions.create({ + model, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt } + ] + }); + + const optimizedPrompt = response.choices[0]?.message?.content?.trim(); + + if (!optimizedPrompt) { + consoleLogger.warn('No optimized prompt returned from OpenAI'); + return null; + } + + consoleLogger.info('Successfully optimized prompt', { + originalLength: userPrompt.length, + optimizedLength: optimizedPrompt.length, + optimizedPreview: optimizedPrompt.substring(0, 100) + '...', + model + }); + + return optimizedPrompt; + } catch (error: any) { + consoleLogger.error('OpenAI prompt optimization failed:', { + error: error.message, + promptPreview: userPrompt.substring(0, 100) + '...', + model + }); + throw error; + } + }, null, apiKey); +}; + +// Generate HTML snippet with Tailwind CSS +export const generateHtmlSnippet = async ( + userPrompt: string, + contextVariables: Record = {}, + pageContext: any = null, + apiKey?: string +): Promise => { + return withOpenAI(async (client) => { + try { + consoleLogger.info('Starting HTML snippet generation', { + promptLength: userPrompt.length, + hasContext: Object.keys(contextVariables).length > 0, + hasPageContext: !!pageContext + }); + + const variableList = Object.keys(contextVariables).map(k => `\${${k}}`).join(', '); + + const contextPrompt = Object.keys(contextVariables).length > 0 + ? `\nAvailable Variables: You can use these variables in your HTML: ${variableList}. Use the syntax \${variableName} to insert them.` + : ''; + + const pageContextPrompt = pageContext + ? `\nPage Context (JSON): Use this to understand the surrounding page structure/data if relevant:\n\`\`\`json\n${JSON.stringify(pageContext, null, 2).slice(0, 5000)}\n\`\`\`` + : ''; + + const systemPrompt = `You are an expert Tailwind CSS and HTML developer. +Your task is to generate or modify a standalone HTML snippet based on the user's request. + +Rules: +1. Return ONLY the HTML code. Do NOT wrap it in markdown code blocks (\`\`\`html ... \`\`\`\`). +2. Do NOT include any explanations, comments, or conversational text. +3. Use Tailwind CSS classes for styling. +4. The HTML should be a document fragment (e.g., a
,
, or
), NOT a full document. +5. Make the design modern, clean, and responsive.${contextPrompt}${pageContextPrompt} +6. If icons are needed, use valid inline elements with Tailwind classes. Do NOT use React component names (like ) as they will not render. +7. Ensure strict accessibility compliance (aria-labels, roles).`; + + console.log('System prompt:', systemPrompt); + console.log('User prompt:', userPrompt); + + const response = await client.chat.completions.create({ + model: "gpt-4o", // Stronger model for code generation + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt } + ] + }); + + + + let content = response.choices[0]?.message?.content?.trim(); + + if (!content) { + consoleLogger.warn('No HTML returned from OpenAI'); + return null; + } + + // Cleanup if the model ignored the "no markdown" rule + content = content.replace(/^```html\s*/i, '').replace(/^```\s*/, '').replace(/\s*```$/, ''); + + return content; + } catch (error: any) { + consoleLogger.error('OpenAI HTML generation failed:', error); + throw error; + } + }, null, apiKey); +}; + +/** + * Helper function to create Zod-validated OpenAI tools + * Based on ref/tools/index.ts + */ +export const zodFunction = ({ + function: fn, + schema, + description = '', + name, +}: { + function: (args: T) => Promise; + schema: z.ZodSchema; + description?: string; + name?: string; +}): RunnableToolFunctionWithParse => { + return { + type: 'function', + function: { + function: fn, + name: name ?? fn.name, + description: description, + parameters: zodToJsonSchema(schema, { target: 'openAi' }) as JSONSchema, + parse(input: string): T { + const obj = JSON.parse(input); + return schema.parse(obj); + }, + }, + }; +}; + +// ==================================================================== +// TOOL DEFINITIONS +// ==================================================================== + +/** + * Tool: Generate Image + * Creates a new image from a text prompt using the specified AI model + */ +export const generateImageTool = (apiKey?: string) => + zodFunction({ + name: 'generate_image', + description: 'Generate one or more images from a text prompt using AI image generation models. Supports multiple providers (Google, Replicate, Bria, AIML API). Can generate multiple images if count is specified.', + schema: z.object({ + prompt: z.string().describe('The text prompt describing the image to generate'), + count: z.number().optional().describe('Number of images to generate (1-4). Default: 1'), + model: z.string().optional().describe('Model string in format "provider/model-name". Default: "google/gemini-3-pro-image-preview"'), + }), + function: async (args) => { + try { + const count = Math.min(Math.max(args.count || 1, 1), 4); // Clamp between 1-4 + consoleLogger.info('Tool::GenerateImage called', { + prompt: args.prompt, + model: args.model, + count + }); + + // Image Router will fetch the appropriate provider API key from user profile + // Don't pass OpenAI API key to image generation + const results = []; + + // Generate multiple images if count > 1 + for (let i = 0; i < count; i++) { + const result = await createImageRouter( + args.prompt, + args.model || 'google/gemini-3-pro-image-preview', + undefined // Let Image Router fetch the correct provider API key + ); + + if (!result) { + consoleLogger.warn(`Image ${i + 1}/${count} generation failed`); + continue; + } + + // Convert ArrayBuffer to blob URL + const uint8Array = new Uint8Array(result.imageData); + const blob = new Blob([uint8Array], { type: 'image/png' }); + const imageUrl = URL.createObjectURL(blob); + + results.push({ + imageUrl, + text: result.text, + }); + } + + if (results.length === 0) { + return { success: false, error: 'Failed to generate any images' }; + } + + return { + success: true, + images: results, + count: results.length, + message: `${results.length} image${results.length > 1 ? 's' : ''} generated successfully`, + }; + } catch (error: any) { + consoleLogger.error('Tool::GenerateImage failed', error); + return { success: false, error: error.message }; + } + }, + }); + +/** + * Tool: Transcribe Audio + * Converts speech/audio to text using OpenAI Whisper + */ +export const transcribeAudioTool = (apiKey?: string) => + zodFunction({ + name: 'transcribe_audio', + description: 'Transcribe audio or speech to text using OpenAI Whisper model. Accepts audio files in various formats.', + schema: z.object({ + audioFile: z.any().describe('The audio file to transcribe (File object)'), + model: z.string().optional().describe('Whisper model to use. Default: "whisper-1"'), + }), + function: async (args) => { + try { + consoleLogger.info('Tool::TranscribeAudio called', { model: args.model }); + + const text = await transcribeAudio(args.audioFile, args.model, apiKey); + + if (!text) { + return { success: false, error: 'Failed to transcribe audio' }; + } + + return { + success: true, + text, + message: 'Audio transcribed successfully', + }; + } catch (error: any) { + consoleLogger.error('Tool::TranscribeAudio failed', error); + return { success: false, error: error.message }; + } + }, + }); + +/** + * Tool: Optimize Prompt + * Enhances a user's prompt to produce better image generation results + */ +export const optimizePromptTool = (apiKey?: string) => + zodFunction({ + name: 'optimize_prompt', + description: 'Optimize and enhance a text prompt for better image generation results. Adds specific visual details, technical terms, and quality enhancers.', + schema: z.object({ + prompt: z.string().describe('The original user prompt to optimize'), + model: z.string().optional().describe('GPT model to use for optimization. Default: "gpt-4o-mini"'), + }), + function: async (args) => { + try { + consoleLogger.info('Tool::OptimizePrompt called', { promptLength: args.prompt.length }); + + const optimized = await optimizePrompt(args.prompt, args.model, apiKey); + + if (!optimized) { + return { success: false, error: 'Failed to optimize prompt' }; + } + + return { + success: true, + originalPrompt: args.prompt, + optimizedPrompt: optimized, + message: 'Prompt optimized successfully', + }; + } catch (error: any) { + consoleLogger.error('Tool::OptimizePrompt failed', error); + return { success: false, error: error.message }; + } + }, + }); + +/** + * Tool: Generate Text + * Generates text completion using GPT models + */ +export const generateTextTool = (apiKey?: string) => + zodFunction({ + name: 'generate_text', + description: 'Generate text completion using OpenAI GPT models. Useful for creating descriptions, titles, or any text content.', + schema: z.object({ + input: z.string().describe('The input text or prompt'), + model: z.string().optional().describe('GPT model to use. Default: "gpt-4o-mini"'), + }), + function: async (args) => { + try { + consoleLogger.info('Tool::GenerateText called', { inputLength: args.input.length }); + + const text = await generateText(args.input, args.model, apiKey); + + if (!text) { + return { success: false, error: 'Failed to generate text' }; + } + + return { + success: true, + text, + message: 'Text generated successfully', + }; + } catch (error: any) { + consoleLogger.error('Tool::GenerateText failed', error); + return { success: false, error: error.message }; + } + }, + }); + +/** + * Tool: Generate Image Metadata + * Generates title and description for an image based on the prompt or image content + */ +export const generateImageMetadataTool = (apiKey?: string) => + zodFunction({ + name: 'generate_image_metadata', + description: 'Generate a creative title and detailed description for an image based on the prompt or image concept. Returns both title and description.', + schema: z.object({ + prompt: z.string().describe('The image prompt or concept to generate metadata for'), + style: z.string().optional().describe('Optional style hint for the metadata (e.g., "creative", "professional", "poetic")'), + }), + function: async (args) => { + try { + consoleLogger.info('Tool::GenerateImageMetadata called', { promptLength: args.prompt.length }); + + const style = args.style || 'creative'; + const metadataPrompt = `Based on this image concept: "${args.prompt}" + +Generate a ${style} title and description for this image. + +Requirements: +- Title: Short, catchy, and descriptive (5-8 words max) +- Description: Engaging description that captures the essence (2-3 sentences) + +Format your response EXACTLY as: +Title: [your title here] +Description: [your description here]`; + + const response = await generateText(metadataPrompt, 'gpt-4o-mini', apiKey); + + if (!response) { + return { success: false, error: 'Failed to generate metadata' }; + } + + // Parse title and description from response + const titleMatch = response.match(/Title:\s*(.+?)(?:\n|$)/i); + const descMatch = response.match(/Description:\s*(.+?)$/is); + + const title = titleMatch ? titleMatch[1].trim() : `Generated: ${args.prompt.substring(0, 50)}`; + const description = descMatch ? descMatch[1].trim() : args.prompt; + + consoleLogger.info('Successfully generated image metadata', { + titleLength: title.length, + descriptionLength: description.length, + }); + + return { + success: true, + title, + description, + message: 'Image metadata generated successfully', + }; + } catch (error: any) { + consoleLogger.error('Tool::GenerateImageMetadata failed', error); + return { success: false, error: error.message }; + } + }, + }); + +/** + * Tool: Publish Image to Gallery + * Publishes a generated image to the user's gallery with title and description + */ +export const publishImageTool = () => + zodFunction({ + name: 'publish_image', + description: 'Publish a generated image to the gallery with title, description, and optional tags. This saves the image permanently to the user\'s account.', + schema: z.object({ + imageUrl: z.string().describe('The URL or blob URL of the image to publish'), + title: z.string().describe('The title for the image'), + description: z.string().optional().describe('Optional description for the image'), + tags: z.array(z.string()).optional().describe('Optional array of tags for the image'), + prompt: z.string().optional().describe('The prompt used to generate the image'), + }), + function: async (args) => { + try { + consoleLogger.info('Tool::PublishImage called', { title: args.title }); + + // Note: This is a placeholder. The actual publishing logic needs to be + // implemented in the component that calls runTools, as it requires + // access to Supabase client, user context, and File handling. + // The tool returns the data needed for publishing. + + return { + success: true, + publishData: { + imageUrl: args.imageUrl, + title: args.title, + description: args.description || '', + tags: args.tags || [], + prompt: args.prompt || '', + }, + message: 'Image ready to publish. Please confirm publication in the UI.', + requiresConfirmation: true, + }; + } catch (error: any) { + consoleLogger.error('Tool::PublishImage failed', error); + return { success: false, error: error.message }; + } + }, + }); + +// ==================================================================== +// TOOL PRESETS +// ==================================================================== + +export interface ToolPreset { + name: string; + description: string; + model: string; + tools: RunnableToolFunctionWithParse[]; + systemPrompt?: string; +} + + +/** + * Create a simple custom preset using the preset type mapping + */ +export const createSimplePreset = ( + type: PresetType, + systemPrompt: string, + apiKey?: string +): ToolPreset => ({ + name: `Custom ${type}`, + description: type, + model: 'openai/gpt-5.2', + tools: PRESET_TOOLS[type](apiKey), + systemPrompt, +}); + +/** + * Create tool presets with API key + */ +export const createToolPresets = (apiKey?: string, userId?: string, addLog?: LogFunction): Record => ({ + 'image-wizard': { + name: 'Image Generation Wizard', + description: 'Optimize → generate → metadata', + model: 'openai/gpt-5.2', + tools: PRESET_TOOLS['optimize-generate'](apiKey), + systemPrompt: `You are an AI image generation assistant. Your role is to: +1. Optimize prompts for better results +2. Generate images using optimized prompts +3. Create metadata (title + description) + +Execute these steps automatically. Do NOT publish - user handles publishing.`, + }, + + 'speech-to-image': { + name: 'Speech to Image', + description: 'Complete workflow: transcribe audio → optimize prompt → generate image', + model: 'openai/gpt-5.2', + tools: [ + transcribeAudioTool(apiKey), + optimizePromptTool(apiKey), + generateImageTool(apiKey), + ], + systemPrompt: `You are an AI assistant that converts speech to images. Your workflow: +1. Transcribe audio input to text (using transcribe_audio tool) +2. Optimize the transcribed text as an image prompt (using optimize_prompt tool) +3. Generate an image from the optimized prompt (using generate_image tool) +4. Provide clear feedback at each step + +Execute all steps automatically when given audio input.`, + }, + + 'text-to-image': { + name: 'Text to Image', + description: 'Optimize → generate → metadata', + model: 'openai/gpt-5.2', + tools: PRESET_TOOLS['optimize-generate'](apiKey), + systemPrompt: `You are a text-to-image generation assistant. +- Optimize prompts and generate images +- Create metadata automatically +- Do NOT publish - user handles that`, + }, + + + 'smart-generation': { + name: 'Smart Generation', + description: 'Optimize → generate → metadata (no publish)', + model: 'openai/gpt-5.2', + tools: PRESET_TOOLS['optimize-generate'](apiKey), + systemPrompt: `You are an intelligent image generation assistant. + +Workflow: Optimize prompt → Generate image → Create metadata + +Execute automatically. Do NOT publish - user handles publishing. +Keep responses brief.`, + }, + + 'auto-publish': { + name: 'Auto-Publish Workflow', + description: 'Optimize → generate → metadata → publish (full automation)', + model: 'openai/gpt-5.2', + tools: PRESET_TOOLS['generate-publish'](apiKey), + systemPrompt: `You are an auto-publishing assistant. + +Workflow: Optimize → Generate → Metadata → Publish + +Execute all steps automatically. Always include metadata.`, + }, + + 'metadata-generator': { + name: 'Metadata Generator', + description: 'Generate title and description for image concepts or existing images', + model: 'openai/gpt-5.2', + tools: [ + generateImageMetadataTool(apiKey), + ], + systemPrompt: `You are a creative metadata generator for images. +Your job is to create engaging titles and descriptions that capture the essence of the image. + +For titles: Make them short, catchy, and memorable (5-8 words) +For descriptions: Create 2-3 sentences that are engaging and descriptive + +Be creative and match the style of the image concept.`, + }, + + 'metadata-only': { + name: 'Metadata Only', + description: 'Only generate title and description', + model: 'openai/gpt-5.2', + tools: PRESET_TOOLS['metadata-only'](apiKey), + systemPrompt: `Generate title and description for the prompt. +Title: 5-8 words. Description: 2-3 sentences.`, + }, + 'page-generator': { + name: 'Page Generator', + description: 'Generate a complete page with text, images, and metadata from a single voice command.', + model: 'openai/gpt-5.2', + tools: [ + generateTextWithImagesTool(userId || 'anonymous-user', addLog), + createPageTool(userId || 'anonymous-user', addLog), + ], + systemPrompt: `You are an AI assistant that creates well-structured pages with text and images. Your only task is to use the 'generate_text_with_images' tool to create rich, comprehensive markdown content based on the user's request. Include at least one relevant image unless otherwise specified.`, + }, + 'page-generator-text-only': { + name: 'Page Generator (Text Only)', + description: 'Generate a complete page with text from a single command, without images.', + model: 'openai/gpt-5.2', + tools: [ + createPageTool(userId || 'anonymous-user', addLog), + ], + systemPrompt: `You are an AI assistant that writes well-structured, comprehensive markdown documents. Based on the user's request, generate the full text content for a page, then call the 'create_page' tool with that content and a suitable title and tags. Your final response must be only the 'create_page' tool call.`, + }, + 'layout-generator': { + name: 'Layout Generator', + description: 'Generate widget layout fragments (containers + widgets) from a text description.', + model: 'openai/gpt-5.2', + tools: [ + createWidgetsTool(addLog), + ], + systemPrompt: `You are a layout generation assistant for a widget-based page editor. When the user describes a layout, call the create_widgets tool with the correct containers and widgets. Follow the widget schema exactly. Use appropriate columns, widget types, and props. Be creative with HTML widgets when no specific widget type fits. Always return valid JSON.`, + } +}); + +// ==================================================================== +// RUN TOOLS - Main Orchestration Function +// ==================================================================== + +export interface RunToolsOptions { + prompt: string; + preset?: string | ToolPreset; + apiKey?: string; + onMessage?: (message: any) => void; + onToolCall?: (toolCall: any) => void; + onContent?: (content: string) => void; + model?: string; + /** Provider name (e.g. 'openai', 'openrouter'). Determines which proxy base URL to use. */ + provider?: string; + maxIterations?: number; + userId?: string; + images?: string[]; + toolsEnabled?: boolean; + webSearchEnabled?: boolean; + pageToolsEnabled?: boolean; + imageToolsEnabled?: boolean; + vfsToolsEnabled?: boolean; + addLog?: LogFunction; +} + +export interface RunToolsResult { + success: boolean; + content?: string; + messages: any[]; + toolCalls: any[]; + error?: string; +} + +/** + * Run OpenAI with tools - main orchestration function + * Based on ref/run-tools.ts + * + * @example + * // Use preset + * const result = await runTools({ + * prompt: "Create a beautiful sunset over mountains", + * preset: "image-wizard" + * }); + * + * @example + * // Custom tools + * const result = await runTools({ + * prompt: "Generate an image of a cat", + * preset: { + * name: "custom", + * model: "gpt-4o-mini", + * tools: [generateImageTool()], + * systemPrompt: "You are a helpful assistant" + * } + * }); + */ +export const runTools = async (options: RunToolsOptions): Promise => { + const { + prompt, + preset, + apiKey, + onMessage, + onToolCall, + onContent, + maxIterations = 10, + userId, + images, + addLog, + } = options; + + const logger = addLog ? { + debug: (m: string, d?: any) => addLog('debug', `[OPENAI] ${m}`, d), + info: (m: string, d?: any) => addLog('info', `[OPENAI] ${m}`, d), + warn: (m: string, d?: any) => addLog('warn', `[OPENAI] ${m}`, d), + error: (m: string, d?: any) => addLog('error', `[OPENAI] ${m}`, d), + } : consoleLogger; + + + const messages: any[] = []; + const toolCalls: any[] = []; + + try { + // Get preset configuration (needed for model fallback) + const presets = createToolPresets(apiKey, userId, addLog); + const presetConfig: ToolPreset = + typeof preset === 'string' + ? presets[preset] || { ...presets['image-wizard'] } + : preset ? { ...preset } : { ...presets['image-wizard'] }; + // Clone tools array to avoid modifying shared preset object + presetConfig.tools = [...presetConfig.tools]; + + // Determine effective provider — auto-detect OpenRouter from model format (contains '/') + const effectiveModel = options.model || presetConfig.model; + const effectiveProvider = options.provider + || (effectiveModel.includes('/') ? 'openrouter' : 'openai'); + + // Create client — route through the correct proxy based on provider + let client: OpenAI | null = null; + + if (effectiveProvider === 'openrouter') { + // OpenRouter models (e.g. 'google/gemini-2.5-pro') need the OpenRouter proxy + let token: string | undefined = undefined; + try { + token = await getZitadelToken() ?? undefined; + } catch { } + if (!token) { + return { + success: false, + messages, + toolCalls, + error: 'No authentication token available for OpenRouter', + }; + } + client = new OpenAI({ + apiKey: token, + baseURL: `${serverUrl}/api/openrouter/v1`, + dangerouslyAllowBrowser: true, + }); + } else { + // Default: OpenAI proxy + client = await createOpenAIClient(apiKey); + } + + if (!client) { + return { + success: false, + messages, + toolCalls, + error: 'Failed to create AI client - no API key available', + }; + } + + + + // Inject extra tools based on options + if (options.webSearchEnabled) { + const { webSearchTool, markdownScraperTool } = await import('@/modules/ai/searchTools'); + presetConfig.tools.push(webSearchTool(logger.info)); + presetConfig.tools.push(markdownScraperTool(logger.info)); + logger.info('Injected Web Search and Markdown Scraper tools'); + } + + // Determine effective provider — auto-detect OpenRouter from model format (contains '/') + // Note: If we use openrouter, we might need to add special tools, but for function calling we just pass them. + + logger.info('Using preset', { + name: presetConfig.name, + model: presetConfig.model, + toolCount: presetConfig.tools.length, + toolsEnabled: options.toolsEnabled, + webSearchEnabled: options.webSearchEnabled, + pageToolsEnabled: options.pageToolsEnabled, + vfsToolsEnabled: options.vfsToolsEnabled + }); + + // Build messages + const initialMessages: OpenAI.Chat.ChatCompletionMessageParam[] = []; + + if (presetConfig.systemPrompt) { + initialMessages.push({ + role: 'system', + content: presetConfig.systemPrompt, + }); + } + + // Handle multimodal input if images are provided + if (images && images.length > 0) { + logger.info('Adding images to request', { count: images.length }); + + const content: any[] = [ + { type: "text", text: prompt } + ]; + + images.forEach(url => { + content.push({ + type: "image_url", + image_url: { + url, + detail: "auto" + } + }); + }); + + initialMessages.push({ + role: 'user', + content: content, + }); + } else { + initialMessages.push({ + role: 'user', + content: prompt, + }); + } + + // Run tools + const runner = client.chat.completions + .runTools({ + model: options.model || presetConfig.model, + messages: initialMessages, + tools: presetConfig.tools + }); + + logger.info('RunTools started', { + prompt: prompt.substring(0, 100), + preset: typeof preset === 'string' ? preset : 'custom', + model: options.model || presetConfig.model, + toolCount: presetConfig.tools.length, + initialMessages + }); + + await runner.done(); + + const finalMessages = runner.messages; + const finalContent = await runner.finalContent(); + + // Manually reconstruct tool calls with their output from the message history + // This is more reliable than depending on the runner's final state. + const completedToolCalls = finalMessages + .filter((msg): msg is OpenAI.Chat.Completions.ChatCompletionMessage & { tool_calls: OpenAI.Chat.Completions.ChatCompletionMessageToolCall[] } => msg.role === 'assistant' && !!msg.tool_calls) + .flatMap(msg => msg.tool_calls); + + for (const toolCall of completedToolCalls) { + const toolOutputMessage = finalMessages.find( + (msg): msg is OpenAI.Chat.Completions.ChatCompletionMessage => msg.role === 'tool' && msg.tool_call_id === toolCall.id + ); + if (toolOutputMessage && 'function' in toolCall && toolCall.function) { + (toolCall.function as any).output = toolOutputMessage.content; + } + } + + logger.info('RunTools completed', { + messageCount: finalMessages.length, + toolCallCount: completedToolCalls.length, + completedToolCalls, + finalMessages: finalMessages, + finalContent: finalContent, + }); + + // The SDK should handle parsing, but as a safeguard, parse stringified outputs. + for (const toolCall of completedToolCalls) { + if ('function' in toolCall && toolCall.function && 'output' in toolCall.function && typeof toolCall.function.output === 'string') { + try { + // Attempt to parse the output, but don't fail if it's just a raw string. + const parsedOutput = JSON.parse(toolCall.function.output); + (toolCall.function as any).output = parsedOutput; + } catch (e) { + logger.debug('Tool output was a non-JSON string.', { output: toolCall.function.output }); + } + } + } + + return { + success: true, + content: finalContent ?? undefined, + messages: finalMessages, + toolCalls: completedToolCalls, + }; + } catch (error: any) { + + logger.error('RunTools failed', { + error: error.message, + prompt: prompt.substring(0, 100), + }); + + return { + success: false, + messages, + toolCalls, + error: error.message || 'Unknown error occurred', + }; + } +}; diff --git a/packages/media/cpp/src/core/transform.cpp b/packages/media/cpp/src/core/transform.cpp new file mode 100644 index 00000000..3b3bce09 --- /dev/null +++ b/packages/media/cpp/src/core/transform.cpp @@ -0,0 +1,306 @@ +#include "transform.hpp" +#include "url_fetch.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; +using json = nlohmann::json; + +namespace media { + +// ── base64 encode/decode ──────────────────────────────────────────── + +static const char b64_table[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static std::string base64_encode(const uint8_t* data, size_t len) { + std::string out; + out.reserve(((len + 2) / 3) * 4); + for (size_t i = 0; i < len; i += 3) { + uint32_t n = (uint32_t)data[i] << 16; + if (i + 1 < len) n |= (uint32_t)data[i + 1] << 8; + if (i + 2 < len) n |= (uint32_t)data[i + 2]; + out.push_back(b64_table[(n >> 18) & 0x3F]); + out.push_back(b64_table[(n >> 12) & 0x3F]); + out.push_back((i + 1 < len) ? b64_table[(n >> 6) & 0x3F] : '='); + out.push_back((i + 2 < len) ? b64_table[n & 0x3F] : '='); + } + return out; +} + +static int b64_decode_char(char c) { + if (c >= 'A' && c <= 'Z') return c - 'A'; + if (c >= 'a' && c <= 'z') return c - 'a' + 26; + if (c >= '0' && c <= '9') return c - '0' + 52; + if (c == '+') return 62; + if (c == '/') return 63; + return -1; +} + +static std::vector base64_decode(const std::string& in) { + std::vector out; + out.reserve(in.size() * 3 / 4); + uint32_t buf = 0; + int bits = 0; + for (char c : in) { + if (c == '=' || c == '\n' || c == '\r' || c == ' ') continue; + int v = b64_decode_char(c); + if (v < 0) continue; + buf = (buf << 6) | (uint32_t)v; + bits += 6; + if (bits >= 8) { + bits -= 8; + out.push_back((uint8_t)(buf >> bits)); + } + } + return out; +} + +// ── MIME type from extension ──────────────────────────────────────── + +static std::string mime_from_ext(const std::string& ext) { + std::string e = ext; + for (auto& c : e) c = (char)std::tolower((unsigned char)c); + if (e == ".jpg" || e == ".jpeg") return "image/jpeg"; + if (e == ".png") return "image/png"; + if (e == ".webp") return "image/webp"; + if (e == ".gif") return "image/gif"; + if (e == ".bmp") return "image/bmp"; + if (e == ".tif" || e == ".tiff") return "image/tiff"; + if (e == ".avif") return "image/avif"; + if (e == ".heic") return "image/heic"; + return "image/jpeg"; +} + +// ── curl helpers ──────────────────────────────────────────────────── + +static size_t string_write_cb(char* ptr, size_t size, size_t nmemb, void* ud) { + auto* s = static_cast(ud); + s->append(ptr, size * nmemb); + return size * nmemb; +} + +// ── Google Gemini generateContent ─────────────────────────────────── + +static TransformResult call_gemini( + const std::string& input_path, + const std::string& output_path, + const TransformOptions& opts, + TransformProgressFn progress) +{ + TransformResult res; + + // Read input image + if (progress) progress("Reading " + input_path); + std::ifstream ifs(input_path, std::ios::binary); + if (!ifs) { + res.error = "Cannot open input: " + input_path; + return res; + } + std::vector img_bytes((std::istreambuf_iterator(ifs)), + std::istreambuf_iterator()); + ifs.close(); + + if (img_bytes.empty()) { + res.error = "Input file is empty: " + input_path; + return res; + } + + std::string mime = mime_from_ext(fs::path(input_path).extension().string()); + std::string b64 = base64_encode(img_bytes.data(), img_bytes.size()); + + // Build request JSON + json req_body = { + {"contents", json::array({ + {{"parts", json::array({ + {{"text", opts.prompt}}, + {{"inlineData", {{"mimeType", mime}, {"data", b64}}}} + })}} + })}, + {"generationConfig", { + {"responseModalities", json::array({"TEXT", "IMAGE"})} + }} + }; + + std::string url = "https://generativelanguage.googleapis.com/v1beta/models/" + + opts.model + ":generateContent?key=" + opts.api_key; + + std::string body_str = req_body.dump(); + + if (progress) progress("Sending to " + opts.model + " (" + + std::to_string(img_bytes.size() / 1024) + " KB)..."); + + ensure_curl_global(); + CURL* curl = curl_easy_init(); + if (!curl) { + res.error = "curl_easy_init failed"; + return res; + } + + std::string response_str; + struct curl_slist* headers = nullptr; + headers = curl_slist_append(headers, "Content-Type: application/json"); + + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body_str.c_str()); + curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, (long)body_str.size()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, string_write_cb); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_str); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 120L); + curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 15L); + + CURLcode cc = curl_easy_perform(curl); + long http_code = 0; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); + curl_slist_free_all(headers); + curl_easy_cleanup(curl); + + if (cc != CURLE_OK) { + res.error = std::string("HTTP request failed: ") + curl_easy_strerror(cc); + return res; + } + + if (http_code != 200) { + res.error = "API returned HTTP " + std::to_string(http_code); + // Try to extract error message from response JSON + try { + auto j = json::parse(response_str); + if (j.contains("error") && j["error"].contains("message")) + res.error += ": " + j["error"]["message"].get(); + } catch (...) { + if (response_str.size() < 500) res.error += ": " + response_str; + } + return res; + } + + // Parse response + if (progress) progress("Parsing response..."); + json resp; + try { + resp = json::parse(response_str); + } catch (const std::exception& e) { + res.error = std::string("JSON parse error: ") + e.what(); + return res; + } + + // Extract image and text from candidates[0].content.parts[] + bool found_image = false; + try { + auto& parts = resp["candidates"][0]["content"]["parts"]; + for (auto& part : parts) { + if (part.contains("inlineData")) { + auto& id = part["inlineData"]; + std::string resp_mime = id.value("mimeType", "image/png"); + std::string resp_b64 = id["data"].get(); + res.image_data = base64_decode(resp_b64); + found_image = true; + } + if (part.contains("text")) { + if (!res.ai_text.empty()) res.ai_text += "\n"; + res.ai_text += part["text"].get(); + } + } + } catch (const std::exception& e) { + res.error = std::string("Response parsing error: ") + e.what(); + // Include raw response excerpt for debugging + if (response_str.size() < 2000) res.error += "\nRaw: " + response_str; + return res; + } + + if (!found_image || res.image_data.empty()) { + res.error = "No image in API response"; + if (!res.ai_text.empty()) res.error += ". Model said: " + res.ai_text; + return res; + } + + // Write output + std::string out = output_path; + if (out.empty()) out = default_transform_output(input_path, opts.prompt); + + fs::path out_dir = fs::path(out).parent_path(); + if (!out_dir.empty()) { + std::error_code ec; + fs::create_directories(out_dir, ec); + } + + if (progress) progress("Writing " + out); + std::ofstream ofs(out, std::ios::binary); + if (!ofs) { + res.error = "Cannot write output: " + out; + return res; + } + ofs.write(reinterpret_cast(res.image_data.data()), + static_cast(res.image_data.size())); + ofs.close(); + + res.ok = true; + res.output_path = out; + return res; +} + +// ── public API ────────────────────────────────────────────────────── + +std::string default_transform_output(const std::string& input_path, const std::string& prompt) { + fs::path p(input_path); + std::string stem = p.stem().string(); + std::string ext = p.extension().string(); + if (ext.empty()) ext = ".png"; + + // Sanitize and truncate prompt for filename + std::string slug; + slug.reserve(prompt.size()); + for (char c : prompt) { + if (std::isalnum((unsigned char)c)) + slug.push_back((char)std::tolower((unsigned char)c)); + else if (c == ' ' || c == '-' || c == '_') + slug.push_back('_'); + // skip other chars + } + // collapse consecutive underscores + std::string clean; + for (char c : slug) { + if (c == '_' && !clean.empty() && clean.back() == '_') continue; + clean.push_back(c); + } + // trim trailing underscore + while (!clean.empty() && clean.back() == '_') clean.pop_back(); + // truncate to ~40 chars + if (clean.size() > 40) clean.resize(40); + while (!clean.empty() && clean.back() == '_') clean.pop_back(); + + std::string result = stem + "_" + clean + ext; + return (p.parent_path() / result).string(); +} + +TransformResult transform_image( + const std::string& input_path, + const std::string& output_path, + const TransformOptions& opts, + TransformProgressFn progress) +{ + if (opts.prompt.empty()) { + return {false, "prompt is required"}; + } + if (opts.api_key.empty()) { + return {false, "API key is required (set IMAGE_TRANSFORM_GOOGLE_API_KEY in .env or pass --api-key)"}; + } + + if (opts.provider == "google") { + return call_gemini(input_path, output_path, opts, progress); + } + + return {false, "Unsupported provider: " + opts.provider}; +} + +} // namespace media diff --git a/packages/media/cpp/src/core/transform.hpp b/packages/media/cpp/src/core/transform.hpp new file mode 100644 index 00000000..5ebf51a1 --- /dev/null +++ b/packages/media/cpp/src/core/transform.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include + +namespace media { + +struct TransformOptions { + std::string provider = "google"; + std::string model = "gemini-3-pro-image-preview"; + std::string api_key; + std::string prompt; +}; + +struct TransformResult { + bool ok = false; + std::string error; + std::string output_path; // written file + std::string ai_text; // optional text part from model + std::vector image_data; // raw bytes (PNG/JPEG) before writing +}; + +using TransformProgressFn = std::function; + +/// Edit a single image using a generative AI model. +/// Reads `input_path`, sends image + prompt to the API, writes result to `output_path`. +/// If `output_path` is empty, derives it from input + prompt. +TransformResult transform_image( + const std::string& input_path, + const std::string& output_path, + const TransformOptions& opts, + TransformProgressFn progress = nullptr +); + +/// Build a default output path from input path and prompt text. +std::string default_transform_output(const std::string& input_path, const std::string& prompt); + +} // namespace media