diff --git a/packages/kbot/.gitignore b/packages/kbot/.gitignore index f53b683a..9b65460b 100644 --- a/packages/kbot/.gitignore +++ b/packages/kbot/.gitignore @@ -30,5 +30,5 @@ systems/.code-server/code-server-ipc.sock systems/.code-server/User/workspaceStorage/ systems/code-server-defaults systems/.code-server - -packages/kbot/systems/gptr/gpt-researcher \ No newline at end of file +tests/assets/ +packages/kbot/systems/gptr/gpt-researcher diff --git a/packages/kbot/dist/win-64/tauri-app.exe b/packages/kbot/dist/win-64/tauri-app.exe index a4a453ee..d4eb472a 100644 Binary files a/packages/kbot/dist/win-64/tauri-app.exe and b/packages/kbot/dist/win-64/tauri-app.exe differ diff --git a/packages/kbot/docs/inpaint.md b/packages/kbot/docs/inpaint.md new file mode 100644 index 00000000..b7e606e7 --- /dev/null +++ b/packages/kbot/docs/inpaint.md @@ -0,0 +1,80 @@ +# Image Inpainting and Masking Options + +This document outlines potential approaches for implementing an inpainting feature, allowing a user to brush over an area of an image to create a mask that guides the AI for object placement or editing. + +## Core Concept: Image Masking + +The fundamental requirement for inpainting is to create a **mask**. This is typically a black-and-white image where the white (or black, depending on the AI model's requirements) area indicates the region to be modified by the AI. The original image and this mask are then sent to the AI model. + +![Inpainting Concept](https://raw.githubusercontent.com/tauri-apps/tauri-docs/dev/static/img/guides/features/menu-bar.gif) + +--- + +## Option 1: Frontend (Client-Side) Approach (Recommended) + +This approach handles the mask creation entirely in the user's browser or the Tauri webview. + +### How it Works + +1. **Display Image**: The source image is loaded and displayed to the user. +2. **Canvas Overlay**: An HTML `` element is placed directly over the image. +3. **Brush Interaction**: The user can "paint" on the canvas. The brush strokes are rendered as white shapes on a transparent or black background. +4. **Mask Generation**: When the user is done, the contents of the canvas are exported as a base64 encoded PNG image. This PNG is the mask. +5. **API Call**: The original image and the newly generated mask image are sent to the AI for inpainting. + +### Libraries & Implementation + +* **Custom Canvas Logic**: A simple implementation can be achieved with plain JavaScript and the HTML Canvas API to handle mouse events (`mousedown`, `mousemove`, `mouseup`) and draw lines. This is the most lightweight option. +* **Fabric.js / Konva.js**: These are powerful canvas libraries that simplify drawing, shapes, and user interaction. They provide a more robust feature set if more advanced editing tools are needed in the future. +* **React Components**: Libraries like `react-canvas-draw` or `react-sketch-canvas` offer pre-built components that can be integrated quickly. + +### Pros + +* **Lightweight**: No heavy native dependencies are needed on the user's machine. The entire experience is handled by the webview. +* **Interactive & Fast**: The user gets immediate visual feedback as they draw the mask. +* **Cross-Platform**: Works everywhere the Tauri application runs without changes. +* **Simpler Backend**: The backend (`images.ts`) only needs to receive the image and the mask, without needing to perform any image processing itself. + +### Cons + +* **Frontend Complexity**: Requires implementing the drawing logic in the React application. + +--- + +## Option 2: Backend (Server-Side) Approach + +This approach offloads the mask creation to the Node.js backend. + +### How it Works + +1. **Capture Coordinates**: The frontend captures the user's brush strokes as a series of coordinates (e.g., `[{x: 10, y: 20}, {x: 11, y: 21}]`). +2. **Send to Backend**: These coordinates, along with the original image path, are sent to the `images.ts` script. +3. **Process with Sharp/Jimp**: A Node.js library like `sharp` or `Jimp` is used to: + * Read the original image to get its dimensions. + * Create a new blank (black) image of the same size. + * Draw white lines or shapes onto the blank image using the coordinates received from the frontend. + * Save this new image as the mask. +4. **API Call**: The backend then sends the original image and the generated mask to the AI. + +### Libraries + +* **`sharp`**: Very fast and powerful, but it is a native Node.js module. This means it requires compilation during `npm install` and can introduce cross-platform compatibility issues (e.g., needing different binaries for Windows, macOS, Linux, and different architectures like ARM vs. x86). This adds significant complexity to the build and distribution process. +* **`Jimp`**: Pure JavaScript, so it has no native dependencies. It's much easier to install and more portable than `sharp`, but it is significantly slower, which could be a problem for large images or complex masks. + +### Pros + +* **Thinner Client**: Keeps the image processing logic out of the frontend application. + +### Cons + +* **Native Dependencies**: Using `sharp` introduces significant build and maintenance complexity. +* **Performance/Latency**: There is a delay between drawing and seeing the final mask. Sending large arrays of coordinates can also be slow. +* **Less Interactive**: The user doesn't get a "live" view of the mask as they are drawing it. + +--- + +## Recommendation + +The **Frontend (Client-Side) Approach** is strongly recommended for this application. + +Given the interactive nature of the task and the user's explicit concern about native dependencies, a client-side solution using the HTML Canvas is the most practical and efficient choice. It provides the best user experience, avoids the complexities of native modules, and keeps the backend logic simpler. diff --git a/packages/kbot/gui/tauri-app/src/App.tsx b/packages/kbot/gui/tauri-app/src/App.tsx index 013090e9..00967b76 100644 --- a/packages/kbot/gui/tauri-app/src/App.tsx +++ b/packages/kbot/gui/tauri-app/src/App.tsx @@ -35,6 +35,37 @@ function App() { await tauriApi.requestFileDeletion({ path: pathToDelete }); }; + const saveImageAs = async (imagePath: string) => { + const imageFile = files.find(f => f.path === imagePath); + if (!imageFile) { + addDebugMessage('error', `Could not find image to save: ${imagePath}`); + return; + } + + try { + const defaultPath = imagePath.split(/[/\\]/).pop() || 'saved_image.png'; + const newPath = await tauriApi.dialog.save({ + defaultPath, + filters: [{ name: 'Images', extensions: ['png', 'jpg', 'jpeg'] }] + }); + + if (newPath) { + // Convert data URL to binary using fetch + const response = await fetch(imageFile.src); + const blob = await response.blob(); + const buffer = await blob.arrayBuffer(); + const uint8Array = new Uint8Array(buffer); + + await tauriApi.fs.writeFile(newPath, uint8Array); + addDebugMessage('info', `✅ Image saved successfully to: ${newPath}`); + } else { + addDebugMessage('info', 'Save dialog was cancelled.'); + } + } catch (error) { + addDebugMessage('error', `Failed to save image: ${(error as Error).message}`); + } + }; + const generateDefaultDst = (fileCount: number, firstFilePath?: string) => { if (fileCount === 1 && firstFilePath) { const parsedPath = firstFilePath.split(/[/\\]/).pop() || 'image'; @@ -127,13 +158,20 @@ function App() { setFiles([]); }; - const toggleImageSelection = (imagePath: string) => { - setFiles(prev => - prev.map(file => - file.path === imagePath - ? { ...file, selected: !file.selected } - : file - ) + const handleImageSelection = (imagePath: string, isMultiSelect: boolean) => { + setFiles(prev => + prev.map(file => { + if (file.path === imagePath) { + // For multi-select, toggle the current state. For single-select, always select it. + return { ...file, selected: isMultiSelect ? !file.selected : true }; + } + // For single-select, deselect all other images. + if (!isMultiSelect) { + return { ...file, selected: false }; + } + // For multi-select, leave other images as they are. + return file; + }) ); }; @@ -295,9 +333,8 @@ function App() { if (apiKey) { // Generate image via backend (always chat mode now) - // Use selected images if any, otherwise use all files - const selectedImages = getSelectedImages(); - const imagesToUse = selectedImages.length > 0 ? selectedImages : files.filter(f => !f.path.startsWith('generating_')); + // Only use explicitly selected images. If none are selected, generate from prompt alone. + const imagesToUse = getSelectedImages(); await generateImage(prompt, imagesToUse); // Don't clear prompt - let user iterate @@ -444,13 +481,15 @@ function App() { files={files} getSelectedImages={getSelectedImages} clearAllFiles={clearAllFiles} - toggleImageSelection={toggleImageSelection} + handleImageSelection={handleImageSelection} removeFile={removeFile} isGenerating={isGenerating} saveAndClose={saveAndClose} submit={submit} addImageFromUrl={addImageFromUrl} onImageDelete={deleteFilePermanently} + onImageSaveAs={saveImageAs} + addFiles={addFiles} /> {/* Debug Panel */} diff --git a/packages/kbot/gui/tauri-app/src/components/ImageGallery.tsx b/packages/kbot/gui/tauri-app/src/components/ImageGallery.tsx index 3970ec4c..48e1870d 100644 --- a/packages/kbot/gui/tauri-app/src/components/ImageGallery.tsx +++ b/packages/kbot/gui/tauri-app/src/components/ImageGallery.tsx @@ -3,17 +3,19 @@ import { ImageFile } from '../types'; interface ImageGalleryProps { images: ImageFile[]; - onImageSelect?: (imagePath: string) => void; + onImageSelection?: (imagePath: string, isMultiSelect: boolean) => void; onImageRemove?: (imagePath: string) => void; onImageDelete?: (imagePath: string) => void; + onImageSaveAs?: (imagePath: string) => void; showSelection?: boolean; } export default function ImageGallery({ images, - onImageSelect, + onImageSelection, onImageRemove, onImageDelete, + onImageSaveAs, showSelection = false }: ImageGalleryProps) { const [currentIndex, setCurrentIndex] = useState(0); @@ -72,11 +74,13 @@ export default function ImageGallery({ }; const handleThumbnailClick = (event: React.MouseEvent, imagePath: string, index: number) => { - if (event.ctrlKey || event.metaKey) { - if (showSelection && onImageSelect) { - onImageSelect(imagePath); - } - } else { + const isMultiSelect = event.ctrlKey || event.metaKey; + + if (showSelection && onImageSelection) { + onImageSelection(imagePath, isMultiSelect); + } + + if (!isMultiSelect) { setCurrentIndex(index); } }; @@ -211,39 +215,39 @@ export default function ImageGallery({ )} - {/* Remove button */} - {!thumbIsGenerating && onImageRemove && ( - - )} - - {/* Delete button (permanent) */} - {!thumbIsGenerating && onImageDelete && onImageRemove && ( + {/* Save button */} + {!thumbIsGenerating && onImageSaveAs && ( )} + + {/* Delete Button */} + {!thumbIsGenerating && onImageDelete && ( + + )} ); })} diff --git a/packages/kbot/gui/tauri-app/src/components/PromptForm.tsx b/packages/kbot/gui/tauri-app/src/components/PromptForm.tsx index c5765019..fcbeea0d 100644 --- a/packages/kbot/gui/tauri-app/src/components/PromptForm.tsx +++ b/packages/kbot/gui/tauri-app/src/components/PromptForm.tsx @@ -1,6 +1,7 @@ import React from 'react'; import { ImageFile } from '../types'; import ImageGallery from './ImageGallery'; +import { useDropZone } from '../hooks/useDropZone'; interface PromptFormProps { prompt: string; @@ -12,13 +13,15 @@ interface PromptFormProps { files: ImageFile[]; getSelectedImages: () => ImageFile[]; clearAllFiles: () => void; - toggleImageSelection: (path: string) => void; + handleImageSelection: (path: string, isMultiSelect: boolean) => void; removeFile: (path: string) => void; isGenerating: boolean; saveAndClose: () => void; submit: () => void; addImageFromUrl: (url: string) => void; onImageDelete?: (path: string) => void; + onImageSaveAs?: (path: string) => void; + addFiles: (paths: string[]) => void; } const PromptForm: React.FC = ({ @@ -31,15 +34,18 @@ const PromptForm: React.FC = ({ files, getSelectedImages, clearAllFiles, - toggleImageSelection, + handleImageSelection, removeFile, isGenerating, saveAndClose, submit, addImageFromUrl, - onImageDelete + onImageDelete, + onImageSaveAs, + addFiles }) => { const selectedCount = getSelectedImages().length; + const { ref: dropZoneRef, dragIn } = useDropZone({ onDrop: addFiles }); return (
= ({ -
-