inpaint

2025-09-18 16:36:06 +02:00 · 2025-09-18 16:36:06 +02:00 · 002542ac34
commit 002542ac34
parent ea12aae7c8
8 changed files with 240 additions and 51 deletions
--- a/packages/kbot/.gitignore
+++ b/packages/kbot/.gitignore
@ -30,5 +30,5 @@ systems/.code-server/code-server-ipc.sock
 systems/.code-server/User/workspaceStorage/
 systems/code-server-defaults
 systems/.code-server
-
-packages/kbot/systems/gptr/gpt-researcher
+tests/assets/
+packages/kbot/systems/gptr/gpt-researcher
--- a/packages/kbot/dist/win-64/tauri-app.exe
+++ b/packages/kbot/dist/win-64/tauri-app.exe
--- a/packages/kbot/docs/inpaint.md
+++ b/packages/kbot/docs/inpaint.md
@ -0,0 +1,80 @@
+# Image Inpainting and Masking Options
+
+This document outlines potential approaches for implementing an inpainting feature, allowing a user to brush over an area of an image to create a mask that guides the AI for object placement or editing.
+
+## Core Concept: Image Masking
+
+The fundamental requirement for inpainting is to create a **mask**. This is typically a black-and-white image where the white (or black, depending on the AI model's requirements) area indicates the region to be modified by the AI. The original image and this mask are then sent to the AI model.
+
+![Inpainting Concept](https://raw.githubusercontent.com/tauri-apps/tauri-docs/dev/static/img/guides/features/menu-bar.gif)
+
+---
+
+## Option 1: Frontend (Client-Side) Approach (Recommended)
+
+This approach handles the mask creation entirely in the user's browser or the Tauri webview.
+
+### How it Works
+
+1.  **Display Image**: The source image is loaded and displayed to the user.
+2.  **Canvas Overlay**: An HTML `<canvas>` element is placed directly over the image.
+3.  **Brush Interaction**: The user can "paint" on the canvas. The brush strokes are rendered as white shapes on a transparent or black background.
+4.  **Mask Generation**: When the user is done, the contents of the canvas are exported as a base64 encoded PNG image. This PNG is the mask.
+5.  **API Call**: The original image and the newly generated mask image are sent to the AI for inpainting.
+
+### Libraries & Implementation
+
+*   **Custom Canvas Logic**: A simple implementation can be achieved with plain JavaScript and the HTML Canvas API to handle mouse events (`mousedown`, `mousemove`, `mouseup`) and draw lines. This is the most lightweight option.
+*   **Fabric.js / Konva.js**: These are powerful canvas libraries that simplify drawing, shapes, and user interaction. They provide a more robust feature set if more advanced editing tools are needed in the future.
+*   **React Components**: Libraries like `react-canvas-draw` or `react-sketch-canvas` offer pre-built components that can be integrated quickly.
+
+### Pros
+
+*   **Lightweight**: No heavy native dependencies are needed on the user's machine. The entire experience is handled by the webview.
+*   **Interactive & Fast**: The user gets immediate visual feedback as they draw the mask.
+*   **Cross-Platform**: Works everywhere the Tauri application runs without changes.
+*   **Simpler Backend**: The backend (`images.ts`) only needs to receive the image and the mask, without needing to perform any image processing itself.
+
+### Cons
+
+*   **Frontend Complexity**: Requires implementing the drawing logic in the React application.
+
+---
+
+## Option 2: Backend (Server-Side) Approach
+
+This approach offloads the mask creation to the Node.js backend.
+
+### How it Works
+
+1.  **Capture Coordinates**: The frontend captures the user's brush strokes as a series of coordinates (e.g., `[{x: 10, y: 20}, {x: 11, y: 21}]`).
+2.  **Send to Backend**: These coordinates, along with the original image path, are sent to the `images.ts` script.
+3.  **Process with Sharp/Jimp**: A Node.js library like `sharp` or `Jimp` is used to:
+    *   Read the original image to get its dimensions.
+    *   Create a new blank (black) image of the same size.
+    *   Draw white lines or shapes onto the blank image using the coordinates received from the frontend.
+    *   Save this new image as the mask.
+4.  **API Call**: The backend then sends the original image and the generated mask to the AI.
+
+### Libraries
+
+*   **`sharp`**: Very fast and powerful, but it is a native Node.js module. This means it requires compilation during `npm install` and can introduce cross-platform compatibility issues (e.g., needing different binaries for Windows, macOS, Linux, and different architectures like ARM vs. x86). This adds significant complexity to the build and distribution process.
+*   **`Jimp`**: Pure JavaScript, so it has no native dependencies. It's much easier to install and more portable than `sharp`, but it is significantly slower, which could be a problem for large images or complex masks.
+
+### Pros
+
+*   **Thinner Client**: Keeps the image processing logic out of the frontend application.
+
+### Cons
+
+*   **Native Dependencies**: Using `sharp` introduces significant build and maintenance complexity.
+*   **Performance/Latency**: There is a delay between drawing and seeing the final mask. Sending large arrays of coordinates can also be slow.
+*   **Less Interactive**: The user doesn't get a "live" view of the mask as they are drawing it.
+
+---
+
+## Recommendation
+
+The **Frontend (Client-Side) Approach** is strongly recommended for this application.
+
+Given the interactive nature of the task and the user's explicit concern about native dependencies, a client-side solution using the HTML Canvas is the most practical and efficient choice. It provides the best user experience, avoids the complexities of native modules, and keeps the backend logic simpler.
--- a/packages/kbot/gui/tauri-app/src/App.tsx
+++ b/packages/kbot/gui/tauri-app/src/App.tsx
@ -35,6 +35,37 @@ function App() {
    await tauriApi.requestFileDeletion({ path: pathToDelete });
  };

+  const saveImageAs = async (imagePath: string) => {
+    const imageFile = files.find(f => f.path === imagePath);
+    if (!imageFile) {
+      addDebugMessage('error', `Could not find image to save: ${imagePath}`);
+      return;
+    }
+
+    try {
+      const defaultPath = imagePath.split(/[/\\]/).pop() || 'saved_image.png';
+      const newPath = await tauriApi.dialog.save({
+        defaultPath,
+        filters: [{ name: 'Images', extensions: ['png', 'jpg', 'jpeg'] }]
+      });
+
+      if (newPath) {
+        // Convert data URL to binary using fetch
+        const response = await fetch(imageFile.src);
+        const blob = await response.blob();
+        const buffer = await blob.arrayBuffer();
+        const uint8Array = new Uint8Array(buffer);
+        
+        await tauriApi.fs.writeFile(newPath, uint8Array);
+        addDebugMessage('info', `✅ Image saved successfully to: ${newPath}`);
+      } else {
+        addDebugMessage('info', 'Save dialog was cancelled.');
+      }
+    } catch (error) {
+      addDebugMessage('error', `Failed to save image: ${(error as Error).message}`);
+    }
+  };
+
  const generateDefaultDst = (fileCount: number, firstFilePath?: string) => {
    if (fileCount === 1 && firstFilePath) {
      const parsedPath = firstFilePath.split(/[/\\]/).pop() || 'image';
@ -127,13 +158,20 @@ function App() {
    setFiles([]);
  };

-  const toggleImageSelection = (imagePath: string) => {
-    setFiles(prev => 
-      prev.map(file => 
-        file.path === imagePath 
-          ? { ...file, selected: !file.selected }
-          : file
-      )
+  const handleImageSelection = (imagePath: string, isMultiSelect: boolean) => {
+    setFiles(prev =>
+      prev.map(file => {
+        if (file.path === imagePath) {
+          // For multi-select, toggle the current state. For single-select, always select it.
+          return { ...file, selected: isMultiSelect ? !file.selected : true };
+        }
+        // For single-select, deselect all other images.
+        if (!isMultiSelect) {
+          return { ...file, selected: false };
+        }
+        // For multi-select, leave other images as they are.
+        return file;
+      })
    );
  };

@ -295,9 +333,8 @@ function App() {
    
    if (apiKey) {
      // Generate image via backend (always chat mode now)
-      // Use selected images if any, otherwise use all files
-      const selectedImages = getSelectedImages();
-      const imagesToUse = selectedImages.length > 0 ? selectedImages : files.filter(f => !f.path.startsWith('generating_'));
+      // Only use explicitly selected images. If none are selected, generate from prompt alone.
+      const imagesToUse = getSelectedImages();
      
      await generateImage(prompt, imagesToUse);
      // Don't clear prompt - let user iterate
@ -444,13 +481,15 @@ function App() {
          files={files}
          getSelectedImages={getSelectedImages}
          clearAllFiles={clearAllFiles}
-          toggleImageSelection={toggleImageSelection}
+          handleImageSelection={handleImageSelection}
          removeFile={removeFile}
          isGenerating={isGenerating}
          saveAndClose={saveAndClose}
          submit={submit}
          addImageFromUrl={addImageFromUrl}
          onImageDelete={deleteFilePermanently}
+          onImageSaveAs={saveImageAs}
+          addFiles={addFiles}
        />

        {/* Debug Panel */}
--- a/packages/kbot/gui/tauri-app/src/components/ImageGallery.tsx
+++ b/packages/kbot/gui/tauri-app/src/components/ImageGallery.tsx
@ -3,17 +3,19 @@ import { ImageFile } from '../types';

 interface ImageGalleryProps {
  images: ImageFile[];
-  onImageSelect?: (imagePath: string) => void;
+  onImageSelection?: (imagePath: string, isMultiSelect: boolean) => void;
  onImageRemove?: (imagePath: string) => void;
  onImageDelete?: (imagePath: string) => void;
+  onImageSaveAs?: (imagePath: string) => void;
  showSelection?: boolean;
 }

 export default function ImageGallery({ 
  images, 
-  onImageSelect, 
+  onImageSelection, 
  onImageRemove, 
  onImageDelete,
+  onImageSaveAs,
  showSelection = false 
 }: ImageGalleryProps) {
  const [currentIndex, setCurrentIndex] = useState(0);
@ -72,11 +74,13 @@ export default function ImageGallery({
  };

  const handleThumbnailClick = (event: React.MouseEvent<HTMLButtonElement>, imagePath: string, index: number) => {
-    if (event.ctrlKey || event.metaKey) {
-      if (showSelection && onImageSelect) {
-        onImageSelect(imagePath);
-      }
-    } else {
+    const isMultiSelect = event.ctrlKey || event.metaKey;
+
+    if (showSelection && onImageSelection) {
+      onImageSelection(imagePath, isMultiSelect);
+    }
+
+    if (!isMultiSelect) {
      setCurrentIndex(index);
    }
  };
@ -211,39 +215,39 @@ export default function ImageGallery({
                  </div>
                )}
                
-                {/* Remove button */}
-                {!thumbIsGenerating && onImageRemove && (
-                  <button
-                    onClick={(e) => {
-                      e.stopPropagation();
-                      onImageRemove(image.path);
-                    }}
-                    className="absolute top-1 right-1 bg-red-500/90 hover:bg-red-600 text-white rounded-full w-5 h-5 flex items-center justify-center text-xs opacity-0 group-hover:opacity-100 transition-all duration-200"
-                    title="Remove from view"
-                  >
-                    ×
-                  </button>
-                )}
-
-                {/* Delete button (permanent) */}
-                {!thumbIsGenerating && onImageDelete && onImageRemove && (
+                {/* Save button */}
+                {!thumbIsGenerating && onImageSaveAs && (
                  <button
                    type="button"
                    onClick={(e) => {
                      e.stopPropagation();
-                      if (window.confirm('Are you sure you want to permanently delete this file from your disk?')) {
-                        onImageDelete(image.path);
-                        onImageRemove(image.path);
-                      }
+                      onImageSaveAs(image.path);
                    }}
-                    className="absolute bottom-1 right-1 bg-red-600/70 hover:bg-red-600 text-white rounded-full w-5 h-5 flex items-center justify-center text-xs transition-all duration-200"
-                    title="Delete File Permanently"
+                    className="absolute bottom-1 left-1 bg-blue-600/70 hover:bg-blue-700 text-white rounded-full w-5 h-5 flex items-center justify-center text-xs transition-all duration-200"
+                    title="Save Image As..."
                  >
                    <svg xmlns="http://www.w3.org/2000/svg" className="h-3 w-3" viewBox="0 0 20 20" fill="currentColor">
-                      <path fillRule="evenodd" d="M9 2a1 1 0 00-.894.553L7.382 4H4a1 1 0 000 2v10a2 2 0 002 2h8a2 2 0 002-2V6a1 1 0 100-2h-3.382l-.724-1.447A1 1 0 0011 2H9zM7 8a1 1 0 012 0v6a1 1 0 11-2 0V8zm4 0a1 1 0 012 0v6a1 1 0 11-2 0V8z" clipRule="evenodd" />
+                      <path d="M5 2a1 1 0 00-1 1v14a1 1 0 001 1h10a1 1 0 001-1V7.414a1 1 0 00-.293-.707l-4-4A1 1 0 0011.586 2H5zm5 2a2 2 0 11-4 0 2 2 0 014 0zm-2 9a1 1 0 00-1 1v1a1 1 0 102 0v-1a1 1 0 00-1-1z" />
                    </svg>
                  </button>
                )}
+
+                {/* Delete Button */}
+                {!thumbIsGenerating && onImageDelete && (
+                  <button
+                    type="button"
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      if (window.confirm('Are you sure you want to permanently delete this file? This action cannot be undone.')) {
+                        onImageDelete(image.path);
+                      }
+                    }}
+                    className="absolute bottom-1 right-1 bg-red-600/80 hover:bg-red-700 text-white rounded-full w-5 h-5 flex items-center justify-center text-xs transition-all duration-200"
+                    title="Delete File Permanently"
+                  >
+                    ×
+                  </button>
+                )}
              </button>
            );
          })}
--- a/packages/kbot/gui/tauri-app/src/components/PromptForm.tsx
+++ b/packages/kbot/gui/tauri-app/src/components/PromptForm.tsx
@ -1,6 +1,7 @@
 import React from 'react';
 import { ImageFile } from '../types';
 import ImageGallery from './ImageGallery';
+import { useDropZone } from '../hooks/useDropZone';

 interface PromptFormProps {
  prompt: string;
@ -12,13 +13,15 @@ interface PromptFormProps {
  files: ImageFile[];
  getSelectedImages: () => ImageFile[];
  clearAllFiles: () => void;
-  toggleImageSelection: (path: string) => void;
+  handleImageSelection: (path: string, isMultiSelect: boolean) => void;
  removeFile: (path: string) => void;
  isGenerating: boolean;
  saveAndClose: () => void;
  submit: () => void;
  addImageFromUrl: (url: string) => void;
  onImageDelete?: (path: string) => void;
+  onImageSaveAs?: (path: string) => void;
+  addFiles: (paths: string[]) => void;
 }

 const PromptForm: React.FC<PromptFormProps> = ({
@ -31,15 +34,18 @@ const PromptForm: React.FC<PromptFormProps> = ({
  files,
  getSelectedImages,
  clearAllFiles,
-  toggleImageSelection,
+  handleImageSelection,
  removeFile,
  isGenerating,
  saveAndClose,
  submit,
  addImageFromUrl,
-  onImageDelete
+  onImageDelete,
+  onImageSaveAs,
+  addFiles
 }) => {
  const selectedCount = getSelectedImages().length;
+  const { ref: dropZoneRef, dragIn } = useDropZone({ onDrop: addFiles });

  return (
    <form
@ -88,17 +94,20 @@ const PromptForm: React.FC<PromptFormProps> = ({
          </div>
        </div>

-        <div>
-          <label className="block text-sm font-semibold text-slate-700 dark:text-slate-300 mb-2">
+        <div 
+          ref={dropZoneRef}
+          className={`p-4 rounded-xl border-2 border-dashed transition-all duration-300 ${dragIn ? 'border-blue-500 bg-blue-500/10' : 'border-slate-300/50 dark:border-slate-600/50'}`}
+        >
+          <label className="block text-sm font-semibold text-slate-700 dark:text-slate-300 mb-2 text-center">
            Source Images
          </label>
          <div className="flex gap-3">
            <button
              type="button"
              onClick={openFilePicker}
-              className="w-full glass-button font-semibold py-4 px-6 rounded-xl border-dashed border-2 border-slate-300/50 dark:border-slate-600/50 hover:border-slate-400/60 dark:hover-border-slate-500/60"
+              className="w-full glass-button font-semibold py-4 px-6 rounded-xl hover:border-slate-400/60 dark:hover-border-slate-500/60"
            >
-              📸 Select Images to Edit
+              📸 Select Images to Edit (or Drop Here)
            </button>
            <button
              type="button"
@ -138,8 +147,9 @@ const PromptForm: React.FC<PromptFormProps> = ({
          <div className="glass-card p-4">
            <ImageGallery
              images={files}
-              onImageSelect={toggleImageSelection}
+              onImageSelection={handleImageSelection}
              onImageRemove={removeFile}
+              onImageSaveAs={onImageSaveAs}
              showSelection={true}
              onImageDelete={onImageDelete}
            />
--- a/packages/kbot/gui/tauri-app/src/hooks/useDropZone.ts
+++ b/packages/kbot/gui/tauri-app/src/hooks/useDropZone.ts
@ -0,0 +1,47 @@
+import { listen } from "@tauri-apps/api/event";
+import { useEffect, useRef, useState } from "react";
+
+type TauriDragDropEvent = {
+  paths: string[];
+  position: {
+    x: number;
+    y: number;
+  };
+};
+
+const useDropZone = ({ onDrop }: { onDrop: (paths: string[]) => void }) => {
+  const ref = useRef<HTMLDivElement>(null);
+  const [dragIn, setDragIn] = useState(false);
+  useEffect(() => {
+    const unlisten = listen<TauriDragDropEvent>("tauri://drag-drop", (e) => {
+      const { x, y } = e.payload.position;
+      if (document.elementFromPoint(x, y) === ref.current) {
+        onDrop(e.payload.paths);
+        setDragIn(false);
+      }
+    });
+    return () => {
+      unlisten.then((unlisten) => {
+        unlisten();
+      });
+    };
+  }, [onDrop]);
+  useEffect(() => {
+    const unlisten = listen<TauriDragDropEvent>("tauri://drag-over", (e) => {
+      const { x, y } = e.payload.position;
+      if (document.elementFromPoint(x, y) === ref.current) {
+        setDragIn(true);
+      } else {
+        setDragIn(false);
+      }
+    });
+    return () => {
+      unlisten.then((unlisten) => {
+        unlisten();
+      });
+    };
+  }, []);
+  return { ref, dragIn };
+};
+
+export { useDropZone };
--- a/packages/kbot/gui/tauri-app/src/lib/tauriApi.ts
+++ b/packages/kbot/gui/tauri-app/src/lib/tauriApi.ts
@ -9,6 +9,7 @@ let writeFile: any;
 let BaseDirectory: any;
 let listen: any;
 let getCurrentWindow: any;
+let getCurrentWebview: any;
 let fetch: any;

 let isTauri = false;
@ -20,6 +21,8 @@ const apiInitializationPromise = (async () => {
    try {
        const windowApi = await import('@tauri-apps/api/window');
        getCurrentWindow = windowApi.getCurrentWindow;
+        const webviewApi = await import('@tauri-apps/api/webview');
+        getCurrentWebview = webviewApi.getCurrentWebview;
        isTauri = true;
        console.log('✅ Tauri window API loaded');

@ -114,6 +117,12 @@ export const tauriApi = {
            return getCurrentWindow ? getCurrentWindow() : null;
        },
    },
+    webview: {
+        getCurrent: async () => {
+            await ensureTauriApi();
+            return getCurrentWebview ? getCurrentWebview() : null;
+        },
+    },
    // Add typed wrappers for your app's specific commands
    resolvePathRelativeToHome: (absolutePath: string) =>
        safeInvoke<string>(TauriCommand.RESOLVE_PATH_RELATIVE_TO_HOME, { absolutePath }),