From 84834a4b1a403e39026914bb84da031a711c1aea Mon Sep 17 00:00:00 2001 From: babayaga Date: Mon, 7 Apr 2025 18:52:59 +0200 Subject: [PATCH] kbot docs --- packages/kbot/{docs_ => docs}/apis.md | 0 packages/kbot/{docs_ => docs}/docker.md | 0 packages/kbot/{docs_ => docs}/docker.sh | 0 packages/kbot/docs_/advanced.md | 60 +- packages/kbot/docs_/commands.md | 37 +- packages/kbot/docs_/examples.md | 202 ++--- packages/kbot/docs_/iterator-report.md | 376 --------- packages/kbot/docs_/iterator.md | 245 +++++- packages/kbot/docs_/modes.md | 308 +++----- packages/kbot/docs_/parameters.md | 66 +- packages/kbot/docs_/todos.md | 35 - packages/kbot/preferences.md | 3 - .../kbot/{ => src/examples}/test_web_urls.js | 0 .../tests/test-data/core/md-test-out.json | 725 ++++++++++++++++++ 14 files changed, 1270 insertions(+), 787 deletions(-) rename packages/kbot/{docs_ => docs}/apis.md (100%) rename packages/kbot/{docs_ => docs}/docker.md (100%) rename packages/kbot/{docs_ => docs}/docker.sh (100%) delete mode 100644 packages/kbot/docs_/iterator-report.md delete mode 100644 packages/kbot/docs_/todos.md delete mode 100644 packages/kbot/preferences.md rename packages/kbot/{ => src/examples}/test_web_urls.js (100%) create mode 100644 packages/kbot/tests/test-data/core/md-test-out.json diff --git a/packages/kbot/docs_/apis.md b/packages/kbot/docs/apis.md similarity index 100% rename from packages/kbot/docs_/apis.md rename to packages/kbot/docs/apis.md diff --git a/packages/kbot/docs_/docker.md b/packages/kbot/docs/docker.md similarity index 100% rename from packages/kbot/docs_/docker.md rename to packages/kbot/docs/docker.md diff --git a/packages/kbot/docs_/docker.sh b/packages/kbot/docs/docker.sh similarity index 100% rename from packages/kbot/docs_/docker.sh rename to packages/kbot/docs/docker.sh diff --git a/packages/kbot/docs_/advanced.md b/packages/kbot/docs_/advanced.md index 7a09b576..29f906bd 100644 --- a/packages/kbot/docs_/advanced.md +++ b/packages/kbot/docs_/advanced.md @@ -1,28 +1,54 @@ +# Advanced Topics -# Working on Larger Directories +This section covers more advanced usage patterns and concepts. -Since LLMs (Large Language Models) and providers are limited to very small 'context windows', it's necessary to feed them with smaller chunks instead. This document explains how to process larger directories efficiently. +## Processing Multiple Items (`--each`) -## Directory Processing Example +Instead of relying on external scripting for batch processing, `kbot` provides the built-in `--each` parameter. This allows you to iterate a task over multiple inputs efficiently. -Here's an example of how to walk through files and process them: +**How it Works:** + +The `--each` parameter accepts: + +* A comma-separated list of strings (e.g., `--each="file1.txt,file2.txt"`). +* A file path to a JSON file containing an array of strings. +* A GLOB pattern matching multiple files (e.g., `--each="./src/**/*.ts"`). +* A list of model IDs to test a prompt against different models (e.g., `--each="openai/gpt-4o,anthropic/claude-3.5-sonnet"`). + +**Using the `${ITEM}` Variable:** + +Within the loop initiated by `--each`, the current item being processed is available as the `${ITEM}` variable. You can use this variable in other parameters, such as `--dst`, `--include`, or within the `--prompt` itself. + +**Example: Generating Documentation for Multiple Files** ```bash -osr-cli each --main='kbot \"read ${KEY} and translate to german, save in docs/language code/filename.md\" --include=\"${REL}\" --include=\".kbot/preferences.md\"' --list="./docs/*.md" --cwd=. +kbot --each "./src/modules/*.ts" \ + --dst "./docs/api/${ITEM}.md" \ + --prompt "Generate API documentation in Markdown format for the module defined in ${ITEM}" ``` -### Parameter Explanation +This command will: -- `each`: Command to process multiple files iteratively -- `--main`: The main command (`kbot`) to execute for each file - - `--include=\"${REL}\"` instructs kbot to include the current selected path - - `--include=\".kbot/preferences.md\"` instructs kbot to include additional preferences about the task (eg: translation specifics) -- `--list`: Specifies the file pattern to match - - Supports include patterns (e.g., `"./docs/*.md"`) -- `--cwd`: Sets the current working directory for the command execution. Default is the current directory (`.`) +1. Find all `.ts` files in `./src/modules/`. +2. For each file (e.g., `moduleA.ts`): + * Set `${ITEM}` to the file path (`./src/modules/moduleA.ts`). + * Execute `kbot` with the prompt, including the specific file via `${ITEM}`. + * Save the output to `./docs/api/./src/modules/moduleA.ts.md` (Note: path handling might vary). -**Note** requires `@plastichub/osr-cli-commons` to be installed globally: +Refer to the [Examples](./examples.md#iterating-with---each) for more use cases. -```bash -npm i -g @plastichub/osr-cli-commons -``` \ No newline at end of file +## Choosing a Transformation Method: `transform` vs. `createIterator` + +When transforming data structures (often JSON) using LLMs, you have two primary approaches: + +1. **`transform` Helper Function:** + * **Pros:** Simple, minimal setup, good for basic field transformations. + * **Cons:** Less control over network, caching, logging details. + * **Use Case:** Quickly applying straightforward transformations to data fields without needing deep customization. + +2. **`createIterator` Factory:** + * **Pros:** Full control over network options (retries, concurrency), caching (namespace, expiration), logging, custom transformer logic, and callbacks (`onTransform`, `onTransformed`). + * **Cons:** More verbose setup required. + * **Use Case:** Complex transformations requiring fine-tuned control over the entire process, advanced caching strategies, or integration with custom logging/transformation logic. + +Consult the [Iterator Documentation](./iterator.md) for detailed explanations and code examples of both methods. \ No newline at end of file diff --git a/packages/kbot/docs_/commands.md b/packages/kbot/docs_/commands.md index 7da289ba..d0c338e1 100644 --- a/packages/kbot/docs_/commands.md +++ b/packages/kbot/docs_/commands.md @@ -1,25 +1,32 @@ -## Commands +# Main Commands -### Prompt +The primary way to interact with `kbot` for processing tasks is by invoking it with a prompt and various options. While often used implicitly, this typically corresponds to the `run` command. -```kbot "create Astro minimal boilerplate, use starlight theme. Install dependencies via NPM tool"``` +## Running Tasks -### Fetch latest models +```bash +kbot run [options...] "Your prompt here..." +# or simply (if 'run' is the default): +kbot [options...] "Your prompt here..." +``` -```kbot fetch``` +This command executes the main AI processing pipeline based on the provided prompt and options. Key aspects controlled by options include: -### Print examples +* **Input:** Specified via `--include` (files, directories, web URLs), `--path`. +* **Task:** Defined by the `--prompt`. +* **Behavior:** Controlled by `--mode` (e.g., `tools`, `completion`). +* **Output:** Directed using `--dst` or `--output`. +* **Model & API:** Configured with `--model`, `--router`, `--api_key`, etc. -```kbot examples``` +Refer to [Parameters](./parameters.md) and [Modes](./modes.md) for detailed options. -### Print extended help +## Utility Commands -```kbot help-md``` +Other potential utility commands might include: -### Initialize folder +* `kbot fetch`: Fetch updated information, such as the latest available models. +* `kbot init`: Initialize a directory or project for use with `kbot` (e.g., create default config files). +* `kbot help-md`: Generate extended help documentation in Markdown format. +* `kbot examples`: Show example usage patterns. -```kbot init``` - -### Internal : Build - -```kbot build``` +*(Note: Availability and exact behavior of utility commands may vary.)* diff --git a/packages/kbot/docs_/examples.md b/packages/kbot/docs_/examples.md index 991f3ed1..07d903c4 100644 --- a/packages/kbot/docs_/examples.md +++ b/packages/kbot/docs_/examples.md @@ -1,138 +1,148 @@ - # CLI Examples -## Basic Commands +This page shows various ways to use `kbot` from the command line. -### Modify Project Files +## Basic Modifications + +Modify project files based on a prompt: ```bash -# Basic project modification -kbot "Add error handling to API endpoints" +# Request a change in the current directory context +kbot "Add comprehensive error handling to all API route handlers" -# Using stdin for prompt -echo "Add error handling to API endpoints" | kbot +# Use stdin for the prompt +echo "Refactor the database connection logic into a reusable module" | kbot -# Pipe file content as prompt -cat prompt.txt | kbot +# Use a file containing the prompt +kbot --prompt ./prompts/refactor_request.md +# Specify target files using glob patterns +kbot --include "src/controllers/**/*.ts" "Ensure all controller methods return consistent JSON responses" -# Specify files using include patterns -kbot --include "src/**/*.ts" "Update TypeScript types" - -kbot "Add unit tests for src/commands/*" --include="./src/commands/*.ts" - +# Target specific files +kbot --include "src/utils/auth.ts,src/middleware/auth.js" "Update authentication logic to support JWT refresh tokens" ``` -### Node.js API Projects +## Working with Different Input Sources + +`kbot` can process various types of input specified via the `--include` parameter: ```bash -# Add API endpoints -kbot --include "src/routes/*.ts" "Add authentication middleware" +# Include local files and directories +kbot --include "./docs/api.md,./src/server.ts" "Update the API documentation based on the server code" -# Update API models -kbot --include "src/models/*.ts" "Add validation" +# Include content from a web URL +kbot --include "https://raw.githubusercontent.com/user/repo/main/README.md" "Summarize this project's README" + +# Combine different sources +kbot --include "./config.yaml,https://example.com/api/spec.json" "Generate documentation based on the config and the API spec" ``` -## Advanced Features +## Advanced Data Transformation & Iteration -### Using Profiles +These examples demonstrate more complex scenarios involving data processing, structured output, and iteration. -Profiles allow you to define variables that can be used across your project and templates. These variables can be accessed using `${VARIABLE_NAME}` syntax in your tools and template partials. +### Transforming Data with the Iterator Factory + +The `iterator-factory-example.ts` demonstrates advanced usage including transforming specific fields within complex JSON objects, handling arrays, caching results, and generating structured output using the `--format` option. + +To run this example: ```bash -# Use a specific profile file -kbot "Update configuration" --profile=./profiles/profile.json +# Ensure dependencies are installed, then run: +npm run examples:iterator-factory -# Use environment-specific variables -kbot "Configure for Hugo release" --profile=./profiles/profile.json --env=hugo-release +# Run without caching: +npm run examples:iterator-factory -- --no-cache ``` -Example profile.json structure: -```json -{ - "variables" : { - "foo": "bar" - }, - "env": { - "hugo-release": { - "variables":{ - "GIT_USER": "hugo-deployer", - "DEPLOY_TARGET": "production" - } - } - } -} -``` +This example uses the `createIterator` function for fine-grained control. Explore the code in `src/examples/core/iterator-factory-example.ts` for details. -- Top-level variables are used as defaults -- Environment-specific variables (specified by --env) override defaults -- Variables can be used in tools and template partials using ${VARIABLE_NAME} syntax +### Structured Output with `--format` -### Custom Output Path +Force the AI to return data in a specific JSON structure. You can provide the schema as a JSON string, a file path, or a Zod schema string. ```bash -# Save modifications to different directory -kbot --output ./modified "Refactor code" +# Using a JSON schema file +kbot --include ./product_review.txt \ + --format file:./schemas/review_analysis_schema.json \ + "Analyze this review and extract sentiment, pros, and cons according to the schema" + +# Using an inline JSON schema string +kbot --include ./data.json \ + --format '{"type":"object","properties":{"summary":{"type":"string"},"keywords":{"type":"array","items":{"type":"string"}}},"required":["summary","keywords"]}' \ + "Summarize this data and extract keywords" ``` -### AI Model Selection +### Iterating with `--each` + +Process multiple items (files, models, strings) in sequence. ```bash -# Use specific OpenAI model -kbot --router openai --model gpt-4 "Optimize code" +# Iterate over specific files and output to separate files +kbot --each "./src/moduleA.ts,./src/moduleB.ts" \ + --dst "./docs/${ITEM}.md" \ + "Generate documentation for the module defined in ${ITEM}" -# Use Anthropic Claude -kbot --model anthropic/claude-3-opus "Add documentation" +# Test a prompt against multiple models +kbot --each "openai/gpt-4o,anthropic/claude-3-sonnet" \ + --include ./my_code.py \ + --dst "${ITEM}_review.txt" \ + "Review this Python code for potential bugs and suggest improvements" + +# Iterate over files matching a glob pattern +kbot --each "./tests/**/*.test.js" \ + --dst "./coverage/${ITEM}.txt" \ + "Analyze the test coverage for ${ITEM}" +``` + +## Configuration and Control + +### Using Profiles and Environments + +Load variables from configuration files for different environments. + +```bash +# Load variables from a profile and specify an environment +kbot --profile ./config/profiles.json --env staging "Deploy the staging configuration using variables from the profile" +``` + +### Custom Output Paths + +Control where results are saved. + +```bash +# Save modifications to a different directory (tool mode) +kbot --output ./refactored_code "Refactor all functions to use async/await" + +# Save completion result to a specific file (completion mode) +kbot --mode completion --dst ./summary.md "Summarize the included documents" +``` + +### Model and Router Selection + +Choose the AI provider and specific model. + +```bash +# Use a specific OpenAI model +kbot --router openai --model gpt-4o "Translate the included text to French" + +# Use a model via OpenRouter (default router) +kbot --model anthropic/claude-3.5-sonnet "Write a poem about the included text" ``` ### Tool Control -```bash -# Disable specific tools -kbot --disable git "Update code without git commits" - -# Disable multiple tool categories -kbot --disable fs,npm,git "Analyze code only" -``` - -### File Selection +Enable or disable specific tools or categories. ```bash -# Multiple include patterns -kbot --include "src/**/*.ts" --include "test/**/*.ts" "Update types" +# Disable the 'git' category tools +kbot --disable git "Update code documentation without committing changes" -# Exclude patterns -kbot --include "src/**/*.ts" --include "!src/generated/**" "Refactor code" +# Disable specific tools by name +kbot --disableTools "npm_install,file_save" "Analyze dependencies but do not install or save files" ``` -### Environment and Profile +## More Examples -```bash -# Use specific environment -kbot --env production "Add production configs" - -# Custom profile path -kbot --profile ./custom-profile.json --env production -``` - -### Scripting - -```bash -# Generate modification script -kbot --dump ./modify-script.sh "Add types" -``` - -### Input Types - -The tool supports different types of input: - -```bash -# Text input through stdin -echo "Add error handling" | kbot - -# Piping files -cat my-prompt.md | kbot - -# Specifying a file -kbot my-prompt.md -``` +For more in-depth examples demonstrating various features like custom tools, different modes, and advanced configurations, please explore the code within the `src/examples/` directory of this package. diff --git a/packages/kbot/docs_/iterator-report.md b/packages/kbot/docs_/iterator-report.md deleted file mode 100644 index 6f020595..00000000 --- a/packages/kbot/docs_/iterator-report.md +++ /dev/null @@ -1,376 +0,0 @@ -# Iterator Implementation Review - -## Potential Bugs and Edge Cases - -### Type Safety Issues -1. Excessive use of `any` type in key functions: - - `removeEmptyObjects` function uses `any` return type and parameter (line 19) - - Limited type checking in cache key generation and object cloning - -### Error Handling -1. Inconsistent error handling: - - In `createLLMTransformer`, errors are caught but only logged (line 106) without retry mechanism outside of `transformPath` - - Retry mechanism in `transformPath` uses exponential backoff but lacks circuit breaking capability - -2. API errors not properly categorized: - - No distinction between transient errors (like rate limits) and permanent errors (like invalid requests) - - Missing status code handling from LLM API responses - - No handling of network timeouts for long-running LLM requests - -### Cache Implementation -1. Cache key generation issues: - - Cache key for `createObjectCacheKey` (line 137) uses JSON.stringify on full data objects, which may: - - Create extremely large cache keys - - Fail with circular references - - Generate different keys for identical logical objects if properties are in different orders - -2. Cache expiration: - - Fixed default expiration time (7 days) might not be suitable for all use cases - - No mechanism to force refresh or invalidate specific cache entries - -3. Cache isolation: - - No isolation between different versions of models (newer models might give better results) - - No context-based cache namespacing (different applications using same cache) - -### Concurrency and Performance -1. Fixed throttling implementation: - - `throttleDelay` is applied globally without considering API rate limits - - Default concurrency of 1 may be overly cautious for some APIs - - No adaptability to different LLM providers' rate limit policies - -2. JSON parsing overhead: - - Deep cloning via `JSON.parse(JSON.stringify())` in multiple places (lines 189, 208) can cause: - - Performance issues with large objects - - Loss of data for values that don't serialize to JSON (e.g., Date objects, functions) - - Memory spikes during transformation - -3. Inefficient parallel execution: - - The iterator processes field mappings sequentially rather than in parallel batches - - No priority system for more important transformations - -### Data Integrity -1. Deep merge implementation risks: - - The custom `deepMerge` function (line 144) doesn't properly handle arrays - - No protection against prototype pollution - - May overwrite existing values unexpectedly - -2. JSONPath implementation limitations: - - No validation of JSONPath syntax - - No handling for missing paths - - Potential for duplicate updates when JSONPath matches multiple nodes - -### Integration Issues -1. LLM integration rigidity: - - Tight coupling to specific LLM API structure in `createLLMTransformer` - - Limited flexibility for different output formats (assumes string response) - - No streaming support for larger transformations - -2. Missing validation for prompt templates: - - No checking if prompts exceed token limits - - Prompts are concatenated with input without token awareness - - No handling of LLM context windows - -## Suggested Improvements - -### Type Safety -1. Replace uses of `any` with proper type definitions: -```typescript -export const removeEmptyObjects = (obj: T): T => { - // Implementation with proper type checking -} -``` - -2. Define stricter interfaces for cache keys and values: -```typescript -interface CacheKey { - prompt: string; - model?: string; - router?: string; - mode?: string; -} -``` - -### Error Handling -1. Implement consistent error handling strategy: -```typescript -// Add proper error classes -export class TransformError extends Error { - constructor(public path: string, public originalValue: string, public cause: Error) { - super(`Error transforming ${path}: ${cause.message}`); - this.name = 'TransformError'; - } -} -``` - -2. Add circuit breaker pattern for API calls: -```typescript -// In createLLMTransformer -const circuitBreaker = new CircuitBreaker({ - failureThreshold: 3, - resetTimeout: 30000 -}); - -return async (input: string, jsonPath: string): Promise => { - return circuitBreaker.fire(() => callLLMAPI(input, jsonPath)); -}; -``` - -3. Categorize and handle API errors appropriately: -```typescript -async function handleLLMRequest(task: IKBotTask, input: string): Promise { - try { - return await run(task); - } catch (error) { - if (error.status === 429) { - // Rate limit - back off and retry - return await retryWithExponentialBackoff(() => run(task)); - } else if (error.status >= 400 && error.status < 500) { - // Client error - fix request or abort - throw new ClientError(error.message); - } else { - // Server error - retry with caution - return await retryWithLinearBackoff(() => run(task)); - } - } -} -``` - -### Cache Implementation -1. Improve cache key generation: -```typescript -const createCacheKey = (task: IKBotTask, input: string): string => { - // Create deterministic hash of relevant properties only - const keyObj = { - prompt: task.prompt, - model: task.model, - input: input.substring(0, 100) // Limit input size in key - }; - return createHash('sha256').update(JSON.stringify(keyObj)).digest('hex'); -}; -``` - -2. Add cache control capabilities: -```typescript -export interface CacheConfig { - enabled?: boolean; - namespace?: string; - expiration?: number; - forceRefresh?: boolean; - keyGenerator?: (task: IKBotTask, input: string) => string; - versionStrategy?: 'model-based' | 'time-based' | 'none'; -} -``` - -3. Implement context-aware cache namespacing: -```typescript -function createContextualNamespace(config: CacheConfig, options: IKBotTask): string { - const appId = options.appId || 'default'; - const modelVersion = options.model?.replace(/[^\w]/g, '-') || 'unknown-model'; - return `${config.namespace || 'llm-responses'}-${appId}-${modelVersion}`; -} -``` - -### Concurrency and Performance -1. Replace deep cloning with structured cloning or immutable data libraries: -```typescript -import { structuredClone } from 'node:util'; // Node.js 17+ - -// Replace JSON.parse(JSON.stringify(obj)) with: -const transformedObj = structuredClone(obj); -``` - -2. Add adaptive throttling based on API responses: -```typescript -const adaptiveThrottle = createAdaptiveThrottle({ - initialLimit: 10, - initialInterval: 1000, - maxLimit: 50, - adjustOnError: (err) => { - // Check rate limit errors and adjust accordingly - } -}); -``` - -3. Implement parallel batch processing: -```typescript -// Process mappings in parallel batches -async function transformInBatches(obj: Record, mappings: FieldMapping[], batchSize: number = 3) { - const batches = []; - for (let i = 0; i < mappings.length; i += batchSize) { - batches.push(mappings.slice(i, i + batchSize)); - } - - for (const batch of batches) { - await Promise.all(batch.map(mapping => processMapping(obj, mapping))); - } -} -``` - -### Interface Improvements -1. Simplify the API for common use cases: -```typescript -// Simple transform helper -export async function transform( - data: T, - mapping: FieldMapping | FieldMapping[], - options?: Partial -): Promise { - const mappings = Array.isArray(mapping) ? mapping : [mapping]; - const result = structuredClone(data); - await createIterator(result, options || {}).transform(mappings); - return result; -} -``` - -2. Add typesafe JSONPath: -```typescript -// Type-safe JSONPath function -export function createTypeSafePath( - path: string, - validator: (value: unknown) => value is R -): JSONPathSelector { - // Implementation -} -``` - -3. Support streaming transformations: -```typescript -export interface StreamOptions extends IOptions { - onProgress?: (current: number, total: number) => void; - onFieldTransform?: (path: string, before: string, after: string) => void; -} - -export function createStreamingIterator( - obj: Record, - optionsMixin: Partial, - streamOptions: StreamOptions -): IteratorFactory { - // Implementation with callbacks for progress updates -} -``` - -## Alternative Libraries - -### Lightweight Alternatives - -1. **JSONata** instead of JSONPath - - More expressive query language - - Smaller footprint (54KB vs 120KB) - - Built-in transformation capabilities - - Example conversion: - ```typescript - // Instead of JSONPath: - const paths = JSONPath({ path: '$.products.fruits[*].description', json: obj }); - - // With JSONata: - const result = jsonata('products.fruits.description').evaluate(obj); - ``` - -2. **p-limit** instead of p-throttle and p-map - - Simpler API - - More focused functionality - - Smaller bundle size - - Example conversion: - ```typescript - // Instead of: - const throttle = pThrottle({ - limit: 1, - interval: throttleDelay, - }); - await pMap(items, async (item) => throttle(transform)(item)); - - // With p-limit: - const limit = pLimit(concurrentTasks); - await Promise.all(items.map(item => - limit(() => new Promise(r => setTimeout(() => r(transform(item)), throttleDelay))) - )); - ``` - -3. **fast-copy** instead of JSON.parse/stringify - - 2-3x faster than JSON method - - Handles circular references - - Preserves prototypes - - Example conversion: - ```typescript - // Instead of: - const copy = JSON.parse(JSON.stringify(obj)); - - // With fast-copy: - import copy from 'fast-copy'; - const objCopy = copy(obj); - ``` - -4. **object-path** instead of custom path traversal - - Well-tested library for object access by path - - Simpler error handling - - Better performance - - Example conversion: - ```typescript - // Instead of custom path traversal: - let current = obj; - for (const key of keys) { - if (current[key] === undefined) return; - current = current[key]; - } - - // With object-path: - import objectPath from 'object-path'; - const value = objectPath.get(obj, path); - objectPath.set(obj, path, newValue); - ``` - -5. **oazapfts** or **openapi-typescript** for LLM API clients - - Type-safe API clients generated from OpenAPI specs - - Consistent error handling - - Proper request/response typing - - Example: - ```typescript - import { createClient } from './generated/openai-client'; - - const client = createClient({ - apiKey: process.env.OPENAI_API_KEY, - }); - - const response = await client.createChatCompletion({ - model: 'gpt-4', - messages: [{ role: 'user', content: prompt }] - }); - ``` - -## Enhanced Interface Suggestions - -```typescript -// Strongly typed transform function -export async function transform>( - data: T, - options: { - paths: { - source: string; - target?: string; - prompt: string; - }[]; - model?: string; - router?: string; - cache?: boolean | Partial; - concurrency?: number; - logger?: Partial; - } -): Promise; - -// Simplified usage example: -const result = await transform(myData, { - paths: [ - { - source: '$.description', - prompt: 'Make this more engaging' - }, - { - source: '$.title', - target: 'seoTitle', - prompt: 'Create an SEO-optimized version' - } - ], - model: 'gpt-4', - concurrency: 5 -}); -``` \ No newline at end of file diff --git a/packages/kbot/docs_/iterator.md b/packages/kbot/docs_/iterator.md index dd7c6b63..f7388541 100644 --- a/packages/kbot/docs_/iterator.md +++ b/packages/kbot/docs_/iterator.md @@ -1,6 +1,247 @@ -# Iterator Documentation +# Data Transformation with Iterators -The Iterator module provides a powerful way to transform data structures using asynchronous operations, particularly suited for applying LLM-based transformations to JSON data. This document covers the core functionality, usage patterns, and examples. +The Iterator module provides a powerful and flexible way to transform complex data structures (like JSON objects) using asynchronous operations, especially useful for applying LLM transformations to specific fields. + +## Key Concepts + +* **Targeted Transformation:** Use JSONPath expressions to select specific fields or elements within your data for transformation. +* **LLM Integration:** Seamlessly integrate Large Language Models (LLMs) to modify text, generate content, or analyze data based on prompts. +* **Structured Output:** Define JSON schemas using the `format` option to ensure LLM outputs conform to a required structure. +* **In-Place or New Fields:** Choose whether to modify data directly or add transformed results to new fields. +* **Customization:** Control concurrency, rate limiting, error handling, filtering, and caching. +* **Callbacks:** Hook into the transformation process using `onTransform` (before LLM call) and `onTransformed` (after LLM call) callbacks. + +## Core Components & Usage + +There are two main ways to use the transformation capabilities: + +1. **`transform` Helper Function:** A simplified approach for common use cases. +2. **`createIterator` Factory:** Provides more control and customization options. + +### 1. Simplified Usage: `transform` Function + +For straightforward transformations, the `transform` function offers a minimal setup: + +```typescript +import { transform, FieldMapping, E_Mode } from '@polymech/kbot'; + +// Sample Data +const data = { product: { description: "Old description" } }; + +// Field mapping definition +const mappings: FieldMapping[] = [ + { + jsonPath: '$.product.description', + targetPath: null, // Transform in-place + options: { + prompt: 'Rewrite this description to be more exciting' + } + } +]; + +// Global LLM options +const llmOptions = { + model: 'openai/gpt-4o', + router: 'openai', + mode: E_Mode.COMPLETION +}; + +// Optional callbacks +const callbacks = { + onTransform: async (path, value) => { console.log(`Transforming: ${path}`); return value; }, + onTransformed: async (path, newValue) => { console.log(`Transformed: ${path}`); return newValue; } +}; + +// Perform the transformation +await transform(data, mappings, llmOptions, callbacks); + +console.log(data.product.description); // Output: The exciting new description +``` + +See the `simpleTransformExample` function within `src/examples/core/iterator-factory-example.ts` for a runnable demonstration. + +### 2. Advanced Usage: `createIterator` Factory + +The `createIterator` factory provides maximum flexibility for complex scenarios, including fine-grained control over network options, caching, logging, and transformer creation. + +```typescript +import { + createIterator, + createLLMTransformer, + FieldMapping, + IOptions, + CacheConfig, + INetworkOptions, + E_Mode +} from '@polymech/kbot'; +import { getLogger } from '@polymech/kbot'; // Assuming logger setup + +const logger = getLogger({ logLevel: 4 }); + +// Sample Data +const data = { products: [{ id: 'p1', name: 'Apple', details: { nutrition: 'Rich in fiber' } }] }; + +// Global LLM options mixin +const globalOptionsMixin = { + model: 'anthropic/claude-3.5-sonnet', + router: 'openrouter', + mode: E_Mode.COMPLETION +}; + +// Field Mappings +const fieldMappings: FieldMapping[] = [ + { + jsonPath: '$.products[*].name', + targetPath: 'marketingName', // Add a new field + options: { prompt: 'Create a catchy marketing name' } + }, + { + jsonPath: '$.products[*].details.nutrition', + targetPath: null, // Transform in-place + options: { prompt: 'Expand nutrition info with health benefits (20 words)' } + } +]; + +// Network Configuration +const networkOptions: INetworkOptions = { + throttleDelay: 500, + concurrentTasks: 2, + maxRetries: 3, + retryDelay: 1000 +}; + +// Cache Configuration +const cacheConfig: CacheConfig = { + enabled: true, + namespace: 'product-info-transforms', + expiration: 3600 // 1 hour in seconds +}; + +// Iterator Options +const iteratorOptions: IOptions = { + network: networkOptions, + errorCallback: (path, value, error) => logger.error(`Error at ${path}: ${error.message}`), + filterCallback: async () => true, // Example: only transform strings + transformerFactory: (opts) => createLLMTransformer(opts, logger, cacheConfig), + logger, + cacheConfig, + onTransform: async (path, value, opts) => { logger.debug(`About to transform ${path}`); return value; }, + onTransformed: async (path, transformedValue, opts) => { logger.debug(`Finished transforming ${path}`); return transformedValue; } +}; + +// Create the iterator instance +const iterator = createIterator( + data, + globalOptionsMixin, + iteratorOptions +); + +// Apply transformations +await iterator.transform(fieldMappings); + +console.log(JSON.stringify(data, null, 2)); +/* Output might look like: +{ + "products": [ + { + "id": "p1", + "name": "Apple", + "details": { + "nutrition": "Rich in fiber, supporting digestion and heart health. Apples provide essential vitamins for overall well-being." + }, + "marketingName": "Orchard Crisp Delight" + } + ] +} +*/ +``` + +Refer to the `factoryExample` function in `src/examples/core/iterator-factory-example.ts` for a comprehensive, runnable example demonstrating caching, structured output, and callbacks. + +## Field Mappings (`FieldMapping`) + +Define *what* to transform and *how*: + +```typescript +interface FieldMapping { + jsonPath: string; // JSONPath expression identifying data to transform. + targetPath: string | null; // Where to put the result. `null` for in-place, or a string for a new relative field name. + options?: Partial; // LLM options (prompt, model, format, etc.) specific to this mapping. +} +``` + +## Callbacks (`onTransform`, `onTransformed`) + +Inject custom logic before and after the core transformation (e.g., the LLM call): + +* **`onTransform(jsonPath, originalValue, options)`:** Called just before the value is sent to the transformer (LLM). You can modify the `originalValue` before it's processed. + * **Note:** For non-string values (like arrays), `originalValue` will be the original non-string data type. Ensure your callback handles this or stringifies if necessary before returning. +* **`onTransformed(jsonPath, transformedValue, options)`:** Called after the transformer returns a result. You can modify the `transformedValue` before it's written back to the data object. + +## Structured Output (`format` Option) + +The `format` option within a `FieldMapping`'s `options` ensures the LLM's output conforms to a specific JSON schema. This is crucial for reliable data extraction and processing. + +```typescript +// Example FieldMapping using 'format' +{ + jsonPath: '$.reviewText', + targetPath: 'analysis', + options: { + prompt: 'Analyze sentiment, pros, and cons from this review.', + format: { + type: "object", + properties: { + sentiment: { type: "string", enum: ["positive", "neutral", "negative"] }, + pros: { type: "array", items: { type: "string" } }, + cons: { type: "array", items: { type: "string" } } + }, + required: ["sentiment", "pros", "cons"] + } + } +} +``` + +* The LLM is instructed to return a JSON object matching the schema. +* The result assigned to `data.analysis` will typically be a *string* containing the JSON. You'll likely need to `JSON.parse()` it. +* See the `iterator-factory-example.ts` for a full example of defining and handling formatted output. + +## Caching (`CacheConfig`) + +Improve performance and reduce costs by caching transformation results. + +```typescript +interface CacheConfig { + enabled: boolean; // Master switch for caching + namespace?: string; // Optional prefix for cache keys (recommended) + expiration?: number; // Cache duration in seconds + // implementation?: CacheInterface; // Advanced: Provide a custom cache backend +} +``` + +* Caching is configured within the `IOptions` passed to `createIterator`. +* The default cache uses `@polymech/cache`. +* Cache keys are generated based on the input value and transformation options. +* The `iterator-factory-example.ts` includes logic to demonstrate caching and how to clear specific cache entries using `rm_cached_object` from `@polymech/cache` for testing purposes. + +## Network Configuration (`INetworkOptions`) + +Fine-tune network behavior for API calls: + +```typescript +interface INetworkOptions { + throttleDelay?: number; // ms delay between requests + concurrentTasks?: number; // Max parallel requests + maxRetries?: number; // Retries on failure + retryDelay?: number; // Base delay (ms) between retries (exponential backoff applied) +} +``` + +## Examples + +Explore the source code for detailed, runnable examples: + +* **`src/examples/core/iterator-factory-example.ts`**: Demonstrates `createIterator`, `transform`, callbacks, caching, structured output (`format`), and handling different data types (strings, number arrays). ## Overview diff --git a/packages/kbot/docs_/modes.md b/packages/kbot/docs_/modes.md index 390b5906..815fcc5c 100644 --- a/packages/kbot/docs_/modes.md +++ b/packages/kbot/docs_/modes.md @@ -1,8 +1,10 @@ -# KBot Run Modes +# KBot Operational Modes -KBot supports several run modes that determine how it processes inputs and generates outputs. This document provides an overview of each mode and how they work. +`kbot` operates in different modes specified by the `--mode` parameter. Each mode dictates how the AI processes input, whether it can use tools, and how output is handled. -## Command Execution Flow +## Execution Flow Overview + +(Mermaid diagrams illustrate the general flow for different modes - minor updates might be needed if diagrams become outdated) ```mermaid sequenceDiagram @@ -12,243 +14,111 @@ sequenceDiagram participant AI participant FileSystem - User->>KBot: Run command with options + User->>KBot: Run command with options (prompt, mode, include, etc.) KBot->>KBot: Parse options (OptionsSchema) - KBot->>KBot: Process 'each' parameter (if present) - + KBot->>KBot: Process `--each` parameter (if present) + loop For each item (if --each specified) - KBot->>KBot: Create client + KBot->>KBot: Create API client (based on router, key) KBot->>KBot: Load profile & variables - KBot->>FileSystem: Gather files (--include) - KBot->>KBot: Prepare messages - KBot->>KBot: Configure tools (based on mode) - KBot->>Client: Create request params + KBot->>FileSystem: Gather input content (--include: files, URLs) + KBot->>KBot: Prepare messages (prompt, preferences, input content) + KBot->>KBot: Load & configure tools (if mode supports tools) + KBot->>Client: Create API request parameters (model, messages, tools) Client->>AI: Send API request - + alt mode=completion - AI->>Client: Generate text output - Client->>FileSystem: Save to --dst location + AI->>Client: Generate text response + Client->>KBot: Return response + KBot->>KBot: Apply filters (if specified) + KBot->>FileSystem: Save filtered response to --dst location else mode=tools - AI->>Client: Tool function calls - Client->>FileSystem: Perform file operations - Client->>FileSystem: Optional save to --output + AI->>Client: Request tool calls + Client->>KBot: Execute requested tools (e.g., FileSystem access) + KBot->>Client: Provide tool results + Client->>AI: Send tool results + AI->>Client: Final response (may include summary of actions) + Client->>KBot: Return final response + KBot->>User: Output response (modified files handled by tools, optionally guided by --output) else mode=assistant - AI->>Client: Process document content - Client->>FileSystem: Save to --dst location + AI->>Client: Process document content (using built-in retrieval/analysis) + Client->>KBot: Return response + KBot->>FileSystem: Save response to --dst location + else mode=custom + Note over KBot, AI: Custom interaction flow defined by implementation + KBot->>User: Return custom result end - - AI->>Client: Final response - Client->>KBot: Return result - KBot->>KBot: Apply filters (if specified) - KBot->>KBot: Collect result in array + + AI->>Client: (Error Handling / Retries) + Client->>KBot: Return final result/error for the item + KBot->>KBot: Collect item result end - - KBot->>User: Return results + + KBot->>User: Return aggregated results (if --each used) ``` -## Run Modes Overview +## Mode Descriptions -```mermaid -graph TD - A[KBot Command] --> B{Mode Selection} - B -->|mode=completion| C[Completion Mode] - B -->|mode=tools| D[Tools Mode] - B -->|mode=assistant| E[Assistant Mode] - B -->|mode=custom| F[Custom Mode] - - C --> G[Simple Text Output] - D --> H[Tool-Assisted Output] - E --> I[Document Processing] - F --> J[User-Defined Behavior] -``` +### 1. `completion` Mode (`--mode=completion`) -### Completion Mode - -The simplest mode that generates text without using tools. Best for straightforward prompts. - -```mermaid -graph LR - A[Input] --> B[Completion Mode] - B --> C[Text Output] - C --> D[Save to --dst] - C --> E[Apply Filters] -``` - -**Key features:** -- No support for tools -- Requires `--dst` parameter to save output -- Can use filters to process output -- Best for: summaries, transformations, simple Q&A - -**Example command:** -```bash -kbot --mode=completion --prompt="Summarize this document" --include=README.md --dst=summary.md -``` - -### Tools Mode - -Allows the model to use tools to perform actions like modifying files or accessing external information. - -```mermaid -graph TD - A[Input] --> B[Tools Mode] - B --> C{Tool Selection} - C --> D[File Operations] - C --> E[Web Search] - C --> F[Shell Commands] - C --> G[Other Tools...] - D --> H[Output with File Changes] - E --> H - F --> H - G --> H - H --> I[Optional --output] -``` - -**Key features:** -- Enables AI to use various tools -- Can modify files directly -- Optional `--output` parameter for modified files -- Not all models support this mode -- Best for: code modifications, contextual tasks, complex file operations - -**Example command:** -```bash -kbot --mode=tools --prompt="Refactor this function to be more efficient" --include=src/utils.js -``` - -### Assistant Mode - -Specialized for document processing, allowing PDF, DOCX and other document formats to be analyzed. - -```mermaid -graph LR - A[Documents] --> B[Assistant Mode] - B --> C[Document Analysis] - C --> D[Text Output] - D --> E[Save to --dst] -``` - -**Key features:** -- Supports document formats (PDF, DOCX, etc.) -- No tool support -- Requires `--dst` to save output -- Best for: document analysis, content extraction - -**Example command:** -```bash -kbot --mode=assistant --prompt="Extract key points" --include=docs/report.pdf --dst=key-points.md -``` - -### Custom Mode - -For specialized use cases with user-defined behavior. - -## The "each" Parameter - -The `each` parameter allows KBot to iterate over multiple items, running the same process for each one. - -```mermaid -graph TD - A[KBot with --each] --> B{Source Type} - B -->|Comma-separated list| C[List of strings] - B -->|File path| D[JSON file] - B -->|GLOB pattern| E[Matching files] - B -->|Array| F[Array of items] - - C --> G[For each item] - D --> G - E --> G - F --> G - - G --> H[Run process] - H --> I[Collect results] - - J[Model names] --> G - G --> K[Set ITEM variable] - K --> L[Can be used in other parameters] - L --> M["--dst=${ITEM}-output.md"] -``` - -### How "each" Works - -1. KBot accepts the `--each` parameter with various inputs: - - Comma-separated string (e.g., `--each="gpt-3.5-turbo,gpt-4o"`) - - Path to a JSON file containing an array - - GLOB pattern matching multiple files - - Array of strings - -2. For each item: - - The value is exposed as the `ITEM` variable - - If the item matches a model ID, it automatically sets that as the model - - The item is added to the `include` list for processing - - The process is run with these modified options - - Results are collected into an array - -### Special Case: Model Testing - -A powerful use case is testing multiple AI models with the same prompt: +* **Purpose:** Direct text generation based on the prompt and included context. +* **Tools:** Not supported. +* **Output:** The generated text response. +* **Saving:** Requires the `--dst` parameter to specify a file path for saving the output. +* **Filters:** Output can be processed by filters specified via the `--filters` parameter before saving. +* **Use Cases:** Summarization, translation, question answering, content generation where no external actions are needed. ```bash -kbot --mode=completion --prompt="Solve this problem" --each="gpt-3.5-turbo,gpt-4o,claude-3-opus" --dst="${ITEM}-solution.md" +# Example: Summarize a web page +kbot --mode=completion \ + --include "https://example.com/article" \ + --prompt "Provide a 3-sentence summary of this article" \ + --dst "./summary.txt" ``` -This will: -1. Run the same prompt with three different models -2. Save each result to a separate file named after the model -3. Allow you to compare results across models +### 2. `tools` Mode (`--mode=tools`, Default) -### "each" Parameter Processing Flow +* **Purpose:** Allows the AI to interact with the local environment using available tools (e.g., file system operations, code execution, web searches). +* **Tools:** Supported and enabled by default (can be configured via `--tools`, `--disable`, `--disableTools`). +* **Output:** Typically involves actions performed by tools (e.g., files modified, commands run). The final AI response might summarize actions taken. +* **Saving:** File modifications are handled directly by tools. The `--output` parameter can sometimes guide tools on *where* to place results if they create new files relative to a target directory. +* **Use Cases:** Code refactoring, file generation, running tests, automating workflows, interacting with APIs via tools. +* **Note:** Tool support depends on the specific AI model being used. -```mermaid -flowchart TD - Start([Start]) --> ParseEach{Parse --each\nparameter} - - ParseEach -->|is Array| A[Use directly] - ParseEach -->|is JSON file path| B[Read JSON array] - ParseEach -->|is GLOB pattern| C[Get matching files] - ParseEach -->|is single file| D[Use as one-item array] - ParseEach -->|is comma-separated string| E[Split into array] - - A --> ItemsArray[Items Array] - B --> ItemsArray - C --> ItemsArray - D --> ItemsArray - E --> ItemsArray - - ItemsArray --> CheckEmpty{Items\nempty?} - CheckEmpty -->|Yes| LogWarning[Log warning] --> End([End]) - - CheckEmpty -->|No| LogInfo[Log processing info] - LogInfo --> GetAllModels[Get all available models] - - GetAllModels --> ProcessItems[Process first item] - - subgraph ItemProcessing [Item Processing Loop] - ProcessItems --> CreateItemOpts[Create item options:\n- Set ITEM variable\n- Add to variables] - CreateItemOpts --> CheckModel{Item matches\nmodel ID?} - CheckModel -->|Yes| SetModel[Set model to item] - CheckModel -->|No| SkipModelSet[Skip model override] - SetModel --> UpdateIncludes[Add item to includes] - SkipModelSet --> UpdateIncludes - UpdateIncludes --> RunProcess[Run processRun] - RunProcess --> CheckResult{Result defined?} - CheckResult -->|Yes| AddToResults[Add to results array] - CheckResult -->|No| SkipAddResult[Skip] - AddToResults --> NextItem{More items?} - SkipAddResult --> NextItem - NextItem -->|Yes| ProcessNext[Process next item] --> CreateItemOpts - end - - NextItem -->|No| ReturnResults[Return results array] - ReturnResults --> End +```bash +# Example: Refactor code in specified files +kbot --mode=tools \ + --include "./src/**/*.js" \ + --prompt "Refactor all functions in these files to use arrow syntax" ``` -This detailed flow shows exactly how KBot processes the `each` parameter, from initial parsing through the execution loop for each item. +### 3. `assistant` Mode (`--mode=assistant`) -### Variable Substitution +* **Purpose:** Specialized for interacting with and analyzing document files (e.g., PDF, DOCX). +* **Tools:** Not supported. Relies on the AI provider's built-in document handling capabilities (if available). +* **Output:** An analysis or response based on the content of the included documents. +* **Saving:** Requires the `--dst` parameter to save the AI's response. +* **Use Cases:** Analyzing reports, extracting information from PDFs, question answering based on uploaded documents. -The `ITEM` variable can be used in other parameters: +```bash +# Example: Extract key points from a PDF report +kbot --mode=assistant \ + --include "./reports/annual_report.pdf" \ + --prompt "Extract the main financial highlights from this report" \ + --dst "./financial_highlights.md" +``` -- `--dst="${ITEM}-output.md"` - Creates unique output files -- Path templating with `${MODEL_NAME}` and `${ROUTER}` -- Accessible in prompts via variable substitution \ No newline at end of file +### 4. `custom` Mode (`--mode=custom`) + +* **Purpose:** Reserved for specific, user-defined operational flows that don't fit the standard modes. +* **Behavior:** Defined by the specific implementation integrating with `kbot`. + +## Iteration with `--each` + +The `--each` parameter enables running a `kbot` task iteratively over multiple items (files, model names, strings). This is powerful for batch processing or comparative testing. + +* It accepts comma-separated strings, GLOB patterns, JSON file paths, or arrays. +* The current item is available via the `${ITEM}` variable for use in other parameters (`--dst`, `--prompt`, `--include`). +* If an item matches a known model ID, `--model` is automatically set for that iteration. + +Refer to [Advanced Topics](./advanced.md#processing-multiple-items---each) and [Examples](./examples.md#iterating-with---each) for detailed usage and examples. \ No newline at end of file diff --git a/packages/kbot/docs_/parameters.md b/packages/kbot/docs_/parameters.md index 277a4cc3..d92f29d6 100644 --- a/packages/kbot/docs_/parameters.md +++ b/packages/kbot/docs_/parameters.md @@ -1,49 +1,67 @@ - # Command Line Parameters -This document describes all available command line parameters. +This document describes the command line parameters available for `kbot`. + +**Note:** Many parameters support environment variable substitution (e.g., `${VAR_NAME}`). ## Core Parameters | Parameter | Description | Default | Required | |-----------|-------------|---------|----------| -| `path` | Target directory | `.` | No | -| `prompt` | The prompt. Supports file paths and environment variables | `./prompt.md` | No | -| `output` | Optional output path for modified files (Tool mode only) | - | No | -| `dst` | Optional destination path for the result, will substitute ${MODEL} and ${ROUTER} in the path. | - | No | -| `model` | AI model to use for processing | `anthropic/claude-3.5-sonnet` | No | -| `router` | Router to use: openai or openrouter | `openrouter` | No | -| `mode` | Chat completion mode: "completion" (without tools) or "tools" | `tools` | No | +| `prompt` | The main instruction or question for the AI. Can be a string, a file path (e.g., `file:./my_prompt.md`), or an environment variable. | - | Yes (or implied by context) | +| `model` | AI model ID to use for processing (e.g., `openai/gpt-4o`). See available models via helper functions or router documentation. | Depends on router/config | No | +| `router` | The API provider to use. | `openrouter` | No | +| `mode` | The operational mode. See [Modes](./modes.md) for details. | `tools` | No | -## Advanced Parameters +## Input & File Selection | Parameter | Description | Default | Required | |-----------|-------------|---------|----------| -| `each` | Target directory | `.` | No | -| `dry` | Dry run - only write out parameters without making API calls | `false` | No | +| `path` | Target directory for local file operations or context. | `.` | No | +| `include` | Specify input files or content. Accepts comma-separated glob patterns (e.g., `src/**/*.ts`), file paths, directory paths, or **web URLs** (e.g., `https://example.com/page`). | `[]` | No | +| `query` | JSONPath query to extract specific data from input objects (often used with structured input files). | `null` | No | -## File Selection & Tools +## Output & Formatting | Parameter | Description | Default | Required | |-----------|-------------|---------|----------| -| `include` | Glob patterns to match files for processing. Supports multiple patterns, e.g. `--include=src/*.tsx,src/*.ts --include=package.json` | - | No | -| `disable` | Disable tools categories | `[]` | No | -| `disableTools` | List of specific tools to disable | `[]` | No | +| `output` | Output path for modified files (primarily for `tools` mode operations like refactoring). | - | No | +| `dst` | Destination path/filename for the main result (primarily for `completion` or `assistant` mode). Supports `${MODEL_NAME}` and `${ROUTER}` substitutions. | - | No | +| `format` | Defines the desired structure for the AI's output. Can be a Zod schema object, a Zod schema string, a JSON schema string, or a path to a JSON schema file (e.g., `file:./schema.json`). Ensures the output conforms to the specified structure. | - | No | +| `filters` | Post-processing filters applied to the output (primarily `completion` mode with `--dst`). Can be a comma-separated string of filter names (e.g., `unwrapMarkdown,trim`). | `''` | No | -## Configuration & Profiles +## Tool Usage | Parameter | Description | Default | Required | |-----------|-------------|---------|----------| -| `profile` | Path to profile for variables. Supports environment variables | `${POLYMECH-ROOT}/profile.json` | No | -| `env` | Environment (in profile) | `default` | No | -| `config` | Path to JSON configuration file (API keys). Supports environment variables | - | No | -| `preferences` | Path to preferences file (location, email, gender, etc). Supports environment variables | `./.kbot/preferences.md` | No | +| `tools` | Comma-separated list of tool names or paths to custom tool files to enable. | (List of default tools) | No | +| `disable` | Comma-separated list of tool *categories* to disable (e.g., `filesystem,git`). | `[]` | No | +| `disableTools` | Comma-separated list of specific tool *names* to disable. | `[]` | No | + +## Iteration & Advanced Control + +| Parameter | Description | Default | Required | +|-----------|-------------|---------|----------| +| `each` | Iterate the task over multiple items. Accepts a GLOB pattern, path to a JSON file (array), or comma-separated strings. The current item is available as the `${ITEM}` variable in other parameters (e.g., `--dst="${ITEM}-output.md"`). Can be used to test different models (e.g., `--each="openai/gpt-3.5-turbo,openai/gpt-4o"`). | - | No | +| `variables` | Define custom key-value variables for use in prompts or other parameters (e.g., `--variables.PROJECT_NAME=MyProject`). Access via `${variableName}`. | `{}` | No | + +## Configuration & Authentication + +| Parameter | Description | Default | Required | +|-----------|-------------|---------|----------| +| `api_key` | Explicit API key for the selected router. Overrides keys from config files. | - | No | +| `baseURL` | Custom base URL for the API endpoint (e.g., for local LLMs via Ollama). Set automatically for known routers or can be specified directly. | - | No | +| `config` | Path to a JSON configuration file containing API keys and potentially other settings. | - | No | +| `profile` | Path to a profile file (JSON or .env format) for loading environment-specific variables. | - | No | +| `env` | Specifies the environment section to use within the profile file. | `default` | No | +| `preferences` | Path to a preferences file (e.g., containing user details like location, email). Used to provide context to the AI. | (System-specific default, often `~/.kbot/Preferences`) | No | ## Debugging & Logging | Parameter | Description | Default | Required | |-----------|-------------|---------|----------| -| `logLevel` | Logging level for the application (0-4) | `2` | No | -| `logs` | Logging directory | `./.kbot` | No | -| `dump` | Create a script | - | No | +| `logLevel` | Logging verbosity level (e.g., 0=error, 4=debug). | `4` | No | +| `logs` | Directory to store log files and temporary outputs (like `params.json`). | `./logs` | No | +| `dry` | Perform a dry run: log parameters and configurations without executing the AI request. | `false` | No | +| `dump` | Path to generate a script file representing the current command invocation. | - | No | diff --git a/packages/kbot/docs_/todos.md b/packages/kbot/docs_/todos.md deleted file mode 100644 index babdf0db..00000000 --- a/packages/kbot/docs_/todos.md +++ /dev/null @@ -1,35 +0,0 @@ -## Core - -- history/session -- run mode: streaming -- extensions: gui - -- logging : system, file logger, notification, popup - -- filters: in/out (incl. tools) -- input/output formats: md, csv, xls, docx, pdf - -- splitters -- pipes: n8n/nodered/stdio - -- docs: custom help command - -- router: ollama - -- bundle: ESM & Deno - -## Models - -- task->match - -## Prompt - -- evaluation -> refine - -## Tools - -### Web - -- external agents: https://github.com/Skyvern-AI/skyvern -- multiple scrapers: puppeteer / cherio / API -- Rapid API composer diff --git a/packages/kbot/preferences.md b/packages/kbot/preferences.md deleted file mode 100644 index 9edf5cf6..00000000 --- a/packages/kbot/preferences.md +++ /dev/null @@ -1,3 +0,0 @@ -# Preferences - -You are a helpful AI assistant. When asked to perform calculations, you should return only the numerical result without any explanation or comments. \ No newline at end of file diff --git a/packages/kbot/test_web_urls.js b/packages/kbot/src/examples/test_web_urls.js similarity index 100% rename from packages/kbot/test_web_urls.js rename to packages/kbot/src/examples/test_web_urls.js diff --git a/packages/kbot/tests/test-data/core/md-test-out.json b/packages/kbot/tests/test-data/core/md-test-out.json new file mode 100644 index 00000000..2c5b0e1d --- /dev/null +++ b/packages/kbot/tests/test-data/core/md-test-out.json @@ -0,0 +1,725 @@ +{ + "type": "root", + "children": [ + { + "type": "heading", + "depth": 1, + "children": [ + { + "type": "text", + "value": "Intro Chapter", + "position": { + "start": { + "line": 1, + "column": 3, + "offset": 2 + }, + "end": { + "line": 1, + "column": 26, + "offset": 25 + } + } + } + ], + "position": { + "start": { + "line": 1, + "column": 1, + "offset": 0 + }, + "end": { + "line": 1, + "column": 26, + "offset": 25 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "The document introduces topics related to AI and Markdown processing.", + "position": { + "start": { + "line": 3, + "column": 1, + "offset": 29 + }, + "end": { + "line": 3, + "column": 142, + "offset": 170 + } + } + } + ], + "position": { + "start": { + "line": 3, + "column": 1, + "offset": 29 + }, + "end": { + "line": 3, + "column": 142, + "offset": 170 + } + } + }, + { + "type": "heading", + "depth": 2, + "children": [ + { + "type": "text", + "value": "Background Overview", + "position": { + "start": { + "line": 5, + "column": 4, + "offset": 177 + }, + "end": { + "line": 5, + "column": 27, + "offset": 200 + } + } + } + ], + "position": { + "start": { + "line": 5, + "column": 1, + "offset": 174 + }, + "end": { + "line": 5, + "column": 27, + "offset": 200 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "Markdown has an interesting history that we will explore.", + "position": { + "start": { + "line": 7, + "column": 1, + "offset": 204 + }, + "end": { + "line": 7, + "column": 91, + "offset": 294 + } + } + } + ], + "position": { + "start": { + "line": 7, + "column": 1, + "offset": 204 + }, + "end": { + "line": 7, + "column": 91, + "offset": 294 + } + } + }, + { + "type": "heading", + "depth": 3, + "children": [ + { + "type": "text", + "value": "\"Early Days\"", + "position": { + "start": { + "line": 9, + "column": 5, + "offset": 302 + }, + "end": { + "line": 9, + "column": 33, + "offset": 330 + } + } + } + ], + "position": { + "start": { + "line": 9, + "column": 1, + "offset": 298 + }, + "end": { + "line": 9, + "column": 33, + "offset": 330 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "Early days are described in the details provided.", + "position": { + "start": { + "line": 11, + "column": 1, + "offset": 334 + }, + "end": { + "line": 11, + "column": 38, + "offset": 371 + } + } + } + ], + "position": { + "start": { + "line": 11, + "column": 1, + "offset": 334 + }, + "end": { + "line": 11, + "column": 38, + "offset": 371 + } + } + }, + { + "type": "heading", + "depth": 1, + "children": [ + { + "type": "text", + "value": "Methodology Overview", + "position": { + "start": { + "line": 13, + "column": 3, + "offset": 377 + }, + "end": { + "line": 13, + "column": 25, + "offset": 399 + } + } + } + ], + "position": { + "start": { + "line": 13, + "column": 1, + "offset": 375 + }, + "end": { + "line": 13, + "column": 25, + "offset": 399 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "This chapter describes the employed methods.", + "position": { + "start": { + "line": 15, + "column": 1, + "offset": 403 + }, + "end": { + "line": 15, + "column": 40, + "offset": 442 + } + } + } + ], + "position": { + "start": { + "line": 15, + "column": 1, + "offset": 403 + }, + "end": { + "line": 15, + "column": 40, + "offset": 442 + } + } + }, + { + "type": "heading", + "depth": 2, + "children": [ + { + "type": "text", + "value": "\"Markdown Parsing\"", + "position": { + "start": { + "line": 17, + "column": 4, + "offset": 449 + }, + "end": { + "line": 17, + "column": 33, + "offset": 478 + } + } + } + ], + "position": { + "start": { + "line": 17, + "column": 1, + "offset": 446 + }, + "end": { + "line": 17, + "column": 33, + "offset": 478 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "User preferences indicate a focus on concise and direct responses.", + "position": { + "start": { + "line": 19, + "column": 1, + "offset": 482 + }, + "end": { + "line": 19, + "column": 8, + "offset": 489 + } + } + }, + { + "type": "inlineCode", + "value": "unified", + "position": { + "start": { + "line": 19, + "column": 8, + "offset": 489 + }, + "end": { + "line": 19, + "column": 17, + "offset": 498 + } + } + }, + { + "type": "text", + "value": "The paragraph consists only of an empty conjunction and lacks meaningful content.", + "position": { + "start": { + "line": 19, + "column": 17, + "offset": 498 + }, + "end": { + "line": 19, + "column": 22, + "offset": 503 + } + } + }, + { + "type": "inlineCode", + "value": "remark-parse", + "position": { + "start": { + "line": 19, + "column": 22, + "offset": 503 + }, + "end": { + "line": 19, + "column": 36, + "offset": 517 + } + } + }, + { + "type": "text", + "value": "To manage Markdown formatting.", + "position": { + "start": { + "line": 19, + "column": 36, + "offset": 517 + }, + "end": { + "line": 19, + "column": 56, + "offset": 537 + } + } + } + ], + "position": { + "start": { + "line": 19, + "column": 1, + "offset": 482 + }, + "end": { + "line": 19, + "column": 56, + "offset": 537 + } + } + }, + { + "type": "heading", + "depth": 2, + "children": [ + { + "type": "text", + "value": "Content Transformation", + "position": { + "start": { + "line": 21, + "column": 4, + "offset": 544 + }, + "end": { + "line": 21, + "column": 37, + "offset": 577 + } + } + } + ], + "position": { + "start": { + "line": 21, + "column": 1, + "offset": 541 + }, + "end": { + "line": 21, + "column": 37, + "offset": 577 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "The process entails navigating the AST and implementing LLM transformations.", + "position": { + "start": { + "line": 23, + "column": 1, + "offset": 581 + }, + "end": { + "line": 23, + "column": 77, + "offset": 657 + } + } + } + ], + "position": { + "start": { + "line": 23, + "column": 1, + "offset": 581 + }, + "end": { + "line": 23, + "column": 77, + "offset": 657 + } + } + }, + { + "type": "heading", + "depth": 1, + "children": [ + { + "type": "text", + "value": "Data Representation", + "position": { + "start": { + "line": 25, + "column": 3, + "offset": 663 + }, + "end": { + "line": 25, + "column": 33, + "offset": 693 + } + } + } + ], + "position": { + "start": { + "line": 25, + "column": 1, + "offset": 661 + }, + "end": { + "line": 25, + "column": 33, + "offset": 693 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "The table includes three headers and various data cells, highlighting important content in one cell.", + "position": { + "start": { + "line": 27, + "column": 1, + "offset": 697 + }, + "end": { + "line": 31, + "column": 35, + "offset": 900 + } + } + } + ], + "position": { + "start": { + "line": 27, + "column": 1, + "offset": 697 + }, + "end": { + "line": 31, + "column": 35, + "offset": 900 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "The table displays sample data with transformable cells.", + "position": { + "start": { + "line": 33, + "column": 1, + "offset": 904 + }, + "end": { + "line": 33, + "column": 76, + "offset": 979 + } + } + } + ], + "position": { + "start": { + "line": 33, + "column": 1, + "offset": 904 + }, + "end": { + "line": 33, + "column": 76, + "offset": 979 + } + } + }, + { + "type": "heading", + "depth": 1, + "children": [ + { + "type": "text", + "value": "Structured Output Analysis", + "position": { + "start": { + "line": 35, + "column": 3, + "offset": 985 + }, + "end": { + "line": 35, + "column": 45, + "offset": 1027 + } + } + } + ], + "position": { + "start": { + "line": 35, + "column": 1, + "offset": 983 + }, + "end": { + "line": 35, + "column": 45, + "offset": 1027 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "The text mentions sunny weather, making it ideal for a park walk.", + "position": { + "start": { + "line": 37, + "column": 1, + "offset": 1031 + }, + "end": { + "line": 37, + "column": 163, + "offset": 1193 + } + } + } + ], + "position": { + "start": { + "line": 37, + "column": 1, + "offset": 1031 + }, + "end": { + "line": 37, + "column": 163, + "offset": 1193 + } + }, + "manualAnalysisResult": { + "keywords": [ + "sunny weather", + "ideal", + "park walk" + ], + "sentiment": "positive" + } + }, + { + "type": "heading", + "depth": 1, + "children": [ + { + "type": "text", + "value": "Final Thoughts", + "position": { + "start": { + "line": 39, + "column": 3, + "offset": 1199 + }, + "end": { + "line": 39, + "column": 24, + "offset": 1220 + } + } + } + ], + "position": { + "start": { + "line": 39, + "column": 1, + "offset": 1197 + }, + "end": { + "line": 39, + "column": 24, + "offset": 1220 + } + } + }, + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "A summary of the key findings is presented in the concluding paragraph.", + "position": { + "start": { + "line": 41, + "column": 1, + "offset": 1224 + }, + "end": { + "line": 41, + "column": 53, + "offset": 1276 + } + } + } + ], + "position": { + "start": { + "line": 41, + "column": 1, + "offset": 1224 + }, + "end": { + "line": 41, + "column": 54, + "offset": 1277 + } + } + } + ], + "position": { + "start": { + "line": 1, + "column": 1, + "offset": 0 + }, + "end": { + "line": 41, + "column": 54, + "offset": 1277 + } + } +} \ No newline at end of file