kbot iterator callbacks

This commit is contained in:
lovebird 2025-04-07 16:01:06 +02:00
parent 9391478181
commit b2ef96e786
11 changed files with 483 additions and 76 deletions

View File

@ -1,7 +1,10 @@
import { IKBotTask } from '@polymech/ai-tools';
export type AsyncTransformer = (input: string, path: string) => Promise<string>;
export type ErrorCallback = (path: string, value: string, error: unknown) => void;
export type FilterCallback = (input: string, path: string) => Promise<boolean>;
export type Filter = (input: string) => Promise<boolean>;
export type OnTransformCallback = (jsonPath: string, value: string, options?: Partial<IKBotTask>) => Promise<string>;
export type OnTransformedCallback = (jsonPath: string, transformedValue: string, options?: Partial<IKBotTask>) => Promise<string>;
export interface INetworkOptions {
throttleDelay?: number;
concurrentTasks?: number;
@ -21,14 +24,21 @@ export interface GlobalOptions {
network?: INetworkOptions;
errorCallback?: ErrorCallback;
filterCallback?: FilterCallback;
onTransform?: OnTransformCallback;
onTransformed?: OnTransformedCallback;
}
export declare const isNumber: Filter;
export declare const isBoolean: Filter;
export declare const isValidString: Filter;
export declare const testFilters: (filters: Filter[]) => FilterCallback;
export declare const defaultFilters: (filters?: Filter[]) => Filter[];
export declare function transformObject(obj: Record<string, any>, transform: AsyncTransformer, path: string, networkOptions: Required<INetworkOptions>, errorCallback: ErrorCallback, testCallback: FilterCallback): Promise<void>;
export declare function transformPath(obj: Record<string, any>, keys: string[], transform: AsyncTransformer, networkOptions: Required<INetworkOptions>, currentPath: string, errorCallback: ErrorCallback, testCallback: FilterCallback): Promise<void>;
export declare function transformObject(obj: Record<string, any>, transform: AsyncTransformer, path: string, networkOptions: Required<INetworkOptions>, errorCallback: ErrorCallback, testCallback: FilterCallback, onTransform: OnTransformCallback, // Pass callbacks down
onTransformed: OnTransformedCallback, // Pass callbacks down
options?: Partial<IKBotTask>): Promise<void>;
export declare function transformPath(obj: Record<string, any>, keys: string[], transform: AsyncTransformer, networkOptions: Required<INetworkOptions>, currentPath: string, // Changed from jsonPointer to represent the logical path
errorCallback: ErrorCallback, testCallback: FilterCallback, onTransform: OnTransformCallback, // Receive callbacks
onTransformed: OnTransformedCallback, // Receive callbacks
options?: Partial<IKBotTask>): Promise<void>;
export declare const defaultError: ErrorCallback;
export interface TransformWithOptionsInput {
jsonPath: string;
@ -36,6 +46,9 @@ export interface TransformWithOptionsInput {
network?: INetworkOptions;
errorCallback?: ErrorCallback;
filterCallback?: FilterCallback;
onTransform?: OnTransformCallback;
onTransformed?: OnTransformedCallback;
kbotOptions?: Partial<IKBotTask>;
}
export declare function transformObjectWithOptions(obj: Record<string, any>, transform: AsyncTransformer, options: TransformWithOptionsInput): Promise<void>;
export declare const defaultOptions: (options?: Partial<TransformOptions>) => TransformOptions;

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
import { IKBotTask } from '@polymech/ai-tools';
import { AsyncTransformer, ErrorCallback, FilterCallback, INetworkOptions } from './async-iterator.js';
import { AsyncTransformer, ErrorCallback, FilterCallback, OnTransformCallback, OnTransformedCallback, INetworkOptions } from './async-iterator.js';
/**
* Notes for LLM modifications
*
@ -34,6 +34,8 @@ export interface IOptions {
transformerFactory?: (options: IKBotTask) => AsyncTransformer;
logger?: ILogger;
cacheConfig?: CacheConfig;
onTransform?: OnTransformCallback;
onTransformed?: OnTransformedCallback;
}
export { INetworkOptions };
export declare function createLLMTransformer(options: IKBotTask, logger?: ILogger, cacheConfig?: CacheConfig): AsyncTransformer;

File diff suppressed because one or more lines are too long

View File

@ -137,6 +137,160 @@ To keep the original value and add a transformed version, specify a `targetPath`
This keeps the original `name` and adds a new `marketingName` field.
### Structured Output with Format Option
The `format` option allows you to define a JSON schema that the LLM output should conform to. This is extremely useful for ensuring consistent, structured responses that can be easily parsed and used in your application.
#### Basic Format Usage
To request structured output, add a `format` property to your field mapping options:
```typescript
{
jsonPath: '$.productReview.reviewText',
targetPath: 'analysis',
options: {
prompt: 'Analyze this product review and extract key information',
format: {
type: "object",
properties: {
sentiment: {
type: "string",
enum: ["positive", "neutral", "negative"],
description: "The overall sentiment of the review"
},
pros: {
type: "array",
items: {
type: "string"
},
description: "Positive aspects mentioned in the review",
minItems: 1,
maxItems: 3
},
cons: {
type: "array",
items: {
type: "string"
},
description: "Negative aspects mentioned in the review",
minItems: 0,
maxItems: 3
}
},
required: ["sentiment", "pros", "cons"]
}
}
}
```
#### Processing Structured Responses
The formatted response may be returned as a JSON string. When working with formatted responses, it's good practice to handle potential string parsing:
```typescript
// After transformation
if (data.productReview && data.productReview.analysis) {
try {
// Parse the JSON string if needed
const analysisJson = typeof data.productReview.analysis === 'string'
? JSON.parse(data.productReview.analysis)
: data.productReview.analysis;
// Now you can work with the structured data
console.log(`Sentiment: ${analysisJson.sentiment}`);
console.log(`Pros: ${analysisJson.pros.join(', ')}`);
console.log(`Cons: ${analysisJson.cons.join(', ')}`);
} catch (e) {
console.error("Error parsing structured output:", e);
}
}
```
#### Best Practices for Formatted Output
1. **Clear Prompt Instructions**: Include explicit instructions in your prompt about the expected format.
2. **Schema Validation**: Use detailed JSON schemas with required fields and appropriate types.
3. **Parsing Handling**: Always include error handling when parsing the output.
4. **Schema Examples**: Consider including examples in your prompt for more complex schemas.
#### Format Option Example
Here's a complete example from the iterator-factory-example.ts file:
```typescript
// Define a field mapping with format option
const fieldMappings = [
{
jsonPath: '$.productReview.reviewText',
targetPath: 'analysis',
options: {
// Clear and explicit prompt that includes the schema format details
prompt: `Analyze this product review and extract key information using EXACTLY the schema specified below.
The review: "Great selection of fruits with good prices and quality. Some items were out of stock."
Your response MUST be a valid JSON object following this exact schema:
{
"sentiment": "positive" | "neutral" | "negative",
"pros": ["string", "string"...], // 1-3 items
"cons": ["string"...] // 0-3 items
}
Do not add any extra fields not in the schema, and make sure to use the exact field names as specified.`,
// Schema validation ensures structured output format
format: {
type: "object",
properties: {
sentiment: {
type: "string",
enum: ["positive", "neutral", "negative"],
description: "The overall sentiment of the review"
},
pros: {
type: "array",
items: {
type: "string"
},
description: "Positive aspects mentioned in the review",
minItems: 1,
maxItems: 3
},
cons: {
type: "array",
items: {
type: "string"
},
description: "Negative aspects mentioned in the review",
minItems: 0,
maxItems: 3
}
},
required: ["sentiment", "pros", "cons"]
}
}
}
]
```
When run, this produces a structured output like:
```json
{
"sentiment": "positive",
"pros": [
"great selection of fruits",
"good prices",
"good quality"
],
"cons": [
"some items were out of stock"
]
}
```
This structured format is much easier to work with programmatically than free-form text responses.
## Filtering
Filters determine which values should be transformed:
@ -441,9 +595,15 @@ async function transformProductsWithCaching() {
8. **Use targetPath for non-destructive transformations**: When generating new content related to existing fields, use targetPath to preserve the original data.
9. **Implement custom cache for production**: For production scenarios, implement a persistent cache solution rather than relying on in-memory caching.
9. **Use format for structured outputs**: When you need consistent, structured data from LLMs, use the format option with clear JSON schemas.
10. **Use appropriate namespaces**: When multiple parts of your application use the same cache implementation, use distinct namespaces to prevent collisions.
10. **Include schema details in prompts**: For complex schemas, include the schema structure in your prompt to guide the LLM.
11. **Handle string parsing**: Always add error handling when parsing structured responses, as they may be returned as string JSON.
12. **Implement custom cache for production**: For production scenarios, implement a persistent cache solution rather than relying on in-memory caching.
13. **Use appropriate namespaces**: When multiple parts of your application use the same cache implementation, use distinct namespaces to prevent collisions.
## API Reference
@ -465,7 +625,14 @@ async function transformProductsWithCaching() {
- `FilterCallback`: Function that determines if a value should be transformed
- `ErrorCallback`: Function that handles transformation errors
- `FieldMapping`: Configuration for a transformation
- `jsonPath`: JSONPath expression to select values
- `targetPath`: Optional field to store transformed value (null for in-place)
- `options`: Configuration for the transformation including:
- `prompt`: The prompt for the LLM
- `format`: Optional JSON schema for structured output
- `TransformOptions`: Options for the transformation process
- `CacheConfig`: Configuration for the caching mechanism
- `INetworkOptions`: Configuration for throttling and concurrency
## Limitations
@ -495,6 +662,9 @@ npm run examples:iterator-factory
# Run with debug logging
npm run examples:async-iterator -- --debug
# Run with caching disabled (forces fresh responses)
npm run examples:iterator-factory -- --no-cache
```
The examples will transform sample JSON data and save the results to the `tests/test-data/core/` directory.

View File

@ -3,7 +3,7 @@
"messages": [
{
"role": "user",
"content": "Analyze this product review and extract key information using EXACTLY the schema specified below.\n\nThe review: \"Great selection of fruits with good prices and quality. Some items were out of stock.\"\n\nYour response MUST be a valid JSON object following this exact schema:\n{\n \"sentiment\": \"positive\" | \"neutral\" | \"negative\",\n \"pros\": [\"string\", \"string\"...], // 1-3 items\n \"cons\": [\"string\"...] // 0-3 items\n}\n\nDo not add any extra fields not in the schema, and make sure to use the exact field names as specified.\n\nText to transform: \"Great selection of fruits with good prices and quality. Some items were out of stock.\""
"content": "Make this description more engaging and detailed, around 10 words\n\nText to transform: \"[PRODUCT INFO] A yellow tropical fruit\""
},
{
"role": "user",

View File

@ -2,11 +2,15 @@ import { JSONPath } from 'jsonpath-plus'
import pThrottle from 'p-throttle'
import pMap from 'p-map'
import { deepClone } from "@polymech/core/objects"
import { IKBotTask } from '@polymech/ai-tools'; // Assuming IKBotTask might be relevant context for callbacks
export type AsyncTransformer = (input: string, path: string) => Promise<string>
export type ErrorCallback = (path: string, value: string, error: unknown) => void
export type FilterCallback = (input: string, path: string) => Promise<boolean>
export type Filter = (input: string) => Promise<boolean>
// Define the new callback types, passing IKBotTask options might be useful context
export type OnTransformCallback = (jsonPath: string, value: string, options?: Partial<IKBotTask>) => Promise<string>;
export type OnTransformedCallback = (jsonPath: string, transformedValue: string, options?: Partial<IKBotTask>) => Promise<string>;
export interface INetworkOptions {
throttleDelay?: number;
@ -35,6 +39,8 @@ export interface GlobalOptions {
network?: INetworkOptions
errorCallback?: ErrorCallback
filterCallback?: FilterCallback
onTransform?: OnTransformCallback // Add pre-transform callback
onTransformed?: OnTransformedCallback // Add post-transform callback
}
// Sleep utility for retry mechanism
@ -66,7 +72,10 @@ export async function transformObject(
path: string,
networkOptions: Required<INetworkOptions>,
errorCallback: ErrorCallback,
testCallback: FilterCallback
testCallback: FilterCallback,
onTransform: OnTransformCallback, // Pass callbacks down
onTransformed: OnTransformedCallback, // Pass callbacks down
options?: Partial<IKBotTask> // Pass options context if available
): Promise<void> {
const paths = JSONPath({ path, json: obj, resultType: 'pointer' });
await pMap(
@ -80,7 +89,10 @@ export async function transformObject(
networkOptions,
jsonPointer,
errorCallback,
testCallback
testCallback,
onTransform, // Pass callbacks down
onTransformed, // Pass callbacks down
options // Pass options context if available
)
},
{ concurrency: networkOptions.concurrentTasks }
@ -92,9 +104,12 @@ export async function transformPath(
keys: string[],
transform: AsyncTransformer,
networkOptions: Required<INetworkOptions>,
currentPath: string,
currentPath: string, // Changed from jsonPointer to represent the logical path
errorCallback: ErrorCallback,
testCallback: FilterCallback
testCallback: FilterCallback,
onTransform: OnTransformCallback, // Receive callbacks
onTransformed: OnTransformedCallback, // Receive callbacks
options?: Partial<IKBotTask> // Pass options context if available
): Promise<void> {
let current: Record<string, any> = obj
@ -117,10 +132,30 @@ export async function transformPath(
let attempts = 0;
let success = false;
let lastError: unknown;
let valueToTransform = current[lastKey];
const fullJsonPath = `${currentPath}/${lastKey}`; // Construct full path
// Call onTransform before transformation
try {
valueToTransform = await onTransform(fullJsonPath, valueToTransform, options);
} catch (error) {
console.error(`Error in onTransform callback for path ${fullJsonPath}:`, error);
// Decide if you want to proceed with the original value or stop
}
while (attempts < networkOptions.maxRetries && !success) {
try {
current[lastKey] = await throttle(transform)(current[lastKey], `${currentPath}/${lastKey}`);
let transformedValue = await throttle(transform)(valueToTransform, fullJsonPath);
// Call onTransformed after successful transformation
try {
transformedValue = await onTransformed(fullJsonPath, transformedValue, options);
} catch (error) {
console.error(`Error in onTransformed callback for path ${fullJsonPath}:`, error);
// Decide if you want to proceed with the transformed value or stop/modify
}
current[lastKey] = transformedValue; // Assign potentially modified transformed value
success = true;
} catch (error) {
lastError = error;
@ -135,17 +170,20 @@ export async function transformPath(
}
if (!success) {
errorCallback(currentPath, lastKey, lastError);
errorCallback(currentPath, lastKey, lastError); // Use currentPath (logical path)
}
}
} else if (typeof current[lastKey] === 'object' && current[lastKey] !== null) {
await transformObject(
current[lastKey] as Record<string, any>,
transform,
'$.*',
'$.*', // Recurse on all properties
networkOptions,
errorCallback,
testCallback
testCallback,
onTransform, // Pass callbacks down
onTransformed, // Pass callbacks down
options // Pass options context down
)
}
}
@ -161,8 +199,15 @@ export interface TransformWithOptionsInput {
network?: INetworkOptions
errorCallback?: ErrorCallback
filterCallback?: FilterCallback
onTransform?: OnTransformCallback // Add to options
onTransformed?: OnTransformedCallback // Add to options
kbotOptions?: Partial<IKBotTask> // Add kbot options context
}
// Default no-op implementations for the new callbacks
const defaultOnTransform: OnTransformCallback = async (_, value) => value;
const defaultOnTransformed: OnTransformedCallback = async (_, transformedValue) => transformedValue;
export async function transformObjectWithOptions(
obj: Record<string, any>,
transform: AsyncTransformer,
@ -173,7 +218,10 @@ export async function transformObjectWithOptions(
targetPath = null,
network = {},
errorCallback = defaultError,
filterCallback = testFilters(defaultFilters())
filterCallback = testFilters(defaultFilters()),
onTransform = defaultOnTransform, // Use default if not provided
onTransformed = defaultOnTransformed, // Use default if not provided
kbotOptions // Destructure kbot options
} = options;
const networkOptions: Required<INetworkOptions> = {
@ -189,7 +237,10 @@ export async function transformObjectWithOptions(
jsonPath,
networkOptions,
errorCallback,
filterCallback
filterCallback,
onTransform, // Pass down
onTransformed, // Pass down
kbotOptions // Pass down kbot options
);
}
@ -203,7 +254,10 @@ export async function transformObjectWithOptions(
jsonPath,
networkOptions,
errorCallback,
filterCallback
filterCallback,
onTransform, // Pass down
onTransformed, // Pass down
kbotOptions // Pass down kbot options
);
// Get paths from original object

View File

@ -4,7 +4,9 @@ import * as fs from 'fs';
import type { IKBotTask } from '@polymech/ai-tools';
import { E_OPENROUTER_MODEL } from '../../models/cache/openrouter-models.js';
import { E_Mode } from '../../zod_schema.js';
import { FieldMapping, createIterator, createLLMTransformer, CacheConfig, INetworkOptions, transform } from '../../iterator.js';
import { FieldMapping, createIterator, createLLMTransformer, CacheConfig, INetworkOptions, transform, IOptions, removeEmptyObjects } from '../../iterator.js';
import { OnTransformCallback, OnTransformedCallback } from '../../async-iterator.js';
import { rm_cached_object } from '@polymech/cache';
/**
* Notes for LLM modifications
@ -32,6 +34,9 @@ export async function simpleTransformExample() {
model: MODEL,
router: ROUTER,
mode: E_Mode.COMPLETION
}, {
onTransform: simpleOnTransform,
onTransformed: simpleOnTransformed
});
console.log("\nSimplified Transform Result:");
@ -211,6 +216,37 @@ Do not add any extra fields not in the schema, and make sure to use the exact fi
}
];
// Example onTransform callback
const exampleOnTransform: OnTransformCallback = async (jsonPath, value, options) => {
console.log(` -> onTransform: Path='${jsonPath}', Original Value='${value.substring(0, 30)}...', Options Model='${options?.model}'`);
// Example: Prefix value before sending to LLM
if (jsonPath.includes('description')) {
return `[PRODUCT INFO] ${value}`;
}
return value; // Return original value if no modification needed
};
// Example onTransformed callback
const exampleOnTransformed: OnTransformedCallback = async (jsonPath, transformedValue, options) => {
console.log(` <- onTransformed: Path='${jsonPath}', Transformed Value='${transformedValue.substring(0, 30)}...', Options Model='${options?.model}'`);
// Example: Post-process the LLM response
if (jsonPath.includes('nutrition')) {
return `${transformedValue} [HEALTH FOCUS]`;
}
return transformedValue; // Return transformed value if no modification needed
};
// Simpler callbacks for the second example
const simpleOnTransform: OnTransformCallback = async (jsonPath, value) => {
console.log(` -> simpleOnTransform: Path='${jsonPath}'`);
return value;
};
const simpleOnTransformed: OnTransformedCallback = async (jsonPath, transformedValue) => {
console.log(` <- simpleOnTransformed: Path='${jsonPath}'`);
return transformedValue;
};
// Error handler
const errorCallback = (path: string, value: string, error: any) => {
logger.error(`Error transforming ${path}: ${error.message}`);
@ -247,26 +283,56 @@ export async function factoryExample() {
mode: E_Mode.COMPLETION
};
// --- Cache Clearing Logic ---
// Calculate the expected cache key for the object transformation
const objectCacheKey = removeEmptyObjects({
data: JSON.stringify(exampleData), // Use the initial data state
mappings: fieldMappings.map(m => ({
jsonPath: m.jsonPath,
targetPath: m.targetPath,
options: {
model: globalOptionsMixin.model,
router: globalOptionsMixin.router,
mode: globalOptionsMixin.mode,
prompt: m.options?.prompt
}
}))
});
const objectCacheNamespace = 'transformed-objects';
console.log(`Attempting to clear cache for key in namespace '${objectCacheNamespace}' before first run...`);
try {
await rm_cached_object({ ca_options: objectCacheKey }, objectCacheNamespace);
console.log('Cache cleared successfully (or key did not exist).');
} catch (error) {
console.warn('Failed to clear cache, proceeding anyway:', error);
}
// --- End Cache Clearing Logic ---
// Combine all options including callbacks for createIterator
const iteratorOptions: IOptions = {
network: networkOptions,
errorCallback,
filterCallback: async () => true,
transformerFactory: (options) => createLLMTransformer(options, logger, cacheConfig),
logger,
cacheConfig: {
...cacheConfig,
enabled: process.argv.includes('--no-cache') ? false : cacheConfig.enabled
},
onTransform: exampleOnTransform,
onTransformed: exampleOnTransformed
};
// Create an iterator factory instance
const iterator = createIterator(
data,
globalOptionsMixin,
{
network: networkOptions,
errorCallback,
filterCallback: async () => true,
transformerFactory: (options) => createLLMTransformer(options, logger, cacheConfig),
logger,
cacheConfig: {
...cacheConfig,
// Force a new response for format testing
enabled: process.argv.includes('--no-cache') ? false : cacheConfig.enabled
}
}
iteratorOptions
);
// Use the iterator to transform the data
console.log("First run - should transform and cache results:");
console.log("First run - should transform, run callbacks, and cache results:");
await iterator.transform(fieldMappings);
const outputPath = path.resolve('./tests/test-data/core/iterator-factory-data.json');
@ -309,17 +375,10 @@ export async function factoryExample() {
const iterator2 = createIterator(
data2,
globalOptionsMixin,
{
network: networkOptions,
errorCallback,
filterCallback: async () => true,
transformerFactory: (options) => createLLMTransformer(options, logger, cacheConfig),
logger,
cacheConfig
}
iteratorOptions
);
// Should use cached values
// Should use cached values (callbacks won't run for cached object transform)
await iterator2.transform(fieldMappings);
console.log("\nBefore/After Comparison Example:");

View File

@ -3,6 +3,8 @@ import {
AsyncTransformer,
ErrorCallback,
FilterCallback,
OnTransformCallback,
OnTransformedCallback,
defaultError,
defaultFilters,
testFilters,
@ -75,6 +77,8 @@ export interface IOptions {
transformerFactory?: (options: IKBotTask) => AsyncTransformer;
logger?: ILogger;
cacheConfig?: CacheConfig;
onTransform?: OnTransformCallback;
onTransformed?: OnTransformedCallback;
}
const DEFAULT_CACHE_CONFIG: Required<CacheConfig> = {
@ -159,7 +163,9 @@ export function createIterator(
filterCallback = testFilters(defaultFilters()),
transformerFactory,
logger = dummyLogger,
cacheConfig
cacheConfig,
onTransform,
onTransformed
} = globalOptions;
const networkOptions: Required<INetworkOptions> = {
@ -234,7 +240,10 @@ export function createIterator(
targetPath,
network: networkOptions,
errorCallback,
filterCallback
filterCallback,
onTransform,
onTransformed,
kbotOptions: mergedOptions
}
);
}

View File

@ -4,22 +4,22 @@
{
"id": "f1",
"name": "apple",
"description": "A delightfully crisp and juicy fruit bursting with natural sweetness.",
"description": "A deliciously sweet, crisp, and refreshing fruit packed with flavor.",
"details": {
"color": "red",
"origin": "Worldwide",
"nutrition": "Rich in fiber and vitamin D, this food supports healthy digestion, helps regulate blood sugar levels, and promotes strong bones by enhancing calcium absorption and supporting the immune system."
"nutrition": "Rich in fiber and vitamin D, this food supports healthy digestion, helps regulate blood sugar levels, and promotes strong bones by enhancing calcium absorption and supporting the immune system. [HEALTH FOCUS]"
},
"marketingName": "Sure! Here are a few appealing marketing name options for \"apple\":\n\n1. Crimson Bliss\n2. Orchard Jewel\n3. Scarlet Crunch\n4. Natures Candy\n5. Ruby Crisp\n6. SweetHarvest\n7. Eden Bite\n8. Golden Orchard (if its a yellow variety)\n9. PurePom\n10. FreshMuse\n\nLet me know if you'd like names tailored to a specific apple variety or target audience!"
},
{
"id": "f2",
"name": "banana",
"description": "A sweet, sun-ripened yellow fruit bursting with tropical flavor.",
"description": "Vibrant, sun-kissed yellow tropical fruit bursting with sweet flavor",
"details": {
"color": "yellow",
"origin": "Southeast Asia",
"nutrition": "High in potassium, which helps regulate blood pressure, supports proper muscle function, and maintains fluid balance in the body, contributing to overall cardiovascular and muscular health."
"nutrition": "High in potassium, which helps regulate blood pressure, supports proper muscle function, and maintains fluid balance in the body, contributing to overall cardiovascular and muscular health. [HEALTH FOCUS]"
},
"marketingName": "Golden Delight"
}