diff --git a/packages/kbot/package.json b/packages/kbot/package.json index 0d1570e9..90eaeb65 100644 --- a/packages/kbot/package.json +++ b/packages/kbot/package.json @@ -27,6 +27,7 @@ "test:web": "vitest run tests/unit/web.test.ts", "test:files": "vitest run tests/unit/files.test.ts", "test:research": "vitest run tests/unit/research.test.ts", + "test:core": "vitest run tests/unit/core/async-iterator.test.ts", "test2:watch": "vitest", "test2:coverage": "vitest run --coverage", "webpack": "webpack --config webpack.config.js --stats-error-details", diff --git a/packages/kbot/src/reference/web.ts b/packages/kbot/src/reference/web.ts deleted file mode 100644 index 44ca15a4..00000000 --- a/packages/kbot/src/reference/web.ts +++ /dev/null @@ -1,112 +0,0 @@ -import * as path from 'path' -import { RunnableToolFunction } from 'openai/lib/RunnableFunction' - -import puppeteer from 'puppeteer' -import TurndownService from 'turndown' - -import { toolLogger } from '../../index.js' -import { IKBotTask } from '../../types.js' - -const turndown = new TurndownService() - -export const tools = (target: string, options: IKBotTask): Array => { - const logger = toolLogger('web', options) - return [ - { - type: 'function', - function: { - name: 'browse_page', - description: 'Browse a webpage and return its content as markdown, all links, images and pages main image', - parameters: { - type: 'object', - properties: { - url: { - type: 'string', - description: 'URL of the webpage to browse' - } - }, - required: ['url'] - }, - function: async (params: any) => { - try { - logger.debug(`Tool::BrowsePage Browsing ${params.url}`); - const browser = await puppeteer.launch({ - headless: true, - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }) - - try { - const page = await browser.newPage() - logger.debug(`Tool::Web::BrowsePage Opening page ${params.url}`) - await page.goto(params.url, { - waitUntil: 'networkidle2' - }) - - const pageData = await page.evaluate((selector) => { - const elementsToRemove = document.querySelectorAll( - 'script, style, link, meta, noscript, iframe, [style*="display:none"],[style*="display: none"], .hidden' - ) - elementsToRemove.forEach(el => el.remove()) - - const links = Array.from(document.querySelectorAll('a')) - .map(a => ({ - text: a.textContent?.trim() || '', - href: a.href - })) - .filter(link => link.href && link.href.startsWith('http')) - .slice(0, 20) - - const images = Array.from(document.querySelectorAll('img')) - .map(img => ({ - src: img.src, - alt: img.alt || '', - width: img.width, - height: img.height - })) - .filter(img => img.src && img.src.startsWith('http')) - .slice(0, 20) - - const mainImage = document.querySelector('meta[property="og:image"]')?.getAttribute('content') || - document.querySelector('meta[name="og:image"]')?.getAttribute('content') - - let content - const body = document.body - content = body ? body.innerHTML : '' - return { - content, - links, - images, - ogImage: mainImage - } - }, null) - - const markdown = turndown.turndown(pageData.content) - await browser.close() - const ret = { - success: true, - markdown: markdown, - links: pageData.links, - images: pageData.images, - mainImage: pageData.ogImage, - url: params.url - }; - return ret - } catch (error: any) { - logger.debug('Error browsing page:', error.message, error); - await browser.close() - throw error - } - } catch (error: any) { - logger.debug('Error browsing page:', error.message); - return { - success: false, - error: error.message, - url: params.url - }; - } - }, - parse: JSON.parse - } - } as RunnableToolFunction - ] -} \ No newline at end of file diff --git a/packages/kbot/tests/unit/core/async-iterator.test.ts b/packages/kbot/tests/unit/core/async-iterator.test.ts new file mode 100644 index 00000000..6e9912a7 --- /dev/null +++ b/packages/kbot/tests/unit/core/async-iterator.test.ts @@ -0,0 +1,336 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { + transformObject, + defaultOptions, + TransformOptions, + testFilters, + defaultFilters, + AsyncTransformer, + ErrorCallback, + FilterCallback +} from '../../../src/async-iterator' + +// Mock interfaces for testing +interface IOptions { + query: string + keys: string[] + srcLang?: string + dstLang?: string + store?: string + cache?: boolean + api_key?: string + formality?: string +} + +describe('async-iterator', () => { + describe('transformObject', () => { + // Test data setup + const testData = { + items: [ + { id: '1', name: 'apple', description: 'A fruit', type: 'fresh' }, + { id: '2', name: 'banana', description: 'Yellow fruit', type: 'tropical' }, + { id: '3', name: 'orange', description: 'Orange fruit', type: 'citrus' } + ], + metadata: { + totalCount: 3, + category: 'fruits', + description: 'Collection of fruits', + type: 'food' + } + } + + let mockTransform: AsyncTransformer + let mockErrorCallback: ErrorCallback + let mockFilterCallback: FilterCallback + + beforeEach(() => { + // Mock transform function that uppercases strings + mockTransform = vi.fn(async (input: string) => { + return input.toUpperCase() + }) + + // Mock error callback + mockErrorCallback = vi.fn() + + // Mock filter callback that excludes IDs + mockFilterCallback = vi.fn(async (input: string, path: string) => { + return path.includes('name') || path.includes('description') + }) + }) + + it('should transform all matching strings based on JSONPath', async () => { + // Use a simple JSONPath to target item names + const path = '$.items[*].name' + + // Create a deep copy of the test data to prevent mutation between tests + const testObj = JSON.parse(JSON.stringify(testData)) + + await transformObject( + testObj, + mockTransform, + path, + 10, // throttleDelay + 1, // concurrentTasks + mockErrorCallback, + mockFilterCallback + ) + + // Check that the names were transformed to uppercase + expect(testObj.items[0].name).toBe('APPLE') + expect(testObj.items[1].name).toBe('BANANA') + expect(testObj.items[2].name).toBe('ORANGE') + + // Check that descriptions were not transformed + expect(testObj.items[0].description).toBe('A fruit') + expect(testObj.metadata.category).toBe('fruits') + + // Verify mock calls + expect(mockTransform).toHaveBeenCalledTimes(3) + expect(mockErrorCallback).not.toHaveBeenCalled() + }) + + it('should transform all matching strings with complex JSONPath', async () => { + // More complex JSONPath to target both names and descriptions + const path = '$.items[*].*' + + const testObj = JSON.parse(JSON.stringify(testData)) + + await transformObject( + testObj, + mockTransform, + path, + 10, + 1, + mockErrorCallback, + mockFilterCallback + ) + + // Check that both names and descriptions were transformed + expect(testObj.items[0].name).toBe('APPLE') + expect(testObj.items[0].description).toBe('A FRUIT') + expect(testObj.items[1].name).toBe('BANANA') + expect(testObj.items[1].description).toBe('YELLOW FRUIT') + + // But IDs should not be transformed due to filter + expect(testObj.items[0].id).toBe('1') + + // Metadata should remain untouched + expect(testObj.metadata.category).toBe('fruits') + }) + + it('should handle errors during transformation', async () => { + // Create a transform function that throws an error for certain inputs + const errorTransform = vi.fn(async (input: string, path: string) => { + if (input === 'banana') { + throw new Error('Test error') + } + return input.toUpperCase() + }) + + const testObj = JSON.parse(JSON.stringify(testData)) + + await transformObject( + testObj, + errorTransform, + '$.items[*].name', + 10, + 1, + mockErrorCallback, + mockFilterCallback + ) + + // First and third items should be transformed + expect(testObj.items[0].name).toBe('APPLE') + expect(testObj.items[2].name).toBe('ORANGE') + + // Error callback should have been called for 'banana' + expect(mockErrorCallback).toHaveBeenCalledTimes(1) + expect(mockErrorCallback).toHaveBeenCalledWith( + '/items/1/name', + 'name', + expect.any(Error) + ) + }) + + it('should use translateObjectAIT pattern with defaultOptions', async () => { + // Test implementation similar to translateObjectAIT + const options: IOptions = { + query: '$.items[*].name', + keys: ['id'], + srcLang: 'EN', + dstLang: 'DE', + store: 'test-store', + } + + const testObj = JSON.parse(JSON.stringify(testData)) + + // Create default options similar to translateObjectAIT + const opts: TransformOptions = defaultOptions({ + throttleDelay: 100, + concurrentTasks: 1, + path: options.query, + filterCallback: testFilters( + defaultFilters([ + async (input) => options.keys.includes(input) + ]) + ), + transform: mockTransform, + errorCallback: mockErrorCallback + }) + + await transformObject( + testObj, + opts.transform, + opts.path, + opts.throttleDelay, + opts.concurrentTasks, + opts.errorCallback, + opts.filterCallback + ) + + // Names should be transformed + expect(testObj.items[0].name).toBe('APPLE') + expect(testObj.items[1].name).toBe('BANANA') + expect(testObj.items[2].name).toBe('ORANGE') + + // IDs should remain untouched due to filter + expect(testObj.items[0].id).toBe('1') + }) + + it('should handle empty objects and paths', async () => { + // Empty object + const emptyObj = {} + + await transformObject( + emptyObj, + mockTransform, + '$.items[*].name', + 10, + 1, + mockErrorCallback, + mockFilterCallback + ) + + // Should not throw errors and object should remain empty + expect(emptyObj).toEqual({}) + expect(mockTransform).not.toHaveBeenCalled() + + // Empty path + const testObj = JSON.parse(JSON.stringify(testData)) + + await transformObject( + testObj, + mockTransform, + '', + 10, + 1, + mockErrorCallback, + mockFilterCallback + ) + + // Should not transform anything with empty path + expect(testObj).toEqual(testData) + expect(mockTransform).not.toHaveBeenCalled() + }) + + it('should transform only specific fields (description and type)', async () => { + // Create a specific JSONPath that targets only description and type fields + // Using a simpler JSONPath pattern that works reliably + const path = '$..[description,type]' + + const testObj = JSON.parse(JSON.stringify(testData)) + + // Create an async transform function that transforms to uppercase + const asyncTransform = vi.fn(async (input: string) => { + // Simulate async operation + await new Promise(resolve => setTimeout(resolve, 1)) + return input.toUpperCase() + }) + + // No filter is needed as we're already targeting specific fields with the JSONPath + const noFilter = async () => true + + await transformObject( + testObj, + asyncTransform, + path, + 10, + 3, // Run 3 concurrent tasks + mockErrorCallback, + noFilter + ) + + // Verify descriptions were transformed + expect(testObj.items[0].description).toBe('A FRUIT') + expect(testObj.items[1].description).toBe('YELLOW FRUIT') + expect(testObj.items[2].description).toBe('ORANGE FRUIT') + + // Verify types were transformed + expect(testObj.items[0].type).toBe('FRESH') + expect(testObj.items[1].type).toBe('TROPICAL') + expect(testObj.items[2].type).toBe('CITRUS') + + // Verify metadata fields were also transformed + expect(testObj.metadata.description).toBe('COLLECTION OF FRUITS') + expect(testObj.metadata.type).toBe('FOOD') + + // Verify other fields were not transformed + expect(testObj.items[0].name).toBe('apple') + expect(testObj.metadata.category).toBe('fruits') + + // Check that the transform function was called the expected number of times + // (4 descriptions + 4 types = 8 calls) + expect(asyncTransform).toHaveBeenCalledTimes(8) + }) + + it('should transform nested objects with specific fields', async () => { + // Create nested test data + const nestedData = { + products: { + fruits: { + items: [ + { id: '1', name: 'apple', description: 'A fruit', type: 'fresh' }, + { id: '2', name: 'banana', description: 'Yellow fruit', type: 'tropical' } + ], + description: 'Fresh fruits', + type: 'edible' + }, + vegetables: { + items: [ + { id: '3', name: 'carrot', description: 'Orange vegetable', type: 'root' }, + { id: '4', name: 'broccoli', description: 'Green vegetable', type: 'floret' } + ], + description: 'Fresh vegetables', + type: 'edible' + } + } + } + + // Create a filter that only allows 'description' fields + const descriptionFilter = vi.fn(async (input: string, path: string) => { + return path.includes('description') + }) + + await transformObject( + nestedData, + mockTransform, + '$..description', + 10, + 5, + mockErrorCallback, + descriptionFilter + ) + + // Check that only descriptions were transformed + expect(nestedData.products.fruits.description).toBe('FRESH FRUITS') + expect(nestedData.products.vegetables.description).toBe('FRESH VEGETABLES') + expect(nestedData.products.fruits.items[0].description).toBe('A FRUIT') + expect(nestedData.products.vegetables.items[1].description).toBe('GREEN VEGETABLE') + + // Check that other fields were not transformed + expect(nestedData.products.fruits.type).toBe('edible') + expect(nestedData.products.fruits.items[0].name).toBe('apple') + expect(nestedData.products.vegetables.items[0].type).toBe('root') + }) + }) +}) \ No newline at end of file