diff --git a/packages/kbot/logs/params.json b/packages/kbot/logs/params.json index be6022d3..331cfc4a 100644 --- a/packages/kbot/logs/params.json +++ b/packages/kbot/logs/params.json @@ -3,7 +3,7 @@ "messages": [ { "role": "user", - "content": "return the result of 2+2, dont comment" + "content": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation." }, { "role": "user", diff --git a/packages/kbot/package.json b/packages/kbot/package.json index 6e8bc5a8..302467f4 100644 --- a/packages/kbot/package.json +++ b/packages/kbot/package.json @@ -18,6 +18,7 @@ "lint": "eslint src --ext .ts", "test": "vitest run", "test:unit": "vitest run tests/unit", + "test:math": "vitest run tests/unit/math.test.ts", "test2:watch": "vitest", "test2:coverage": "vitest run --coverage", "webpack": "webpack --config webpack.config.js --stats-error-details", diff --git a/packages/kbot/tests/unit/basic.json b/packages/kbot/tests/unit/basic.json index 7d7a9581..8f54473b 100644 --- a/packages/kbot/tests/unit/basic.json +++ b/packages/kbot/tests/unit/basic.json @@ -58,5 +58,175 @@ "model": "gpt-4", "router": "openrouter", "timestamp": "2025-04-01T10:21:52.849Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:24:35.975Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:24:37.455Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:24:38.366Z" + }, + { + "test": "json_structure", + "prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.", + "result": [ + "{\"name\": \"test\", \"value\": 42}" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:24:39.480Z" + }, + { + "test": "json_structure", + "prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.", + "result": [ + "{\"name\": \"test\", \"value\": 42}" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:24:42.228Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:25:06.304Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:25:07.392Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:25:08.429Z" + }, + { + "test": "json_structure", + "prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.", + "result": [ + "```json\n{\"name\":\"test\",\"value\":42}\n```" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:25:09.627Z" + }, + { + "test": "json_structure", + "prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.", + "result": [ + "```json\n{\n \"name\": \"test\",\n \"value\": 42\n}\n```\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:25:10.720Z" + }, + { + "test": "json_structure", + "prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.", + "result": [ + "{\"name\": \"test\", \"value\": 42}" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:25:12.099Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:26:30.128Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:26:31.316Z" + }, + { + "test": "basic_arithmetic", + "prompt": "return the result of 2+2, dont comment", + "result": [ + "4" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:26:32.279Z" + }, + { + "test": "json_structure", + "prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.", + "result": [ + "{\"name\":\"test\",\"value\":42}" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:26:33.459Z" + }, + { + "test": "json_structure", + "prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.", + "result": [ + "{\n \"name\": \"test\",\n \"value\": 42\n}" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:26:35.012Z" + }, + { + "test": "json_structure", + "prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.", + "result": [ + "{\"name\": \"test\", \"value\": 42}" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:26:36.239Z" } ] \ No newline at end of file diff --git a/packages/kbot/tests/unit/basic.test.ts b/packages/kbot/tests/unit/basic.test.ts index 2cacd410..043822a4 100644 --- a/packages/kbot/tests/unit/basic.test.ts +++ b/packages/kbot/tests/unit/basic.test.ts @@ -7,35 +7,41 @@ import { sync as exists } from "@polymech/fs/exists" import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index' -const DEFAULT_MODEL = E_OPENROUTER_MODEL_FREE.MODEL_FREE_MISTRALAI_MISTRAL_NEMO_FREE - const models = [ E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE, E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE, E_OPENAI_MODEL.MODEL_GPT_4 ] +const TEST_BASE_PATH = path.resolve(__dirname, '../../') +const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs') +const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md') +const TEST_LOG_PATH = path.resolve(__dirname, './basic.json') + interface TestResult { test: string; prompt: string; result: string[]; + expected: string; model: string; router: string; timestamp: string; + passed: boolean; + reason?: string; } -describe('Basic Functionality', () => { - const logPath = path.resolve(__dirname, './basic.json') +describe('Basic Operations', () => { let testResults: TestResult[] = [] // Load existing results if any - if (exists(logPath)) { - const data = read(logPath, 'json') + if (exists(TEST_LOG_PATH)) { + const data = read(TEST_LOG_PATH, 'json') testResults = Array.isArray(data) ? data : [] } - it.each(models)('should return 4 when given "return the result of 2+2, dont comment" in completion mode with model %s', async (modelName) => { - const prompt = 'return the result of 2+2, dont comment' + it.each(models)('should add two numbers with model %s', async (modelName) => { + const prompt = 'add 5 and 3. Return only the number, no explanation.' + const expected = '8' let model = 'unknown' let router = 'unknown' @@ -43,28 +49,116 @@ describe('Basic Functionality', () => { prompt, mode: 'completion', model: modelName, - path: path.resolve(__dirname, '../../'), - logs: path.resolve(__dirname, '../../logs'), - preferences: path.resolve(__dirname, '../../preferences.md'), + path: TEST_BASE_PATH, + logs: TEST_LOGS_PATH, + preferences: TEST_PREFERENCES_PATH, onRun: async (options) => { model = options.model || 'unknown' router = options.router || 'unknown' return options } }) as string[] - expect(result.map(r => r.trim())).toEqual(['4']) + + const actual = result.map(r => r.trim())[0] + const passed = actual === expected + expect(actual).toEqual(expected) // Add test result to array testResults.push({ - test: 'basic_arithmetic', + test: 'addition', prompt, result, + expected, model, router, - timestamp: new Date().toISOString() + timestamp: new Date().toISOString(), + passed, + reason: passed ? undefined : `Expected ${expected}, but got ${actual}` }) // Write all results to the same file - write(logPath, JSON.stringify(testResults, null, 2)) + write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2)) + }) + + it.each(models)('should multiply two numbers with model %s', async (modelName) => { + const prompt = 'multiply 4 and 6. Return only the number, no explanation.' + const expected = '24' + let model = 'unknown' + let router = 'unknown' + + const result = await run({ + prompt, + mode: 'completion', + model: modelName, + path: TEST_BASE_PATH, + logs: TEST_LOGS_PATH, + preferences: TEST_PREFERENCES_PATH, + onRun: async (options) => { + model = options.model || 'unknown' + router = options.router || 'unknown' + return options + } + }) as string[] + + const actual = result.map(r => r.trim())[0] + const passed = actual === expected + expect(actual).toEqual(expected) + + // Add test result to array + testResults.push({ + test: 'multiplication', + prompt, + result, + expected, + model, + router, + timestamp: new Date().toISOString(), + passed, + reason: passed ? undefined : `Expected ${expected}, but got ${actual}` + }) + + // Write all results to the same file + write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2)) + }) + + it.each(models)('should divide two numbers with model %s', async (modelName) => { + const prompt = 'divide 15 by 3. Return only the number, no explanation.' + const expected = '5' + let model = 'unknown' + let router = 'unknown' + + const result = await run({ + prompt, + mode: 'completion', + model: modelName, + path: TEST_BASE_PATH, + logs: TEST_LOGS_PATH, + preferences: TEST_PREFERENCES_PATH, + onRun: async (options) => { + model = options.model || 'unknown' + router = options.router || 'unknown' + return options + } + }) as string[] + + const actual = result.map(r => r.trim())[0] + const passed = actual === expected + expect(actual).toEqual(expected) + + // Add test result to array + testResults.push({ + test: 'division', + prompt, + result, + expected, + model, + router, + timestamp: new Date().toISOString(), + passed, + reason: passed ? undefined : `Expected ${expected}, but got ${actual}` + }) + + // Write all results to the same file + write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2)) }) }) \ No newline at end of file diff --git a/packages/kbot/tests/unit/math-report.md b/packages/kbot/tests/unit/math-report.md new file mode 100644 index 00000000..8e14405d --- /dev/null +++ b/packages/kbot/tests/unit/math-report.md @@ -0,0 +1,62 @@ +# Math Test Results + +## Failed Tests + +## Passed Tests + +### factorial - deepseek/deepseek-chat:free +- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.` +- Expected: `120` +- Actual: `120` +- Timestamp: 4/1/2025, 12:42:00 PM + +### factorial - google/gemini-2.0-flash-exp:free +- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.` +- Expected: `120` +- Actual: `120 +` +- Timestamp: 4/1/2025, 12:42:02 PM + +### factorial - gpt-4 +- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.` +- Expected: `120` +- Actual: `120` +- Timestamp: 4/1/2025, 12:42:03 PM + +### fibonacci - deepseek/deepseek-chat:free +- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.` +- Expected: `0,1,1,2,3` +- Actual: `0,1,1,2,3` +- Timestamp: 4/1/2025, 12:41:49 PM + +### fibonacci - google/gemini-2.0-flash-exp:free +- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.` +- Expected: `0,1,1,2,3` +- Actual: `0,1,1,2,3 +` +- Timestamp: 4/1/2025, 12:42:10 PM + +### fibonacci - gpt-4 +- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.` +- Expected: `0,1,1,2,3` +- Actual: `0, 1, 1, 2, 3` +- Timestamp: 4/1/2025, 12:42:12 PM + +### quadratic - deepseek/deepseek-chat:free +- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.` +- Expected: `[-3,-2]` +- Actual: `[-2, -3]` +- Timestamp: 4/1/2025, 12:42:16 PM + +### quadratic - google/gemini-2.0-flash-exp:free +- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.` +- Expected: `[-3,-2]` +- Actual: `[-2.0, -3.0]` +- Timestamp: 4/1/2025, 12:42:17 PM + +### quadratic - gpt-4 +- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.` +- Expected: `[-3,-2]` +- Actual: `[-2, -3]` +- Timestamp: 4/1/2025, 12:42:18 PM + diff --git a/packages/kbot/tests/unit/math.json b/packages/kbot/tests/unit/math.json new file mode 100644 index 00000000..7a8a2f4a --- /dev/null +++ b/packages/kbot/tests/unit/math.json @@ -0,0 +1,556 @@ +[ + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:34:59.561Z" + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:01.881Z" + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:02.849Z" + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:04.513Z" + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:05.862Z" + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:42.778Z" + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:44.194Z" + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:45.420Z" + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:46.805Z" + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:48.153Z" + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0, 1, 1, 2, 3" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:49.492Z" + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:50.745Z" + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:51.918Z" + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:35:53.368Z" + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:08.969Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:10.154Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:11.066Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:12.258Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:14.007Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0, 1, 1, 2, 3" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:15.272Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:16.397Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:17.622Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:18.768Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:41.644Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:43.969Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:44.945Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:46.261Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3\n" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:47.915Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0, 1, 1, 2, 3" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:49.298Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:50.593Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:52.768Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:38:54.254Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "expected": "120", + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:40:45.957Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120\n" + ], + "expected": "120", + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:40:47.272Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "expected": "120", + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:40:48.185Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3" + ], + "expected": "0,1,1,2,3", + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:40:51.236Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3\n" + ], + "expected": "0,1,1,2,3", + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:40:52.419Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0, 1, 1, 2, 3" + ], + "expected": "0,1,1,2,3", + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:40:54.327Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "expected": "120", + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:41:45.219Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120\n" + ], + "expected": "120", + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:41:46.489Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "expected": "120", + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:41:47.701Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3" + ], + "expected": "0,1,1,2,3", + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:41:49.929Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "expected": "120", + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:42:00.754Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120\n" + ], + "expected": "120", + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:42:02.899Z", + "passed": true + }, + { + "test": "factorial", + "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.", + "result": [ + "120" + ], + "expected": "120", + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:42:03.927Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0,1,1,2,3\n" + ], + "expected": "0,1,1,2,3", + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:42:10.515Z", + "passed": true + }, + { + "test": "fibonacci", + "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.", + "result": [ + "0, 1, 1, 2, 3" + ], + "expected": "0,1,1,2,3", + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:42:12.203Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "expected": "[-3,-2]", + "model": "deepseek/deepseek-chat:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:42:16.443Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2.0, -3.0]" + ], + "expected": "[-3,-2]", + "model": "google/gemini-2.0-flash-exp:free", + "router": "openrouter", + "timestamp": "2025-04-01T10:42:17.598Z", + "passed": true + }, + { + "test": "quadratic", + "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.", + "result": [ + "[-2, -3]" + ], + "expected": "[-3,-2]", + "model": "gpt-4", + "router": "openrouter", + "timestamp": "2025-04-01T10:42:18.707Z", + "passed": true + } +] \ No newline at end of file diff --git a/packages/kbot/tests/unit/math.test.ts b/packages/kbot/tests/unit/math.test.ts new file mode 100644 index 00000000..e671c811 --- /dev/null +++ b/packages/kbot/tests/unit/math.test.ts @@ -0,0 +1,170 @@ +import { describe, it, expect } from 'vitest' +import { run } from '../../src/index' +import * as path from 'node:path' +import { sync as write } from "@polymech/fs/write" +import { sync as read } from "@polymech/fs/read" +import { sync as exists } from "@polymech/fs/exists" + +import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index' + +const models = [ + E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE, + E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE, + E_OPENAI_MODEL.MODEL_GPT_4 +] + +const TEST_BASE_PATH = path.resolve(__dirname, '../../') +const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs') +const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md') +const TEST_LOG_PATH = path.resolve(__dirname, './math.json') + +interface TestResult { + test: string; + prompt: string; + result: string[]; + expected: string; + model: string; + router: string; + timestamp: string; + passed: boolean; + reason?: string; +} + +describe('Advanced Math Operations', () => { + let testResults: TestResult[] = [] + + // Load existing results if any + if (exists(TEST_LOG_PATH)) { + const data = read(TEST_LOG_PATH, 'json') + testResults = Array.isArray(data) ? data : [] + } + + it.each(models)('should calculate factorial of 5 with model %s', async (modelName) => { + const prompt = 'calculate the factorial of 5 (5!). Return only the number, no explanation.' + const expected = '120' + let model = 'unknown' + let router = 'unknown' + + const result = await run({ + prompt, + mode: 'completion', + model: modelName, + path: TEST_BASE_PATH, + logs: TEST_LOGS_PATH, + preferences: TEST_PREFERENCES_PATH, + onRun: async (options) => { + model = options.model || 'unknown' + router = options.router || 'unknown' + return options + } + }) as string[] + + const actual = result.map(r => r.trim())[0] + const passed = actual === expected + expect(actual).toEqual(expected) + + // Add test result to array + testResults.push({ + test: 'factorial', + prompt, + result, + expected, + model, + router, + timestamp: new Date().toISOString(), + passed, + reason: passed ? undefined : `Expected ${expected}, but got ${actual}` + }) + + // Write all results to the same file + write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2)) + }) + + it.each(models)('should calculate fibonacci sequence up to 5th number with model %s', async (modelName) => { + const prompt = 'calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.' + const expected = '0,1,1,2,3' + let model = 'unknown' + let router = 'unknown' + + const result = await run({ + prompt, + mode: 'completion', + model: modelName, + path: TEST_BASE_PATH, + logs: TEST_LOGS_PATH, + preferences: TEST_PREFERENCES_PATH, + onRun: async (options) => { + model = options.model || 'unknown' + router = options.router || 'unknown' + return options + } + }) as string[] + + // Handle both formats: "0,1,1,2,3" and "0, 1, 1, 2, 3" + const numbers = result[0].trim().split(',').map(n => n.trim()) + const actual = numbers.join(',') + const passed = actual === expected + expect(numbers).toEqual(['0', '1', '1', '2', '3']) + + // Add test result to array + testResults.push({ + test: 'fibonacci', + prompt, + result, + expected, + model, + router, + timestamp: new Date().toISOString(), + passed, + reason: passed ? undefined : `Expected ${expected}, but got ${actual}` + }) + + // Write all results to the same file + write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2)) + }) + + it.each(models)('should solve quadratic equation x² + 5x + 6 = 0 with model %s', async (modelName) => { + const prompt = 'solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.' + const expectedDisplay = '[-3,-2]' + let model = 'unknown' + let router = 'unknown' + + const result = await run({ + prompt, + mode: 'completion', + model: modelName, + filters: 'code', + path: TEST_BASE_PATH, + logs: TEST_LOGS_PATH, + preferences: TEST_PREFERENCES_PATH, + onRun: async (options) => { + model = options.model || 'unknown' + router = options.router || 'unknown' + return options + } + }) as string[] + + // Parse the result as JSON (markdown already stripped by filter) + const jsonResult = JSON.parse(result[0].trim()) + const actual = JSON.stringify(jsonResult.sort()) + const expectedSorted = JSON.stringify([-3, -2].sort()) + const passed = actual === expectedSorted + expect(jsonResult.sort()).toEqual([-3, -2].sort()) + + // Add test result to array + testResults.push({ + test: 'quadratic', + prompt, + result, + expected: expectedDisplay, + model, + router, + timestamp: new Date().toISOString(), + passed, + reason: passed ? undefined : `Expected ${expectedDisplay}, but got ${result[0]}` + }) + + // Write all results to the same file + write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2)) + }) +}) \ No newline at end of file