tests:language - merci, habsch selbstgemacht :)

2025-04-01 13:02:25 +02:00 · 2025-04-01 13:02:25 +02:00 · 91517c7ac1
commit 91517c7ac1
parent 4ec4053898
12 changed files with 2129 additions and 81 deletions
--- a/packages/kbot/logs/params.json
+++ b/packages/kbot/logs/params.json
@ -3,7 +3,7 @@
  "messages": [
    {
      "role": "user",
-      "content": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation."
+      "content": "divide 15 by 3. Return only the number, no explanation."
    },
    {
      "role": "user",
--- a/packages/kbot/package.json
+++ b/packages/kbot/package.json
@ -17,8 +17,9 @@
    "dev": "tsc -p . --watch",
    "lint": "eslint src --ext .ts",
    "test": "vitest run",
-    "test:unit": "vitest run tests/unit",
+    "test:basic": "vitest run tests/unit/basic.test.ts",
    "test:math": "vitest run tests/unit/math.test.ts",
+    "test:language": "vitest run tests/unit/language.test.ts",
    "test2:watch": "vitest",
    "test2:coverage": "vitest run --coverage",
    "webpack": "webpack --config webpack.config.js --stats-error-details",
--- a/packages/kbot/tests/unit/basic-report.md
+++ b/packages/kbot/tests/unit/basic-report.md
@ -0,0 +1,109 @@
+# Basic Operations Test Results
+
+## Failed Tests
+
+### basic_arithmetic - deepseek/deepseek-chat:free
+- Prompt: `return the result of 2+2, dont comment`
+- Expected: `undefined`
+- Actual: `4`
+- Reason: undefined
+- Timestamp: 4/1/2025, 12:26:30 PM
+
+### basic_arithmetic - google/gemini-2.0-flash-exp:free
+- Prompt: `return the result of 2+2, dont comment`
+- Expected: `undefined`
+- Actual: `4
+`
+- Reason: undefined
+- Timestamp: 4/1/2025, 12:26:31 PM
+
+### basic_arithmetic - gpt-4
+- Prompt: `return the result of 2+2, dont comment`
+- Expected: `undefined`
+- Actual: `4`
+- Reason: undefined
+- Timestamp: 4/1/2025, 12:26:32 PM
+
+### json_structure - deepseek/deepseek-chat:free
+- Prompt: `return a JSON object with two fields: "name" as "test" and "value" as 42. Return only the JSON, no other text.`
+- Expected: `undefined`
+- Actual: `{"name":"test","value":42}`
+- Reason: undefined
+- Timestamp: 4/1/2025, 12:26:33 PM
+
+### json_structure - gpt-4
+- Prompt: `return a JSON object with two fields: "name" as "test" and "value" as 42. Return only the JSON, no other text.`
+- Expected: `undefined`
+- Actual: `{"name": "test", "value": 42}`
+- Reason: undefined
+- Timestamp: 4/1/2025, 12:26:36 PM
+
+### json_structure - google/gemini-2.0-flash-exp:free
+- Prompt: `return a JSON object with two fields: "name" as "test" and "value" as 42. Return only the JSON, no other text.`
+- Expected: `undefined`
+- Actual: `{
+  "name": "test",
+  "value": 42
+}`
+- Reason: undefined
+- Timestamp: 4/1/2025, 12:26:35 PM
+
+## Passed Tests
+
+### addition - deepseek/deepseek-chat:free
+- Prompt: `add 5 and 3. Return only the number, no explanation.`
+- Expected: `8`
+- Actual: `8`
+- Timestamp: 4/1/2025, 12:59:06 PM
+
+### addition - google/gemini-2.0-flash-exp:free
+- Prompt: `add 5 and 3. Return only the number, no explanation.`
+- Expected: `8`
+- Actual: `8
+`
+- Timestamp: 4/1/2025, 12:59:08 PM
+
+### addition - gpt-4
+- Prompt: `add 5 and 3. Return only the number, no explanation.`
+- Expected: `8`
+- Actual: `8`
+- Timestamp: 4/1/2025, 1:00:35 PM
+
+### multiplication - deepseek/deepseek-chat:free
+- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
+- Expected: `24`
+- Actual: `24`
+- Timestamp: 4/1/2025, 12:59:13 PM
+
+### multiplication - google/gemini-2.0-flash-exp:free
+- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
+- Expected: `24`
+- Actual: `24
+`
+- Timestamp: 4/1/2025, 12:59:15 PM
+
+### multiplication - gpt-4
+- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
+- Expected: `24`
+- Actual: `24`
+- Timestamp: 4/1/2025, 1:00:37 PM
+
+### division - deepseek/deepseek-chat:free
+- Prompt: `divide 15 by 3. Return only the number, no explanation.`
+- Expected: `5`
+- Actual: `5`
+- Timestamp: 4/1/2025, 12:59:18 PM
+
+### division - google/gemini-2.0-flash-exp:free
+- Prompt: `divide 15 by 3. Return only the number, no explanation.`
+- Expected: `5`
+- Actual: `5
+`
+- Timestamp: 4/1/2025, 12:56:09 PM
+
+### division - gpt-4
+- Prompt: `divide 15 by 3. Return only the number, no explanation.`
+- Expected: `5`
+- Actual: `5`
+- Timestamp: 4/1/2025, 1:00:40 PM
+
--- a/packages/kbot/tests/unit/basic.json
+++ b/packages/kbot/tests/unit/basic.json
@ -228,5 +228,557 @@
    "model": "gpt-4",
    "router": "openrouter",
    "timestamp": "2025-04-01T10:26:36.239Z"
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:03.882Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8\n"
+    ],
+    "expected": "8",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:05.343Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:06.365Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:07.564Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24\n"
+    ],
+    "expected": "24",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:09.013Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:09.968Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:10.981Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5\n"
+    ],
+    "expected": "5",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:12.829Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:13.957Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:47.732Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8\n"
+    ],
+    "expected": "8",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:50.075Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:50.977Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:52.835Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24\n"
+    ],
+    "expected": "24",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:54.875Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:55.866Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:56.950Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:59.317Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:01.223Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8\n"
+    ],
+    "expected": "8",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:02.717Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:03.867Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:04.864Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24\n"
+    ],
+    "expected": "24",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:05.973Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:07.075Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:08.103Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5\n"
+    ],
+    "expected": "5",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:09.298Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:10.178Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:32.641Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 4 and 6. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:35.038Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:37.567Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:06.742Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8\n"
+    ],
+    "expected": "8",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:08.889Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:09.871Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:13.627Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+    "result": [
+      "24\n"
+    ],
+    "expected": "24",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:15.963Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:17.124Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:18.527Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:20.298Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:58.244Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T11:00:00.620Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T11:00:03.143Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T11:00:10.210Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T11:00:12.798Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T11:00:15.604Z",
+    "passed": true
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [
+      "8"
+    ],
+    "expected": "8",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T11:00:35.256Z",
+    "passed": true
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+    "result": [
+      "24"
+    ],
+    "expected": "24",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T11:00:37.681Z",
+    "passed": true
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [
+      "5"
+    ],
+    "expected": "5",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T11:00:40.556Z",
+    "passed": true
  }
 ]
--- a/packages/kbot/tests/unit/basic.test.ts
+++ b/packages/kbot/tests/unit/basic.test.ts
@ -4,32 +4,10 @@ import * as path from 'node:path'
 import { sync as write } from "@polymech/fs/write"
 import { sync as read } from "@polymech/fs/read"
 import { sync as exists } from "@polymech/fs/exists"
+import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TestResult } from './commons'

-import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL  } from '../../src/index'
-
-const models = [
-    E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE, 
-    E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
-    E_OPENAI_MODEL.MODEL_GPT_4
-]
-
-const TEST_BASE_PATH = path.resolve(__dirname, '../../')
-const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
-const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
 const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')

-interface TestResult {
-  test: string;
-  prompt: string;
-  result: string[];
-  expected: string;
-  model: string;
-  router: string;
-  timestamp: string;
-  passed: boolean;
-  reason?: string;
-}
-
 describe('Basic Operations', () => {
  let testResults: TestResult[] = []
  
@ -58,8 +36,8 @@ describe('Basic Operations', () => {
        return options
      }
    }) as string[]    
-    
-    const actual = result.map(r => r.trim())[0]
+
+    const actual = result?.[0]?.trim() || ''
    const passed = actual === expected
    expect(actual).toEqual(expected)

@ -67,13 +45,13 @@ describe('Basic Operations', () => {
    testResults.push({
      test: 'addition',
      prompt,
-      result,
+      result: result || [],
      expected,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
-      reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
+      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
    })

    // Write all results to the same file
@ -81,7 +59,7 @@ describe('Basic Operations', () => {
  })

  it.each(models)('should multiply two numbers with model %s', async (modelName) => {
-    const prompt = 'multiply 4 and 6. Return only the number, no explanation.'
+    const prompt = 'multiply 8 and 3. Return only the number, no explanation.'
    const expected = '24'
    let model = 'unknown'
    let router = 'unknown'
@ -98,9 +76,9 @@ describe('Basic Operations', () => {
        router = options.router || 'unknown'
        return options
      }
-    }) as string[]    
+    }) as string[]

-    const actual = result.map(r => r.trim())[0]
+    const actual = result?.[0]?.trim() || ''
    const passed = actual === expected
    expect(actual).toEqual(expected)

@ -108,13 +86,13 @@ describe('Basic Operations', () => {
    testResults.push({
      test: 'multiplication',
      prompt,
-      result,
+      result: result || [],
      expected,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
-      reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
+      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
    })

    // Write all results to the same file
@ -139,9 +117,9 @@ describe('Basic Operations', () => {
        router = options.router || 'unknown'
        return options
      }
-    }) as string[]    
+    }) as string[]

-    const actual = result.map(r => r.trim())[0]
+    const actual = result?.[0]?.trim() || ''
    const passed = actual === expected
    expect(actual).toEqual(expected)

@ -149,16 +127,72 @@ describe('Basic Operations', () => {
    testResults.push({
      test: 'division',
      prompt,
-      result,
+      result: result || [],
      expected,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
-      reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
+      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
    })

    // Write all results to the same file
    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
  })
+
+  it('should generate markdown report', () => {
+    // Group results by test and model
+    const latestResults = new Map<string, Map<string, TestResult>>()
+    
+    // Get only the latest result for each test+model combination
+    testResults.forEach(result => {
+      if (!latestResults.has(result.test)) {
+        latestResults.set(result.test, new Map())
+      }
+      const testMap = latestResults.get(result.test)!
+      const existingResult = testMap.get(result.model)
+      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
+        testMap.set(result.model, result)
+      }
+    })
+
+    // Generate markdown report
+    let report = '# Basic Operations Test Results\n\n'
+    
+    // First list failed tests
+    report += '## Failed Tests\n\n'
+    for (const [testName, modelResults] of latestResults) {
+      for (const [model, result] of modelResults) {
+        if (!result.passed) {
+          report += `### ${testName} - ${model}\n`
+          report += `- Prompt: \`${result.prompt}\`\n`
+          report += `- Expected: \`${result.expected}\`\n`
+          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Reason: ${result.reason}\n`
+          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+        }
+      }
+    }
+
+    // Then list passed tests
+    report += '## Passed Tests\n\n'
+    for (const [testName, modelResults] of latestResults) {
+      for (const [model, result] of modelResults) {
+        if (result.passed) {
+          report += `### ${testName} - ${model}\n`
+          report += `- Prompt: \`${result.prompt}\`\n`
+          report += `- Expected: \`${result.expected}\`\n`
+          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+        }
+      }
+    }
+
+    // Write report to file
+    const reportPath = path.resolve(__dirname, './basic-report.md')
+    write(reportPath, report)
+
+    // Verify report was written
+    expect(exists(reportPath) === 'file').toBe(true)
+  })
 }) 
--- a/packages/kbot/tests/unit/commons.ts
+++ b/packages/kbot/tests/unit/commons.ts
@ -0,0 +1,24 @@
+import * as path from 'node:path'
+import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
+
+export const models = [
+    E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE, 
+    E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
+    E_OPENAI_MODEL.MODEL_GPT_4
+]
+
+export const TEST_BASE_PATH = path.resolve(__dirname, '../../')
+export const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
+export const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
+
+export interface TestResult {
+  test: string;
+  prompt: string;
+  result: string[];
+  expected: string;
+  model: string;
+  router: string;
+  timestamp: string;
+  passed: boolean;
+  reason?: string;
+} 
--- a/packages/kbot/tests/unit/language-report.md
+++ b/packages/kbot/tests/unit/language-report.md
@ -0,0 +1,113 @@
+# Language Test Results
+
+## Failed Tests
+
+## Passed Tests
+
+### german_translation - deepseek/deepseek-chat:free
+- Prompt: `translate "hello" to German. Return only the translation, no explanation.`
+- Expected: `hallo`
+- Actual: `Hallo`
+- Timestamp: 4/1/2025, 12:56:01 PM
+
+### german_translation - google/gemini-2.0-flash-exp:free
+- Prompt: `translate "hello" to German. Return only the translation, no explanation.`
+- Expected: `hallo`
+- Actual: `Hallo
+`
+- Timestamp: 4/1/2025, 12:56:02 PM
+
+### german_translation - gpt-4
+- Prompt: `translate "hello" to German. Return only the translation, no explanation.`
+- Expected: `hallo`
+- Actual: `Hallo`
+- Timestamp: 4/1/2025, 12:56:32 PM
+
+### spanish_translation - deepseek/deepseek-chat:free
+- Prompt: `translate "yes" to Spanish. Return only the translation, no explanation.`
+- Expected: `sí`
+- Actual: `sí`
+- Timestamp: 4/1/2025, 12:56:05 PM
+
+### spanish_translation - google/gemini-2.0-flash-exp:free
+- Prompt: `translate "yes" to Spanish. Return only the translation, no explanation.`
+- Expected: `sí`
+- Actual: `sí
+`
+- Timestamp: 4/1/2025, 12:56:06 PM
+
+### spanish_translation - gpt-4
+- Prompt: `translate "yes" to Spanish. Return only the translation, no explanation.`
+- Expected: `sí`
+- Actual: `sí`
+- Timestamp: 4/1/2025, 12:56:35 PM
+
+### french_translation - deepseek/deepseek-chat:free
+- Prompt: `translate "no" to French. Return only the translation, no explanation.`
+- Expected: `non`
+- Actual: `non`
+- Timestamp: 4/1/2025, 12:56:08 PM
+
+### french_translation - google/gemini-2.0-flash-exp:free
+- Prompt: `translate "no" to French. Return only the translation, no explanation.`
+- Expected: `non`
+- Actual: `non
+`
+- Timestamp: 4/1/2025, 12:56:10 PM
+
+### french_translation - gpt-4
+- Prompt: `translate "no" to French. Return only the translation, no explanation.`
+- Expected: `non`
+- Actual: `non`
+- Timestamp: 4/1/2025, 12:56:37 PM
+
+### german - deepseek/deepseek-chat:free
+- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
+- Expected: `hallo`
+- Actual: `Hallo`
+- Timestamp: 4/1/2025, 12:59:07 PM
+
+### german - google/gemini-2.0-flash-exp:free
+- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
+- Expected: `hallo`
+- Actual: `Hallo
+`
+- Timestamp: 4/1/2025, 12:59:09 PM
+
+### german - gpt-4
+- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
+- Expected: `hallo`
+- Actual: `Hallo`
+- Timestamp: 4/1/2025, 12:59:11 PM
+
+### spanish - deepseek/deepseek-chat:free
+- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
+- Expected: `sí`
+- Actual: `Sí`
+- Timestamp: 4/1/2025, 12:59:12 PM
+
+### spanish - google/gemini-2.0-flash-exp:free
+- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
+- Expected: `sí`
+- Actual: `Sí
+`
+- Timestamp: 4/1/2025, 12:59:14 PM
+
+### spanish - gpt-4
+- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
+- Expected: `sí`
+- Actual: `sí`
+- Timestamp: 4/1/2025, 12:59:15 PM
+
+### french - deepseek/deepseek-chat:free
+- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
+- Expected: `non`
+- Actual: `non`
+- Timestamp: 4/1/2025, 12:59:18 PM
+
+### french - gpt-4
+- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
+- Expected: `non`
+- Actual: `non`
+- Timestamp: 4/1/2025, 12:59:20 PM
+
--- a/packages/kbot/tests/unit/language.json
+++ b/packages/kbot/tests/unit/language.json
@ -0,0 +1,530 @@
+[
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:01.527Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo\n"
+    ],
+    "expected": "hallo",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:03.169Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:04.655Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:05.726Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí\n"
+    ],
+    "expected": "sí",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:08.264Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:09.514Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:11.281Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non\n"
+    ],
+    "expected": "non",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:12.942Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:50:14.089Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:51:53.360Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo\n"
+    ],
+    "expected": "hallo",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:51:54.551Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:51:55.699Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:51:56.740Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "Sí\n"
+    ],
+    "expected": "sí",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:51:57.918Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "Sí"
+    ],
+    "expected": "sí",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:51:58.882Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:52:00.119Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "Non\n"
+    ],
+    "expected": "non",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:52:01.184Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:52:02.204Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:50.924Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo\n"
+    ],
+    "expected": "hallo",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:52.477Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:53.546Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:56.815Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:58.718Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:54:01.673Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:01.199Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo\n"
+    ],
+    "expected": "hallo",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:02.857Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:03.788Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:05.218Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí\n"
+    ],
+    "expected": "sí",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:06.317Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:07.436Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:08.879Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non\n"
+    ],
+    "expected": "non",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:10.365Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:11.295Z",
+    "passed": true
+  },
+  {
+    "test": "german_translation",
+    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:32.735Z",
+    "passed": true
+  },
+  {
+    "test": "spanish_translation",
+    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:35.276Z",
+    "passed": true
+  },
+  {
+    "test": "french_translation",
+    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:37.673Z",
+    "passed": true
+  },
+  {
+    "test": "german",
+    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:07.944Z",
+    "passed": true
+  },
+  {
+    "test": "german",
+    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
+    "result": [
+      "Hallo\n"
+    ],
+    "expected": "hallo",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:09.471Z",
+    "passed": true
+  },
+  {
+    "test": "german",
+    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
+    "result": [
+      "Hallo"
+    ],
+    "expected": "hallo",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:11.335Z",
+    "passed": true
+  },
+  {
+    "test": "spanish",
+    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
+    "result": [
+      "Sí"
+    ],
+    "expected": "sí",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:12.740Z",
+    "passed": true
+  },
+  {
+    "test": "spanish",
+    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
+    "result": [
+      "Sí\n"
+    ],
+    "expected": "sí",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:14.246Z",
+    "passed": true
+  },
+  {
+    "test": "spanish",
+    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
+    "result": [
+      "sí"
+    ],
+    "expected": "sí",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:15.205Z",
+    "passed": true
+  },
+  {
+    "test": "french",
+    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:18.139Z",
+    "passed": true
+  },
+  {
+    "test": "french",
+    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
+    "result": [
+      "non"
+    ],
+    "expected": "non",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:20.589Z",
+    "passed": true
+  }
+]
--- a/packages/kbot/tests/unit/language.test.ts
+++ b/packages/kbot/tests/unit/language.test.ts
@ -0,0 +1,200 @@
+import { describe, it, expect } from 'vitest'
+import { run } from '../../src/index'
+import * as path from 'node:path'
+import { sync as write } from "@polymech/fs/write"
+import { sync as read } from "@polymech/fs/read"
+import { sync as exists } from "@polymech/fs/exists"
+import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TestResult } from './commons'
+
+import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL  } from '../../src/index'
+
+const TEST_LOG_PATH = path.resolve(__dirname, './language.json')
+
+describe('Language Capabilities', () => {
+  let testResults: TestResult[] = []
+  
+  // Load existing results if any
+  if (exists(TEST_LOG_PATH)) {
+    const data = read(TEST_LOG_PATH, 'json')
+    testResults = Array.isArray(data) ? data : []
+  }
+
+  it.each(models)('should translate "hello" to German with model %s', async (modelName) => {
+    const prompt = 'translate "hello" to German. Return only the translated word, no explanation.'
+    const expected = 'hallo'
+    let model = 'unknown'
+    let router = 'unknown'
+    
+    const result = await run({
+      prompt,
+      mode: 'completion',
+      model: modelName,
+      path: TEST_BASE_PATH,
+      logs: TEST_LOGS_PATH,
+      preferences: TEST_PREFERENCES_PATH,
+      onRun: async (options) => {
+        model = options.model || 'unknown'
+        router = options.router || 'unknown'
+        return options
+      }
+    }) as string[]
+
+    const actual = result?.[0]?.trim()?.toLowerCase() || ''
+    const passed = actual === expected
+    expect(actual).toEqual(expected)
+
+    // Add test result to array
+    testResults.push({
+      test: 'german',
+      prompt,
+      result: result || [],
+      expected,
+      model,
+      router,
+      timestamp: new Date().toISOString(),
+      passed,
+      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
+    })
+
+    // Write all results to the same file
+    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
+  })
+
+  it.each(models)('should translate "yes" to Spanish with model %s', async (modelName) => {
+    const prompt = 'translate "yes" to Spanish. Return only the translated word, no explanation.'
+    const expected = 'sí'
+    let model = 'unknown'
+    let router = 'unknown'
+    
+    const result = await run({
+      prompt,
+      mode: 'completion',
+      model: modelName,
+      path: TEST_BASE_PATH,
+      logs: TEST_LOGS_PATH,
+      preferences: TEST_PREFERENCES_PATH,
+      onRun: async (options) => {
+        model = options.model || 'unknown'
+        router = options.router || 'unknown'
+        return options
+      }
+    }) as string[]
+
+    const actual = result?.[0]?.trim()?.toLowerCase() || ''
+    const passed = actual === expected
+    expect(actual).toEqual(expected)
+
+    // Add test result to array
+    testResults.push({
+      test: 'spanish',
+      prompt,
+      result: result || [],
+      expected,
+      model,
+      router,
+      timestamp: new Date().toISOString(),
+      passed,
+      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
+    })
+
+    // Write all results to the same file
+    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
+  })
+
+  it.each(models)('should translate "no" to French with model %s', async (modelName) => {
+    const prompt = 'translate "no" to French. Return only the translated word, no explanation.'
+    const expected = 'non'
+    let model = 'unknown'
+    let router = 'unknown'
+    
+    const result = await run({
+      prompt,
+      mode: 'completion',
+      model: modelName,
+      path: TEST_BASE_PATH,
+      logs: TEST_LOGS_PATH,
+      preferences: TEST_PREFERENCES_PATH,
+      onRun: async (options) => {
+        model = options.model || 'unknown'
+        router = options.router || 'unknown'
+        return options
+      }
+    }) as string[]
+
+    const actual = result?.[0]?.trim()?.toLowerCase() || ''
+    const passed = actual === expected
+    expect(actual).toEqual(expected)
+
+    // Add test result to array
+    testResults.push({
+      test: 'french',
+      prompt,
+      result: result || [],
+      expected,
+      model,
+      router,
+      timestamp: new Date().toISOString(),
+      passed,
+      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
+    })
+
+    // Write all results to the same file
+    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
+  })
+
+  it('should generate markdown report', () => {
+    // Group results by test and model
+    const latestResults = new Map<string, Map<string, TestResult>>()
+    
+    // Get only the latest result for each test+model combination
+    testResults.forEach(result => {
+      if (!latestResults.has(result.test)) {
+        latestResults.set(result.test, new Map())
+      }
+      const testMap = latestResults.get(result.test)!
+      const existingResult = testMap.get(result.model)
+      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
+        testMap.set(result.model, result)
+      }
+    })
+
+    // Generate markdown report
+    let report = '# Language Test Results\n\n'
+    
+    // First list failed tests
+    report += '## Failed Tests\n\n'
+    for (const [testName, modelResults] of latestResults) {
+      for (const [model, result] of modelResults) {
+        if (!result.passed) {
+          report += `### ${testName} - ${model}\n`
+          report += `- Prompt: \`${result.prompt}\`\n`
+          report += `- Expected: \`${result.expected}\`\n`
+          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Reason: ${result.reason}\n`
+          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+        }
+      }
+    }
+
+    // Then list passed tests
+    report += '## Passed Tests\n\n'
+    for (const [testName, modelResults] of latestResults) {
+      for (const [model, result] of modelResults) {
+        if (result.passed) {
+          report += `### ${testName} - ${model}\n`
+          report += `- Prompt: \`${result.prompt}\`\n`
+          report += `- Expected: \`${result.expected}\`\n`
+          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+        }
+      }
+    }
+
+    // Write report to file
+    const reportPath = path.resolve(__dirname, './language-report.md')
+    write(reportPath, report)
+
+    // Verify report was written
+    expect(exists(reportPath) === 'file').toBe(true)
+  })
+}) 
--- a/packages/kbot/tests/unit/math-report.md
+++ b/packages/kbot/tests/unit/math-report.md
@ -8,55 +8,55 @@
 - Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
 - Expected: `120`
 - Actual: `120`
- Timestamp: 4/1/2025, 12:42:00 PM
+- Timestamp: 4/1/2025, 12:59:09 PM

 ### factorial - google/gemini-2.0-flash-exp:free
 - Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
 - Expected: `120`
 - Actual: `120
 `
- Timestamp: 4/1/2025, 12:42:02 PM
+- Timestamp: 4/1/2025, 12:59:10 PM

 ### factorial - gpt-4
 - Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
 - Expected: `120`
 - Actual: `120`
- Timestamp: 4/1/2025, 12:42:03 PM
+- Timestamp: 4/1/2025, 12:59:11 PM

 ### fibonacci - deepseek/deepseek-chat:free
 - Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
 - Expected: `0,1,1,2,3`
 - Actual: `0,1,1,2,3`
- Timestamp: 4/1/2025, 12:41:49 PM
+- Timestamp: 4/1/2025, 12:59:13 PM

 ### fibonacci - google/gemini-2.0-flash-exp:free
 - Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
 - Expected: `0,1,1,2,3`
 - Actual: `0,1,1,2,3
 `
- Timestamp: 4/1/2025, 12:42:10 PM
+- Timestamp: 4/1/2025, 12:59:14 PM

 ### fibonacci - gpt-4
 - Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
 - Expected: `0,1,1,2,3`
 - Actual: `0, 1, 1, 2, 3`
- Timestamp: 4/1/2025, 12:42:12 PM
+- Timestamp: 4/1/2025, 12:59:16 PM

 ### quadratic - deepseek/deepseek-chat:free
 - Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
 - Expected: `[-3,-2]`
 - Actual: `[-2, -3]`
- Timestamp: 4/1/2025, 12:42:16 PM
+- Timestamp: 4/1/2025, 12:59:19 PM

 ### quadratic - google/gemini-2.0-flash-exp:free
 - Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
 - Expected: `[-3,-2]`
- Actual: `[-2.0, -3.0]`
- Timestamp: 4/1/2025, 12:42:17 PM
+- Actual: `[-2, -3]`
+- Timestamp: 4/1/2025, 12:46:13 PM

 ### quadratic - gpt-4
 - Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
 - Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Timestamp: 4/1/2025, 12:42:18 PM
+- Actual: `["-2", "-3"]`
+- Timestamp: 4/1/2025, 12:59:20 PM

--- a/packages/kbot/tests/unit/math.json
+++ b/packages/kbot/tests/unit/math.json
@ -552,5 +552,437 @@
    "router": "openrouter",
    "timestamp": "2025-04-01T10:42:18.707Z",
    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:04.344Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120\n"
+    ],
+    "expected": "120",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:06.496Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:07.467Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0,1,1,2,3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:08.798Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0,1,1,2,3\n"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:09.986Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0, 1, 1, 2, 3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:11.342Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2, -3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:12.578Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2, -3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:13.716Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2, -3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:46:14.857Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:47.605Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120\n"
+    ],
+    "expected": "120",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:49.748Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:50.767Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0,1,1,2,3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:52.399Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0,1,1,2,3\n"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:53.994Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0, 1, 1, 2, 3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:55.463Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2, -3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:56.689Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2, -3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:53:59.331Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:01.640Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120\n"
+    ],
+    "expected": "120",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:03.712Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:04.767Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0,1,1,2,3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:07.138Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0,1,1,2,3\n"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:08.328Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0, 1, 1, 2, 3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:09.776Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2, -3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:10.999Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2, -3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:14.081Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:32.818Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0, 1, 1, 2, 3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:35.563Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2,-3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:56:37.672Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:09.054Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120\n"
+    ],
+    "expected": "120",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:10.238Z",
+    "passed": true
+  },
+  {
+    "test": "factorial",
+    "prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
+    "result": [
+      "120"
+    ],
+    "expected": "120",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:11.167Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0,1,1,2,3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:13.500Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0,1,1,2,3\n"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "google/gemini-2.0-flash-exp:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:14.679Z",
+    "passed": true
+  },
+  {
+    "test": "fibonacci",
+    "prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
+    "result": [
+      "0, 1, 1, 2, 3"
+    ],
+    "expected": "0,1,1,2,3",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:16.094Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[-2, -3]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "deepseek/deepseek-chat:free",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:19.103Z",
+    "passed": true
+  },
+  {
+    "test": "quadratic",
+    "prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
+    "result": [
+      "[\"-2\", \"-3\"]"
+    ],
+    "expected": "[-3,-2]",
+    "model": "gpt-4",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T10:59:20.963Z",
+    "passed": true
  }
 ]
--- a/packages/kbot/tests/unit/math.test.ts
+++ b/packages/kbot/tests/unit/math.test.ts
@ -4,32 +4,12 @@ import * as path from 'node:path'
 import { sync as write } from "@polymech/fs/write"
 import { sync as read } from "@polymech/fs/read"
 import { sync as exists } from "@polymech/fs/exists"
+import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TestResult } from './commons'

 import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL  } from '../../src/index'

-const models = [
-    E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE, 
-    E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
-    E_OPENAI_MODEL.MODEL_GPT_4
-]
-
-const TEST_BASE_PATH = path.resolve(__dirname, '../../')
-const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
-const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
 const TEST_LOG_PATH = path.resolve(__dirname, './math.json')

-interface TestResult {
-  test: string;
-  prompt: string;
-  result: string[];
-  expected: string;
-  model: string;
-  router: string;
-  timestamp: string;
-  passed: boolean;
-  reason?: string;
-}
-
 describe('Advanced Math Operations', () => {
  let testResults: TestResult[] = []
  
@ -57,9 +37,9 @@ describe('Advanced Math Operations', () => {
        router = options.router || 'unknown'
        return options
      }
-    }) as string[]    
-    
-    const actual = result.map(r => r.trim())[0]
+    }) as string[]
+
+    const actual = result?.[0]?.trim() || ''
    const passed = actual === expected
    expect(actual).toEqual(expected)

@ -67,13 +47,13 @@ describe('Advanced Math Operations', () => {
    testResults.push({
      test: 'factorial',
      prompt,
-      result,
+      result: result || [],
      expected,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
-      reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
+      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
    })

    // Write all results to the same file
@ -101,7 +81,7 @@ describe('Advanced Math Operations', () => {
    }) as string[]    

    // Handle both formats: "0,1,1,2,3" and "0, 1, 1, 2, 3"
-    const numbers = result[0].trim().split(',').map(n => n.trim())
+    const numbers = result?.[0]?.trim()?.split(',')?.map(n => n.trim()) || []
    const actual = numbers.join(',')
    const passed = actual === expected
    expect(numbers).toEqual(['0', '1', '1', '2', '3'])
@ -110,13 +90,13 @@ describe('Advanced Math Operations', () => {
    testResults.push({
      test: 'fibonacci',
      prompt,
-      result,
+      result: result || [],
      expected,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
-      reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
+      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
    })

    // Write all results to the same file
@ -145,7 +125,24 @@ describe('Advanced Math Operations', () => {
    }) as string[]    

    // Parse the result as JSON (markdown already stripped by filter)
-    const jsonResult = JSON.parse(result[0].trim())
+    let jsonResult: number[]
+    try {
+      const resultStr = result?.[0]?.trim() || '[]'
+      if (!resultStr) {
+        throw new Error('No result returned from model')
+      }
+      jsonResult = JSON.parse(resultStr)
+      if (!Array.isArray(jsonResult)) {
+        throw new Error('Result is not an array')
+      }
+      // Convert any string numbers to actual numbers
+      jsonResult = jsonResult.map(n => typeof n === 'string' ? parseFloat(n) : n)
+    } catch (error) {
+      // If parsing fails, try to extract numbers from the string
+      const numbers = result?.[0]?.match(/-?\d+/g)?.map(n => parseInt(n, 10)) || []
+      jsonResult = numbers
+    }
+
    const actual = JSON.stringify(jsonResult.sort())
    const expectedSorted = JSON.stringify([-3, -2].sort())
    const passed = actual === expectedSorted
@ -155,16 +152,72 @@ describe('Advanced Math Operations', () => {
    testResults.push({
      test: 'quadratic',
      prompt,
-      result,
+      result: result || [],
      expected: expectedDisplay,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
-      reason: passed ? undefined : `Expected ${expectedDisplay}, but got ${result[0]}`
+      reason: !result?.[0] ? 'No result returned from model' : passed ? undefined : `Expected ${expectedDisplay}, but got ${result?.[0] || ''}`
    })

    // Write all results to the same file
    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
  })
+
+  it('should generate markdown report', () => {
+    // Group results by test and model
+    const latestResults = new Map<string, Map<string, TestResult>>()
+    
+    // Get only the latest result for each test+model combination
+    testResults.forEach(result => {
+      if (!latestResults.has(result.test)) {
+        latestResults.set(result.test, new Map())
+      }
+      const testMap = latestResults.get(result.test)!
+      const existingResult = testMap.get(result.model)
+      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
+        testMap.set(result.model, result)
+      }
+    })
+
+    // Generate markdown report
+    let report = '# Math Test Results\n\n'
+    
+    // First list failed tests
+    report += '## Failed Tests\n\n'
+    for (const [testName, modelResults] of latestResults) {
+      for (const [model, result] of modelResults) {
+        if (!result.passed) {
+          report += `### ${testName} - ${model}\n`
+          report += `- Prompt: \`${result.prompt}\`\n`
+          report += `- Expected: \`${result.expected}\`\n`
+          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Reason: ${result.reason}\n`
+          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+        }
+      }
+    }
+
+    // Then list passed tests
+    report += '## Passed Tests\n\n'
+    for (const [testName, modelResults] of latestResults) {
+      for (const [model, result] of modelResults) {
+        if (result.passed) {
+          report += `### ${testName} - ${model}\n`
+          report += `- Prompt: \`${result.prompt}\`\n`
+          report += `- Expected: \`${result.expected}\`\n`
+          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+        }
+      }
+    }
+
+    // Write report to file
+    const reportPath = path.resolve(__dirname, './math-report.md')
+    write(reportPath, report)
+
+    // Verify report was written
+    expect(exists(reportPath) === 'file').toBe(true)
+  })
 })