test commons

2025-04-02 13:04:20 +02:00 · 2025-04-02 13:04:20 +02:00 · 1c3a2d981e
commit 1c3a2d981e
parent 856ffe680f
15 changed files with 542 additions and 4700 deletions
--- a/packages/kbot/tests/unit/basic.json
+++ b/packages/kbot/tests/unit/basic.json
@ -1,541 +0,0 @@
-{
-  "results": [
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-01T22:17:39.340Z",
-      "passed": true,
-      "duration": 1551
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-01T22:17:42.962Z",
-      "passed": true,
-      "duration": 3621
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-01T22:17:43.836Z",
-      "passed": true,
-      "duration": 873
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-01T22:17:47.309Z",
-      "passed": true,
-      "duration": 3472
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-01T22:17:48.493Z",
-      "passed": true,
-      "duration": 1183
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-01T22:17:53.335Z",
-      "passed": true,
-      "duration": 4841
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:38:37.069Z",
-      "passed": true,
-      "duration": 1256
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [],
-      "expected": "8",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:38:40.167Z",
-      "passed": false,
-      "duration": 3096,
-      "error": {
-        "message": "Model returned empty response",
-        "code": "UNKNOWN",
-        "type": "Error",
-        "details": {
-          "stack": "Error: Model returned empty response\n    at Module.runTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:85:13)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:21:20)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n    at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n    at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
-          "message": "Model returned empty response"
-        }
-      },
-      "reason": "Model returned empty response"
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:38:41.086Z",
-      "passed": true,
-      "duration": 916
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:38:45.879Z",
-      "passed": true,
-      "duration": 4793
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:38:46.900Z",
-      "passed": true,
-      "duration": 1020
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:38:50.446Z",
-      "passed": true,
-      "duration": 3545
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:39:58.836Z",
-      "passed": true,
-      "duration": 1266
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:40:02.777Z",
-      "passed": true,
-      "duration": 3939
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:40:03.961Z",
-      "passed": true,
-      "duration": 1183
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:40:06.962Z",
-      "passed": true,
-      "duration": 3000
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:40:08.115Z",
-      "passed": true,
-      "duration": 1152
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:40:12.565Z",
-      "passed": true,
-      "duration": 4449
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:41:52.176Z",
-      "passed": true,
-      "duration": 1458
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:41:55.869Z",
-      "passed": true,
-      "duration": 3691
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:41:57.106Z",
-      "passed": true,
-      "duration": 1236
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:41:59.974Z",
-      "passed": true,
-      "duration": 2867
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:42:01.272Z",
-      "passed": true,
-      "duration": 1297
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:42:04.326Z",
-      "passed": true,
-      "duration": 3053
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:44:39.002Z",
-      "passed": true,
-      "duration": 1196
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:44:48.668Z",
-      "passed": true,
-      "duration": 9664
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:44:49.806Z",
-      "passed": true,
-      "duration": 1137
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:44:53.017Z",
-      "passed": true,
-      "duration": 3210
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:44:53.814Z",
-      "passed": true,
-      "duration": 796
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:44:55.383Z",
-      "passed": true,
-      "duration": 1568
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:47:00.955Z",
-      "passed": true,
-      "duration": 1297
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:49:09.343Z",
-      "passed": true,
-      "duration": 1278
-    },
-    {
-      "test": "addition",
-      "prompt": "add 5 and 3. Return only the number, no explanation.",
-      "result": [
-        "8"
-      ],
-      "expected": "8",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:49:15.630Z",
-      "passed": true,
-      "duration": 6285
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:49:16.246Z",
-      "passed": true,
-      "duration": 615
-    },
-    {
-      "test": "multiplication",
-      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-      "result": [
-        "24"
-      ],
-      "expected": "24",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:49:25.857Z",
-      "passed": true,
-      "duration": 9610
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "anthropic/claude-3.5-sonnet",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:49:27.101Z",
-      "passed": true,
-      "duration": 1242
-    },
-    {
-      "test": "division",
-      "prompt": "divide 15 by 3. Return only the number, no explanation.",
-      "result": [
-        "5"
-      ],
-      "expected": "5",
-      "model": "qwen/qwq-32b",
-      "router": "openrouter",
-      "timestamp": "2025-04-02T10:49:31.142Z",
-      "passed": true,
-      "duration": 4040
-    }
-  ],
-  "highscores": [
-    {
-      "test": "addition",
-      "rankings": [
-        {
-          "model": "anthropic/claude-3.5-sonnet",
-          "duration": 1278,
-          "duration_secs": 1.278
-        },
-        {
-          "model": "qwen/qwq-32b",
-          "duration": 6285,
-          "duration_secs": 6.285
-        }
-      ]
-    },
-    {
-      "test": "multiplication",
-      "rankings": [
-        {
-          "model": "anthropic/claude-3.5-sonnet",
-          "duration": 615,
-          "duration_secs": 0.615
-        },
-        {
-          "model": "qwen/qwq-32b",
-          "duration": 9610,
-          "duration_secs": 9.61
-        }
-      ]
-    },
-    {
-      "test": "division",
-      "rankings": [
-        {
-          "model": "anthropic/claude-3.5-sonnet",
-          "duration": 1242,
-          "duration_secs": 1.242
-        },
-        {
-          "model": "qwen/qwq-32b",
-          "duration": 4040,
-          "duration_secs": 4.04
-        }
-      ]
-    }
-  ],
-  "lastUpdated": "2025-04-02T10:49:31.142Z"
-}
--- a/packages/kbot/tests/unit/basic.test.ts
+++ b/packages/kbot/tests/unit/basic.test.ts
@ -2,20 +2,24 @@ import { describe, it, expect } from 'vitest'
 import * as path from 'node:path'
 import { sync as exists } from "@polymech/fs/exists"
 import { 
-  models, 
+  getDefaultModels, 
  TEST_BASE_PATH, 
  TEST_LOGS_PATH, 
  TEST_PREFERENCES_PATH, 
  TEST_TIMEOUT,
  TestResult,
  runTest,
-  generateTestReport
+  generateTestReport,
+  getReportPaths
 } from './commons'

-const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')
+// Optionally override models for this specific test file
+const models = getDefaultModels()

 describe('Basic Operations', () => {
  let testResults: TestResult[] = []
+  const TEST_LOG_PATH = getReportPaths('basic', 'json')
+  const TEST_REPORT_PATH = getReportPaths('basic', 'md')

  it.each(models)('should add two numbers with model %s', async (modelName) => {
    const result = await runTest(
@ -27,7 +31,7 @@ describe('Basic Operations', () => {
    )
    testResults.push(result)
    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('8')
-  }, { timeout: 10000 })
+  }, { timeout: TEST_TIMEOUT })

  it.each(models)('should multiply two numbers with model %s', async (modelName) => {
    const result = await runTest(
@ -39,7 +43,7 @@ describe('Basic Operations', () => {
    )
    testResults.push(result)
    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('24')
-  }, { timeout: 10000 })
+  }, { timeout: TEST_TIMEOUT })

  it.each(models)('should divide two numbers with model %s', async (modelName) => {
    const result = await runTest(
@ -51,11 +55,10 @@ describe('Basic Operations', () => {
    )
    testResults.push(result)
    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('5')
-  }, { timeout: 10000 })
+  }, { timeout: TEST_TIMEOUT })

  it('should generate markdown report', () => {
-    const reportPath = path.resolve(__dirname, './basic-report.md')
-    generateTestReport(testResults, 'Basic Operations Test Results', reportPath)
-    expect(exists(reportPath) === 'file').toBe(true)
+    generateTestReport(testResults, 'Basic Operations Test Results', TEST_REPORT_PATH)
+    expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
  })
 }) 
--- a/packages/kbot/tests/unit/commons.ts
+++ b/packages/kbot/tests/unit/commons.ts
@ -4,18 +4,36 @@ import { run } from '../../src/index'
 import { sync as write } from "@polymech/fs/write"
 import { sync as read } from "@polymech/fs/read"
 import { sync as exists } from "@polymech/fs/exists"
+import { sync as mkdirp } from "mkdirp"

-export const models = [
+export const getDefaultModels = () => [
    //E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE, 
    E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_3_5_SONNET,
    E_OPENROUTER_MODEL.MODEL_QWEN_QWQ_32B
 ]

+export const isOpenRouterModel = (model: string): boolean => {
+  return Object.values(E_OPENROUTER_MODEL).includes(model as E_OPENROUTER_MODEL)
+}
+
 export const TEST_BASE_PATH = path.resolve(__dirname, '../../')
 export const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
 export const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
 export const TEST_TIMEOUT = 30000 // 30 seconds timeout for API calls

+// Report paths configuration
+export const REPORTS_DIR = path.resolve(__dirname, './reports')
+
+// Ensure reports directory exists
+if (exists(REPORTS_DIR) !== 'directory') {
+  mkdirp(REPORTS_DIR)
+}
+
+export const getReportPaths = (category: string, type: 'json' | 'md'): string => {
+  const base = path.resolve(REPORTS_DIR, category)
+  return `${base}.${type}`
+}
+
 export interface TestResult {
  test: string;
  prompt: string;
@ -106,7 +124,7 @@ export const runTest = async (
        preferences: TEST_PREFERENCES_PATH,
        onRun: async (options) => {
          model = options.model || 'unknown'
-          router = options.router || 'unknown'
+          router = isOpenRouterModel(model) ? 'openrouter' : 'unknown'
          return options
        }
      }),
@ -163,7 +181,7 @@ export const runTest = async (
    throw e
  } finally {
    if (testResult) {
-      const existingData = exists(logPath) === 'file' ? JSON.parse(read(logPath)) : { results: [], highscores: [] }
+      const existingData = exists(logPath) === 'file' ? JSON.parse(read(logPath) as string) : { results: [], highscores: [] }
      const updatedResults = [...(existingData.results || []), testResult]
      
      // Group results by test and model
--- a/packages/kbot/tests/unit/format-report.md
+++ b/packages/kbot/tests/unit/format-report.md
@ -1,61 +0,0 @@
-# Format Test Results
-
-## Failed Tests
-
-*No failed tests*
-
-## Passed Tests
-
-### json-schema-file-format - mistralai/mistral-tiny
- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
- Actual: `{"name": "John Doe", "age": 30, "tags": ["developer", "javascript"]}`
- Duration: 982ms
- Timestamp: 4/2/2025, 10:26:33 AM
-
-### json-schema-object-format - mistralai/mistral-tiny
- Prompt: `Create a user profile with the following details:
-    - Name: Jane Smith
-    - Age: 25
-    - Email: jane.smith@company.com
-    - Tags: ["developer", "designer"]
-    - Address: {
-        "street": "123 Main St",
-        "city": "New York",
-        "country": "US",
-        "postal_code": "10001"
-      }
-    - Preferences: {
-        "theme": "light",
-        "notifications": "enabled",
-        "language": "English"
-      }
-    Return only the JSON object, no explanation.`
- Expected: `{"name":"Jane Smith","age":25,"email":"jane.smith@company.com","tags":["developer","designer"],"address":{"street":"123 Main St","city":"New York","country":"US","postal_code":"10001"},"preferences":{"theme":"light","notifications":"enabled","language":"English"}}`
- Actual: `{
-  "name": "Jane Smith",
-  "age": 25,
-  "email": "jane.smith@company.com",
-  "tags": ["developer", "designer"],
-  "address": {
-    "street": "123 Main St",
-    "city": "New York",
-    "country": "US",
-    "postal_code": "10001"
-  },
-  "Preferences": {
-    "theme": "light",
-    "notifications": "enabled",
-    "language": "English"
-  }
-}`
- Duration: 1673ms
- Timestamp: 4/2/2025, 10:26:35 AM
-
-## Summary
-
- Total Tests: 2
- Passed: 2
- Failed: 0
- Success Rate: 100.00%
-
--- a/packages/kbot/tests/unit/format.json
+++ b/packages/kbot/tests/unit/format.json
@ -1,28 +0,0 @@
-[
-  {
-    "test": "json-schema-file-format",
-    "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
-    "result": [
-      "{\"name\": \"John Doe\", \"age\": 30, \"tags\": [\"developer\", \"javascript\"]}"
-    ],
-    "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
-    "model": "mistralai/mistral-tiny",
-    "router": "openrouter",
-    "timestamp": "2025-04-02T08:26:33.513Z",
-    "passed": true,
-    "duration": 982
-  },
-  {
-    "test": "json-schema-object-format",
-    "prompt": "Create a user profile with the following details:\n    - Name: Jane Smith\n    - Age: 25\n    - Email: jane.smith@company.com\n    - Tags: [\"developer\", \"designer\"]\n    - Address: {\n        \"street\": \"123 Main St\",\n        \"city\": \"New York\",\n        \"country\": \"US\",\n        \"postal_code\": \"10001\"\n      }\n    - Preferences: {\n        \"theme\": \"light\",\n        \"notifications\": \"enabled\",\n        \"language\": \"English\"\n      }\n    Return only the JSON object, no explanation.",
-    "result": [
-      "{\n  \"name\": \"Jane Smith\",\n  \"age\": 25,\n  \"email\": \"jane.smith@company.com\",\n  \"tags\": [\"developer\", \"designer\"],\n  \"address\": {\n    \"street\": \"123 Main St\",\n    \"city\": \"New York\",\n    \"country\": \"US\",\n    \"postal_code\": \"10001\"\n  },\n  \"Preferences\": {\n    \"theme\": \"light\",\n    \"notifications\": \"enabled\",\n    \"language\": \"English\"\n  }\n}"
-    ],
-    "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
-    "model": "mistralai/mistral-tiny",
-    "router": "openrouter",
-    "timestamp": "2025-04-02T08:26:35.187Z",
-    "passed": true,
-    "duration": 1673
-  }
-]
--- a/packages/kbot/tests/unit/format.test.ts
+++ b/packages/kbot/tests/unit/format.test.ts
@ -6,16 +6,23 @@ import { sync as read } from "@polymech/fs/read"
 import { sync as exists } from "@polymech/fs/exists"
 import { z } from 'zod'
 import { 
+  getDefaultModels, 
  TEST_BASE_PATH, 
  TEST_LOGS_PATH, 
  TEST_PREFERENCES_PATH, 
  TEST_TIMEOUT,
  TestResult,
  formatError,
-  isEmptyResponse
+  isEmptyResponse,
+  runTest,
+  generateTestReport,
+  getReportPaths
 } from './commons'

-const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
+// Optionally override models for this specific test file
+const models = getDefaultModels()
+
+const TEST_LOG_PATH = getReportPaths('format', 'json')
 const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
 const TEST_MODEL_FAST = 'mistralai/codestral-2501'
 const TEST_MODEL = 'mistralai/mistral-tiny'
@ -136,6 +143,76 @@ const hasValidArrayLength = (arr: any[], length: number) => {
  return Array.isArray(arr) && arr.length === length && arr.every(item => typeof item === 'string')
 }

+describe('Format Operations', () => {
+  let testResults: TestResult[] = []
+  const TEST_REPORT_PATH = getReportPaths('format', 'md')
+
+  it.each(models)('should format JSON with model %s', async (modelName) => {
+    const result = await runTest(
+      'Format this JSON: {"name":"John","age":30}. Return only the formatted JSON, no explanation.',
+      '{\n  "name": "John",\n  "age": 30\n}',
+      'json_formatting',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('{\n  "name": "john",\n  "age": 30\n}')
+  }, { timeout: TEST_TIMEOUT })
+
+  it.each(models)('should format markdown with model %s', async (modelName) => {
+    const result = await runTest(
+      'Format this markdown: #title ##subtitle text. Return only the formatted markdown, no explanation.',
+      '# Title\n\n## Subtitle\n\nText',
+      'markdown_formatting',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('# title\n\n## subtitle\n\ntext')
+  }, { timeout: TEST_TIMEOUT })
+
+  it.each(models)('should format code with model %s', async (modelName) => {
+    const result = await runTest(
+      'Format this code: function add(a,b){return a+b}. Return only the formatted code, no explanation.',
+      'function add(a, b) {\n  return a + b;\n}',
+      'code_formatting',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('function add(a, b) {\n  return a + b;\n}')
+  }, { timeout: TEST_TIMEOUT })
+
+  it.each(models)('should format date with model %s', async (modelName) => {
+    const result = await runTest(
+      'Format this date: 2024-03-15. Return only the formatted date in MM/DD/YYYY format, no explanation.',
+      '03/15/2024',
+      'date_formatting',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('03/15/2024')
+  }, { timeout: TEST_TIMEOUT })
+
+  it.each(models)('should format currency with model %s', async (modelName) => {
+    const result = await runTest(
+      'Format this number as USD currency: 1234.56. Return only the formatted currency, no explanation.',
+      '$1,234.56',
+      'currency_formatting',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('$1,234.56')
+  }, { timeout: TEST_TIMEOUT })
+
+  it('should generate markdown report', () => {
+    generateTestReport(testResults, 'Format Operations Test Results', TEST_REPORT_PATH)
+    expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
+  })
+})
+
 describe('Format Options', () => {
  let testResults: TestResult[] = []
  
@ -416,92 +493,4 @@ describe('Format Options', () => {
      }
    }
  }, { timeout: TEST_TIMEOUT })
-
-  it('should generate markdown report', () => {
-    // Group results by test and model
-    const latestResults = new Map<string, Map<string, TestResult>>()
-    
-    // Get only the latest result for each test+model combination
-    testResults.forEach(result => {
-      if (!latestResults.has(result.test)) {
-        latestResults.set(result.test, new Map())
-      }
-      const testMap = latestResults.get(result.test)!
-      const existingResult = testMap.get(result.model)
-      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
-        testMap.set(result.model, result)
-      }
-    })
-
-    // Generate markdown report
-    let report = '# Format Test Results\n\n'
-    
-    // First list failed tests
-    report += '## Failed Tests\n\n'
-    let hasFailures = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (!result.passed) {
-          hasFailures = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${result.expected}\`\n`
-          report += `- Actual: \`${result.result[0] || ''}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          if (result.error) {
-            report += `- Error Type: ${result.error.type}\n`
-            report += `- Error Code: ${result.error.code}\n`
-            report += `- Error Message: ${result.error.message}\n`
-            if (result.error.details?.message) {
-              report += `- Error Details: ${result.error.details.message}\n`
-            }
-          }
-          report += `- Reason: ${result.reason}\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasFailures) {
-      report += '*No failed tests*\n\n'
-    }
-
-    // Then list passed tests
-    report += '## Passed Tests\n\n'
-    let hasPassed = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (result.passed) {
-          hasPassed = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${result.expected}\`\n`
-          report += `- Actual: \`${result.result[0] || ''}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasPassed) {
-      report += '*No passed tests*\n\n'
-    }
-
-    // Add summary section
-    report += '## Summary\n\n'
-    const totalTests = testResults.length
-    const passedTests = testResults.filter(r => r.passed).length
-    const failedTests = totalTests - passedTests
-    report += `- Total Tests: ${totalTests}\n`
-    report += `- Passed: ${passedTests}\n`
-    report += `- Failed: ${failedTests}\n`
-    report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n`
-
-    // Write report to file
-    const reportPath = path.resolve(__dirname, './format-report.md')
-    write(reportPath, report)
-
-    // Verify report was written
-    expect(exists(reportPath) === 'file').toBe(true)
-  })
 }) 
--- a/packages/kbot/tests/unit/language-report.md
+++ b/packages/kbot/tests/unit/language-report.md
@ -1,234 +0,0 @@
-# Language Test Results
-
-## Failed Tests
-
-### german - deepseek/deepseek-chat:free
- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
- Expected: `hallo`
- Actual: ``
- Duration: 985ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:05:50 PM
-
-### german - google/gemini-2.0-flash-exp:free
- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
- Expected: `hallo`
- Actual: ``
- Duration: 746ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:05:51 PM
-
-### german - gpt-4
- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
- Expected: `hallo`
- Actual: ``
- Duration: 1067ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:05:52 PM
-
-### german - anthropic/claude-2.0
- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
- Expected: `hallo`
- Actual: ``
- Duration: 1192ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 11:53:55 PM
-
-### german - anthropic/claude-3.5-sonnet
- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
- Expected: `hallo`
- Actual: ``
- Duration: 1753ms
- Reason: Unknown error occurred
- Timestamp: 4/2/2025, 10:29:45 AM
-
-### german - qwen/qwq-32b
- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
- Expected: `hallo`
- Actual: ``
- Duration: 13757ms
- Reason: Unknown error occurred
- Timestamp: 4/2/2025, 10:29:59 AM
-
-### spanish - deepseek/deepseek-chat:free
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
- Expected: `sí`
- Actual: ``
- Duration: 678ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:05:53 PM
-
-### spanish - google/gemini-2.0-flash-exp:free
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
- Expected: `sí`
- Actual: ``
- Duration: 744ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:05:53 PM
-
-### spanish - gpt-4
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
- Expected: `sí`
- Actual: ``
- Duration: 1125ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:05:55 PM
-
-### spanish - anthropic/claude-2.0
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
- Expected: `sí`
- Actual: ``
- Duration: 1193ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 11:53:56 PM
-
-### spanish - anthropic/claude-3.5-sonnet
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
- Expected: `sí`
- Actual: ``
- Duration: 1242ms
- Reason: Unknown error occurred
- Timestamp: 4/2/2025, 10:29:51 AM
-
-### spanish - qwen/qwq-32b
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
- Expected: `sí`
- Actual: ``
- Duration: 5925ms
- Reason: Unknown error occurred
- Timestamp: 4/2/2025, 10:29:57 AM
-
-### french - deepseek/deepseek-chat:free
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
- Expected: `non`
- Actual: ``
- Duration: 626ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:05:55 PM
-
-### french - gpt-4
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
- Expected: `non`
- Actual: ``
- Duration: 1341ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:05:57 PM
-
-### french - google/gemini-2.0-flash-exp:free
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
- Expected: `non`
- Actual: ``
- Duration: 729ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:05:56 PM
-
-### french - anthropic/claude-2.0
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
- Expected: `non`
- Actual: ``
- Duration: 968ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 11:53:57 PM
-
-### french - anthropic/claude-3.5-sonnet
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
- Expected: `non`
- Actual: ``
- Duration: 2656ms
- Reason: Unknown error occurred
- Timestamp: 4/2/2025, 10:29:59 AM
-
-### french - qwen/qwq-32b
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
- Expected: `non`
- Actual: ``
- Duration: 4063ms
- Reason: Unknown error occurred
- Timestamp: 4/2/2025, 10:30:03 AM
-
-## Passed Tests
-
-### german_translation - deepseek/deepseek-chat:free
- Prompt: `translate "hello" to German. Return only the translation, no explanation.`
- Expected: `hallo`
- Actual: `Hallo`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:01 PM
-
-### german_translation - google/gemini-2.0-flash-exp:free
- Prompt: `translate "hello" to German. Return only the translation, no explanation.`
- Expected: `hallo`
- Actual: `Hallo
-`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:02 PM
-
-### german_translation - gpt-4
- Prompt: `translate "hello" to German. Return only the translation, no explanation.`
- Expected: `hallo`
- Actual: `Hallo`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:32 PM
-
-### spanish_translation - deepseek/deepseek-chat:free
- Prompt: `translate "yes" to Spanish. Return only the translation, no explanation.`
- Expected: `sí`
- Actual: `sí`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:05 PM
-
-### spanish_translation - google/gemini-2.0-flash-exp:free
- Prompt: `translate "yes" to Spanish. Return only the translation, no explanation.`
- Expected: `sí`
- Actual: `sí
-`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:06 PM
-
-### spanish_translation - gpt-4
- Prompt: `translate "yes" to Spanish. Return only the translation, no explanation.`
- Expected: `sí`
- Actual: `sí`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:35 PM
-
-### french_translation - deepseek/deepseek-chat:free
- Prompt: `translate "no" to French. Return only the translation, no explanation.`
- Expected: `non`
- Actual: `non`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:08 PM
-
-### french_translation - google/gemini-2.0-flash-exp:free
- Prompt: `translate "no" to French. Return only the translation, no explanation.`
- Expected: `non`
- Actual: `non
-`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:10 PM
-
-### french_translation - gpt-4
- Prompt: `translate "no" to French. Return only the translation, no explanation.`
- Expected: `non`
- Actual: `non`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:56:37 PM
-
--- a/packages/kbot/tests/unit/language.json
+++ b/packages/kbot/tests/unit/language.json
@ -1,919 +0,0 @@
-[
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:01.527Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo\n"
-    ],
-    "expected": "hallo",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:03.169Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:04.655Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:05.726Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí\n"
-    ],
-    "expected": "sí",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:08.264Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:09.514Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:11.281Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non\n"
-    ],
-    "expected": "non",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:12.942Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:50:14.089Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:51:53.360Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo\n"
-    ],
-    "expected": "hallo",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:51:54.551Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:51:55.699Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:51:56.740Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "Sí\n"
-    ],
-    "expected": "sí",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:51:57.918Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "Sí"
-    ],
-    "expected": "sí",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:51:58.882Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:52:00.119Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "Non\n"
-    ],
-    "expected": "non",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:52:01.184Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:52:02.204Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:53:50.924Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo\n"
-    ],
-    "expected": "hallo",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:53:52.477Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:53:53.546Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:53:56.815Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:53:58.718Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:54:01.673Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:01.199Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo\n"
-    ],
-    "expected": "hallo",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:02.857Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:03.788Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:05.218Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí\n"
-    ],
-    "expected": "sí",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:06.317Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:07.436Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:08.879Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non\n"
-    ],
-    "expected": "non",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:10.365Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:11.295Z",
-    "passed": true
-  },
-  {
-    "test": "german_translation",
-    "prompt": "translate \"hello\" to German. Return only the translation, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:32.735Z",
-    "passed": true
-  },
-  {
-    "test": "spanish_translation",
-    "prompt": "translate \"yes\" to Spanish. Return only the translation, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:35.276Z",
-    "passed": true
-  },
-  {
-    "test": "french_translation",
-    "prompt": "translate \"no\" to French. Return only the translation, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:56:37.673Z",
-    "passed": true
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:59:07.944Z",
-    "passed": true
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [
-      "Hallo\n"
-    ],
-    "expected": "hallo",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:59:09.471Z",
-    "passed": true
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:59:11.335Z",
-    "passed": true
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [
-      "Sí"
-    ],
-    "expected": "sí",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:59:12.740Z",
-    "passed": true
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [
-      "Sí\n"
-    ],
-    "expected": "sí",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:59:14.246Z",
-    "passed": true
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:59:15.205Z",
-    "passed": true
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:59:18.139Z",
-    "passed": true
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T10:59:20.589Z",
-    "passed": true
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [
-      "Hallo"
-    ],
-    "expected": "hallo",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:03:09.890Z",
-    "passed": true
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [
-      "sí"
-    ],
-    "expected": "sí",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:03:12.312Z",
-    "passed": true
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [
-      "non"
-    ],
-    "expected": "non",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:03:14.660Z",
-    "passed": true
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "hallo",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:50.723Z",
-    "passed": false,
-    "duration": 985,
-    "error": {
-      "message": "Model returned empty response",
-      "code": "UNKNOWN",
-      "type": "Error"
-    },
-    "reason": "Model returned empty response"
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "hallo",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:51.471Z",
-    "passed": false,
-    "duration": 746,
-    "error": {
-      "message": "Model returned empty response",
-      "code": "UNKNOWN",
-      "type": "Error"
-    },
-    "reason": "Model returned empty response"
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "hallo",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:52.540Z",
-    "passed": false,
-    "duration": 1067,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "sí",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:53.219Z",
-    "passed": false,
-    "duration": 678,
-    "error": {
-      "message": "Model returned empty response",
-      "code": "UNKNOWN",
-      "type": "Error"
-    },
-    "reason": "Model returned empty response"
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "sí",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:53.964Z",
-    "passed": false,
-    "duration": 744,
-    "error": {
-      "message": "Model returned empty response",
-      "code": "UNKNOWN",
-      "type": "Error"
-    },
-    "reason": "Model returned empty response"
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "sí",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:55.090Z",
-    "passed": false,
-    "duration": 1125,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "non",
-    "model": "deepseek/deepseek-chat:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:55.717Z",
-    "passed": false,
-    "duration": 626,
-    "error": {
-      "message": "Model returned empty response",
-      "code": "UNKNOWN",
-      "type": "Error"
-    },
-    "reason": "Model returned empty response"
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "non",
-    "model": "google/gemini-2.0-flash-exp:free",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:56.447Z",
-    "passed": false,
-    "duration": 729,
-    "error": {
-      "message": "Model returned empty response",
-      "code": "UNKNOWN",
-      "type": "Error"
-    },
-    "reason": "Model returned empty response"
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "non",
-    "model": "gpt-4",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:05:57.790Z",
-    "passed": false,
-    "duration": 1341,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "hallo",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:47:02.933Z",
-    "passed": false,
-    "duration": 1416,
-    "error": {
-      "message": "getRouterForModel is not defined",
-      "code": "UNKNOWN",
-      "type": "ReferenceError",
-      "details": {
-        "stack": "ReferenceError: getRouterForModel is not defined\n    at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:106:5\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n    at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n    at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
-        "message": "getRouterForModel is not defined",
-        "stackStr": "ReferenceError: getRouterForModel is not defined\n    at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:106:5\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n    at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n    at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
-        "nameStr": "ReferenceError",
-        "expected": "undefined",
-        "actual": "undefined"
-      }
-    },
-    "reason": "getRouterForModel is not defined"
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "sí",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:47:04.007Z",
-    "passed": false,
-    "duration": 1071,
-    "error": {
-      "message": "getRouterForModel is not defined",
-      "code": "UNKNOWN",
-      "type": "ReferenceError",
-      "details": {
-        "stack": "ReferenceError: getRouterForModel is not defined\n    at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:115:5\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n    at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n    at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
-        "message": "getRouterForModel is not defined",
-        "stackStr": "ReferenceError: getRouterForModel is not defined\n    at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:115:5\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n    at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n    at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
-        "nameStr": "ReferenceError",
-        "expected": "undefined",
-        "actual": "undefined"
-      }
-    },
-    "reason": "getRouterForModel is not defined"
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "non",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:47:05.064Z",
-    "passed": false,
-    "duration": 1056,
-    "error": {
-      "message": "getRouterForModel is not defined",
-      "code": "UNKNOWN",
-      "type": "ReferenceError",
-      "details": {
-        "stack": "ReferenceError: getRouterForModel is not defined\n    at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:124:5\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n    at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n    at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
-        "message": "getRouterForModel is not defined"
-      }
-    },
-    "reason": "getRouterForModel is not defined"
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "hallo",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:47:26.798Z",
-    "passed": false,
-    "duration": 1253,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "sí",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:47:27.731Z",
-    "passed": false,
-    "duration": 932,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "non",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T11:47:28.596Z",
-    "passed": false,
-    "duration": 864,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "hallo",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T21:53:55.163Z",
-    "passed": false,
-    "duration": 1192,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "sí",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T21:53:56.357Z",
-    "passed": false,
-    "duration": 1193,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "non",
-    "model": "anthropic/claude-2.0",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T21:53:57.326Z",
-    "passed": false,
-    "duration": 968,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "hallo",
-    "model": "anthropic/claude-3.5-sonnet",
-    "router": "openrouter",
-    "timestamp": "2025-04-02T08:29:45.316Z",
-    "passed": false,
-    "duration": 1753,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "sí",
-    "model": "anthropic/claude-3.5-sonnet",
-    "router": "openrouter",
-    "timestamp": "2025-04-02T08:29:51.577Z",
-    "passed": false,
-    "duration": 1242,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "spanish",
-    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "sí",
-    "model": "qwen/qwq-32b",
-    "router": "openrouter",
-    "timestamp": "2025-04-02T08:29:57.503Z",
-    "passed": false,
-    "duration": 5925,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "german",
-    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "hallo",
-    "model": "qwen/qwq-32b",
-    "router": "openrouter",
-    "timestamp": "2025-04-02T08:29:59.075Z",
-    "passed": false,
-    "duration": 13757,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "non",
-    "model": "anthropic/claude-3.5-sonnet",
-    "router": "openrouter",
-    "timestamp": "2025-04-02T08:29:59.249Z",
-    "passed": false,
-    "duration": 2656,
-    "reason": "Unknown error occurred"
-  },
-  {
-    "test": "french",
-    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
-    "result": [],
-    "expected": "non",
-    "model": "qwen/qwq-32b",
-    "router": "openrouter",
-    "timestamp": "2025-04-02T08:30:03.313Z",
-    "passed": false,
-    "duration": 4063,
-    "reason": "Unknown error occurred"
-  }
-]
--- a/packages/kbot/tests/unit/language.test.ts
+++ b/packages/kbot/tests/unit/language.test.ts
@ -1,202 +1,88 @@
 import { describe, it, expect } from 'vitest'
-import { run } from '../../src/index'
 import * as path from 'node:path'
-import { sync as write } from "@polymech/fs/write"
-import { sync as read } from "@polymech/fs/read"
 import { sync as exists } from "@polymech/fs/exists"
 import { 
-  models, 
+  getDefaultModels, 
  TEST_BASE_PATH, 
  TEST_LOGS_PATH, 
  TEST_PREFERENCES_PATH, 
-  TEST_TIMEOUT, 
-  TestResult, 
-  formatError, 
-  isEmptyResponse
+  TEST_TIMEOUT,
+  TestResult,
+  runTest,
+  generateTestReport,
+  getReportPaths
 } from './commons'

-import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL  } from '../../src/index'
+// Optionally override models for this specific test file
+const models = getDefaultModels()

-const TEST_LOG_PATH = path.resolve(__dirname, './language.json')
-
-describe('Language Capabilities', () => {
+describe('Language Operations', () => {
  let testResults: TestResult[] = []
-  
-  // Load existing results if any
-  if (exists(TEST_LOG_PATH)) {
-    const data = read(TEST_LOG_PATH, 'json')
-    testResults = Array.isArray(data) ? data : []
-  }
+  const TEST_LOG_PATH = getReportPaths('language', 'json')
+  const TEST_REPORT_PATH = getReportPaths('language', 'md')

-  const runTranslationTest = async (prompt: string, expected: string, testName: string, modelName: string) => {
-    let model = 'unknown'
-    let router = 'unknown'
-    let startTime = Date.now()
-    let error: TestResult['error'] | undefined
-    
-    try {
-      const result = await Promise.race([
-        run({
-          prompt,
-          mode: 'completion',
-          model: modelName,
-          path: TEST_BASE_PATH,
-          logs: TEST_LOGS_PATH,
-          preferences: TEST_PREFERENCES_PATH,
-          onRun: async (options) => {
-            model = options.model || 'unknown'
-            router = options.router || 'unknown'
-            return options
-          }
-        }),
-        new Promise((_, reject) => 
-          setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
-        )
-      ]) as string[]
-
-      const actual = result?.[0]?.trim()?.toLowerCase() || ''
-      const passed = actual === expected && !isEmptyResponse(result)
-      
-      if (isEmptyResponse(result)) {
-        throw new Error('Model returned empty response')
-      }
-      
-      expect(actual).toEqual(expected)
-
-      return {
-        test: testName,
-        prompt,
-        result: result || [],
-        expected,
-        model,
-        router,
-        timestamp: new Date().toISOString(),
-        passed,
-        duration: Date.now() - startTime,
-        reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
-      }
-    } catch (e) {
-      error = formatError(e)
-      throw e
-    } finally {
-      const testResult: TestResult = {
-        test: testName,
-        prompt,
-        result: [],
-        expected,
-        model,
-        router,
-        timestamp: new Date().toISOString(),
-        passed: false,
-        duration: Date.now() - startTime,
-        error,
-        reason: error?.message || 'Unknown error occurred'
-      }
-      
-      testResults.push(testResult)
-      write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
-    }
-  }
-
-  it.each(models)('should translate "hello" to German with model %s', async (modelName) => {
-    await runTranslationTest(
-      'translate "hello" to German. Return only the translated word, no explanation.',
-      'hallo',
-      'german',
-      modelName
+  it.each(models)('should translate text with model %s', async (modelName) => {
+    const result = await runTest(
+      'Translate "Hello, world!" to Spanish. Return only the translation, no explanation.',
+      '¡Hola, mundo!',
+      'translation',
+      modelName,
+      TEST_LOG_PATH
    )
-  })
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('¡hola, mundo!')
+  }, { timeout: TEST_TIMEOUT })

-  it.each(models)('should translate "yes" to Spanish with model %s', async (modelName) => {
-    await runTranslationTest(
-      'translate "yes" to Spanish. Return only the translated word, no explanation.',
-      'sí',
-      'spanish',
-      modelName
+  it.each(models)('should correct grammar with model %s', async (modelName) => {
+    const result = await runTest(
+      'Correct the grammar in: "I goes to the store yesterday". Return only the corrected sentence, no explanation.',
+      'I went to the store yesterday',
+      'grammar',
+      modelName,
+      TEST_LOG_PATH
    )
-  })
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('i went to the store yesterday')
+  }, { timeout: TEST_TIMEOUT })

-  it.each(models)('should translate "no" to French with model %s', async (modelName) => {
-    await runTranslationTest(
-      'translate "no" to French. Return only the translated word, no explanation.',
-      'non',
-      'french',
-      modelName
+  it.each(models)('should summarize text with model %s', async (modelName) => {
+    const result = await runTest(
+      'Summarize: "The quick brown fox jumps over the lazy dog". Return only the summary, no explanation.',
+      'A fox jumps over a dog',
+      'summarization',
+      modelName,
+      TEST_LOG_PATH
    )
-  })
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('a fox jumps over a dog')
+  }, { timeout: TEST_TIMEOUT })
+
+  it.each(models)('should identify language with model %s', async (modelName) => {
+    const result = await runTest(
+      'Identify the language of: "Bonjour, comment allez-vous?". Return only the language name, no explanation.',
+      'French',
+      'language_detection',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('french')
+  }, { timeout: TEST_TIMEOUT })
+
+  it.each(models)('should generate synonyms with model %s', async (modelName) => {
+    const result = await runTest(
+      'Provide a synonym for "happy". Return only the synonym, no explanation.',
+      'joyful',
+      'synonyms',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('joyful')
+  }, { timeout: TEST_TIMEOUT })

  it('should generate markdown report', () => {
-    // Group results by test and model
-    const latestResults = new Map<string, Map<string, TestResult>>()
-    
-    // Get only the latest result for each test+model combination
-    testResults.forEach(result => {
-      if (!latestResults.has(result.test)) {
-        latestResults.set(result.test, new Map())
-      }
-      const testMap = latestResults.get(result.test)!
-      const existingResult = testMap.get(result.model)
-      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
-        testMap.set(result.model, result)
-      }
-    })
-
-    // Generate markdown report
-    let report = '# Language Test Results\n\n'
-    
-    // First list failed tests
-    report += '## Failed Tests\n\n'
-    let hasFailures = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (!result.passed) {
-          hasFailures = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${result.expected}\`\n`
-          report += `- Actual: \`${result.result[0] || ''}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          if (result.error) {
-            report += `- Error Type: ${result.error.type}\n`
-            report += `- Error Code: ${result.error.code}\n`
-            report += `- Error Message: ${result.error.message}\n`
-          }
-          report += `- Reason: ${result.reason}\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasFailures) {
-      report += '*No failed tests*\n\n'
-    }
-
-    // Then list passed tests
-    report += '## Passed Tests\n\n'
-    let hasPassed = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (result.passed) {
-          hasPassed = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${result.expected}\`\n`
-          report += `- Actual: \`${result.result[0] || ''}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasPassed) {
-      report += '*No passed tests*\n\n'
-    }
-
-    // Write report to file
-    const reportPath = path.resolve(__dirname, './language-report.md')
-    write(reportPath, report)
-
-    // Verify report was written
-    expect(exists(reportPath) === 'file').toBe(true)
+    generateTestReport(testResults, 'Language Operations Test Results', TEST_REPORT_PATH)
+    expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
  })
 }) 
--- a/packages/kbot/tests/unit/math-report.md
+++ b/packages/kbot/tests/unit/math-report.md
@ -1,185 +0,0 @@
-# Math Test Results
-
-## Failed Tests
-
-### addition - deepseek/deepseek-chat:free
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: ``
- Duration: 913ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:42:47 PM
-
-### addition - google/gemini-2.0-flash-exp:free
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: ``
- Duration: 1112ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:45:50 PM
-
-### addition - gpt-4
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: ``
- Duration: 1038ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:42:26 PM
-
-### addition - anthropic/claude-2.0
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: ``
- Duration: 1992ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 11:53:40 PM
-
-### multiplication - deepseek/deepseek-chat:free
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: ``
- Duration: 636ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:42:48 PM
-
-### multiplication - google/gemini-2.0-flash-exp:free
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: ``
- Duration: 764ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:45:51 PM
-
-### multiplication - gpt-4
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: ``
- Duration: 1052ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:42:28 PM
-
-### multiplication - anthropic/claude-2.0
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: ``
- Duration: 1078ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 11:53:41 PM
-
-### division - deepseek/deepseek-chat:free
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: ``
- Duration: 648ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:42:50 PM
-
-### division - google/gemini-2.0-flash-exp:free
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: ``
- Duration: 829ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:45:52 PM
-
-### division - gpt-4
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: ``
- Duration: 959ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:42:31 PM
-
-### division - anthropic/claude-2.0
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: ``
- Duration: 940ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 11:53:42 PM
-
-## Passed Tests
-
-### factorial - deepseek/deepseek-chat:free
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
- Expected: `120`
- Actual: `120`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:09 PM
-
-### factorial - google/gemini-2.0-flash-exp:free
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
- Expected: `120`
- Actual: `120
-`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:10 PM
-
-### factorial - gpt-4
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
- Expected: `120`
- Actual: `120`
- Duration: undefinedms
- Timestamp: 4/1/2025, 1:03:25 PM
-
-### fibonacci - deepseek/deepseek-chat:free
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
- Expected: `0,1,1,2,3`
- Actual: `0,1,1,2,3`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:13 PM
-
-### fibonacci - google/gemini-2.0-flash-exp:free
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
- Expected: `0,1,1,2,3`
- Actual: `0,1,1,2,3
-`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:14 PM
-
-### fibonacci - gpt-4
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
- Expected: `0,1,1,2,3`
- Actual: `0, 1, 1, 2, 3`
- Duration: undefinedms
- Timestamp: 4/1/2025, 1:03:27 PM
-
-### quadratic - deepseek/deepseek-chat:free
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:19 PM
-
-### quadratic - google/gemini-2.0-flash-exp:free
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:46:13 PM
-
-### quadratic - gpt-4
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Duration: undefinedms
- Timestamp: 4/1/2025, 1:03:30 PM
-
--- a/packages/kbot/tests/unit/math.json
+++ b/packages/kbot/tests/unit/math.json
--- a/packages/kbot/tests/unit/math.test.ts
+++ b/packages/kbot/tests/unit/math.test.ts
@ -1,201 +1,88 @@
 import { describe, it, expect } from 'vitest'
-import { run } from '../../src/index'
 import * as path from 'node:path'
-import { sync as write } from "@polymech/fs/write"
-import { sync as read } from "@polymech/fs/read"
 import { sync as exists } from "@polymech/fs/exists"
 import { 
-  models, 
+  getDefaultModels, 
  TEST_BASE_PATH, 
  TEST_LOGS_PATH, 
  TEST_PREFERENCES_PATH, 
-  TEST_TIMEOUT, 
-  TestResult, 
-  formatError, 
-  isEmptyResponse
+  TEST_TIMEOUT,
+  TestResult,
+  runTest,
+  generateTestReport,
+  getReportPaths
 } from './commons'

+// Optionally override models for this specific test file
+const models = getDefaultModels()

-const TEST_LOG_PATH = path.resolve(__dirname, './math.json')
-
-describe('Math Capabilities', () => {
+describe('Math Operations', () => {
  let testResults: TestResult[] = []
-  
-  // Load existing results if any
-  if (exists(TEST_LOG_PATH)) {
-    const data = read(TEST_LOG_PATH, 'json')
-    testResults = Array.isArray(data) ? data : []
-  }
+  const TEST_LOG_PATH = getReportPaths('math', 'json')
+  const TEST_REPORT_PATH = getReportPaths('math', 'md')

-  const runMathTest = async (prompt: string, expected: string, testName: string, modelName: string) => {
-    let model = 'unknown'
-    let router = 'unknown'
-    let startTime = Date.now()
-    let error: TestResult['error'] | undefined
-    
-    try {
-      const result = await Promise.race([
-        run({
-          prompt,
-          mode: 'completion',
-          model: modelName,
-          path: TEST_BASE_PATH,
-          logs: TEST_LOGS_PATH,
-          preferences: TEST_PREFERENCES_PATH,
-          onRun: async (options) => {
-            model = options.model || 'unknown'
-            router = options.router || 'unknown'
-            return options
-          }
-        }),
-        new Promise((_, reject) => 
-          setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
-        )
-      ]) as string[]
+  it.each(models)('should solve quadratic equation with model %s', async (modelName) => {
+    const result = await runTest(
+      'Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.',
+      '-3,-2',
+      'quadratic',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('-3,-2')
+  }, { timeout: TEST_TIMEOUT })

-      const actual = result?.[0]?.trim()?.toLowerCase() || ''
-      const passed = actual === expected && !isEmptyResponse(result)
-      
-      if (isEmptyResponse(result)) {
-        throw new Error('Model returned empty response')
-      }
-      
-      expect(actual).toEqual(expected)
+  it.each(models)('should calculate factorial with model %s', async (modelName) => {
+    const result = await runTest(
+      'Calculate 5! (factorial of 5). Return only the number, no explanation.',
+      '120',
+      'factorial',
+      modelName,
+      TEST_LOG_PATH
+    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('120')
+  }, { timeout: TEST_TIMEOUT })

-      return {
-        test: testName,
-        prompt,
-        result: result || [],
-        expected,
-        model,
-        router,
-        timestamp: new Date().toISOString(),
-        passed,
-        duration: Date.now() - startTime,
-        reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
-      }
-    } catch (e) {
-      error = formatError(e)
-      throw e
-    } finally {
-      const testResult: TestResult = {
-        test: testName,
-        prompt,
-        result: [],
-        expected,
-        model,
-        router,
-        timestamp: new Date().toISOString(),
-        passed: false,
-        duration: Date.now() - startTime,
-        error,
-        reason: error?.message || 'Unknown error occurred'
-      }
-      
-      testResults.push(testResult)
-      write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
-    }
-  }
-
-  it.each(models)('should add two numbers with model %s', async (modelName) => {
-    await runMathTest(
-      'add 5 and 3. Return only the number, no explanation.',
+  it.each(models)('should calculate fibonacci sequence with model %s', async (modelName) => {
+    const result = await runTest(
+      'Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.',
      '8',
-      'addition',
-      modelName
+      'fibonacci',
+      modelName,
+      TEST_LOG_PATH
    )
-  })
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('8')
+  }, { timeout: TEST_TIMEOUT })

-  it.each(models)('should multiply two numbers with model %s', async (modelName) => {
-    await runMathTest(
-      'multiply 8 and 3. Return only the number, no explanation.',
-      '24',
-      'multiplication',
-      modelName
+  it.each(models)('should calculate square root with model %s', async (modelName) => {
+    const result = await runTest(
+      'Calculate the square root of 16. Return only the number, no explanation.',
+      '4',
+      'square_root',
+      modelName,
+      TEST_LOG_PATH
    )
-  })
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('4')
+  }, { timeout: TEST_TIMEOUT })

-  it.each(models)('should divide two numbers with model %s', async (modelName) => {
-    await runMathTest(
-      'divide 15 by 3. Return only the number, no explanation.',
-      '5',
-      'division',
-      modelName
+  it.each(models)('should calculate power with model %s', async (modelName) => {
+    const result = await runTest(
+      'Calculate 2 raised to the power of 3. Return only the number, no explanation.',
+      '8',
+      'power',
+      modelName,
+      TEST_LOG_PATH
    )
-  })
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('8')
+  }, { timeout: TEST_TIMEOUT })

  it('should generate markdown report', () => {
-    // Group results by test and model
-    const latestResults = new Map<string, Map<string, TestResult>>()
-    
-    // Get only the latest result for each test+model combination
-    testResults.forEach(result => {
-      if (!latestResults.has(result.test)) {
-        latestResults.set(result.test, new Map())
-      }
-      const testMap = latestResults.get(result.test)!
-      const existingResult = testMap.get(result.model)
-      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
-        testMap.set(result.model, result)
-      }
-    })
-
-    // Generate markdown report
-    let report = '# Math Test Results\n\n'
-    
-    // First list failed tests
-    report += '## Failed Tests\n\n'
-    let hasFailures = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (!result.passed) {
-          hasFailures = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${result.expected}\`\n`
-          report += `- Actual: \`${result.result[0] || ''}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          if (result.error) {
-            report += `- Error Type: ${result.error.type}\n`
-            report += `- Error Code: ${result.error.code}\n`
-            report += `- Error Message: ${result.error.message}\n`
-          }
-          report += `- Reason: ${result.reason}\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasFailures) {
-      report += '*No failed tests*\n\n'
-    }
-
-    // Then list passed tests
-    report += '## Passed Tests\n\n'
-    let hasPassed = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (result.passed) {
-          hasPassed = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${result.expected}\`\n`
-          report += `- Actual: \`${result.result[0] || ''}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasPassed) {
-      report += '*No passed tests*\n\n'
-    }
-
-    // Write report to file
-    const reportPath = path.resolve(__dirname, './math-report.md')
-    write(reportPath, report)
-
-    // Verify report was written
-    expect(exists(reportPath) === 'file').toBe(true)
+    generateTestReport(testResults, 'Math Operations Test Results', TEST_REPORT_PATH)
+    expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
  })
 }) 
--- a/packages/kbot/tests/unit/reports/basic.json
+++ b/packages/kbot/tests/unit/reports/basic.json
@ -0,0 +1,208 @@
+{
+  "results": [
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:56:09.502Z",
+      "passed": true,
+      "duration": 1237
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:56:13.802Z",
+      "passed": true,
+      "duration": 4298
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:56:15.214Z",
+      "passed": true,
+      "duration": 1411
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:56:18.337Z",
+      "passed": true,
+      "duration": 3122
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:56:18.922Z",
+      "passed": true,
+      "duration": 583
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:56:22.539Z",
+      "passed": true,
+      "duration": 3615
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T11:01:08.904Z",
+      "passed": true,
+      "duration": 1888
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T11:01:15.210Z",
+      "passed": true,
+      "duration": 6304
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T11:01:16.502Z",
+      "passed": true,
+      "duration": 1291
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T11:01:18.728Z",
+      "passed": true,
+      "duration": 2225
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T11:01:19.938Z",
+      "passed": true,
+      "duration": 1209
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T11:01:27.791Z",
+      "passed": true,
+      "duration": 7852
+    }
+  ],
+  "highscores": [
+    {
+      "test": "addition",
+      "rankings": [
+        {
+          "model": "anthropic/claude-3.5-sonnet",
+          "duration": 1888,
+          "duration_secs": 1.888
+        },
+        {
+          "model": "qwen/qwq-32b",
+          "duration": 6304,
+          "duration_secs": 6.304
+        }
+      ]
+    },
+    {
+      "test": "multiplication",
+      "rankings": [
+        {
+          "model": "anthropic/claude-3.5-sonnet",
+          "duration": 1291,
+          "duration_secs": 1.291
+        },
+        {
+          "model": "qwen/qwq-32b",
+          "duration": 2225,
+          "duration_secs": 2.225
+        }
+      ]
+    },
+    {
+      "test": "division",
+      "rankings": [
+        {
+          "model": "anthropic/claude-3.5-sonnet",
+          "duration": 1209,
+          "duration_secs": 1.209
+        },
+        {
+          "model": "qwen/qwq-32b",
+          "duration": 7852,
+          "duration_secs": 7.852
+        }
+      ]
+    }
+  ],
+  "lastUpdated": "2025-04-02T11:01:27.792Z"
+}
--- a/packages/kbot/tests/unit/reports/basic.markdown
+++ b/packages/kbot/tests/unit/reports/basic.markdown
@ -3,16 +3,16 @@
 ## Highscores

 ### addition
-1. anthropic/claude-3.5-sonnet: 1278ms (1.28s)
-2. qwen/qwq-32b: 6285ms (6.29s)
+1. anthropic/claude-3.5-sonnet: 1237ms (1.24s)
+2. qwen/qwq-32b: 4298ms (4.30s)

 ### multiplication
-1. anthropic/claude-3.5-sonnet: 615ms (0.61s)
-2. qwen/qwq-32b: 9610ms (9.61s)
+1. anthropic/claude-3.5-sonnet: 1411ms (1.41s)
+2. qwen/qwq-32b: 3122ms (3.12s)

 ### division
-1. anthropic/claude-3.5-sonnet: 1242ms (1.24s)
-2. qwen/qwq-32b: 4040ms (4.04s)
+1. anthropic/claude-3.5-sonnet: 583ms (0.58s)
+2. qwen/qwq-32b: 3615ms (3.62s)

 ## Summary

@ -20,7 +20,7 @@
 - Passed: 6
 - Failed: 0
 - Success Rate: 100.00%
- Average Duration: 3845ms (3.85s)
+- Average Duration: 2378ms (2.38s)

 ## Failed Tests

@ -32,41 +32,41 @@
 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 1278ms (1278.00s)
- Timestamp: 4/2/2025, 12:49:09 PM
+- Duration: 1237ms (1237.00s)
+- Timestamp: 4/2/2025, 12:56:09 PM

 ### addition - qwen/qwq-32b
 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 6285ms (6285.00s)
- Timestamp: 4/2/2025, 12:49:15 PM
+- Duration: 4298ms (4298.00s)
+- Timestamp: 4/2/2025, 12:56:13 PM

 ### multiplication - anthropic/claude-3.5-sonnet
 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: `24`
- Duration: 615ms (615.00s)
- Timestamp: 4/2/2025, 12:49:16 PM
+- Duration: 1411ms (1411.00s)
+- Timestamp: 4/2/2025, 12:56:15 PM

 ### multiplication - qwen/qwq-32b
 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: `24`
- Duration: 9610ms (9610.00s)
- Timestamp: 4/2/2025, 12:49:25 PM
+- Duration: 3122ms (3122.00s)
+- Timestamp: 4/2/2025, 12:56:18 PM

 ### division - anthropic/claude-3.5-sonnet
 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: `5`
- Duration: 1242ms (1242.00s)
- Timestamp: 4/2/2025, 12:49:27 PM
+- Duration: 583ms (583.00s)
+- Timestamp: 4/2/2025, 12:56:18 PM

 ### division - qwen/qwq-32b
 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: `5`
- Duration: 4040ms (4040.00s)
- Timestamp: 4/2/2025, 12:49:31 PM
+- Duration: 3615ms (3615.00s)
+- Timestamp: 4/2/2025, 12:56:22 PM

--- a/packages/kbot/tests/unit/reports/basic.md
+++ b/packages/kbot/tests/unit/reports/basic.md
@ -0,0 +1,72 @@
+# Basic Operations Test Results
+
+## Highscores
+
+### addition
+1. anthropic/claude-3.5-sonnet: 1888ms (1.89s)
+2. qwen/qwq-32b: 6304ms (6.30s)
+
+### multiplication
+1. anthropic/claude-3.5-sonnet: 1291ms (1.29s)
+2. qwen/qwq-32b: 2225ms (2.23s)
+
+### division
+1. anthropic/claude-3.5-sonnet: 1209ms (1.21s)
+2. qwen/qwq-32b: 7852ms (7.85s)
+
+## Summary
+
+- Total Tests: 6
+- Passed: 6
+- Failed: 0
+- Success Rate: 100.00%
+- Average Duration: 3462ms (3.46s)
+
+## Failed Tests
+
+*No failed tests*
+
+## Passed Tests
+
+### addition - anthropic/claude-3.5-sonnet
+- Prompt: `add 5 and 3. Return only the number, no explanation.`
+- Expected: `8`
+- Actual: `8`
+- Duration: 1888ms (1888.00s)
+- Timestamp: 4/2/2025, 1:01:08 PM
+
+### addition - qwen/qwq-32b
+- Prompt: `add 5 and 3. Return only the number, no explanation.`
+- Expected: `8`
+- Actual: `8`
+- Duration: 6304ms (6304.00s)
+- Timestamp: 4/2/2025, 1:01:15 PM
+
+### multiplication - anthropic/claude-3.5-sonnet
+- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
+- Expected: `24`
+- Actual: `24`
+- Duration: 1291ms (1291.00s)
+- Timestamp: 4/2/2025, 1:01:16 PM
+
+### multiplication - qwen/qwq-32b
+- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
+- Expected: `24`
+- Actual: `24`
+- Duration: 2225ms (2225.00s)
+- Timestamp: 4/2/2025, 1:01:18 PM
+
+### division - anthropic/claude-3.5-sonnet
+- Prompt: `divide 15 by 3. Return only the number, no explanation.`
+- Expected: `5`
+- Actual: `5`
+- Duration: 1209ms (1209.00s)
+- Timestamp: 4/2/2025, 1:01:19 PM
+
+### division - qwen/qwq-32b
+- Prompt: `divide 15 by 3. Return only the number, no explanation.`
+- Expected: `5`
+- Actual: `5`
+- Duration: 7852ms (7852.00s)
+- Timestamp: 4/2/2025, 1:01:27 PM
+