latest

2025-04-02 12:49:47 +02:00 · 2025-04-02 12:49:47 +02:00 · 856ffe680f
commit 856ffe680f
parent 2afc1a8051
11 changed files with 997 additions and 298 deletions
--- a/packages/kbot/dist-in/zod_schema.js
+++ b/packages/kbot/dist-in/zod_schema.js
--- a/packages/kbot/logs/params.json
+++ b/packages/kbot/logs/params.json
@ -1,9 +1,9 @@
 {
-  "model": "google/gemini-2.0-flash-lite-001",
+  "model": "qwen/qwq-32b",
  "messages": [
    {
      "role": "user",
-      "content": "Generate a random number"
+      "content": "divide 15 by 3. Return only the number, no explanation."
    },
    {
      "role": "user",
--- a/packages/kbot/tests/unit/basic-report.md
+++ b/packages/kbot/tests/unit/basic-report.md
@ -1,5 +1,27 @@
 # Basic Operations Test Results

+## Highscores
+
+### addition
+1. anthropic/claude-3.5-sonnet: 1278ms (1.28s)
+2. qwen/qwq-32b: 6285ms (6.29s)
+
+### multiplication
+1. anthropic/claude-3.5-sonnet: 615ms (0.61s)
+2. qwen/qwq-32b: 9610ms (9.61s)
+
+### division
+1. anthropic/claude-3.5-sonnet: 1242ms (1.24s)
+2. qwen/qwq-32b: 4040ms (4.04s)
+
+## Summary
+
+- Total Tests: 6
+- Passed: 6
+- Failed: 0
+- Success Rate: 100.00%
+- Average Duration: 3845ms (3.85s)
+
 ## Failed Tests

 *No failed tests*
@ -10,48 +32,41 @@
 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 1551ms
- Timestamp: 4/2/2025, 12:17:39 AM
+- Duration: 1278ms (1278.00s)
+- Timestamp: 4/2/2025, 12:49:09 PM

 ### addition - qwen/qwq-32b
 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 3621ms
- Timestamp: 4/2/2025, 12:17:42 AM
+- Duration: 6285ms (6285.00s)
+- Timestamp: 4/2/2025, 12:49:15 PM

 ### multiplication - anthropic/claude-3.5-sonnet
 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: `24`
- Duration: 873ms
- Timestamp: 4/2/2025, 12:17:43 AM
+- Duration: 615ms (615.00s)
+- Timestamp: 4/2/2025, 12:49:16 PM

 ### multiplication - qwen/qwq-32b
 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: `24`
- Duration: 3472ms
- Timestamp: 4/2/2025, 12:17:47 AM
+- Duration: 9610ms (9610.00s)
+- Timestamp: 4/2/2025, 12:49:25 PM

 ### division - anthropic/claude-3.5-sonnet
 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: `5`
- Duration: 1183ms
- Timestamp: 4/2/2025, 12:17:48 AM
+- Duration: 1242ms (1242.00s)
+- Timestamp: 4/2/2025, 12:49:27 PM

 ### division - qwen/qwq-32b
 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: `5`
- Duration: 4841ms
- Timestamp: 4/2/2025, 12:17:53 AM
-
-## Summary
-
- Total Tests: 6
- Passed: 6
- Failed: 0
- Success Rate: 100.00%
+- Duration: 4040ms (4040.00s)
+- Timestamp: 4/2/2025, 12:49:31 PM

--- a/packages/kbot/tests/unit/basic.json
+++ b/packages/kbot/tests/unit/basic.json
@ -1,80 +1,541 @@
-[
-  {
-    "test": "addition",
-    "prompt": "add 5 and 3. Return only the number, no explanation.",
-    "result": [
-      "8"
-    ],
-    "expected": "8",
-    "model": "anthropic/claude-3.5-sonnet",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T22:17:39.340Z",
-    "passed": true,
-    "duration": 1551
-  },
-  {
-    "test": "addition",
-    "prompt": "add 5 and 3. Return only the number, no explanation.",
-    "result": [
-      "8"
-    ],
-    "expected": "8",
-    "model": "qwen/qwq-32b",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T22:17:42.962Z",
-    "passed": true,
-    "duration": 3621
-  },
-  {
-    "test": "multiplication",
-    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-    "result": [
-      "24"
-    ],
-    "expected": "24",
-    "model": "anthropic/claude-3.5-sonnet",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T22:17:43.836Z",
-    "passed": true,
-    "duration": 873
-  },
-  {
-    "test": "multiplication",
-    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
-    "result": [
-      "24"
-    ],
-    "expected": "24",
-    "model": "qwen/qwq-32b",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T22:17:47.309Z",
-    "passed": true,
-    "duration": 3472
-  },
-  {
-    "test": "division",
-    "prompt": "divide 15 by 3. Return only the number, no explanation.",
-    "result": [
-      "5"
-    ],
-    "expected": "5",
-    "model": "anthropic/claude-3.5-sonnet",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T22:17:48.493Z",
-    "passed": true,
-    "duration": 1183
-  },
-  {
-    "test": "division",
-    "prompt": "divide 15 by 3. Return only the number, no explanation.",
-    "result": [
-      "5"
-    ],
-    "expected": "5",
-    "model": "qwen/qwq-32b",
-    "router": "openrouter",
-    "timestamp": "2025-04-01T22:17:53.335Z",
-    "passed": true,
-    "duration": 4841
-  }
-]
+{
+  "results": [
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-01T22:17:39.340Z",
+      "passed": true,
+      "duration": 1551
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-01T22:17:42.962Z",
+      "passed": true,
+      "duration": 3621
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-01T22:17:43.836Z",
+      "passed": true,
+      "duration": 873
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-01T22:17:47.309Z",
+      "passed": true,
+      "duration": 3472
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-01T22:17:48.493Z",
+      "passed": true,
+      "duration": 1183
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-01T22:17:53.335Z",
+      "passed": true,
+      "duration": 4841
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:38:37.069Z",
+      "passed": true,
+      "duration": 1256
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [],
+      "expected": "8",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:38:40.167Z",
+      "passed": false,
+      "duration": 3096,
+      "error": {
+        "message": "Model returned empty response",
+        "code": "UNKNOWN",
+        "type": "Error",
+        "details": {
+          "stack": "Error: Model returned empty response\n    at Module.runTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:85:13)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:21:20)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n    at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n    at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n    at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n    at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
+          "message": "Model returned empty response"
+        }
+      },
+      "reason": "Model returned empty response"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:38:41.086Z",
+      "passed": true,
+      "duration": 916
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:38:45.879Z",
+      "passed": true,
+      "duration": 4793
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:38:46.900Z",
+      "passed": true,
+      "duration": 1020
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:38:50.446Z",
+      "passed": true,
+      "duration": 3545
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:39:58.836Z",
+      "passed": true,
+      "duration": 1266
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:40:02.777Z",
+      "passed": true,
+      "duration": 3939
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:40:03.961Z",
+      "passed": true,
+      "duration": 1183
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:40:06.962Z",
+      "passed": true,
+      "duration": 3000
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:40:08.115Z",
+      "passed": true,
+      "duration": 1152
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:40:12.565Z",
+      "passed": true,
+      "duration": 4449
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:41:52.176Z",
+      "passed": true,
+      "duration": 1458
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:41:55.869Z",
+      "passed": true,
+      "duration": 3691
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:41:57.106Z",
+      "passed": true,
+      "duration": 1236
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:41:59.974Z",
+      "passed": true,
+      "duration": 2867
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:42:01.272Z",
+      "passed": true,
+      "duration": 1297
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:42:04.326Z",
+      "passed": true,
+      "duration": 3053
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:44:39.002Z",
+      "passed": true,
+      "duration": 1196
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:44:48.668Z",
+      "passed": true,
+      "duration": 9664
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:44:49.806Z",
+      "passed": true,
+      "duration": 1137
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:44:53.017Z",
+      "passed": true,
+      "duration": 3210
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:44:53.814Z",
+      "passed": true,
+      "duration": 796
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:44:55.383Z",
+      "passed": true,
+      "duration": 1568
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:47:00.955Z",
+      "passed": true,
+      "duration": 1297
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:49:09.343Z",
+      "passed": true,
+      "duration": 1278
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:49:15.630Z",
+      "passed": true,
+      "duration": 6285
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:49:16.246Z",
+      "passed": true,
+      "duration": 615
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:49:25.857Z",
+      "passed": true,
+      "duration": 9610
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "anthropic/claude-3.5-sonnet",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:49:27.101Z",
+      "passed": true,
+      "duration": 1242
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "qwen/qwq-32b",
+      "router": "openrouter",
+      "timestamp": "2025-04-02T10:49:31.142Z",
+      "passed": true,
+      "duration": 4040
+    }
+  ],
+  "highscores": [
+    {
+      "test": "addition",
+      "rankings": [
+        {
+          "model": "anthropic/claude-3.5-sonnet",
+          "duration": 1278,
+          "duration_secs": 1.278
+        },
+        {
+          "model": "qwen/qwq-32b",
+          "duration": 6285,
+          "duration_secs": 6.285
+        }
+      ]
+    },
+    {
+      "test": "multiplication",
+      "rankings": [
+        {
+          "model": "anthropic/claude-3.5-sonnet",
+          "duration": 615,
+          "duration_secs": 0.615
+        },
+        {
+          "model": "qwen/qwq-32b",
+          "duration": 9610,
+          "duration_secs": 9.61
+        }
+      ]
+    },
+    {
+      "test": "division",
+      "rankings": [
+        {
+          "model": "anthropic/claude-3.5-sonnet",
+          "duration": 1242,
+          "duration_secs": 1.242
+        },
+        {
+          "model": "qwen/qwq-32b",
+          "duration": 4040,
+          "duration_secs": 4.04
+        }
+      ]
+    }
+  ],
+  "lastUpdated": "2025-04-02T10:49:31.142Z"
+}
--- a/packages/kbot/tests/unit/basic.test.ts
+++ b/packages/kbot/tests/unit/basic.test.ts
@ -1,8 +1,5 @@
 import { describe, it, expect } from 'vitest'
-import { run } from '../../src/index'
 import * as path from 'node:path'
-import { sync as write } from "@polymech/fs/write"
-import { sync as read } from "@polymech/fs/read"
 import { sync as exists } from "@polymech/fs/exists"
 import { 
  models, 
@ -11,200 +8,54 @@ import {
  TEST_PREFERENCES_PATH, 
  TEST_TIMEOUT,
  TestResult,
-  formatError,
-  isEmptyResponse
+  runTest,
+  generateTestReport
 } from './commons'

 const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')

 describe('Basic Operations', () => {
  let testResults: TestResult[] = []
-  
-
-  const runBasicTest = async (prompt: string, expected: string, testName: string, modelName: string) => {
-    let model = 'unknown'
-    let router = 'unknown'
-    let startTime = Date.now()
-    let error: TestResult['error'] | undefined
-    let testResult: TestResult | undefined
-    
-    try {
-      const result = await Promise.race([
-        run({
-          prompt,
-          mode: 'completion',
-          model: modelName,
-          path: TEST_BASE_PATH,
-          logs: TEST_LOGS_PATH,
-          preferences: TEST_PREFERENCES_PATH,
-          onRun: async (options) => {
-            model = options.model || 'unknown'
-            router = options.router || 'unknown'
-            return options
-          }
-        }),
-        new Promise((_, reject) => 
-          setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
-        )
-      ]) as string[]
-
-      if (isEmptyResponse(result)) {
-        throw new Error('Model returned empty response')
-      }
-
-      const actual = result?.[0]?.trim()?.toLowerCase() || ''
-      const passed = actual === expected
-      
-      expect(actual).toEqual(expected)
-
-      testResult = {
-        test: testName,
-        prompt,
-        result: result || [],
-        expected,
-        model,
-        router,
-        timestamp: new Date().toISOString(),
-        passed,
-        duration: Date.now() - startTime,
-        reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
-      }
-    } catch (e) {
-      error = formatError(e)
-      testResult = {
-        test: testName,
-        prompt,
-        result: [],
-        expected,
-        model,
-        router,
-        timestamp: new Date().toISOString(),
-        passed: false,
-        duration: Date.now() - startTime,
-        error,
-        reason: error?.message || 'Unknown error occurred'
-      }
-      throw e
-    } finally {
-      if (testResult) {
-        testResults.push(testResult)
-        write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
-      }
-    }
-  }

  it.each(models)('should add two numbers with model %s', async (modelName) => {
-    await runBasicTest(
+    const result = await runTest(
      'add 5 and 3. Return only the number, no explanation.',
      '8',
      'addition',
-      modelName
+      modelName,
+      TEST_LOG_PATH
    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('8')
  }, { timeout: 10000 })

  it.each(models)('should multiply two numbers with model %s', async (modelName) => {
-    await runBasicTest(
+    const result = await runTest(
      'multiply 8 and 3. Return only the number, no explanation.',
      '24',
      'multiplication',
-      modelName
+      modelName,
+      TEST_LOG_PATH
    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('24')
  }, { timeout: 10000 })

  it.each(models)('should divide two numbers with model %s', async (modelName) => {
-    await runBasicTest(
+    const result = await runTest(
      'divide 15 by 3. Return only the number, no explanation.',
      '5',
      'division',
-      modelName
+      modelName,
+      TEST_LOG_PATH
    )
+    testResults.push(result)
+    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('5')
  }, { timeout: 10000 })

  it('should generate markdown report', () => {
-    // Group results by test and model
-    const latestResults = new Map<string, Map<string, TestResult>>()
-    
-    // Get only the latest result for each test+model combination
-    testResults.forEach(result => {
-      if (!latestResults.has(result.test)) {
-        latestResults.set(result.test, new Map())
-      }
-      const testMap = latestResults.get(result.test)!
-      const existingResult = testMap.get(result.model)
-      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
-        testMap.set(result.model, result)
-      }
-    })
-
-    // Generate markdown report
-    let report = '# Basic Operations Test Results\n\n'
-    
-    // First list failed tests
-    report += '## Failed Tests\n\n'
-    let hasFailures = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (!result.passed) {
-          hasFailures = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${result.expected}\`\n`
-          report += `- Actual: \`${result.result[0] || ''}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          if (result.error) {
-            report += `- Error Type: ${result.error.type}\n`
-            report += `- Error Code: ${result.error.code}\n`
-            report += `- Error Message: ${result.error.message}\n`
-            if (result.error.details?.message) {
-              report += `- Error Details: ${result.error.details.message}\n`
-            }
-          }
-          report += `- Reason: ${result.reason}\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasFailures) {
-      report += '*No failed tests*\n\n'
-    }
-
-    // Then list passed tests
-    report += '## Passed Tests\n\n'
-    let hasPassed = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (result.passed) {
-          hasPassed = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${result.expected}\`\n`
-          report += `- Actual: \`${result.result[0] || ''}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasPassed) {
-      report += '*No passed tests*\n\n'
-    }
-
-    // Add summary section
-    report += '## Summary\n\n'
-    const totalTests = testResults.length
-    const passedTests = testResults.filter(r => r.passed).length
-    const failedTests = totalTests - passedTests
-    report += `- Total Tests: ${totalTests}\n`
-    report += `- Passed: ${passedTests}\n`
-    report += `- Failed: ${failedTests}\n`
-    report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n`
-
-    // Write report to file
    const reportPath = path.resolve(__dirname, './basic-report.md')
-    write(reportPath, report)
-
-    // Verify report was written
+    generateTestReport(testResults, 'Basic Operations Test Results', reportPath)
    expect(exists(reportPath) === 'file').toBe(true)
  })
 }) 
--- a/packages/kbot/tests/unit/commons.ts
+++ b/packages/kbot/tests/unit/commons.ts
@ -1,5 +1,9 @@
 import * as path from 'node:path'
 import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL, E_OPENROUTER_MODEL } from '../../src/index'
+import { run } from '../../src/index'
+import { sync as write } from "@polymech/fs/write"
+import { sync as read } from "@polymech/fs/read"
+import { sync as exists } from "@polymech/fs/exists"

 export const models = [
    //E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE, 
@ -31,6 +35,15 @@ export interface TestResult {
  duration?: number
 }

+export interface TestHighscore {
+  test: string;
+  rankings: {
+    model: string;
+    duration: number;
+    duration_secs: number;
+  }[];
+}
+
 export const formatError = (error: any): TestResult['error'] => {
  return {
    message: error?.message || 'Unknown error',
@ -43,3 +56,251 @@ export const formatError = (error: any): TestResult['error'] => {
 export const isEmptyResponse = (result: string[] | null | undefined): boolean => {
  return !result || result.length === 0 || result.every(r => !r || r.trim() === '')
 }
+
+export const generateHighscores = (latestResults: Map<string, Map<string, TestResult>>): TestHighscore[] => {
+  const highscores: TestHighscore[] = []
+  
+  for (const [testName, modelResults] of latestResults) {
+    // Convert model results to array and sort by duration
+    const sortedResults = Array.from(modelResults.entries())
+      .map(([model, result]) => ({ model, result }))
+      .sort((a, b) => (a.result.duration || 0) - (b.result.duration || 0))
+      .slice(0, 2) // Get top 2
+
+    if (sortedResults.length > 0) {
+      highscores.push({
+        test: testName,
+        rankings: sortedResults.map(({ model, result }) => ({
+          model,
+          duration: result.duration || 0,
+          duration_secs: (result.duration || 0) / 1000
+        }))
+      })
+    }
+  }
+  
+  return highscores
+}
+
+export const runTest = async (
+  prompt: string, 
+  expected: string, 
+  testName: string, 
+  modelName: string,
+  logPath: string
+): Promise<TestResult> => {
+  let model = 'unknown'
+  let router = 'unknown'
+  let startTime = Date.now()
+  let error: TestResult['error'] | undefined
+  let testResult: TestResult | undefined
+  
+  try {
+    const result = await Promise.race([
+      run({
+        prompt,
+        mode: 'completion',
+        model: modelName,
+        path: TEST_BASE_PATH,
+        logs: TEST_LOGS_PATH,
+        preferences: TEST_PREFERENCES_PATH,
+        onRun: async (options) => {
+          model = options.model || 'unknown'
+          router = options.router || 'unknown'
+          return options
+        }
+      }),
+      new Promise((_, reject) => 
+        setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
+      )
+    ]) as string[]
+
+    if (isEmptyResponse(result)) {
+      testResult = {
+        test: testName,
+        prompt,
+        result: [],
+        expected,
+        model,
+        router,
+        timestamp: new Date().toISOString(),
+        passed: false,
+        duration: Date.now() - startTime,
+        reason: 'Model returned empty response'
+      }
+    } else {
+      const actual = result?.[0]?.trim()?.toLowerCase() || ''
+      const passed = actual === expected
+      
+      testResult = {
+        test: testName,
+        prompt,
+        result: result || [],
+        expected,
+        model,
+        router,
+        timestamp: new Date().toISOString(),
+        passed,
+        duration: Date.now() - startTime,
+        reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
+      }
+    }
+  } catch (e) {
+    error = formatError(e)
+    testResult = {
+      test: testName,
+      prompt,
+      result: [],
+      expected,
+      model,
+      router,
+      timestamp: new Date().toISOString(),
+      passed: false,
+      duration: Date.now() - startTime,
+      error,
+      reason: error?.message || 'Unknown error occurred'
+    }
+    throw e
+  } finally {
+    if (testResult) {
+      const existingData = exists(logPath) === 'file' ? JSON.parse(read(logPath)) : { results: [], highscores: [] }
+      const updatedResults = [...(existingData.results || []), testResult]
+      
+      // Group results by test and model
+      const latestResults = new Map<string, Map<string, TestResult>>()
+      updatedResults.forEach(result => {
+        if (!latestResults.has(result.test)) {
+          latestResults.set(result.test, new Map())
+        }
+        const testMap = latestResults.get(result.test)!
+        const existingResult = testMap.get(result.model)
+        if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
+          testMap.set(result.model, result)
+        }
+      })
+
+      // Generate highscores
+      const highscores = generateHighscores(latestResults)
+
+      // Write both results and highscores
+      write(logPath, JSON.stringify({
+        results: updatedResults,
+        highscores,
+        lastUpdated: new Date().toISOString()
+      }, null, 2))
+    }
+  }
+  return testResult
+}
+
+export const generateTestReport = (
+  testResults: TestResult[],
+  reportTitle: string,
+  reportPath: string
+): void => {
+  // Group results by test and model
+  const latestResults = new Map<string, Map<string, TestResult>>()
+  
+  // Get only the latest result for each test+model combination
+  testResults.forEach(result => {
+    if (!latestResults.has(result.test)) {
+      latestResults.set(result.test, new Map())
+    }
+    const testMap = latestResults.get(result.test)!
+    const existingResult = testMap.get(result.model)
+    if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
+      testMap.set(result.model, result)
+    }
+  })
+
+  // Generate markdown report
+  let report = `# ${reportTitle}\n\n`
+
+  // Add highscore section
+  report += '## Highscores\n\n'
+  for (const [testName, modelResults] of latestResults) {
+    report += `### ${testName}\n`
+    
+    // Convert model results to array and sort by duration
+    const sortedResults = Array.from(modelResults.entries())
+      .map(([model, result]) => ({ model, result }))
+      .sort((a, b) => (a.result.duration || 0) - (b.result.duration || 0))
+      .slice(0, 2) // Get top 2
+
+    if (sortedResults.length > 0) {
+      sortedResults.forEach(({ model, result }, index) => {
+        const duration = result.duration || 0
+        report += `${index + 1}. ${model}: ${duration}ms (${(duration / 1000).toFixed(2)}s)\n`
+      })
+    } else {
+      report += '*No results available*\n'
+    }
+    report += '\n'
+  }
+
+  // Add summary section
+  report += '## Summary\n\n'
+  const totalTests = testResults.length
+  const passedTests = testResults.filter(r => r.passed).length
+  const failedTests = totalTests - passedTests
+  const avgDuration = testResults.reduce((sum, r) => sum + (r.duration || 0), 0) / totalTests
+  report += `- Total Tests: ${totalTests}\n`
+  report += `- Passed: ${passedTests}\n`
+  report += `- Failed: ${failedTests}\n`
+  report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n`
+  report += `- Average Duration: ${avgDuration.toFixed(0)}ms (${(avgDuration / 1000).toFixed(2)}s)\n\n`
+  
+  // First list failed tests
+  report += '## Failed Tests\n\n'
+  let hasFailures = false
+  for (const [testName, modelResults] of latestResults) {
+    for (const [model, result] of modelResults) {
+      if (!result.passed) {
+        hasFailures = true
+        report += `### ${testName} - ${model}\n`
+        report += `- Prompt: \`${result.prompt}\`\n`
+        report += `- Expected: \`${result.expected}\`\n`
+        report += `- Actual: \`${result.result[0] || ''}\`\n`
+        report += `- Duration: ${result.duration}ms (${(result.duration || 0 / 1000).toFixed(2)}s)\n`
+        if (result.error) {
+          report += `- Error Type: ${result.error.type}\n`
+          report += `- Error Code: ${result.error.code}\n`
+          report += `- Error Message: ${result.error.message}\n`
+          if (result.error.details?.message) {
+            report += `- Error Details: ${result.error.details.message}\n`
+          }
+        }
+        report += `- Reason: ${result.reason}\n`
+        report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+      }
+    }
+  }
+  
+  if (!hasFailures) {
+    report += '*No failed tests*\n\n'
+  }
+
+  // Then list passed tests
+  report += '## Passed Tests\n\n'
+  let hasPassed = false
+  for (const [testName, modelResults] of latestResults) {
+    for (const [model, result] of modelResults) {
+      if (result.passed) {
+        hasPassed = true
+        report += `### ${testName} - ${model}\n`
+        report += `- Prompt: \`${result.prompt}\`\n`
+        report += `- Expected: \`${result.expected}\`\n`
+        report += `- Actual: \`${result.result[0] || ''}\`\n`
+        report += `- Duration: ${result.duration}ms (${(result.duration || 0 / 1000).toFixed(2)}s)\n`
+        report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+      }
+    }
+  }
+  
+  if (!hasPassed) {
+    report += '*No passed tests*\n\n'
+  }
+
+  // Write report to file
+  write(reportPath, report)
+}
--- a/packages/kbot/tests/unit/format-report.md
+++ b/packages/kbot/tests/unit/format-report.md
@ -6,21 +6,14 @@

 ## Passed Tests

-### json-schema-file-format - google/gemini-2.0-flash-lite-001
+### json-schema-file-format - mistralai/mistral-tiny
 - Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
 - Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
- Actual: ````json
-{
-  "name": "John Doe",
-  "age": 30,
-  "tags": ["developer", "javascript"]
-}
-```
-`
- Duration: 1122ms
- Timestamp: 4/2/2025, 9:03:33 AM
+- Actual: `{"name": "John Doe", "age": 30, "tags": ["developer", "javascript"]}`
+- Duration: 982ms
+- Timestamp: 4/2/2025, 10:26:33 AM

-### json-schema-object-format - google/gemini-2.0-flash-lite-001
+### json-schema-object-format - mistralai/mistral-tiny
 - Prompt: `Create a user profile with the following details:
    - Name: Jane Smith
    - Age: 25
@ -39,8 +32,7 @@
      }
    Return only the JSON object, no explanation.`
 - Expected: `{"name":"Jane Smith","age":25,"email":"jane.smith@company.com","tags":["developer","designer"],"address":{"street":"123 Main St","city":"New York","country":"US","postal_code":"10001"},"preferences":{"theme":"light","notifications":"enabled","language":"English"}}`
- Actual: ````json
-{
+- Actual: `{
  "name": "Jane Smith",
  "age": 25,
  "email": "jane.smith@company.com",
@ -51,16 +43,14 @@
    "country": "US",
    "postal_code": "10001"
  },
-  "preferences": {
+  "Preferences": {
    "theme": "light",
    "notifications": "enabled",
    "language": "English"
  }
-}
-```
-`
- Duration: 1289ms
- Timestamp: 4/2/2025, 9:03:34 AM
+}`
+- Duration: 1673ms
+- Timestamp: 4/2/2025, 10:26:35 AM

 ## Summary

--- a/packages/kbot/tests/unit/format.json
+++ b/packages/kbot/tests/unit/format.json
@ -3,26 +3,26 @@
    "test": "json-schema-file-format",
    "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
    "result": [
-      "```json\n{\n  \"name\": \"John Doe\",\n  \"age\": 30,\n  \"tags\": [\"developer\", \"javascript\"]\n}\n```\n"
+      "{\"name\": \"John Doe\", \"age\": 30, \"tags\": [\"developer\", \"javascript\"]}"
    ],
    "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
-    "model": "google/gemini-2.0-flash-lite-001",
+    "model": "mistralai/mistral-tiny",
    "router": "openrouter",
-    "timestamp": "2025-04-02T07:03:33.038Z",
+    "timestamp": "2025-04-02T08:26:33.513Z",
    "passed": true,
-    "duration": 1122
+    "duration": 982
  },
  {
    "test": "json-schema-object-format",
    "prompt": "Create a user profile with the following details:\n    - Name: Jane Smith\n    - Age: 25\n    - Email: jane.smith@company.com\n    - Tags: [\"developer\", \"designer\"]\n    - Address: {\n        \"street\": \"123 Main St\",\n        \"city\": \"New York\",\n        \"country\": \"US\",\n        \"postal_code\": \"10001\"\n      }\n    - Preferences: {\n        \"theme\": \"light\",\n        \"notifications\": \"enabled\",\n        \"language\": \"English\"\n      }\n    Return only the JSON object, no explanation.",
    "result": [
-      "```json\n{\n  \"name\": \"Jane Smith\",\n  \"age\": 25,\n  \"email\": \"jane.smith@company.com\",\n  \"tags\": [\"developer\", \"designer\"],\n  \"address\": {\n    \"street\": \"123 Main St\",\n    \"city\": \"New York\",\n    \"country\": \"US\",\n    \"postal_code\": \"10001\"\n  },\n  \"preferences\": {\n    \"theme\": \"light\",\n    \"notifications\": \"enabled\",\n    \"language\": \"English\"\n  }\n}\n```\n"
+      "{\n  \"name\": \"Jane Smith\",\n  \"age\": 25,\n  \"email\": \"jane.smith@company.com\",\n  \"tags\": [\"developer\", \"designer\"],\n  \"address\": {\n    \"street\": \"123 Main St\",\n    \"city\": \"New York\",\n    \"country\": \"US\",\n    \"postal_code\": \"10001\"\n  },\n  \"Preferences\": {\n    \"theme\": \"light\",\n    \"notifications\": \"enabled\",\n    \"language\": \"English\"\n  }\n}"
    ],
    "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
-    "model": "google/gemini-2.0-flash-lite-001",
+    "model": "mistralai/mistral-tiny",
    "router": "openrouter",
-    "timestamp": "2025-04-02T07:03:34.328Z",
+    "timestamp": "2025-04-02T08:26:35.187Z",
    "passed": true,
-    "duration": 1289
+    "duration": 1673
  }
 ]
--- a/packages/kbot/tests/unit/format.test.ts
+++ b/packages/kbot/tests/unit/format.test.ts
@ -17,7 +17,8 @@ import {

 const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
 const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
-const TEST_MODEL = 'google/gemini-2.0-flash-lite-001'
+const TEST_MODEL_FAST = 'mistralai/codestral-2501'
+const TEST_MODEL = 'mistralai/mistral-tiny'
 const TEST_ROUTER = 'openrouter'

 // Sample JSON Schema for testing
--- a/packages/kbot/tests/unit/language-report.md
+++ b/packages/kbot/tests/unit/language-report.md
@ -40,6 +40,22 @@
 - Reason: Unknown error occurred
 - Timestamp: 4/1/2025, 11:53:55 PM

+### german - anthropic/claude-3.5-sonnet
+- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
+- Expected: `hallo`
+- Actual: ``
+- Duration: 1753ms
+- Reason: Unknown error occurred
+- Timestamp: 4/2/2025, 10:29:45 AM
+
+### german - qwen/qwq-32b
+- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
+- Expected: `hallo`
+- Actual: ``
+- Duration: 13757ms
+- Reason: Unknown error occurred
+- Timestamp: 4/2/2025, 10:29:59 AM
+
 ### spanish - deepseek/deepseek-chat:free
 - Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
 - Expected: `sí`
@ -78,6 +94,22 @@
 - Reason: Unknown error occurred
 - Timestamp: 4/1/2025, 11:53:56 PM

+### spanish - anthropic/claude-3.5-sonnet
+- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
+- Expected: `sí`
+- Actual: ``
+- Duration: 1242ms
+- Reason: Unknown error occurred
+- Timestamp: 4/2/2025, 10:29:51 AM
+
+### spanish - qwen/qwq-32b
+- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
+- Expected: `sí`
+- Actual: ``
+- Duration: 5925ms
+- Reason: Unknown error occurred
+- Timestamp: 4/2/2025, 10:29:57 AM
+
 ### french - deepseek/deepseek-chat:free
 - Prompt: `translate "no" to French. Return only the translated word, no explanation.`
 - Expected: `non`
@ -116,6 +148,22 @@
 - Reason: Unknown error occurred
 - Timestamp: 4/1/2025, 11:53:57 PM

+### french - anthropic/claude-3.5-sonnet
+- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
+- Expected: `non`
+- Actual: ``
+- Duration: 2656ms
+- Reason: Unknown error occurred
+- Timestamp: 4/2/2025, 10:29:59 AM
+
+### french - qwen/qwq-32b
+- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
+- Expected: `non`
+- Actual: ``
+- Duration: 4063ms
+- Reason: Unknown error occurred
+- Timestamp: 4/2/2025, 10:30:03 AM
+
 ## Passed Tests

 ### german_translation - deepseek/deepseek-chat:free
--- a/packages/kbot/tests/unit/language.json
+++ b/packages/kbot/tests/unit/language.json
@ -843,5 +843,77 @@
    "passed": false,
    "duration": 968,
    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "german",
+    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "hallo",
+    "model": "anthropic/claude-3.5-sonnet",
+    "router": "openrouter",
+    "timestamp": "2025-04-02T08:29:45.316Z",
+    "passed": false,
+    "duration": 1753,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "spanish",
+    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "sí",
+    "model": "anthropic/claude-3.5-sonnet",
+    "router": "openrouter",
+    "timestamp": "2025-04-02T08:29:51.577Z",
+    "passed": false,
+    "duration": 1242,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "spanish",
+    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "sí",
+    "model": "qwen/qwq-32b",
+    "router": "openrouter",
+    "timestamp": "2025-04-02T08:29:57.503Z",
+    "passed": false,
+    "duration": 5925,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "german",
+    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "hallo",
+    "model": "qwen/qwq-32b",
+    "router": "openrouter",
+    "timestamp": "2025-04-02T08:29:59.075Z",
+    "passed": false,
+    "duration": 13757,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "french",
+    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "non",
+    "model": "anthropic/claude-3.5-sonnet",
+    "router": "openrouter",
+    "timestamp": "2025-04-02T08:29:59.249Z",
+    "passed": false,
+    "duration": 2656,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "french",
+    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "non",
+    "model": "qwen/qwq-32b",
+    "router": "openrouter",
+    "timestamp": "2025-04-02T08:30:03.313Z",
+    "passed": false,
+    "duration": 4063,
+    "reason": "Unknown error occurred"
  }
 ]