tests:response_format

2025-04-02 00:34:47 +02:00 · 2025-04-02 00:34:47 +02:00 · 20cb6861af
commit 20cb6861af
parent 2eac7ecda1
17 changed files with 1865 additions and 4165 deletions
--- a/packages/kbot/dist-in/commands/run-completion.js
+++ b/packages/kbot/dist-in/commands/run-completion.js
@ -35,6 +35,7 @@ export const runCompletion = async (client, params, options) => {
        logger.info('Dry run - skipping API call');
        return false;
    }
+    // await client.beta.chat.completions.parse
    const completion = await client.chat.completions.create({
        model: options.model,
        messages: params.messages,
@ -44,4 +45,4 @@ export const runCompletion = async (client, params, options) => {
    result = await onCompletion(result, options);
    return result;
 };
-//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicnVuLWNvbXBsZXRpb24uanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvY29tbWFuZHMvcnVuLWNvbXBsZXRpb24udHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBQ0EsT0FBTyxFQUFFLE1BQU0sRUFBRSxNQUFNLFFBQVEsQ0FBQTtBQUMvQixPQUFPLEVBQUUsY0FBYyxFQUFFLE1BQU0saUJBQWlCLENBQUE7QUFDaEQsT0FBTyxLQUFLLElBQUksTUFBTSxXQUFXLENBQUE7QUFDakMsT0FBTyxFQUFFLElBQUksSUFBSSxLQUFLLEVBQUUsTUFBTSxvQkFBb0IsQ0FBQTtBQUNsRCxPQUFPLEVBQUUsT0FBTyxFQUFFLE1BQU0sbUJBQW1CLENBQUE7QUFJM0MsT0FBTyxFQUFFLE1BQU0sRUFBRSxNQUFNLGFBQWEsQ0FBQTtBQUNwQyxPQUFPLEVBQUUsWUFBWSxFQUFFLE1BQU0sb0JBQW9CLENBQUE7QUFDakQsT0FBTyxFQUFFLFlBQVksRUFBVSxNQUFNLGVBQWUsQ0FBQTtBQUNwRCxPQUFPLEVBQUUsU0FBUyxFQUFFLE1BQU0saUJBQWlCLENBQUE7QUFHM0MsTUFBTSxDQUFDLE1BQU0sWUFBWSxHQUFHLEtBQUssRUFBRSxTQUFjLEVBQUUsRUFBRSxPQUFrQixFQUFFLEVBQUU7SUFDekUsTUFBTSxHQUFHLFlBQVksQ0FBQyxNQUFNLEVBQUUsT0FBTyxDQUFDLE9BQW1CLElBQUksRUFBRSxDQUFDLENBQUE7SUFDaEUsTUFBTSxJQUFJLEdBQUcsU0FBUyxDQUFDLE9BQU8sQ0FBQyxDQUFBO0lBQy9CLElBQUksT0FBTyxDQUFDLEdBQUcsRUFBRSxDQUFDO1FBQ2hCLE1BQU0sT0FBTyxHQUFHLElBQUksQ0FBQyxPQUFPLENBQUMsT0FBTyxDQUFDLE9BQU8sQ0FBQyxHQUFHLEVBQUUsS0FBSyxFQUFFO1lBQ3ZELEdBQUcsSUFBSTtZQUNQLEtBQUssRUFBRSxJQUFJLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FBQyxLQUFLLENBQUMsQ0FBQyxJQUFJO1lBQ3JDLE1BQU0sRUFBRSxPQUFPLENBQUMsTUFBTTtTQUN2QixDQUFDLENBQUMsQ0FBQTtRQUNILEtBQUssQ0FBQyxPQUFPLEVBQUUsTUFBTSxDQUFDLENBQUE7UUFDdEIsTUFBTSxDQUFDLEtBQUssQ0FBQyw4QkFBOEIsT0FBTyxNQUFNLE9BQU8sQ0FBQyxHQUFHLEVBQUUsQ0FBQyxDQUFBO0lBQ3hFLENBQUM7U0FBTSxDQUFDO1FBQ04sTUFBTSxDQUFDLEdBQUcsQ0FBQyxjQUFjLENBQUM7WUFDeEIsS0FBSyxFQUFFLEtBQUs7U0FDYixDQUFDLENBQUMsQ0FBQTtRQUNILE1BQU0sT0FBTyxHQUFXLE1BQU0sQ0FBQyxNQUFNLENBQVcsQ0FBQztRQUNqRCxPQUFPLENBQUMsTUFBTSxDQUFDLEtBQUssQ0FBQyxPQUFPLENBQUMsQ0FBQTtJQUMvQixDQUFDO0lBQ0QsWUFBWSxDQUFDLE9BQU8sQ0FBQyxDQUFBO0lBQ3JCLGtCQUFrQjtJQUNsQixPQUFPLE1BQU0sQ0FBQTtBQUNmLENBQUMsQ0FBQTtBQUVELE1BQU0sQ0FBQyxNQUFNLGFBQWEsR0FBRyxLQUFLLEVBQUUsTUFBYyxFQUFFLE1BQVcsRUFBRSxPQUFrQixFQUFFLEVBQUU7SUFDckYsSUFBSSxPQUFPLENBQUMsR0FBRyxFQUFFLENBQUM7UUFDaEIsTUFBTSxDQUFDLElBQUksQ0FBQyw2QkFBNkIsQ0FBQyxDQUFBO1FBQzFDLE9BQU8sS0FBSyxDQUFBO0lBQ2QsQ0FBQztJQUNELE1BQU0sVUFBVSxHQUFHLE1BQU0sTUFBTSxDQUFDLElBQUksQ0FBQyxXQUFXLENBQUMsTUFBTSxDQUFDO1FBQ3RELEtBQUssRUFBRSxPQUFPLENBQUMsS0FBSztRQUNwQixRQUFRLEVBQUUsTUFBTSxDQUFDLFFBQVE7UUFDekIsZUFBZSxFQUFFLE9BQU8sQ0FBQyxNQUFhO0tBQ3ZDLENBQUMsQ0FBQTtJQUNGLElBQUksTUFBTSxHQUFHLFVBQVUsQ0FBQyxPQUFPLENBQUMsQ0FBQyxDQUFDLENBQUMsT0FBTyxDQUFDLE9BQU8sQ0FBQTtJQUNsRCxNQUFNLEdBQUcsTUFBTSxZQUFZLENBQUMsTUFBTSxFQUFFLE9BQU8sQ0FBQyxDQUFBO0lBQzVDLE9BQU8sTUFBTSxDQUFBO0FBQ2YsQ0FBQyxDQUFBIn0=
+//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicnVuLWNvbXBsZXRpb24uanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvY29tbWFuZHMvcnVuLWNvbXBsZXRpb24udHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBQ0EsT0FBTyxFQUFFLE1BQU0sRUFBRSxNQUFNLFFBQVEsQ0FBQTtBQUMvQixPQUFPLEVBQUUsY0FBYyxFQUFFLE1BQU0saUJBQWlCLENBQUE7QUFDaEQsT0FBTyxLQUFLLElBQUksTUFBTSxXQUFXLENBQUE7QUFDakMsT0FBTyxFQUFFLElBQUksSUFBSSxLQUFLLEVBQUUsTUFBTSxvQkFBb0IsQ0FBQTtBQUNsRCxPQUFPLEVBQUUsT0FBTyxFQUFFLE1BQU0sbUJBQW1CLENBQUE7QUFJM0MsT0FBTyxFQUFFLE1BQU0sRUFBRSxNQUFNLGFBQWEsQ0FBQTtBQUNwQyxPQUFPLEVBQUUsWUFBWSxFQUFFLE1BQU0sb0JBQW9CLENBQUE7QUFDakQsT0FBTyxFQUFFLFlBQVksRUFBVSxNQUFNLGVBQWUsQ0FBQTtBQUNwRCxPQUFPLEVBQUUsU0FBUyxFQUFFLE1BQU0saUJBQWlCLENBQUE7QUFHM0MsTUFBTSxDQUFDLE1BQU0sWUFBWSxHQUFHLEtBQUssRUFBRSxTQUFjLEVBQUUsRUFBRSxPQUFrQixFQUFFLEVBQUU7SUFDekUsTUFBTSxHQUFHLFlBQVksQ0FBQyxNQUFNLEVBQUUsT0FBTyxDQUFDLE9BQW1CLElBQUksRUFBRSxDQUFDLENBQUE7SUFDaEUsTUFBTSxJQUFJLEdBQUcsU0FBUyxDQUFDLE9BQU8sQ0FBQyxDQUFBO0lBQy9CLElBQUksT0FBTyxDQUFDLEdBQUcsRUFBRSxDQUFDO1FBQ2hCLE1BQU0sT0FBTyxHQUFHLElBQUksQ0FBQyxPQUFPLENBQUMsT0FBTyxDQUFDLE9BQU8sQ0FBQyxHQUFHLEVBQUUsS0FBSyxFQUFFO1lBQ3ZELEdBQUcsSUFBSTtZQUNQLEtBQUssRUFBRSxJQUFJLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FBQyxLQUFLLENBQUMsQ0FBQyxJQUFJO1lBQ3JDLE1BQU0sRUFBRSxPQUFPLENBQUMsTUFBTTtTQUN2QixDQUFDLENBQUMsQ0FBQTtRQUNILEtBQUssQ0FBQyxPQUFPLEVBQUUsTUFBTSxDQUFDLENBQUE7UUFDdEIsTUFBTSxDQUFDLEtBQUssQ0FBQyw4QkFBOEIsT0FBTyxNQUFNLE9BQU8sQ0FBQyxHQUFHLEVBQUUsQ0FBQyxDQUFBO0lBQ3hFLENBQUM7U0FBTSxDQUFDO1FBQ04sTUFBTSxDQUFDLEdBQUcsQ0FBQyxjQUFjLENBQUM7WUFDeEIsS0FBSyxFQUFFLEtBQUs7U0FDYixDQUFDLENBQUMsQ0FBQTtRQUNILE1BQU0sT0FBTyxHQUFXLE1BQU0sQ0FBQyxNQUFNLENBQVcsQ0FBQztRQUNqRCxPQUFPLENBQUMsTUFBTSxDQUFDLEtBQUssQ0FBQyxPQUFPLENBQUMsQ0FBQTtJQUMvQixDQUFDO0lBQ0QsWUFBWSxDQUFDLE9BQU8sQ0FBQyxDQUFBO0lBQ3JCLGtCQUFrQjtJQUNsQixPQUFPLE1BQU0sQ0FBQTtBQUNmLENBQUMsQ0FBQTtBQUVELE1BQU0sQ0FBQyxNQUFNLGFBQWEsR0FBRyxLQUFLLEVBQUUsTUFBYyxFQUFFLE1BQVcsRUFBRSxPQUFrQixFQUFFLEVBQUU7SUFDckYsSUFBSSxPQUFPLENBQUMsR0FBRyxFQUFFLENBQUM7UUFDaEIsTUFBTSxDQUFDLElBQUksQ0FBQyw2QkFBNkIsQ0FBQyxDQUFBO1FBQzFDLE9BQU8sS0FBSyxDQUFBO0lBQ2QsQ0FBQztJQUNELDJDQUEyQztJQUMzQyxNQUFNLFVBQVUsR0FBRyxNQUFNLE1BQU0sQ0FBQyxJQUFJLENBQUMsV0FBVyxDQUFDLE1BQU0sQ0FBQztRQUN0RCxLQUFLLEVBQUUsT0FBTyxDQUFDLEtBQUs7UUFDcEIsUUFBUSxFQUFFLE1BQU0sQ0FBQyxRQUFRO1FBQ3pCLGVBQWUsRUFBRSxPQUFPLENBQUMsTUFBYTtLQUN2QyxDQUFDLENBQUE7SUFDRixJQUFJLE1BQU0sR0FBRyxVQUFVLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxDQUFDLE9BQU8sQ0FBQyxPQUFPLENBQUE7SUFDbEQsTUFBTSxHQUFHLE1BQU0sWUFBWSxDQUFDLE1BQU0sRUFBRSxPQUFPLENBQUMsQ0FBQTtJQUM1QyxPQUFPLE1BQU0sQ0FBQTtBQUNmLENBQUMsQ0FBQSJ9
--- a/packages/kbot/dist-in/models/index.js
+++ b/packages/kbot/dist-in/models/index.js
--- a/packages/kbot/logs/params.json
+++ b/packages/kbot/logs/params.json
@ -1,9 +1,9 @@
 {
-  "model": "anthropic/claude-2.0",
+  "model": "gpt-4o",
  "messages": [
    {
      "role": "user",
-      "content": "translate \"no\" to French. Return only the translated word, no explanation."
+      "content": "Generate a random number."
    },
    {
      "role": "user",
--- a/packages/kbot/package.json
+++ b/packages/kbot/package.json
@ -19,6 +19,7 @@
    "test": "vitest run",
    "test:basic": "vitest run tests/unit/basic.test.ts",
    "test:math": "vitest run tests/unit/math.test.ts",
+    "test:format": "vitest run tests/unit/format.test.ts",
    "test:language": "vitest run tests/unit/language.test.ts",
    "test2:watch": "vitest",
    "test2:coverage": "vitest run --coverage",
--- a/packages/kbot/src/models/index.ts
+++ b/packages/kbot/src/models/index.ts
@ -56,8 +56,7 @@ export const models = () => {
  const openRouterPath = path.resolve(OPENROUTER_CACHE_PATH)
  if (!exists(openRouterPath)) {
    fetchOpenRouterModels()
-  }
-  if (exists(openRouterPath)) {
+  }else{
    const modelData: OpenRouterCachedModels = read(openRouterPath, 'json') as OpenRouterCachedModels
    models.push(chalk.magenta.bold('\n OpenRouter models:\n'))
    models.push(...listOpenRouterModelsAsStrings(modelData.models))
@ -85,8 +84,7 @@ export const all = () => {
  const openRouterPath = path.resolve(OPENROUTER_CACHE_PATH)
  if (!exists(openRouterPath)) {
    fetchOpenRouterModels()
-  }
-  if (exists(openRouterPath)) {
+  }else{
    const modelData: OpenRouterCachedModels = read(openRouterPath, 'json') as OpenRouterCachedModels
    models = models.concat(modelData.models)
  }
--- a/packages/kbot/tests/unit/basic-report.md
+++ b/packages/kbot/tests/unit/basic-report.md
@ -2,171 +2,56 @@

 ## Failed Tests

-### basic_arithmetic - deepseek/deepseek-chat:free
- Prompt: `return the result of 2+2, dont comment`
- Expected: `undefined`
- Actual: `4`
- Reason: undefined
- Timestamp: 4/1/2025, 12:26:30 PM
-
-### basic_arithmetic - google/gemini-2.0-flash-exp:free
- Prompt: `return the result of 2+2, dont comment`
- Expected: `undefined`
- Actual: `4
-`
- Reason: undefined
- Timestamp: 4/1/2025, 12:26:31 PM
-
-### basic_arithmetic - gpt-4
- Prompt: `return the result of 2+2, dont comment`
- Expected: `undefined`
- Actual: `4`
- Reason: undefined
- Timestamp: 4/1/2025, 12:26:32 PM
-
-### json_structure - deepseek/deepseek-chat:free
- Prompt: `return a JSON object with two fields: "name" as "test" and "value" as 42. Return only the JSON, no other text.`
- Expected: `undefined`
- Actual: `{"name":"test","value":42}`
- Reason: undefined
- Timestamp: 4/1/2025, 12:26:33 PM
-
-### json_structure - gpt-4
- Prompt: `return a JSON object with two fields: "name" as "test" and "value" as 42. Return only the JSON, no other text.`
- Expected: `undefined`
- Actual: `{"name": "test", "value": 42}`
- Reason: undefined
- Timestamp: 4/1/2025, 12:26:36 PM
-
-### json_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return a JSON object with two fields: "name" as "test" and "value" as 42. Return only the JSON, no other text.`
- Expected: `undefined`
- Actual: `{
-  "name": "test",
-  "value": 42
-}`
- Reason: undefined
- Timestamp: 4/1/2025, 12:26:35 PM
-
-### hello - deepseek/deepseek-chat:free
- Prompt: `say "hello"`
- Expected: `hello`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:37 PM
-
-### hello - google/gemini-2.0-flash-exp:free
- Prompt: `say "hello"`
- Expected: `hello`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:37 PM
-
-### hello - gpt-4
- Prompt: `say "hello"`
- Expected: `hello`
- Actual: ``
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:36:42 PM
-
-### goodbye - deepseek/deepseek-chat:free
- Prompt: `say "goodbye"`
- Expected: `goodbye`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:42 PM
-
-### goodbye - google/gemini-2.0-flash-exp:free
- Prompt: `say "goodbye"`
- Expected: `goodbye`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:43 PM
-
-### goodbye - gpt-4
- Prompt: `say "goodbye"`
- Expected: `goodbye`
- Actual: ``
- Reason: expected 'goodbye.' to deeply equal 'goodbye'
- Timestamp: 4/1/2025, 1:36:44 PM
-
-### yes - deepseek/deepseek-chat:free
- Prompt: `say "yes"`
- Expected: `yes`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:45 PM
-
-### yes - google/gemini-2.0-flash-exp:free
- Prompt: `say "yes"`
- Expected: `yes`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:45 PM
-
-### yes - gpt-4
- Prompt: `say "yes"`
- Expected: `yes`
- Actual: ``
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:36:46 PM
+*No failed tests*

 ## Passed Tests

-### addition - deepseek/deepseek-chat:free
+### addition - anthropic/claude-3.5-sonnet
 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Timestamp: 4/1/2025, 12:59:06 PM
+- Duration: 1551ms
+- Timestamp: 4/2/2025, 12:17:39 AM

-### addition - google/gemini-2.0-flash-exp:free
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `8
-`
- Timestamp: 4/1/2025, 12:59:08 PM
-
-### addition - gpt-4
+### addition - qwen/qwq-32b
 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Timestamp: 4/1/2025, 1:39:04 PM
+- Duration: 3621ms
+- Timestamp: 4/2/2025, 12:17:42 AM

-### multiplication - deepseek/deepseek-chat:free
+### multiplication - anthropic/claude-3.5-sonnet
 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: `24`
- Timestamp: 4/1/2025, 12:59:13 PM
+- Duration: 873ms
+- Timestamp: 4/2/2025, 12:17:43 AM

-### multiplication - google/gemini-2.0-flash-exp:free
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24
-`
- Timestamp: 4/1/2025, 12:59:15 PM
-
-### multiplication - gpt-4
+### multiplication - qwen/qwq-32b
 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: `24`
- Timestamp: 4/1/2025, 1:39:06 PM
+- Duration: 3472ms
+- Timestamp: 4/2/2025, 12:17:47 AM

-### division - deepseek/deepseek-chat:free
+### division - anthropic/claude-3.5-sonnet
 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: `5`
- Timestamp: 4/1/2025, 12:59:18 PM
+- Duration: 1183ms
+- Timestamp: 4/2/2025, 12:17:48 AM

-### division - google/gemini-2.0-flash-exp:free
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5
-`
- Timestamp: 4/1/2025, 12:56:09 PM
-
-### division - gpt-4
+### division - qwen/qwq-32b
 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: `5`
- Timestamp: 4/1/2025, 1:39:08 PM
+- Duration: 4841ms
+- Timestamp: 4/2/2025, 12:17:53 AM
+
+## Summary
+
+- Total Tests: 6
+- Passed: 6
+- Failed: 0
+- Success Rate: 100.00%

--- a/packages/kbot/tests/unit/basic.json
+++ b/packages/kbot/tests/unit/basic.json
--- a/packages/kbot/tests/unit/basic.test.ts
+++ b/packages/kbot/tests/unit/basic.test.ts
@ -4,153 +4,121 @@ import * as path from 'node:path'
 import { sync as write } from "@polymech/fs/write"
 import { sync as read } from "@polymech/fs/read"
 import { sync as exists } from "@polymech/fs/exists"
-import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TestResult } from './commons'
+import { 
+  models, 
+  TEST_BASE_PATH, 
+  TEST_LOGS_PATH, 
+  TEST_PREFERENCES_PATH, 
+  TEST_TIMEOUT,
+  TestResult,
+  formatError,
+  isEmptyResponse
+} from './commons'

 const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')

 describe('Basic Operations', () => {
  let testResults: TestResult[] = []
  
-  // Load existing results if any
-  if (exists(TEST_LOG_PATH)) {
-    const data = read(TEST_LOG_PATH, 'json')
-    testResults = Array.isArray(data) ? data : []
+
+  const runBasicTest = async (prompt: string, expected: string, testName: string, modelName: string) => {
+    let model = 'unknown'
+    let router = 'unknown'
+    let startTime = Date.now()
+    let error: TestResult['error'] | undefined
+    let testResult: TestResult | undefined
+    
+    try {
+      const result = await Promise.race([
+        run({
+          prompt,
+          mode: 'completion',
+          model: modelName,
+          path: TEST_BASE_PATH,
+          logs: TEST_LOGS_PATH,
+          preferences: TEST_PREFERENCES_PATH,
+          onRun: async (options) => {
+            model = options.model || 'unknown'
+            router = options.router || 'unknown'
+            return options
+          }
+        }),
+        new Promise((_, reject) => 
+          setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
+        )
+      ]) as string[]
+
+      if (isEmptyResponse(result)) {
+        throw new Error('Model returned empty response')
+      }
+
+      const actual = result?.[0]?.trim()?.toLowerCase() || ''
+      const passed = actual === expected
+      
+      expect(actual).toEqual(expected)
+
+      testResult = {
+        test: testName,
+        prompt,
+        result: result || [],
+        expected,
+        model,
+        router,
+        timestamp: new Date().toISOString(),
+        passed,
+        duration: Date.now() - startTime,
+        reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
+      }
+    } catch (e) {
+      error = formatError(e)
+      testResult = {
+        test: testName,
+        prompt,
+        result: [],
+        expected,
+        model,
+        router,
+        timestamp: new Date().toISOString(),
+        passed: false,
+        duration: Date.now() - startTime,
+        error,
+        reason: error?.message || 'Unknown error occurred'
+      }
+      throw e
+    } finally {
+      if (testResult) {
+        testResults.push(testResult)
+        write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
+      }
+    }
  }

  it.each(models)('should add two numbers with model %s', async (modelName) => {
-    const prompt = 'add 5 and 3. Return only the number, no explanation.'
-    const expected = '8'
-    let model = 'unknown'
-    let router = 'unknown'
-    
-    const result = await run({
-      prompt,
-      mode: 'completion',
-      model: modelName,
-      path: TEST_BASE_PATH,
-      logs: TEST_LOGS_PATH,
-      preferences: TEST_PREFERENCES_PATH,
-      onRun: async (options) => {
-        model = options.model || 'unknown'
-        router = options.router || 'unknown'
-        return options
-      }
-    }) as string[]    
-
-    const actual = result?.[0]?.trim() || ''
-    if (!actual) {
-      console.log(`Skipping test for model ${modelName} - no result returned`)
-      return
-    }
-    const passed = actual === expected
-    expect(actual).toEqual(expected)
-
-    // Add test result to array
-    testResults.push({
-      test: 'addition',
-      prompt,
-      result: result || [],
-      expected,
-      model,
-      router,
-      timestamp: new Date().toISOString(),
-      passed,
-      reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
-    })
-
-    // Write all results to the same file
-    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
-  })
+    await runBasicTest(
+      'add 5 and 3. Return only the number, no explanation.',
+      '8',
+      'addition',
+      modelName
+    )
+  }, { timeout: 10000 })

  it.each(models)('should multiply two numbers with model %s', async (modelName) => {
-    const prompt = 'multiply 8 and 3. Return only the number, no explanation.'
-    const expected = '24'
-    let model = 'unknown'
-    let router = 'unknown'
-    
-    const result = await run({
-      prompt,
-      mode: 'completion',
-      model: modelName,
-      path: TEST_BASE_PATH,
-      logs: TEST_LOGS_PATH,
-      preferences: TEST_PREFERENCES_PATH,
-      onRun: async (options) => {
-        model = options.model || 'unknown'
-        router = options.router || 'unknown'
-        return options
-      }
-    }) as string[]
-
-    const actual = result?.[0]?.trim() || ''
-    if (!actual) {
-      console.log(`Skipping test for model ${modelName} - no result returned`)
-      return
-    }
-    const passed = actual === expected
-    expect(actual).toEqual(expected)
-
-    // Add test result to array
-    testResults.push({
-      test: 'multiplication',
-      prompt,
-      result: result || [],
-      expected,
-      model,
-      router,
-      timestamp: new Date().toISOString(),
-      passed,
-      reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
-    })
-
-    // Write all results to the same file
-    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
-  })
+    await runBasicTest(
+      'multiply 8 and 3. Return only the number, no explanation.',
+      '24',
+      'multiplication',
+      modelName
+    )
+  }, { timeout: 10000 })

  it.each(models)('should divide two numbers with model %s', async (modelName) => {
-    const prompt = 'divide 15 by 3. Return only the number, no explanation.'
-    const expected = '5'
-    let model = 'unknown'
-    let router = 'unknown'
-    
-    const result = await run({
-      prompt,
-      mode: 'completion',
-      model: modelName,
-      path: TEST_BASE_PATH,
-      logs: TEST_LOGS_PATH,
-      preferences: TEST_PREFERENCES_PATH,
-      onRun: async (options) => {
-        model = options.model || 'unknown'
-        router = options.router || 'unknown'
-        return options
-      }
-    }) as string[]
-
-    const actual = result?.[0]?.trim() || ''
-    if (!actual) {
-      console.log(`Skipping test for model ${modelName} - no result returned`)
-      return
-    }
-    const passed = actual === expected
-    expect(actual).toEqual(expected)
-
-    // Add test result to array
-    testResults.push({
-      test: 'division',
-      prompt,
-      result: result || [],
-      expected,
-      model,
-      router,
-      timestamp: new Date().toISOString(),
-      passed,
-      reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
-    })
-
-    // Write all results to the same file
-    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
-  })
+    await runBasicTest(
+      'divide 15 by 3. Return only the number, no explanation.',
+      '5',
+      'division',
+      modelName
+    )
+  }, { timeout: 10000 })

  it('should generate markdown report', () => {
    // Group results by test and model
@ -173,32 +141,64 @@ describe('Basic Operations', () => {
    
    // First list failed tests
    report += '## Failed Tests\n\n'
+    let hasFailures = false
    for (const [testName, modelResults] of latestResults) {
      for (const [model, result] of modelResults) {
        if (!result.passed) {
+          hasFailures = true
          report += `### ${testName} - ${model}\n`
          report += `- Prompt: \`${result.prompt}\`\n`
          report += `- Expected: \`${result.expected}\`\n`
          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Duration: ${result.duration}ms\n`
+          if (result.error) {
+            report += `- Error Type: ${result.error.type}\n`
+            report += `- Error Code: ${result.error.code}\n`
+            report += `- Error Message: ${result.error.message}\n`
+            if (result.error.details?.message) {
+              report += `- Error Details: ${result.error.details.message}\n`
+            }
+          }
          report += `- Reason: ${result.reason}\n`
          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
        }
      }
    }
+    
+    if (!hasFailures) {
+      report += '*No failed tests*\n\n'
+    }

    // Then list passed tests
    report += '## Passed Tests\n\n'
+    let hasPassed = false
    for (const [testName, modelResults] of latestResults) {
      for (const [model, result] of modelResults) {
        if (result.passed) {
+          hasPassed = true
          report += `### ${testName} - ${model}\n`
          report += `- Prompt: \`${result.prompt}\`\n`
          report += `- Expected: \`${result.expected}\`\n`
          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Duration: ${result.duration}ms\n`
          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
        }
      }
    }
+    
+    if (!hasPassed) {
+      report += '*No passed tests*\n\n'
+    }
+
+    // Add summary section
+    report += '## Summary\n\n'
+    const totalTests = testResults.length
+    const passedTests = testResults.filter(r => r.passed).length
+    const failedTests = totalTests - passedTests
+    report += `- Total Tests: ${totalTests}\n`
+    report += `- Passed: ${passedTests}\n`
+    report += `- Failed: ${failedTests}\n`
+    report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n`

    // Write report to file
    const reportPath = path.resolve(__dirname, './basic-report.md')
--- a/packages/kbot/tests/unit/commons.ts
+++ b/packages/kbot/tests/unit/commons.ts
@ -3,7 +3,8 @@ import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL, E_OPENROUTER_MODEL } from '../

 export const models = [
    //E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE, 
-    E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_2_0
+    E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_3_5_SONNET,
+    E_OPENROUTER_MODEL.MODEL_QWEN_QWQ_32B
 ]

 export const TEST_BASE_PATH = path.resolve(__dirname, '../../')
--- a/packages/kbot/tests/unit/format-report.md
+++ b/packages/kbot/tests/unit/format-report.md
@ -2,546 +2,224 @@

 ## Failed Tests

-### basic_structure - deepseek/deepseek-chat:free
- Prompt: `return a greeting "hello" with count 42`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 885ms
- Error Type: Error
+### json-schema-file-format - anthropic/claude-3.5-sonnet
+- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
+- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
+- Actual: ``
+- Duration: 1690ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:36 PM
+- Error Message: expected '{\n  "name": "John Doe",\n  "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
+- Error Details: expected '{\n  "name": "John Doe",\n  "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
+- Reason: expected '{\n  "name": "John Doe",\n  "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
+- Timestamp: 4/2/2025, 12:23:43 AM

-### basic_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return a greeting "hello" with count 42`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 757ms
- Error Type: Error
+### json-schema-file-format - qwen/qwq-32b
+- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
+- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
+- Actual: ``
+- Duration: 3426ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:36 PM
+- Error Message: expected '"John Doe",\n  "age": 30,\n  "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
+- Error Details: expected '"John Doe",\n  "age": 30,\n  "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
+- Reason: expected '"John Doe",\n  "age": 30,\n  "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
+- Timestamp: 4/2/2025, 12:23:47 AM

-### basic_structure - gpt-4
- Prompt: `return a greeting "hello" with count 42`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 1043ms
- Error Type: Error
+### json-schema-object-format - anthropic/claude-3.5-sonnet
+- Prompt: `Create a user profile with name Jane Smith, age 25, and tags ["designer", "ui"]. Return only the JSON object, no explanation.`
+- Expected: `{"name":"Jane Smith","age":25,"tags":["designer","ui"]}`
+- Actual: ``
+- Duration: 1918ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:37 PM
+- Error Message: expected '{\n  "name": "Jane Smith",\n  "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
+- Error Details: expected '{\n  "name": "Jane Smith",\n  "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
+- Reason: expected '{\n  "name": "Jane Smith",\n  "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
+- Timestamp: 4/2/2025, 12:23:49 AM

-### basic_structure - anthropic/claude-3.7-sonnet
- Prompt: `return a greeting "hello" with count 42`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 1790ms
- Error Type: Error
+### json-schema-object-format - qwen/qwq-32b
+- Prompt: `Create a user profile with name Jane Smith, age 25, and tags ["designer", "ui"]. Return only the JSON object, no explanation.`
+- Expected: `{"name":"Jane Smith","age":25,"tags":["designer","ui"]}`
+- Actual: ``
+- Duration: 9789ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: Unexpected token 'h', "hello 42" is not valid JSON
- Reason: Failed to parse or validate response: Unexpected token 'h', "hello 42" is not valid JSON
- Timestamp: 4/1/2025, 1:23:05 PM
+- Error Message: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
+- Error Details: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
+- Reason: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
+- Timestamp: 4/2/2025, 12:23:58 AM

-### basic_structure - openai/gpt-4
- Prompt: `Return a JSON object with a greeting "hello" and count 42. The response must be valid JSON with exactly these fields: { "greeting": string, "count": number }`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 1258ms
- Error Type: Error
+### json-schema-object-format - gpt-4o
+- Prompt: `Create a user profile with the following details:
+    - Name: Jane Smith
+    - Age: 25
+    - Email: jane.smith@company.com
+    - Tags: ["developer", "designer"]
+    - Address: 123 Main St, New York, US, 10001
+    - Preferences: light theme, notifications enabled, English language
+    Return only the JSON object, no explanation.`
+- Expected: `{"name":"Jane Smith","age":25,"email":"jane.smith@company.com","tags":["developer","designer"],"address":{"street":"123 Main St","city":"New York","country":"US","zipCode":"10001"},"preferences":{"theme":"light","notifications":true,"language":"en"}}`
+- Actual: ``
+- Duration: 2618ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:43 PM
+- Error Message: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…'
+- Error Details: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…'
+- Reason: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…'
+- Timestamp: 4/2/2025, 12:33:08 AM

-### nested_structure - deepseek/deepseek-chat:free
- Prompt: `return user John age 30 with dark theme and notifications enabled`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 655ms
- Error Type: Error
+### zod-string-format - anthropic/claude-3.5-sonnet
+- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
+- Expected: `john.doe@company.com`
+- Actual: ``
+- Duration: 1347ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:38 PM
+- Error Message: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
+- Error Details: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
+- Reason: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
+- Timestamp: 4/2/2025, 12:24:00 AM

-### nested_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return user John age 30 with dark theme and notifications enabled`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 790ms
- Error Type: Error
+### zod-string-format - qwen/qwq-32b
+- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
+- Expected: `john.doe@company.com`
+- Actual: ``
+- Duration: 13704ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:39 PM
+- Error Message: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
+- Error Details: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
+- Reason: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
+- Timestamp: 4/2/2025, 12:24:13 AM

-### nested_structure - gpt-4
- Prompt: `return user John age 30 with dark theme and notifications enabled`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 717ms
- Error Type: Error
+### zod-string-format - gpt-4o
+- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
+- Expected: `john.doe@company.com`
+- Actual: ``
+- Duration: 1794ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:40 PM
+- Error Message: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
+- Error Details: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
+- Reason: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
+- Timestamp: 4/2/2025, 12:32:20 AM

-### nested_structure - anthropic/claude-3.7-sonnet
- Prompt: `return user John age 30 with dark theme and notifications enabled`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 1189ms
- Error Type: Error
+### zod-number-format - anthropic/claude-3.5-sonnet
+- Prompt: `Generate a random age between 18 and 65. Return only the number, no explanation.`
+- Expected: `25`
+- Actual: ``
+- Duration: 1376ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: Unexpected token '#', "# John's U"... is not valid JSON
- Reason: Failed to parse or validate response: Unexpected token '#', "# John's U"... is not valid JSON
- Timestamp: 4/1/2025, 1:23:06 PM
+- Error Message: expected '42' to deeply equal '25'
+- Error Details: expected '42' to deeply equal '25'
+- Reason: expected '42' to deeply equal '25'
+- Timestamp: 4/2/2025, 12:24:11 AM

-### nested_structure - openai/gpt-4
- Prompt: `Return a JSON object with user John age 30, dark theme and notifications enabled. The response must be valid JSON with this structure: { "user": { "name": string, "age": number }, "settings": { "theme": string, "notifications": boolean } }`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 716ms
- Error Type: Error
+### zod-number-format - gpt-4o
+- Prompt: `Generate a random age between 18 and 65. Return only the number, no explanation.`
+- Expected: `25`
+- Actual: ``
+- Duration: 2399ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:44 PM
+- Error Message: expected '39' to deeply equal '25'
+- Error Details: expected '39' to deeply equal '25'
+- Reason: expected '39' to deeply equal '25'
+- Timestamp: 4/2/2025, 12:32:23 AM

-### array_structure - deepseek/deepseek-chat:free
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 617ms
- Error Type: Error
+### zod-array-format - anthropic/claude-3.5-sonnet
+- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
+- Expected: `["JavaScript","Python","Java"]`
+- Actual: ``
+- Duration: 1009ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:40 PM
+- Error Message: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
+- Error Details: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
+- Reason: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
+- Timestamp: 4/2/2025, 12:24:22 AM

-### array_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 756ms
- Error Type: Error
+### zod-array-format - qwen/qwq-32b
+- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
+- Expected: `["JavaScript","Python","Java"]`
+- Actual: ``
+- Duration: 4147ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:41 PM
+- Error Message: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
+- Error Details: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
+- Reason: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
+- Timestamp: 4/2/2025, 12:24:26 AM

-### array_structure - gpt-4
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
+### zod-array-format - gpt-4o
+- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
+- Expected: `["JavaScript","Python","Java"]`
+- Actual: ``
+- Duration: 693ms
+- Error Type: AssertionError
+- Error Code: UNKNOWN
+- Error Message: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
+- Error Details: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
+- Reason: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
+- Timestamp: 4/2/2025, 12:32:23 AM
+
+### invalid-format - anthropic/claude-3.5-sonnet
+- Prompt: `Generate a random number.`
+- Expected: `Invalid format option`
+- Actual: ``
 - Duration: 1026ms
- Error Type: Error
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:42 PM
+- Error Message: expected '73' to deeply equal 'Invalid format option'
+- Error Details: expected '73' to deeply equal 'Invalid format option'
+- Reason: expected '73' to deeply equal 'Invalid format option'
+- Timestamp: 4/2/2025, 12:24:27 AM

-### array_structure - anthropic/claude-3.7-sonnet
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 1190ms
- Error Type: Error
+### invalid-format - qwen/qwq-32b
+- Prompt: `Generate a random number.`
+- Expected: `Invalid format option`
+- Actual: ``
+- Duration: 7614ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:23:08 PM
+- Error Message: expected '72' to deeply equal 'Invalid format option'
+- Error Details: expected '72' to deeply equal 'Invalid format option'
+- Reason: expected '72' to deeply equal 'Invalid format option'
+- Timestamp: 4/2/2025, 12:24:35 AM

-### array_structure - openai/gpt-4
- Prompt: `Return a JSON object with a list of 2 items. The response must be valid JSON with this structure: { "items": [{ "id": number, "name": string }] }. The first item should have id 1 and name "first", the second item should have id 2 and name "second".`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 703ms
- Error Type: Error
+### invalid-format - gpt-4o
+- Prompt: `Generate a random number.`
+- Expected: `Invalid format option`
+- Actual: ``
+- Duration: 826ms
+- Error Type: AssertionError
 - Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:44 PM
-
-### enum_structure - deepseek/deepseek-chat:free
- Prompt: `return status success with message "Operation completed"`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 647ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:43 PM
-
-### enum_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return status success with message "Operation completed"`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 813ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:43 PM
-
-### enum_structure - gpt-4
- Prompt: `return status success with message "Operation completed"`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 1138ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:45 PM
-
-### enum_structure - anthropic/claude-3.7-sonnet
- Prompt: `return status success with message "Operation completed"`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 1728ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: Unexpected token '`', "```json
-{
-"... is not valid JSON
- Reason: Failed to parse or validate response: Unexpected token '`', "```json
-{
-"... is not valid JSON
- Timestamp: 4/1/2025, 1:23:09 PM
-
-### enum_structure - openai/gpt-4
- Prompt: `Return a JSON object with status "success" and message "Operation completed". The response must be valid JSON with this structure: { "status": "success" | "error" | "pending", "message": string }`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 688ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:45 PM
-
-### optional_fields - deepseek/deepseek-chat:free
- Prompt: `return name "John" with age 30 and email "john@example.com"`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 676ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:45 PM
-
-### optional_fields - google/gemini-2.0-flash-exp:free
- Prompt: `return name "John" with age 30 and email "john@example.com"`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 884ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:46 PM
-
-### optional_fields - gpt-4
- Prompt: `return name "John" with age 30 and email "john@example.com"`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 669ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Reason: Failed to parse or validate response: [
-  {
-    "code": "invalid_type",
-    "expected": "object",
-    "received": "null",
-    "path": [],
-    "message": "Expected object, received null"
-  }
-]
- Timestamp: 4/1/2025, 1:21:47 PM
-
-### optional_fields - anthropic/claude-3.7-sonnet
- Prompt: `return name "John" with age 30 and email "john@example.com"`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 1576ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: Unexpected token '`', "```json
-{
-"... is not valid JSON
- Reason: Failed to parse or validate response: Unexpected token '`', "```json
-{
-"... is not valid JSON
- Timestamp: 4/1/2025, 1:23:11 PM
-
-### optional_fields - openai/gpt-4
- Prompt: `Return a JSON object with name "John", age 30, and email "john@example.com". The response must be valid JSON with this structure: { "name": string, "age"?: number, "email"?: string }`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 682ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:46 PM
+- Error Message: expected '786984' to deeply equal 'Invalid format option'
+- Error Details: expected '786984' to deeply equal 'Invalid format option'
+- Reason: expected '786984' to deeply equal 'Invalid format option'
+- Timestamp: 4/2/2025, 12:32:24 AM

 ## Passed Tests

-*No passed tests*
+### json-schema-file-format - gpt-4o
+- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
+- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
+- Actual: ````json
+{
+    "name": "John Doe",
+    "age": 30,
+    "tags": ["developer", "javascript"]
+}
+````
+- Duration: 995ms
+- Timestamp: 4/2/2025, 12:34:28 AM
+
+## Summary
+
+- Total Tests: 38
+- Passed: 5
+- Failed: 33
+- Success Rate: 13.16%

--- a/packages/kbot/tests/unit/format.json
+++ b/packages/kbot/tests/unit/format.json
--- a/packages/kbot/tests/unit/format.test.ts
+++ b/packages/kbot/tests/unit/format.test.ts
@ -1,295 +1,508 @@
-import { describe, it, expect } from 'vitest'
-import { run } from '../../src/index'
-import * as path from 'node:path'
-import { sync as write } from "@polymech/fs/write"
-import { sync as read } from "@polymech/fs/read"
-import { sync as exists } from "@polymech/fs/exists"
-import { z } from 'zod'
-
-
-import { 
-  models_premium as models, 
-  TEST_BASE_PATH, 
-  TEST_LOGS_PATH, 
-  TEST_PREFERENCES_PATH, 
-  TEST_TIMEOUT, 
-  TestResult, 
-  formatError, 
-  isEmptyResponse,
-  getRouterForModel,
-  getApiKeyForRouter
-} from './commons'
-
-const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
-
-describe('Structured Output Format', () => {
-  let testResults: TestResult[] = []
-  
-  // Load existing results if any
-  if (exists(TEST_LOG_PATH)) {
-    const data = read(TEST_LOG_PATH, 'json')
-    testResults = Array.isArray(data) ? data : []
-  }
-
-  const runFormatTest = async (prompt: string, format: z.ZodType<any>, expected: any, testName: string, modelName: string) => {
-    let model = 'unknown'
-    let router = 'unknown'
-    let startTime = Date.now()
-    let error: TestResult['error'] | undefined
-    
-    try {
-      const result = await Promise.race([
-        run({
-          prompt,
-          mode: 'completion',
-          model: modelName,
-          path: TEST_BASE_PATH,
-          logs: TEST_LOGS_PATH,
-          preferences: TEST_PREFERENCES_PATH,
-          format,
-          onRun: async (options) => {
-            model = options.model || 'unknown'
-            router = options.router || 'unknown'
-            return options
-          }
-        }),
-        new Promise((_, reject) => 
-          setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
-        )
-      ]) as any[]
-
-      const actual = result?.[0]
-      let parsed: any
-      
-      try {
-        parsed = typeof actual === 'string' ? JSON.parse(actual) : actual
-        // Validate against the format schema
-        parsed = format.parse(parsed)
-      } catch (parseError) {
-        throw new Error(`Failed to parse or validate response: ${parseError.message}`)
-      }
-
-      const passed = JSON.stringify(parsed) === JSON.stringify(expected) && !isEmptyResponse(result)
-      
-      if (isEmptyResponse(result)) {
-        throw new Error('Model returned empty response')
-      }
-      
-      expect(parsed).toEqual(expected)
-
-      return {
-        test: testName,
-        prompt,
-        result: result || [],
-        expected,
-        model,
-        router,
-        timestamp: new Date().toISOString(),
-        passed,
-        duration: Date.now() - startTime,
-        reason: passed ? undefined : `Expected ${JSON.stringify(expected)}, but got ${JSON.stringify(parsed)}`,
-        config: {
-          router: getRouterForModel(modelName)
-        }
-      }
-    } catch (e) {
-      error = formatError(e)
-      throw e
-    } finally {
-      const testResult: TestResult = {
-        test: testName,
-        prompt,
-        result: [],
-        expected,
-        model,
-        router,
-        timestamp: new Date().toISOString(),
-        passed: false,
-        duration: Date.now() - startTime,
-        error,
-        reason: error?.message || 'Unknown error occurred',
-        config: {
-          router: getRouterForModel(modelName),
-          apiKey: getApiKeyForRouter(getRouterForModel(modelName))
-        }
-      }
-      
-      testResults.push(testResult)
-      write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
-    }
-  }
-
-  it.each(models)('should return basic structured output with model %s', async (modelName) => {
-    const format = z.object({
-      greeting: z.string(),
-      count: z.number()
-    })
-
-    await runFormatTest(
-      'return a greeting "hello" with count 42',
-      format,
-      { greeting: 'hello', count: 42 },
-      'basic_structure',
-      modelName
-    )
-  })
-
-  it.each(models)('should handle nested structures with model %s', async (modelName) => {
-    const format = z.object({
-      user: z.object({
-        name: z.string(),
-        age: z.number()
-      }),
-      settings: z.object({
-        theme: z.string(),
-        notifications: z.boolean()
-      })
-    })
-
-    await runFormatTest(
-      'return user John age 30 with dark theme and notifications enabled',
-      format,
-      {
-        user: { name: 'John', age: 30 },
-        settings: { theme: 'dark', notifications: true }
-      },
-      'nested_structure',
-      modelName
-    )
-  })
-
-  it.each(models)('should handle arrays with model %s', async (modelName) => {
-    const format = z.object({
-      items: z.array(z.object({
-        id: z.number(),
-        name: z.string()
-      }))
-    })
-
-    await runFormatTest(
-      'return a list of 2 items with ids 1 and 2, names "first" and "second"',
-      format,
-      {
-        items: [
-          { id: 1, name: 'first' },
-          { id: 2, name: 'second' }
-        ]
-      },
-      'array_structure',
-      modelName
-    )
-  })
-
-  it.each(models)('should handle enums with model %s', async (modelName) => {
-    const format = z.object({
-      status: z.enum(['success', 'error', 'pending']),
-      message: z.string()
-    })
-
-    await runFormatTest(
-      'return status success with message "Operation completed"',
-      format,
-      {
-        status: 'success',
-        message: 'Operation completed'
-      },
-      'enum_structure',
-      modelName
-    )
-  })
-
-  it.each(models)('should handle optional fields with model %s', async (modelName) => {
-    const format = z.object({
-      name: z.string(),
-      age: z.number().optional(),
-      email: z.string().email().optional()
-    })
-
-    await runFormatTest(
-      'return name "John" with age 30 and email "john@example.com"',
-      format,
-      {
-        name: 'John',
-        age: 30,
-        email: 'john@example.com'
-      },
-      'optional_fields',
-      modelName
-    )
-  })
-
-  it('should generate markdown report', () => {
-    // Group results by test and model
-    const latestResults = new Map<string, Map<string, TestResult>>()
-    
-    // Get only the latest result for each test+model combination
-    testResults.forEach(result => {
-      if (!latestResults.has(result.test)) {
-        latestResults.set(result.test, new Map())
-      }
-      const testMap = latestResults.get(result.test)!
-      const existingResult = testMap.get(result.model)
-      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
-        testMap.set(result.model, result)
-      }
-    })
-
-    // Generate markdown report
-    let report = '# Format Test Results\n\n'
-    
-    // First list failed tests
-    report += '## Failed Tests\n\n'
-    let hasFailures = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (!result.passed) {
-          hasFailures = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${JSON.stringify(result.expected)}\`\n`
-          report += `- Actual: \`${JSON.stringify(result.result[0] || '')}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          if (result.error) {
-            report += `- Error Type: ${result.error.type}\n`
-            report += `- Error Code: ${result.error.code}\n`
-            report += `- Error Message: ${result.error.message}\n`
-          }
-          report += `- Reason: ${result.reason}\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasFailures) {
-      report += '*No failed tests*\n\n'
-    }
-
-    // Then list passed tests
-    report += '## Passed Tests\n\n'
-    let hasPassed = false
-    for (const [testName, modelResults] of latestResults) {
-      for (const [model, result] of modelResults) {
-        if (result.passed) {
-          hasPassed = true
-          report += `### ${testName} - ${model}\n`
-          report += `- Prompt: \`${result.prompt}\`\n`
-          report += `- Expected: \`${JSON.stringify(result.expected)}\`\n`
-          report += `- Actual: \`${JSON.stringify(result.result[0] || '')}\`\n`
-          report += `- Duration: ${result.duration}ms\n`
-          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
-        }
-      }
-    }
-    
-    if (!hasPassed) {
-      report += '*No passed tests*\n\n'
-    }
-
-    // Write report to file
-    const reportPath = path.resolve(__dirname, './format-report.md')
-    write(reportPath, report)
-
-    // Verify report was written
-    expect(exists(reportPath) === 'file').toBe(true)
-  })
+import { describe, it, expect } from 'vitest'
+import { run } from '../../src/index'
+import * as path from 'node:path'
+import { sync as write } from "@polymech/fs/write"
+import { sync as read } from "@polymech/fs/read"
+import { sync as exists } from "@polymech/fs/exists"
+import { z } from 'zod'
+import { 
+  models, 
+  TEST_BASE_PATH, 
+  TEST_LOGS_PATH, 
+  TEST_PREFERENCES_PATH, 
+  TEST_TIMEOUT,
+  TestResult,
+  formatError,
+  isEmptyResponse
+} from './commons'
+
+const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
+const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
+
+// Sample JSON Schema for testing
+const testJsonSchema = {
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://example.com/user-profile.schema.json",
+  "title": "User Profile",
+  "description": "A user profile containing name, age, and tags",
+  "type": "object",
+  "properties": {
+    "name": {
+      "type": "string",
+      "description": "User's full name",
+      "minLength": 1,
+      "pattern": "^[A-Za-z\\s]+$"
+    },
+    "age": {
+      "type": "number",
+      "description": "User's age in years",
+      "minimum": 0,
+      "maximum": 150
+    },
+    "email": {
+      "type": "string",
+      "description": "User's email address",
+      "format": "email"
+    },
+    "tags": {
+      "type": "array",
+      "description": "List of user's tags",
+      "items": {
+        "type": "string",
+        "enum": ["developer", "designer", "manager", "admin", "user"]
+      },
+      "minItems": 1,
+      "maxItems": 5,
+      "uniqueItems": true
+    },
+    "address": {
+      "type": "object",
+      "description": "User's address",
+      "properties": {
+        "street": {
+          "type": "string",
+          "description": "Street address"
+        },
+        "city": {
+          "type": "string",
+          "description": "City name"
+        },
+        "country": {
+          "type": "string",
+          "description": "Country name",
+          "enum": ["US", "UK", "CA", "AU"]
+        },
+        "zipCode": {
+          "type": "string",
+          "description": "ZIP/Postal code",
+          "pattern": "^[0-9]{5}(-[0-9]{4})?$"
+        }
+      },
+      "required": ["street", "city", "country"]
+    },
+    "preferences": {
+      "type": "object",
+      "description": "User preferences",
+      "properties": {
+        "theme": {
+          "type": "string",
+          "enum": ["light", "dark", "system"],
+          "default": "system"
+        },
+        "notifications": {
+          "type": "boolean",
+          "default": true
+        },
+        "language": {
+          "type": "string",
+          "enum": ["en", "es", "fr", "de", "ja"],
+          "default": "en"
+        }
+      }
+    }
+  },
+  "required": ["name", "age", "email"],
+  "additionalProperties": false
+}
+
+// Write test schema to file
+write(TEST_SCHEMA_PATH, JSON.stringify(testJsonSchema, null, 2))
+
+// Helper function to normalize JSON strings
+const normalizeJson = (json: string) => {
+  try {
+    // Remove markdown code block if present
+    const cleanJson = json.replace(/```json\n|\n```/g, '').trim()
+    return JSON.stringify(JSON.parse(cleanJson))
+  } catch {
+    return json
+  }
+}
+
+// Helper function to validate email
+const isValidEmail = (email: string) => {
+  return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)
+}
+
+// Helper function to validate number in range
+const isNumberInRange = (num: number, min: number, max: number) => {
+  return num >= min && num <= max
+}
+
+// Helper function to validate array length
+const hasValidArrayLength = (arr: any[], length: number) => {
+  return Array.isArray(arr) && arr.length === length && arr.every(item => typeof item === 'string')
+}
+
+describe('Format Options', () => {
+  let testResults: TestResult[] = []
+  
+  // Load existing results if any
+  if (exists(TEST_LOG_PATH)) {
+    const data = read(TEST_LOG_PATH, 'json')
+    testResults = Array.isArray(data) ? data : []
+  }
+
+  const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => {
+    let model = 'gpt-4o'
+    let router = 'openai'
+    let startTime = Date.now()
+    let error: TestResult['error'] | undefined
+    let testResult: TestResult | undefined
+    
+    try {
+      const result = await Promise.race([
+        run({
+          prompt,
+          mode: 'completion',
+          model: 'gpt-4o',
+          router: 'openai',
+          path: TEST_BASE_PATH,
+          logs: TEST_LOGS_PATH,
+          preferences: TEST_PREFERENCES_PATH,
+          ...options,
+          onRun: async (options) => {
+            model = options.model || 'unknown'
+            router = options.router || 'unknown'
+            return options
+          }
+        }),
+        new Promise((_, reject) => 
+          setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
+        )
+      ]) as string[]
+
+      if (isEmptyResponse(result)) {
+        throw new Error('Model returned empty response')
+      }
+
+      const actual = result?.[0]?.trim() || ''
+      const normalizedActual = normalizeJson(actual)
+      const normalizedExpected = normalizeJson(expected)
+      const passed = normalizedActual === normalizedExpected
+      
+      expect(normalizedActual).toEqual(normalizedExpected)
+
+      testResult = {
+        test: testName,
+        prompt,
+        result: result || [],
+        expected,
+        model,
+        router,
+        timestamp: new Date().toISOString(),
+        passed,
+        duration: Date.now() - startTime,
+        reason: passed ? undefined : `Expected ${normalizedExpected}, but got ${normalizedActual}`,
+      }
+    } catch (e) {
+      error = formatError(e)
+      testResult = {
+        test: testName,
+        prompt,
+        result: [],
+        expected,
+        model,
+        router,
+        timestamp: new Date().toISOString(),
+        passed: false,
+        duration: Date.now() - startTime,
+        error,
+        reason: error?.message || 'Unknown error occurred'
+      }
+      throw e
+    } finally {
+      if (testResult) {
+        testResults.push(testResult)
+        write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
+      }
+    }
+  }
+
+  // Test JSON Schema format using file path
+  it('should format response according to JSON Schema file', async () => {
+    const prompt = 'Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.'
+    const expected = JSON.stringify({
+      name: "John Doe",
+      age: 30,
+      tags: ["developer", "javascript"]
+    })
+    
+    await runFormatTest(
+      prompt,
+      expected,
+      'json-schema-file-format',
+      'gpt-4o',
+      {
+        format: TEST_SCHEMA_PATH
+      }
+    )
+  }, { timeout: 10000 })
+
+  // Test JSON Schema format using schema object
+  it('should format response according to JSON Schema object', async () => {
+    const prompt = `Create a user profile with the following details:
+    - Name: Jane Smith
+    - Age: 25
+    - Email: jane.smith@company.com
+    - Tags: ["developer", "designer"]
+    - Address: 123 Main St, New York, US, 10001
+    - Preferences: light theme, notifications enabled, English language
+    Return only the JSON object, no explanation.`
+    
+    try {
+      const result = await run({
+        prompt,
+        mode: 'completion',
+        model: 'gpt-4o',
+        router: 'openai',
+        path: TEST_BASE_PATH,
+        logs: TEST_LOGS_PATH,
+        preferences: TEST_PREFERENCES_PATH,
+        format: testJsonSchema
+      }) as string[]
+
+      const response = JSON.parse(normalizeJson(result?.[0] || '{}'))
+      
+      // Validate required fields
+      expect(response.name).toBe('Jane Smith')
+      expect(response.age).toBe(25)
+      expect(response.email).toBe('jane.smith@company.com')
+      
+      // Validate tags
+      expect(Array.isArray(response.tags)).toBe(true)
+      expect(response.tags).toContain('developer')
+      expect(response.tags).toContain('designer')
+      
+      // Validate address
+      expect(response.address.street).toBe('123 Main St')
+      expect(response.address.city).toBe('New York')
+      expect(response.address.country).toBe('US')
+      expect(response.address.zipCode || response.address.postal_code).toMatch(/^[0-9]{5}$/)
+      
+      // Validate preferences
+      expect(response.preferences.theme).toBe('light')
+      expect(['true', true, 'enabled'].includes(response.preferences.notifications)).toBe(true)
+      expect(['en', 'English'].includes(response.preferences.language)).toBe(true)
+    } catch (e) {
+      throw e
+    }
+  }, { timeout: 10000 })
+
+  // Test Zod Schema format with string
+  it('should format response according to Zod string schema', async () => {
+    const prompt = 'Generate a valid email address for a business domain. Return only the email, no explanation.'
+    
+    try {
+      const result = await run({
+        prompt,
+        mode: 'completion',
+        model: 'gpt-4o',
+        router: 'openai',
+        path: TEST_BASE_PATH,
+        logs: TEST_LOGS_PATH,
+        preferences: TEST_PREFERENCES_PATH,
+        format: {
+          type: "object",
+          properties: {
+            email: {
+              type: "string",
+              format: "email"
+            }
+          },
+          required: ["email"]
+        }
+      }) as string[]
+
+      const email = result?.[0]?.trim() || ''
+      expect(isValidEmail(email)).toBe(true)
+    } catch (e) {
+      throw e
+    }
+  }, { timeout: 10000 })
+
+  // Test Zod Schema format with number
+  it('should format response according to Zod number schema', async () => {
+    const prompt = 'Generate a random age between 18 and 65. Return only the number, no explanation.'
+    
+    try {
+      const result = await run({
+        prompt,
+        mode: 'completion',
+        model: 'gpt-4o',
+        router: 'openai',
+        path: TEST_BASE_PATH,
+        logs: TEST_LOGS_PATH,
+        preferences: TEST_PREFERENCES_PATH,
+        format: {
+          type: "object",
+          properties: {
+            age: {
+              type: "number",
+              minimum: 18,
+              maximum: 65
+            }
+          },
+          required: ["age"]
+        }
+      }) as string[]
+
+      const age = parseInt(result?.[0]?.trim() || '0', 10)
+      expect(isNumberInRange(age, 18, 65)).toBe(true)
+    } catch (e) {
+      throw e
+    }
+  }, { timeout: 10000 })
+
+  // Test Zod Schema format with array
+  it('should format response according to Zod array schema', async () => {
+    const prompt = 'Generate a list of 3 programming languages. Return only the array, no explanation.'
+    
+    try {
+      const result = await run({
+        prompt,
+        mode: 'completion',
+        model: 'gpt-4o',
+        router: 'openai',
+        path: TEST_BASE_PATH,
+        logs: TEST_LOGS_PATH,
+        preferences: TEST_PREFERENCES_PATH,
+        format: {
+          type: "object",
+          properties: {
+            languages: {
+              type: "array",
+              items: {
+                type: "string"
+              },
+              minItems: 3,
+              maxItems: 3
+            }
+          },
+          required: ["languages"]
+        }
+      }) as string[]
+
+      const languages = JSON.parse(result?.[0]?.trim() || '[]')
+      expect(hasValidArrayLength(languages, 3)).toBe(true)
+    } catch (e) {
+      throw e
+    }
+  }, { timeout: 10000 })
+
+  // Test invalid format option
+  it('should handle invalid format option', async () => {
+    const prompt = 'Generate a random number.'
+    
+    try {
+      await run({
+        prompt,
+        mode: 'completion',
+        model: 'gpt-4o',
+        router: 'openai',
+        path: TEST_BASE_PATH,
+        logs: TEST_LOGS_PATH,
+        preferences: TEST_PREFERENCES_PATH,
+        format: {
+          type: "invalid",
+          properties: {}
+        }
+      })
+      // If we get here, the format validation didn't work
+      throw new Error('Expected format validation to fail')
+    } catch (e: any) {
+      // The error should be about invalid format/schema
+      if (!e.message.match(/invalid|Invalid|schema|Schema/)) {
+        throw new Error(`Unexpected error: ${e.message}`)
+      }
+    }
+  }, { timeout: 10000 })
+
+  it('should generate markdown report', () => {
+    // Group results by test and model
+    const latestResults = new Map<string, Map<string, TestResult>>()
+    
+    // Get only the latest result for each test+model combination
+    testResults.forEach(result => {
+      if (!latestResults.has(result.test)) {
+        latestResults.set(result.test, new Map())
+      }
+      const testMap = latestResults.get(result.test)!
+      const existingResult = testMap.get(result.model)
+      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
+        testMap.set(result.model, result)
+      }
+    })
+
+    // Generate markdown report
+    let report = '# Format Test Results\n\n'
+    
+    // First list failed tests
+    report += '## Failed Tests\n\n'
+    let hasFailures = false
+    for (const [testName, modelResults] of latestResults) {
+      for (const [model, result] of modelResults) {
+        if (!result.passed) {
+          hasFailures = true
+          report += `### ${testName} - ${model}\n`
+          report += `- Prompt: \`${result.prompt}\`\n`
+          report += `- Expected: \`${result.expected}\`\n`
+          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Duration: ${result.duration}ms\n`
+          if (result.error) {
+            report += `- Error Type: ${result.error.type}\n`
+            report += `- Error Code: ${result.error.code}\n`
+            report += `- Error Message: ${result.error.message}\n`
+            if (result.error.details?.message) {
+              report += `- Error Details: ${result.error.details.message}\n`
+            }
+          }
+          report += `- Reason: ${result.reason}\n`
+          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+        }
+      }
+    }
+    
+    if (!hasFailures) {
+      report += '*No failed tests*\n\n'
+    }
+
+    // Then list passed tests
+    report += '## Passed Tests\n\n'
+    let hasPassed = false
+    for (const [testName, modelResults] of latestResults) {
+      for (const [model, result] of modelResults) {
+        if (result.passed) {
+          hasPassed = true
+          report += `### ${testName} - ${model}\n`
+          report += `- Prompt: \`${result.prompt}\`\n`
+          report += `- Expected: \`${result.expected}\`\n`
+          report += `- Actual: \`${result.result[0] || ''}\`\n`
+          report += `- Duration: ${result.duration}ms\n`
+          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
+        }
+      }
+    }
+    
+    if (!hasPassed) {
+      report += '*No passed tests*\n\n'
+    }
+
+    // Add summary section
+    report += '## Summary\n\n'
+    const totalTests = testResults.length
+    const passedTests = testResults.filter(r => r.passed).length
+    const failedTests = totalTests - passedTests
+    report += `- Total Tests: ${totalTests}\n`
+    report += `- Passed: ${passedTests}\n`
+    report += `- Failed: ${failedTests}\n`
+    report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n`
+
+    // Write report to file
+    const reportPath = path.resolve(__dirname, './format-report.md')
+    write(reportPath, report)
+
+    // Verify report was written
+    expect(exists(reportPath) === 'file').toBe(true)
+  })
 }) 
--- a/packages/kbot/tests/unit/language-report.md
+++ b/packages/kbot/tests/unit/language-report.md
@ -36,9 +36,9 @@
 - Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
 - Expected: `hallo`
 - Actual: ``
- Duration: 1253ms
+- Duration: 1192ms
 - Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:47:26 PM
+- Timestamp: 4/1/2025, 11:53:55 PM

 ### spanish - deepseek/deepseek-chat:free
 - Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
@ -74,9 +74,9 @@
 - Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
 - Expected: `sí`
 - Actual: ``
- Duration: 932ms
+- Duration: 1193ms
 - Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:47:27 PM
+- Timestamp: 4/1/2025, 11:53:56 PM

 ### french - deepseek/deepseek-chat:free
 - Prompt: `translate "no" to French. Return only the translated word, no explanation.`
@ -112,9 +112,9 @@
 - Prompt: `translate "no" to French. Return only the translated word, no explanation.`
 - Expected: `non`
 - Actual: ``
- Duration: 864ms
+- Duration: 968ms
 - Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:47:28 PM
+- Timestamp: 4/1/2025, 11:53:57 PM

 ## Passed Tests

--- a/packages/kbot/tests/unit/language.json
+++ b/packages/kbot/tests/unit/language.json
@ -807,5 +807,41 @@
    "passed": false,
    "duration": 864,
    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "german",
+    "prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "hallo",
+    "model": "anthropic/claude-2.0",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T21:53:55.163Z",
+    "passed": false,
+    "duration": 1192,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "spanish",
+    "prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "sí",
+    "model": "anthropic/claude-2.0",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T21:53:56.357Z",
+    "passed": false,
+    "duration": 1193,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "french",
+    "prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
+    "result": [],
+    "expected": "non",
+    "model": "anthropic/claude-2.0",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T21:53:57.326Z",
+    "passed": false,
+    "duration": 968,
+    "reason": "Unknown error occurred"
  }
 ]
--- a/packages/kbot/tests/unit/math-report.md
+++ b/packages/kbot/tests/unit/math-report.md
@ -36,9 +36,9 @@
 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: ``
- Duration: 1218ms
+- Duration: 1992ms
 - Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:46:27 PM
+- Timestamp: 4/1/2025, 11:53:40 PM

 ### multiplication - deepseek/deepseek-chat:free
 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
@ -74,9 +74,9 @@
 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: ``
- Duration: 911ms
+- Duration: 1078ms
 - Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:46:27 PM
+- Timestamp: 4/1/2025, 11:53:41 PM

 ### division - deepseek/deepseek-chat:free
 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
@ -112,9 +112,9 @@
 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: ``
- Duration: 1485ms
+- Duration: 940ms
 - Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:46:29 PM
+- Timestamp: 4/1/2025, 11:53:42 PM

 ## Passed Tests

--- a/packages/kbot/tests/unit/math.json
+++ b/packages/kbot/tests/unit/math.json
@ -2213,5 +2213,41 @@
    "passed": false,
    "duration": 1485,
    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "addition",
+    "prompt": "add 5 and 3. Return only the number, no explanation.",
+    "result": [],
+    "expected": "8",
+    "model": "anthropic/claude-2.0",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T21:53:40.351Z",
+    "passed": false,
+    "duration": 1992,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "multiplication",
+    "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+    "result": [],
+    "expected": "24",
+    "model": "anthropic/claude-2.0",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T21:53:41.431Z",
+    "passed": false,
+    "duration": 1078,
+    "reason": "Unknown error occurred"
+  },
+  {
+    "test": "division",
+    "prompt": "divide 15 by 3. Return only the number, no explanation.",
+    "result": [],
+    "expected": "5",
+    "model": "anthropic/claude-2.0",
+    "router": "openrouter",
+    "timestamp": "2025-04-01T21:53:42.372Z",
+    "passed": false,
+    "duration": 940,
+    "reason": "Unknown error occurred"
  }
 ]
--- a/packages/kbot/tests/unit/test-schema.json
+++ b/packages/kbot/tests/unit/test-schema.json
@ -0,0 +1,113 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://example.com/user-profile.schema.json",
+  "title": "User Profile",
+  "description": "A user profile containing name, age, and tags",
+  "type": "object",
+  "properties": {
+    "name": {
+      "type": "string",
+      "description": "User's full name",
+      "minLength": 1,
+      "pattern": "^[A-Za-z\\s]+$"
+    },
+    "age": {
+      "type": "number",
+      "description": "User's age in years",
+      "minimum": 0,
+      "maximum": 150
+    },
+    "email": {
+      "type": "string",
+      "description": "User's email address",
+      "format": "email"
+    },
+    "tags": {
+      "type": "array",
+      "description": "List of user's tags",
+      "items": {
+        "type": "string",
+        "enum": [
+          "developer",
+          "designer",
+          "manager",
+          "admin",
+          "user"
+        ]
+      },
+      "minItems": 1,
+      "maxItems": 5,
+      "uniqueItems": true
+    },
+    "address": {
+      "type": "object",
+      "description": "User's address",
+      "properties": {
+        "street": {
+          "type": "string",
+          "description": "Street address"
+        },
+        "city": {
+          "type": "string",
+          "description": "City name"
+        },
+        "country": {
+          "type": "string",
+          "description": "Country name",
+          "enum": [
+            "US",
+            "UK",
+            "CA",
+            "AU"
+          ]
+        },
+        "zipCode": {
+          "type": "string",
+          "description": "ZIP/Postal code",
+          "pattern": "^[0-9]{5}(-[0-9]{4})?$"
+        }
+      },
+      "required": [
+        "street",
+        "city",
+        "country"
+      ]
+    },
+    "preferences": {
+      "type": "object",
+      "description": "User preferences",
+      "properties": {
+        "theme": {
+          "type": "string",
+          "enum": [
+            "light",
+            "dark",
+            "system"
+          ],
+          "default": "system"
+        },
+        "notifications": {
+          "type": "boolean",
+          "default": true
+        },
+        "language": {
+          "type": "string",
+          "enum": [
+            "en",
+            "es",
+            "fr",
+            "de",
+            "ja"
+          ],
+          "default": "en"
+        }
+      }
+    }
+  },
+  "required": [
+    "name",
+    "age",
+    "email"
+  ],
+  "additionalProperties": false
+}