tests: quasar - alpha

2025-04-04 14:44:04 +02:00 · 2025-04-04 14:44:04 +02:00 · ddf8dbce1f
commit ddf8dbce1f
parent 214eed04f5
10 changed files with 1393 additions and 238 deletions
--- a/packages/kbot/dist/0c550cfc34328e29d9df.js
+++ b/packages/kbot/dist/0c550cfc34328e29d9df.js
--- a/packages/kbot/logs/params.json
+++ b/packages/kbot/logs/params.json
@ -1,9 +1,9 @@
 {
-  "model": "openai/gpt-4o-mini",
+  "model": "openrouter/quasar-alpha",
  "messages": [
    {
      "role": "user",
-      "content": "divide 15 by 3. Return only the number, no explanation."
+      "content": "Calculate 2 raised to the power of 3. Return only the number, no explanation."
    },
    {
      "role": "user",
--- a/packages/kbot/package.json
+++ b/packages/kbot/package.json
@ -23,6 +23,7 @@
    "test:seo": "vitest run tests/unit/seo.test.ts",
    "test:language": "vitest run tests/unit/language.test.ts",
    "test:tools": "vitest run tests/unit/tools.test.ts",
+    "test:coding": "vitest run tests/unit/coding.test.ts",
    "test2:watch": "vitest",
    "test2:coverage": "vitest run --coverage",
    "webpack": "webpack --config webpack.config.js --stats-error-details",
@ -49,6 +50,7 @@
    "p-map": "7.0.3",
    "ts-retry": "6.0.0",
    "tslog": "^4.9.3",
+    "vm2": "^3.9.19",
    "yargs": "17.7.2",
    "zod": "3.24.2"
  },
--- a/packages/kbot/tests/unit/coding.test.ts
+++ b/packages/kbot/tests/unit/coding.test.ts
@ -0,0 +1,122 @@
+import { describe, it, expect } from 'vitest'
+import * as path from 'node:path'
+import { sync as exists } from "@polymech/fs/exists"
+import { sync as read } from "@polymech/fs/read"
+import { sync as write } from "@polymech/fs/write"
+import { sync as mkdirp } from "mkdirp"
+import { VM } from 'vm2'
+
+import { 
+  getDefaultModels, 
+  TEST_BASE_PATH, 
+  TEST_LOGS_PATH, 
+  TEST_PREFERENCES_PATH, 
+  TEST_TIMEOUT,
+  TestResult,
+  runTest,
+  generateTestReport,
+  getReportPaths
+} from './commons'
+
+// Optionally override models for this specific test file
+const models = getDefaultModels()
+
+// Ensure test-data/code directory exists
+const TEST_CODE_DIR = path.resolve(__dirname, '../test-data/code')
+if (exists(TEST_CODE_DIR) !== 'directory') {
+  mkdirp(TEST_CODE_DIR)
+}
+
+describe('Coding Capabilities', () => {
+  let testResults: TestResult[] = []
+  const TEST_LOG_PATH = getReportPaths('coding', 'json')
+  const TEST_REPORT_PATH = getReportPaths('coding', 'md')
+
+  const executeCode = (code: string, context: any = {}): any => {
+    const vm = new VM({
+      timeout: 1000,
+      sandbox: context
+    })
+    return vm.run(code)
+  }
+
+  it.each(models)('should generate and execute a simple function with model %s', async (modelName) => {
+    const prompt = `Generate a JavaScript function that adds two numbers and returns the result. 
+    The function should be named 'add' and take two parameters 'a' and 'b'.
+    Return only the function code, no explanation.`
+    
+    const result = await runTest(
+      prompt,
+      'function add(a, b) { return a + b; }',
+      'simple_function',
+      modelName,
+      TEST_LOG_PATH
+    )
+    
+    testResults.push(result)
+    
+    // Save the code to a file
+    const codePath = path.resolve(TEST_CODE_DIR, 'add.js')
+    write(codePath, result.result[0])
+    
+    // Execute the code
+    const context = {}
+    const addFunction = executeCode(result.result[0], context)
+    expect(addFunction(5, 3)).toBe(8)
+  }, { timeout: TEST_TIMEOUT })
+
+  it.each(models)('should generate and execute a factorial function with model %s', async (modelName) => {
+    const prompt = `Generate a JavaScript function that calculates the factorial of a number.
+    The function should be named 'factorial' and take one parameter 'n'.
+    Return only the function code, no explanation.`
+    
+    const result = await runTest(
+      prompt,
+      'function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }',
+      'factorial_function',
+      modelName,
+      TEST_LOG_PATH
+    )
+    
+    testResults.push(result)
+    
+    // Save the code to a file
+    const codePath = path.resolve(TEST_CODE_DIR, 'factorial.js')
+    write(codePath, result.result[0])
+    
+    // Execute the code
+    const context = {}
+    const factorialFunction = executeCode(result.result[0], context)
+    expect(factorialFunction(5)).toBe(120)
+  }, { timeout: TEST_TIMEOUT })
+
+  it.each(models)('should generate and execute a fibonacci function with model %s', async (modelName) => {
+    const prompt = `Generate a JavaScript function that calculates the nth Fibonacci number.
+    The function should be named 'fibonacci' and take one parameter 'n'.
+    Return only the function code, no explanation.`
+    
+    const result = await runTest(
+      prompt,
+      'function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }',
+      'fibonacci_function',
+      modelName,
+      TEST_LOG_PATH
+    )
+    
+    testResults.push(result)
+    
+    // Save the code to a file
+    const codePath = path.resolve(TEST_CODE_DIR, 'fibonacci.js')
+    write(codePath, result.result[0])
+    
+    // Execute the code
+    const context = {}
+    const fibonacciFunction = executeCode(result.result[0], context)
+    expect(fibonacciFunction(6)).toBe(8)
+  }, { timeout: TEST_TIMEOUT })
+
+  it('should generate markdown report', () => {
+    generateTestReport(testResults, 'Coding Capabilities Test Results', TEST_REPORT_PATH)
+    expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
+  })
+}) 
--- a/packages/kbot/tests/unit/commons.ts
+++ b/packages/kbot/tests/unit/commons.ts
@ -17,7 +17,8 @@ export const getFastModels = (): string[] => {
  return [
    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_3_5_TURBO,
    E_OPENROUTER_MODEL.MODEL_DEEPSEEK_DEEPSEEK_R1_DISTILL_QWEN_14B_FREE,
-    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_4O_MINI
+    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_4O_MINI,
+    E_OPENROUTER_MODEL.MODEL_OPENROUTER_QUASAR_ALPHA
  ]
 }

--- a/packages/kbot/tests/unit/reports/all.json
+++ b/packages/kbot/tests/unit/reports/all.json
@ -7176,6 +7176,457 @@
      "passed": true,
      "duration": 954,
      "category": "basic"
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:36:55.754Z",
+      "passed": true,
+      "duration": 1505,
+      "category": "basic"
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:36:59.232Z",
+      "passed": true,
+      "duration": 3470,
+      "category": "basic"
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:37:00.080Z",
+      "passed": true,
+      "duration": 842,
+      "category": "basic"
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:37:00.897Z",
+      "passed": true,
+      "duration": 811,
+      "category": "basic"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:37:01.784Z",
+      "passed": true,
+      "duration": 881,
+      "category": "basic"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [],
+      "expected": "24",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:37:03.117Z",
+      "passed": false,
+      "duration": 1327,
+      "reason": "Model returned empty response",
+      "category": "basic"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:37:04.222Z",
+      "passed": true,
+      "duration": 1096,
+      "category": "basic"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:37:05.008Z",
+      "passed": true,
+      "duration": 780,
+      "category": "basic"
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:37:05.799Z",
+      "passed": true,
+      "duration": 784,
+      "category": "basic"
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:37:10.272Z",
+      "passed": true,
+      "duration": 4467,
+      "category": "basic"
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:37:11.255Z",
+      "passed": true,
+      "duration": 975,
+      "category": "basic"
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:37:11.993Z",
+      "passed": true,
+      "duration": 731,
+      "category": "basic"
+    },
+    {
+      "test": "quadratic",
+      "prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
+      "result": [
+        "-2,-3"
+      ],
+      "expected": "-2,-3",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:38:12.580Z",
+      "passed": true,
+      "duration": 1229,
+      "category": "math"
+    },
+    {
+      "test": "quadratic",
+      "prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
+      "result": [
+        "The solutions to the quadratic equation x² + 5x + 6 = 0 are -3, -2.\n\nAnswer: -3, -2"
+      ],
+      "expected": "-2,-3",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:38:24.221Z",
+      "passed": false,
+      "duration": 11633,
+      "reason": "Expected -2,-3, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are -3, -2.\n\nanswer: -3, -2",
+      "category": "math"
+    },
+    {
+      "test": "quadratic",
+      "prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
+      "result": [
+        "-2, -3"
+      ],
+      "expected": "-2,-3",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:38:25.175Z",
+      "passed": false,
+      "duration": 943,
+      "reason": "Expected -2,-3, but got -2, -3",
+      "category": "math"
+    },
+    {
+      "test": "quadratic",
+      "prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
+      "result": [
+        "-2,-3"
+      ],
+      "expected": "-2,-3",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:38:26.290Z",
+      "passed": true,
+      "duration": 1105,
+      "category": "math"
+    },
+    {
+      "test": "factorial",
+      "prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
+      "result": [
+        "120"
+      ],
+      "expected": "120",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:38:27.138Z",
+      "passed": true,
+      "duration": 838,
+      "category": "math"
+    },
+    {
+      "test": "factorial",
+      "prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
+      "result": [
+        "120"
+      ],
+      "expected": "120",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:38:34.971Z",
+      "passed": true,
+      "duration": 7825,
+      "category": "math"
+    },
+    {
+      "test": "factorial",
+      "prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
+      "result": [
+        "120"
+      ],
+      "expected": "120",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:38:35.899Z",
+      "passed": true,
+      "duration": 920,
+      "category": "math"
+    },
+    {
+      "test": "factorial",
+      "prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
+      "result": [
+        "120"
+      ],
+      "expected": "120",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:38:36.748Z",
+      "passed": true,
+      "duration": 840,
+      "category": "math"
+    },
+    {
+      "test": "fibonacci",
+      "prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:38:37.951Z",
+      "passed": true,
+      "duration": 1195,
+      "category": "math"
+    },
+    {
+      "test": "fibonacci",
+      "prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "8",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:38:49.318Z",
+      "passed": false,
+      "duration": 11358,
+      "reason": "Expected 8, but got 5",
+      "category": "math"
+    },
+    {
+      "test": "fibonacci",
+      "prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:38:50.264Z",
+      "passed": false,
+      "duration": 935,
+      "reason": "Expected 8, but got 5",
+      "category": "math"
+    },
+    {
+      "test": "fibonacci",
+      "prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:38:50.973Z",
+      "passed": true,
+      "duration": 701,
+      "category": "math"
+    },
+    {
+      "test": "square_root",
+      "prompt": "Calculate the square root of 16. Return only the number, no explanation.",
+      "result": [
+        "4"
+      ],
+      "expected": "4",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:38:51.774Z",
+      "passed": true,
+      "duration": 793,
+      "category": "math"
+    },
+    {
+      "test": "square_root",
+      "prompt": "Calculate the square root of 16. Return only the number, no explanation.",
+      "result": [
+        "4"
+      ],
+      "expected": "4",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:39:08.114Z",
+      "passed": true,
+      "duration": 16332,
+      "category": "math"
+    },
+    {
+      "test": "square_root",
+      "prompt": "Calculate the square root of 16. Return only the number, no explanation.",
+      "result": [
+        "4"
+      ],
+      "expected": "4",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:39:09.133Z",
+      "passed": true,
+      "duration": 1012,
+      "category": "math"
+    },
+    {
+      "test": "square_root",
+      "prompt": "Calculate the square root of 16. Return only the number, no explanation.",
+      "result": [
+        "4"
+      ],
+      "expected": "4",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:39:10.677Z",
+      "passed": true,
+      "duration": 1535,
+      "category": "math"
+    },
+    {
+      "test": "power",
+      "prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:39:11.607Z",
+      "passed": true,
+      "duration": 922,
+      "category": "math"
+    },
+    {
+      "test": "power",
+      "prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:39:18.707Z",
+      "passed": true,
+      "duration": 7091,
+      "category": "math"
+    },
+    {
+      "test": "power",
+      "prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:39:19.719Z",
+      "passed": true,
+      "duration": 1004,
+      "category": "math"
+    },
+    {
+      "test": "power",
+      "prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:39:21.294Z",
+      "passed": true,
+      "duration": 1567,
+      "category": "math"
    }
  ],
  "highscores": [
@ -7184,13 +7635,13 @@
      "rankings": [
        {
          "model": "openai/gpt-4o-mini",
-          "duration": 935,
-          "duration_secs": 0.935
+          "duration": 943,
+          "duration_secs": 0.943
        },
        {
-          "model": "anthropic/claude-3.5-sonnet",
-          "duration": 1435,
-          "duration_secs": 1.435
+          "model": "openrouter/quasar-alpha",
+          "duration": 1105,
+          "duration_secs": 1.105
        }
      ]
    },
@ -7198,14 +7649,14 @@
      "test": "factorial",
      "rankings": [
        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 861,
-          "duration_secs": 0.861
+          "model": "openai/gpt-3.5-turbo",
+          "duration": 838,
+          "duration_secs": 0.838
        },
        {
-          "model": "anthropic/claude-3.5-sonnet",
-          "duration": 1454,
-          "duration_secs": 1.454
+          "model": "openrouter/quasar-alpha",
+          "duration": 840,
+          "duration_secs": 0.84
        }
      ]
    },
@ -7213,29 +7664,29 @@
      "test": "fibonacci",
      "rankings": [
        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 756,
-          "duration_secs": 0.756
+          "model": "openrouter/quasar-alpha",
+          "duration": 701,
+          "duration_secs": 0.701
        },
        {
-          "model": "openai/gpt-3.5-turbo",
-          "duration": 792,
-          "duration_secs": 0.792
+          "model": "openai/gpt-4o-mini",
+          "duration": 935,
+          "duration_secs": 0.935
        }
      ]
    },
    {
      "test": "square_root",
      "rankings": [
+        {
+          "model": "openai/gpt-3.5-turbo",
+          "duration": 793,
+          "duration_secs": 0.793
+        },
        {
          "model": "anthropic/claude-3.5-sonnet",
          "duration": 819,
          "duration_secs": 0.819
-        },
-        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 828,
-          "duration_secs": 0.828
        }
      ]
    },
@ -7244,13 +7695,13 @@
      "rankings": [
        {
          "model": "openai/gpt-3.5-turbo",
-          "duration": 795,
-          "duration_secs": 0.795
+          "duration": 922,
+          "duration_secs": 0.922
        },
        {
          "model": "openai/gpt-4o-mini",
-          "duration": 966,
-          "duration_secs": 0.966
+          "duration": 1004,
+          "duration_secs": 1.004
        }
      ]
    },
@ -7258,14 +7709,14 @@
      "test": "addition",
      "rankings": [
        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 910,
-          "duration_secs": 0.91
+          "model": "openrouter/quasar-alpha",
+          "duration": 811,
+          "duration_secs": 0.811
        },
        {
-          "model": "openai/gpt-3.5-turbo",
-          "duration": 1484,
-          "duration_secs": 1.484
+          "model": "openai/gpt-4o-mini",
+          "duration": 842,
+          "duration_secs": 0.842
        }
      ]
    },
@ -7273,14 +7724,14 @@
      "test": "multiplication",
      "rankings": [
        {
-          "model": "openai/gpt-3.5-turbo",
-          "duration": 955,
-          "duration_secs": 0.955
+          "model": "openrouter/quasar-alpha",
+          "duration": 780,
+          "duration_secs": 0.78
        },
        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 1095,
-          "duration_secs": 1.095
+          "model": "openai/gpt-3.5-turbo",
+          "duration": 881,
+          "duration_secs": 0.881
        }
      ]
    },
@ -7288,14 +7739,14 @@
      "test": "division",
      "rankings": [
        {
-          "model": "openai/gpt-3.5-turbo",
-          "duration": 816,
-          "duration_secs": 0.816
+          "model": "openrouter/quasar-alpha",
+          "duration": 731,
+          "duration_secs": 0.731
        },
        {
-          "model": "qwen/qwq-32b",
-          "duration": 917,
-          "duration_secs": 0.917
+          "model": "openai/gpt-3.5-turbo",
+          "duration": 784,
+          "duration_secs": 0.784
        }
      ]
    },
@ -7360,5 +7811,5 @@
      ]
    }
  ],
-  "lastUpdated": "2025-04-03T17:14:41.955Z"
+  "lastUpdated": "2025-04-04T12:39:21.300Z"
 }
--- a/packages/kbot/tests/unit/reports/basic.json
+++ b/packages/kbot/tests/unit/reports/basic.json
@ -1251,6 +1251,173 @@
      "passed": true,
      "duration": 954,
      "category": "basic"
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:36:55.754Z",
+      "passed": true,
+      "duration": 1505,
+      "category": "basic"
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:36:59.232Z",
+      "passed": true,
+      "duration": 3470,
+      "category": "basic"
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:37:00.080Z",
+      "passed": true,
+      "duration": 842,
+      "category": "basic"
+    },
+    {
+      "test": "addition",
+      "prompt": "add 5 and 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:37:00.897Z",
+      "passed": true,
+      "duration": 811,
+      "category": "basic"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:37:01.784Z",
+      "passed": true,
+      "duration": 881,
+      "category": "basic"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [],
+      "expected": "24",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:37:03.117Z",
+      "passed": false,
+      "duration": 1327,
+      "reason": "Model returned empty response",
+      "category": "basic"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:37:04.222Z",
+      "passed": true,
+      "duration": 1096,
+      "category": "basic"
+    },
+    {
+      "test": "multiplication",
+      "prompt": "multiply 8 and 3. Return only the number, no explanation.",
+      "result": [
+        "24"
+      ],
+      "expected": "24",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:37:05.008Z",
+      "passed": true,
+      "duration": 780,
+      "category": "basic"
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:37:05.799Z",
+      "passed": true,
+      "duration": 784,
+      "category": "basic"
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:37:10.272Z",
+      "passed": true,
+      "duration": 4467,
+      "category": "basic"
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:37:11.255Z",
+      "passed": true,
+      "duration": 975,
+      "category": "basic"
+    },
+    {
+      "test": "division",
+      "prompt": "divide 15 by 3. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "5",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:37:11.993Z",
+      "passed": true,
+      "duration": 731,
+      "category": "basic"
    }
  ],
  "highscores": [
@ -1258,14 +1425,14 @@
      "test": "addition",
      "rankings": [
        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 910,
-          "duration_secs": 0.91
+          "model": "openrouter/quasar-alpha",
+          "duration": 811,
+          "duration_secs": 0.811
        },
        {
-          "model": "openai/gpt-3.5-turbo",
-          "duration": 1484,
-          "duration_secs": 1.484
+          "model": "openai/gpt-4o-mini",
+          "duration": 842,
+          "duration_secs": 0.842
        }
      ]
    },
@ -1273,14 +1440,14 @@
      "test": "multiplication",
      "rankings": [
        {
-          "model": "openai/gpt-3.5-turbo",
-          "duration": 955,
-          "duration_secs": 0.955
+          "model": "openrouter/quasar-alpha",
+          "duration": 780,
+          "duration_secs": 0.78
        },
        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 1095,
-          "duration_secs": 1.095
+          "model": "openai/gpt-3.5-turbo",
+          "duration": 881,
+          "duration_secs": 0.881
        }
      ]
    },
@ -1288,17 +1455,17 @@
      "test": "division",
      "rankings": [
        {
-          "model": "openai/gpt-3.5-turbo",
-          "duration": 816,
-          "duration_secs": 0.816
+          "model": "openrouter/quasar-alpha",
+          "duration": 731,
+          "duration_secs": 0.731
        },
        {
-          "model": "qwen/qwq-32b",
-          "duration": 917,
-          "duration_secs": 0.917
+          "model": "openai/gpt-3.5-turbo",
+          "duration": 784,
+          "duration_secs": 0.784
        }
      ]
    }
  ],
-  "lastUpdated": "2025-04-03T17:14:41.951Z"
+  "lastUpdated": "2025-04-04T12:37:11.994Z"
 }
--- a/packages/kbot/tests/unit/reports/basic.md
+++ b/packages/kbot/tests/unit/reports/basic.md
@ -6,38 +6,37 @@

 | Test | Model | Duration (ms) | Duration (s) |
 |------|-------|--------------|--------------|
-| addition | openai/gpt-4o-mini | 910 | 0.91 |
-| addition | openai/gpt-3.5-turbo | 1484 | 1.48 |
-| addition | deepseek/deepseek-r1-distill-qwen-14b:free | 8460 | 8.46 |
-| multiplication | openai/gpt-3.5-turbo | 955 | 0.95 |
-| multiplication | openai/gpt-4o-mini | 1095 | 1.09 |
-| multiplication | deepseek/deepseek-r1-distill-qwen-14b:free | 7653 | 7.65 |
-| division | openai/gpt-3.5-turbo | 816 | 0.82 |
-| division | openai/gpt-4o-mini | 954 | 0.95 |
-| division | deepseek/deepseek-r1-distill-qwen-14b:free | 16655 | 16.66 |
+| addition | openrouter/quasar-alpha | 811 | 0.81 |
+| addition | openai/gpt-4o-mini | 842 | 0.84 |
+| addition | openai/gpt-3.5-turbo | 1505 | 1.50 |
+| addition | deepseek/deepseek-r1-distill-qwen-14b:free | 3470 | 3.47 |
+| multiplication | openrouter/quasar-alpha | 780 | 0.78 |
+| multiplication | openai/gpt-3.5-turbo | 881 | 0.88 |
+| multiplication | openai/gpt-4o-mini | 1096 | 1.10 |
+| multiplication | deepseek/deepseek-r1-distill-qwen-14b:free | 1327 | 1.33 |
+| division | openrouter/quasar-alpha | 731 | 0.73 |
+| division | openai/gpt-3.5-turbo | 784 | 0.78 |
+| division | openai/gpt-4o-mini | 975 | 0.97 |
+| division | deepseek/deepseek-r1-distill-qwen-14b:free | 4467 | 4.47 |

 ## Summary

- Total Tests: 9
- Passed: 8
+- Total Tests: 12
+- Passed: 11
 - Failed: 1
- Success Rate: 88.89%
- Average Duration: 4331ms (4.33s)
+- Success Rate: 91.67%
+- Average Duration: 1472ms (1.47s)

 ## Failed Tests

-### division - deepseek/deepseek-r1-distill-qwen-14b:free
+### multiplication - deepseek/deepseek-r1-distill-qwen-14b:free

- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `15 divided by 3 is 5. 
-
-Answer: 5`
- Duration: 16655ms (16.66s)
- Reason: Expected 5, but got 15 divided by 3 is 5. 
-
-answer: 5
- Timestamp: 4/3/2025, 7:14:40 PM
+- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
+- Expected: `24`
+- Actual: ``
+- Duration: 1327ms (1.33s)
+- Reason: Model returned empty response
+- Timestamp: 4/4/2025, 2:37:03 PM

 ## Passed Tests

@ -46,62 +45,86 @@ answer: 5
 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 1484ms (1.48s)
- Timestamp: 4/3/2025, 7:14:04 PM
+- Duration: 1505ms (1.50s)
+- Timestamp: 4/4/2025, 2:36:55 PM

 ### addition - deepseek/deepseek-r1-distill-qwen-14b:free

 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 8460ms (8.46s)
- Timestamp: 4/3/2025, 7:14:12 PM
+- Duration: 3470ms (3.47s)
+- Timestamp: 4/4/2025, 2:36:59 PM

 ### addition - openai/gpt-4o-mini

 - Prompt: `add 5 and 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 910ms (0.91s)
- Timestamp: 4/3/2025, 7:14:13 PM
+- Duration: 842ms (0.84s)
+- Timestamp: 4/4/2025, 2:37:00 PM
+
+### addition - openrouter/quasar-alpha
+
+- Prompt: `add 5 and 3. Return only the number, no explanation.`
+- Expected: `8`
+- Actual: `8`
+- Duration: 811ms (0.81s)
+- Timestamp: 4/4/2025, 2:37:00 PM

 ### multiplication - openai/gpt-3.5-turbo

 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: `24`
- Duration: 955ms (0.95s)
- Timestamp: 4/3/2025, 7:14:14 PM
-
-### multiplication - deepseek/deepseek-r1-distill-qwen-14b:free
-
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24`
- Duration: 7653ms (7.65s)
- Timestamp: 4/3/2025, 7:14:22 PM
+- Duration: 881ms (0.88s)
+- Timestamp: 4/4/2025, 2:37:01 PM

 ### multiplication - openai/gpt-4o-mini

 - Prompt: `multiply 8 and 3. Return only the number, no explanation.`
 - Expected: `24`
 - Actual: `24`
- Duration: 1095ms (1.09s)
- Timestamp: 4/3/2025, 7:14:23 PM
+- Duration: 1096ms (1.10s)
+- Timestamp: 4/4/2025, 2:37:04 PM
+
+### multiplication - openrouter/quasar-alpha
+
+- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
+- Expected: `24`
+- Actual: `24`
+- Duration: 780ms (0.78s)
+- Timestamp: 4/4/2025, 2:37:05 PM

 ### division - openai/gpt-3.5-turbo

 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: `5`
- Duration: 816ms (0.82s)
- Timestamp: 4/3/2025, 7:14:24 PM
+- Duration: 784ms (0.78s)
+- Timestamp: 4/4/2025, 2:37:05 PM
+
+### division - deepseek/deepseek-r1-distill-qwen-14b:free
+
+- Prompt: `divide 15 by 3. Return only the number, no explanation.`
+- Expected: `5`
+- Actual: `5`
+- Duration: 4467ms (4.47s)
+- Timestamp: 4/4/2025, 2:37:10 PM

 ### division - openai/gpt-4o-mini

 - Prompt: `divide 15 by 3. Return only the number, no explanation.`
 - Expected: `5`
 - Actual: `5`
- Duration: 954ms (0.95s)
- Timestamp: 4/3/2025, 7:14:41 PM
+- Duration: 975ms (0.97s)
+- Timestamp: 4/4/2025, 2:37:11 PM
+
+### division - openrouter/quasar-alpha
+
+- Prompt: `divide 15 by 3. Return only the number, no explanation.`
+- Expected: `5`
+- Actual: `5`
+- Duration: 731ms (0.73s)
+- Timestamp: 4/4/2025, 2:37:11 PM

--- a/packages/kbot/tests/unit/reports/math.json
+++ b/packages/kbot/tests/unit/reports/math.json
@ -2828,6 +2828,290 @@
      "passed": true,
      "duration": 966,
      "category": "math"
+    },
+    {
+      "test": "quadratic",
+      "prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
+      "result": [
+        "-2,-3"
+      ],
+      "expected": "-2,-3",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:38:12.580Z",
+      "passed": true,
+      "duration": 1229,
+      "category": "math"
+    },
+    {
+      "test": "quadratic",
+      "prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
+      "result": [
+        "The solutions to the quadratic equation x² + 5x + 6 = 0 are -3, -2.\n\nAnswer: -3, -2"
+      ],
+      "expected": "-2,-3",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:38:24.221Z",
+      "passed": false,
+      "duration": 11633,
+      "reason": "Expected -2,-3, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are -3, -2.\n\nanswer: -3, -2",
+      "category": "math"
+    },
+    {
+      "test": "quadratic",
+      "prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
+      "result": [
+        "-2, -3"
+      ],
+      "expected": "-2,-3",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:38:25.175Z",
+      "passed": false,
+      "duration": 943,
+      "reason": "Expected -2,-3, but got -2, -3",
+      "category": "math"
+    },
+    {
+      "test": "quadratic",
+      "prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
+      "result": [
+        "-2,-3"
+      ],
+      "expected": "-2,-3",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:38:26.290Z",
+      "passed": true,
+      "duration": 1105,
+      "category": "math"
+    },
+    {
+      "test": "factorial",
+      "prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
+      "result": [
+        "120"
+      ],
+      "expected": "120",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:38:27.138Z",
+      "passed": true,
+      "duration": 838,
+      "category": "math"
+    },
+    {
+      "test": "factorial",
+      "prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
+      "result": [
+        "120"
+      ],
+      "expected": "120",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:38:34.971Z",
+      "passed": true,
+      "duration": 7825,
+      "category": "math"
+    },
+    {
+      "test": "factorial",
+      "prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
+      "result": [
+        "120"
+      ],
+      "expected": "120",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:38:35.899Z",
+      "passed": true,
+      "duration": 920,
+      "category": "math"
+    },
+    {
+      "test": "factorial",
+      "prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
+      "result": [
+        "120"
+      ],
+      "expected": "120",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:38:36.748Z",
+      "passed": true,
+      "duration": 840,
+      "category": "math"
+    },
+    {
+      "test": "fibonacci",
+      "prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:38:37.951Z",
+      "passed": true,
+      "duration": 1195,
+      "category": "math"
+    },
+    {
+      "test": "fibonacci",
+      "prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "8",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:38:49.318Z",
+      "passed": false,
+      "duration": 11358,
+      "reason": "Expected 8, but got 5",
+      "category": "math"
+    },
+    {
+      "test": "fibonacci",
+      "prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
+      "result": [
+        "5"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:38:50.264Z",
+      "passed": false,
+      "duration": 935,
+      "reason": "Expected 8, but got 5",
+      "category": "math"
+    },
+    {
+      "test": "fibonacci",
+      "prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:38:50.973Z",
+      "passed": true,
+      "duration": 701,
+      "category": "math"
+    },
+    {
+      "test": "square_root",
+      "prompt": "Calculate the square root of 16. Return only the number, no explanation.",
+      "result": [
+        "4"
+      ],
+      "expected": "4",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:38:51.774Z",
+      "passed": true,
+      "duration": 793,
+      "category": "math"
+    },
+    {
+      "test": "square_root",
+      "prompt": "Calculate the square root of 16. Return only the number, no explanation.",
+      "result": [
+        "4"
+      ],
+      "expected": "4",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:39:08.114Z",
+      "passed": true,
+      "duration": 16332,
+      "category": "math"
+    },
+    {
+      "test": "square_root",
+      "prompt": "Calculate the square root of 16. Return only the number, no explanation.",
+      "result": [
+        "4"
+      ],
+      "expected": "4",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:39:09.133Z",
+      "passed": true,
+      "duration": 1012,
+      "category": "math"
+    },
+    {
+      "test": "square_root",
+      "prompt": "Calculate the square root of 16. Return only the number, no explanation.",
+      "result": [
+        "4"
+      ],
+      "expected": "4",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:39:10.677Z",
+      "passed": true,
+      "duration": 1535,
+      "category": "math"
+    },
+    {
+      "test": "power",
+      "prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-3.5-turbo",
+      "router": "openai/gpt-3.5-turbo",
+      "timestamp": "2025-04-04T12:39:11.607Z",
+      "passed": true,
+      "duration": 922,
+      "category": "math"
+    },
+    {
+      "test": "power",
+      "prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
+      "timestamp": "2025-04-04T12:39:18.707Z",
+      "passed": true,
+      "duration": 7091,
+      "category": "math"
+    },
+    {
+      "test": "power",
+      "prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openai/gpt-4o-mini",
+      "router": "openai/gpt-4o-mini",
+      "timestamp": "2025-04-04T12:39:19.719Z",
+      "passed": true,
+      "duration": 1004,
+      "category": "math"
+    },
+    {
+      "test": "power",
+      "prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
+      "result": [
+        "8"
+      ],
+      "expected": "8",
+      "model": "openrouter/quasar-alpha",
+      "router": "openrouter/quasar-alpha",
+      "timestamp": "2025-04-04T12:39:21.294Z",
+      "passed": true,
+      "duration": 1567,
+      "category": "math"
    }
  ],
  "highscores": [
@ -2836,13 +3120,13 @@
      "rankings": [
        {
          "model": "openai/gpt-4o-mini",
-          "duration": 935,
-          "duration_secs": 0.935
+          "duration": 943,
+          "duration_secs": 0.943
        },
        {
-          "model": "anthropic/claude-3.5-sonnet",
-          "duration": 1435,
-          "duration_secs": 1.435
+          "model": "openrouter/quasar-alpha",
+          "duration": 1105,
+          "duration_secs": 1.105
        }
      ]
    },
@ -2850,14 +3134,14 @@
      "test": "factorial",
      "rankings": [
        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 861,
-          "duration_secs": 0.861
+          "model": "openai/gpt-3.5-turbo",
+          "duration": 838,
+          "duration_secs": 0.838
        },
        {
-          "model": "anthropic/claude-3.5-sonnet",
-          "duration": 1454,
-          "duration_secs": 1.454
+          "model": "openrouter/quasar-alpha",
+          "duration": 840,
+          "duration_secs": 0.84
        }
      ]
    },
@ -2865,29 +3149,29 @@
      "test": "fibonacci",
      "rankings": [
        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 756,
-          "duration_secs": 0.756
+          "model": "openrouter/quasar-alpha",
+          "duration": 701,
+          "duration_secs": 0.701
        },
        {
-          "model": "openai/gpt-3.5-turbo",
-          "duration": 792,
-          "duration_secs": 0.792
+          "model": "openai/gpt-4o-mini",
+          "duration": 935,
+          "duration_secs": 0.935
        }
      ]
    },
    {
      "test": "square_root",
      "rankings": [
+        {
+          "model": "openai/gpt-3.5-turbo",
+          "duration": 793,
+          "duration_secs": 0.793
+        },
        {
          "model": "anthropic/claude-3.5-sonnet",
          "duration": 819,
          "duration_secs": 0.819
-        },
-        {
-          "model": "openai/gpt-4o-mini",
-          "duration": 828,
-          "duration_secs": 0.828
        }
      ]
    },
@ -2896,16 +3180,16 @@
      "rankings": [
        {
          "model": "openai/gpt-3.5-turbo",
-          "duration": 795,
-          "duration_secs": 0.795
+          "duration": 922,
+          "duration_secs": 0.922
        },
        {
          "model": "openai/gpt-4o-mini",
-          "duration": 966,
-          "duration_secs": 0.966
+          "duration": 1004,
+          "duration_secs": 1.004
        }
      ]
    }
  ],
-  "lastUpdated": "2025-04-03T17:10:26.897Z"
+  "lastUpdated": "2025-04-04T12:39:21.296Z"
 }
--- a/packages/kbot/tests/unit/reports/math.md
+++ b/packages/kbot/tests/unit/reports/math.md
@ -6,191 +6,204 @@

 | Test | Model | Duration (ms) | Duration (s) |
 |------|-------|--------------|--------------|
-| quadratic | openai/gpt-4o-mini | 935 | 0.94 |
-| quadratic | openai/gpt-3.5-turbo | 1685 | 1.69 |
-| quadratic | deepseek/deepseek-r1-distill-qwen-14b:free | 10827 | 10.83 |
-| factorial | openai/gpt-4o-mini | 861 | 0.86 |
-| factorial | openai/gpt-3.5-turbo | 3991 | 3.99 |
-| factorial | deepseek/deepseek-r1-distill-qwen-14b:free | 9116 | 9.12 |
-| fibonacci | openai/gpt-4o-mini | 756 | 0.76 |
-| fibonacci | openai/gpt-3.5-turbo | 792 | 0.79 |
-| fibonacci | deepseek/deepseek-r1-distill-qwen-14b:free | 8292 | 8.29 |
-| square_root | openai/gpt-4o-mini | 828 | 0.83 |
-| square_root | openai/gpt-3.5-turbo | 892 | 0.89 |
-| square_root | deepseek/deepseek-r1-distill-qwen-14b:free | 1755 | 1.75 |
-| power | openai/gpt-3.5-turbo | 795 | 0.80 |
-| power | openai/gpt-4o-mini | 966 | 0.97 |
-| power | deepseek/deepseek-r1-distill-qwen-14b:free | 7263 | 7.26 |
+| quadratic | openai/gpt-4o-mini | 943 | 0.94 |
+| quadratic | openrouter/quasar-alpha | 1105 | 1.10 |
+| quadratic | openai/gpt-3.5-turbo | 1229 | 1.23 |
+| quadratic | deepseek/deepseek-r1-distill-qwen-14b:free | 11633 | 11.63 |
+| factorial | openai/gpt-3.5-turbo | 838 | 0.84 |
+| factorial | openrouter/quasar-alpha | 840 | 0.84 |
+| factorial | openai/gpt-4o-mini | 920 | 0.92 |
+| factorial | deepseek/deepseek-r1-distill-qwen-14b:free | 7825 | 7.83 |
+| fibonacci | openrouter/quasar-alpha | 701 | 0.70 |
+| fibonacci | openai/gpt-4o-mini | 935 | 0.94 |
+| fibonacci | openai/gpt-3.5-turbo | 1195 | 1.20 |
+| fibonacci | deepseek/deepseek-r1-distill-qwen-14b:free | 11358 | 11.36 |
+| square_root | openai/gpt-3.5-turbo | 793 | 0.79 |
+| square_root | openai/gpt-4o-mini | 1012 | 1.01 |
+| square_root | openrouter/quasar-alpha | 1535 | 1.53 |
+| square_root | deepseek/deepseek-r1-distill-qwen-14b:free | 16332 | 16.33 |
+| power | openai/gpt-3.5-turbo | 922 | 0.92 |
+| power | openai/gpt-4o-mini | 1004 | 1.00 |
+| power | openrouter/quasar-alpha | 1567 | 1.57 |
+| power | deepseek/deepseek-r1-distill-qwen-14b:free | 7091 | 7.09 |

 ## Summary

- Total Tests: 15
- Passed: 9
- Failed: 6
- Success Rate: 60.00%
- Average Duration: 3317ms (3.32s)
+- Total Tests: 20
+- Passed: 16
+- Failed: 4
+- Success Rate: 80.00%
+- Average Duration: 3489ms (3.49s)

 ## Failed Tests

-### quadratic - openai/gpt-3.5-turbo
-
- Prompt: `Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.`
- Expected: `-2,-3`
- Actual: `-3, -2`
- Duration: 1685ms (1.69s)
- Reason: Expected -2,-3, but got -3, -2
- Timestamp: 4/3/2025, 7:09:38 PM
-
 ### quadratic - deepseek/deepseek-r1-distill-qwen-14b:free

 - Prompt: `Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.`
 - Expected: `-2,-3`
- Actual: `The solutions to the quadratic equation x² + 5x + 6 = 0 are -2 and -3.
+- Actual: `The solutions to the quadratic equation x² + 5x + 6 = 0 are -3, -2.

-Specific steps:
+Answer: -3, -2`
+- Duration: 11633ms (11.63s)
+- Reason: Expected -2,-3, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are -3, -2.

-1. **Identify coefficients**: a = 1, b = 5, c = 6.
-2. **Calculate discriminant**: b² - 4ac = 25 - 24 = 1.
-3. **Apply quadratic formula**: x = (-5 ± √1)/2.
-4. **Compute solutions**: x = (-5 + 1)/2 = -2 and x = (-5 - 1)/2 = -3.
-
-2, -3
-
-Answer: -2,-3`
- Duration: 10827ms (10.83s)
- Reason: Expected -2,-3, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are -2 and -3.
-
-specific steps:
-
-1. **identify coefficients**: a = 1, b = 5, c = 6.
-2. **calculate discriminant**: b² - 4ac = 25 - 24 = 1.
-3. **apply quadratic formula**: x = (-5 ± √1)/2.
-4. **compute solutions**: x = (-5 + 1)/2 = -2 and x = (-5 - 1)/2 = -3.
-
-2, -3
-
-answer: -2,-3
- Timestamp: 4/3/2025, 7:09:49 PM
+answer: -3, -2
+- Timestamp: 4/4/2025, 2:38:24 PM

 ### quadratic - openai/gpt-4o-mini

 - Prompt: `Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.`
 - Expected: `-2,-3`
 - Actual: `-2, -3`
- Duration: 935ms (0.94s)
+- Duration: 943ms (0.94s)
 - Reason: Expected -2,-3, but got -2, -3
- Timestamp: 4/3/2025, 7:09:50 PM
-
-### factorial - deepseek/deepseek-r1-distill-qwen-14b:free
-
- Prompt: `Calculate 5! (factorial of 5). Return only the number, no explanation.`
- Expected: `120`
- Actual: `5! = 120
-
-The factorial of 5 is calculated as:
-
-5 × 4 × 3 × 2 × 1 = 120
-
-**Answer:** 120`
- Duration: 9116ms (9.12s)
- Reason: Expected 120, but got 5! = 120
-
-the factorial of 5 is calculated as:
-
-5 × 4 × 3 × 2 × 1 = 120
-
-**answer:** 120
- Timestamp: 4/3/2025, 7:10:03 PM
+- Timestamp: 4/4/2025, 2:38:25 PM

 ### fibonacci - deepseek/deepseek-r1-distill-qwen-14b:free

 - Prompt: `Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `5`
- Duration: 8292ms (8.29s)
+- Duration: 11358ms (11.36s)
 - Reason: Expected 8, but got 5
- Timestamp: 4/3/2025, 7:10:13 PM
+- Timestamp: 4/4/2025, 2:38:49 PM

 ### fibonacci - openai/gpt-4o-mini

 - Prompt: `Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `5`
- Duration: 756ms (0.76s)
+- Duration: 935ms (0.94s)
 - Reason: Expected 8, but got 5
- Timestamp: 4/3/2025, 7:10:14 PM
+- Timestamp: 4/4/2025, 2:38:50 PM

 ## Passed Tests

+### quadratic - openai/gpt-3.5-turbo
+
+- Prompt: `Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.`
+- Expected: `-2,-3`
+- Actual: `-2,-3`
+- Duration: 1229ms (1.23s)
+- Timestamp: 4/4/2025, 2:38:12 PM
+
+### quadratic - openrouter/quasar-alpha
+
+- Prompt: `Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.`
+- Expected: `-2,-3`
+- Actual: `-2,-3`
+- Duration: 1105ms (1.10s)
+- Timestamp: 4/4/2025, 2:38:26 PM
+
 ### factorial - openai/gpt-3.5-turbo

 - Prompt: `Calculate 5! (factorial of 5). Return only the number, no explanation.`
 - Expected: `120`
 - Actual: `120`
- Duration: 3991ms (3.99s)
- Timestamp: 4/3/2025, 7:09:54 PM
+- Duration: 838ms (0.84s)
+- Timestamp: 4/4/2025, 2:38:27 PM
+
+### factorial - deepseek/deepseek-r1-distill-qwen-14b:free
+
+- Prompt: `Calculate 5! (factorial of 5). Return only the number, no explanation.`
+- Expected: `120`
+- Actual: `120`
+- Duration: 7825ms (7.83s)
+- Timestamp: 4/4/2025, 2:38:34 PM

 ### factorial - openai/gpt-4o-mini

 - Prompt: `Calculate 5! (factorial of 5). Return only the number, no explanation.`
 - Expected: `120`
 - Actual: `120`
- Duration: 861ms (0.86s)
- Timestamp: 4/3/2025, 7:10:04 PM
+- Duration: 920ms (0.92s)
+- Timestamp: 4/4/2025, 2:38:35 PM
+
+### factorial - openrouter/quasar-alpha
+
+- Prompt: `Calculate 5! (factorial of 5). Return only the number, no explanation.`
+- Expected: `120`
+- Actual: `120`
+- Duration: 840ms (0.84s)
+- Timestamp: 4/4/2025, 2:38:36 PM

 ### fibonacci - openai/gpt-3.5-turbo

 - Prompt: `Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 792ms (0.79s)
- Timestamp: 4/3/2025, 7:10:05 PM
+- Duration: 1195ms (1.20s)
+- Timestamp: 4/4/2025, 2:38:37 PM
+
+### fibonacci - openrouter/quasar-alpha
+
+- Prompt: `Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.`
+- Expected: `8`
+- Actual: `8`
+- Duration: 701ms (0.70s)
+- Timestamp: 4/4/2025, 2:38:50 PM

 ### square_root - openai/gpt-3.5-turbo

 - Prompt: `Calculate the square root of 16. Return only the number, no explanation.`
 - Expected: `4`
 - Actual: `4`
- Duration: 892ms (0.89s)
- Timestamp: 4/3/2025, 7:10:15 PM
+- Duration: 793ms (0.79s)
+- Timestamp: 4/4/2025, 2:38:51 PM

 ### square_root - deepseek/deepseek-r1-distill-qwen-14b:free

 - Prompt: `Calculate the square root of 16. Return only the number, no explanation.`
 - Expected: `4`
 - Actual: `4`
- Duration: 1755ms (1.75s)
- Timestamp: 4/3/2025, 7:10:17 PM
+- Duration: 16332ms (16.33s)
+- Timestamp: 4/4/2025, 2:39:08 PM

 ### square_root - openai/gpt-4o-mini

 - Prompt: `Calculate the square root of 16. Return only the number, no explanation.`
 - Expected: `4`
 - Actual: `4`
- Duration: 828ms (0.83s)
- Timestamp: 4/3/2025, 7:10:17 PM
+- Duration: 1012ms (1.01s)
+- Timestamp: 4/4/2025, 2:39:09 PM
+
+### square_root - openrouter/quasar-alpha
+
+- Prompt: `Calculate the square root of 16. Return only the number, no explanation.`
+- Expected: `4`
+- Actual: `4`
+- Duration: 1535ms (1.53s)
+- Timestamp: 4/4/2025, 2:39:10 PM

 ### power - openai/gpt-3.5-turbo

 - Prompt: `Calculate 2 raised to the power of 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 795ms (0.80s)
- Timestamp: 4/3/2025, 7:10:18 PM
+- Duration: 922ms (0.92s)
+- Timestamp: 4/4/2025, 2:39:11 PM

 ### power - deepseek/deepseek-r1-distill-qwen-14b:free

 - Prompt: `Calculate 2 raised to the power of 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 7263ms (7.26s)
- Timestamp: 4/3/2025, 7:10:25 PM
+- Duration: 7091ms (7.09s)
+- Timestamp: 4/4/2025, 2:39:18 PM

 ### power - openai/gpt-4o-mini

 - Prompt: `Calculate 2 raised to the power of 3. Return only the number, no explanation.`
 - Expected: `8`
 - Actual: `8`
- Duration: 966ms (0.97s)
- Timestamp: 4/3/2025, 7:10:26 PM
+- Duration: 1004ms (1.00s)
+- Timestamp: 4/4/2025, 2:39:19 PM
+
+### power - openrouter/quasar-alpha
+
+- Prompt: `Calculate 2 raised to the power of 3. Return only the number, no explanation.`
+- Expected: `8`
+- Actual: `8`
+- Duration: 1567ms (1.57s)
+- Timestamp: 4/4/2025, 2:39:21 PM