diff --git a/packages/kbot/logs/params.json b/packages/kbot/logs/params.json index 01754106..62559c71 100644 --- a/packages/kbot/logs/params.json +++ b/packages/kbot/logs/params.json @@ -3,7 +3,7 @@ "messages": [ { "role": "user", - "content": "Calculate 2 raised to the power of 3. Return only the number, no explanation." + "content": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation." }, { "role": "user", diff --git a/packages/kbot/package-lock.json b/packages/kbot/package-lock.json index fc8e1f9d..4520864c 100644 --- a/packages/kbot/package-lock.json +++ b/packages/kbot/package-lock.json @@ -27,6 +27,7 @@ "p-map": "7.0.3", "ts-retry": "6.0.0", "tslog": "^4.9.3", + "vm2": "^3.9.19", "yargs": "17.7.2", "zod": "3.24.2" }, @@ -2292,7 +2293,6 @@ "version": "8.14.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz", "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==", - "dev": true, "license": "MIT", "bin": { "acorn": "bin/acorn" @@ -2311,6 +2311,18 @@ "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, + "node_modules/acorn-walk": { + "version": "8.3.4", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz", + "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==", + "license": "MIT", + "dependencies": { + "acorn": "^8.11.0" + }, + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/agentkeepalive": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", @@ -7203,6 +7215,23 @@ } } }, + "node_modules/vm2": { + "version": "3.9.19", + "resolved": "https://registry.npmjs.org/vm2/-/vm2-3.9.19.tgz", + "integrity": "sha512-J637XF0DHDMV57R6JyVsTak7nIL8gy5KH4r1HiwWLf/4GBbb5MKL5y7LpmF4A8E2nR6XmzpmMFQ7V7ppPTmUQg==", + "deprecated": "The library contains critical security issues and should not be used for production! The maintenance of the project has been discontinued. Consider migrating your code to isolated-vm.", + "license": "MIT", + "dependencies": { + "acorn": "^8.7.0", + "acorn-walk": "^8.2.0" + }, + "bin": { + "vm2": "bin/vm2" + }, + "engines": { + "node": ">=6.0" + } + }, "node_modules/watchpack": { "version": "2.4.2", "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.2.tgz", diff --git a/packages/kbot/package.json b/packages/kbot/package.json index 13766ff5..18eae99f 100644 --- a/packages/kbot/package.json +++ b/packages/kbot/package.json @@ -50,7 +50,6 @@ "p-map": "7.0.3", "ts-retry": "6.0.0", "tslog": "^4.9.3", - "vm2": "^3.9.19", "yargs": "17.7.2", "zod": "3.24.2" }, diff --git a/packages/kbot/tests/test-data/code/add.js b/packages/kbot/tests/test-data/code/add.js new file mode 100644 index 00000000..8e2d16f1 --- /dev/null +++ b/packages/kbot/tests/test-data/code/add.js @@ -0,0 +1,3 @@ +function add(a, b) { + return a + b; +} \ No newline at end of file diff --git a/packages/kbot/tests/test-data/code/factorial.js b/packages/kbot/tests/test-data/code/factorial.js new file mode 100644 index 00000000..3c2fc5b8 --- /dev/null +++ b/packages/kbot/tests/test-data/code/factorial.js @@ -0,0 +1,6 @@ +function factorial(n) { + if (n === 0 || n === 1) { + return 1; + } + return n * factorial(n - 1); +} \ No newline at end of file diff --git a/packages/kbot/tests/test-data/code/fibonacci.js b/packages/kbot/tests/test-data/code/fibonacci.js new file mode 100644 index 00000000..2e661451 --- /dev/null +++ b/packages/kbot/tests/test-data/code/fibonacci.js @@ -0,0 +1,11 @@ +function fibonacci(n) { + if (n <= 0) return 0; + if (n === 1) return 1; + let a = 0, b = 1; + for (let i = 2; i <= n; i++) { + let temp = a + b; + a = b; + b = temp; + } + return b; +} \ No newline at end of file diff --git a/packages/kbot/tests/unit/coding.test.ts b/packages/kbot/tests/unit/coding.test.ts index c48cf25a..dc04c3de 100644 --- a/packages/kbot/tests/unit/coding.test.ts +++ b/packages/kbot/tests/unit/coding.test.ts @@ -4,7 +4,7 @@ import { sync as exists } from "@polymech/fs/exists" import { sync as read } from "@polymech/fs/read" import { sync as write } from "@polymech/fs/write" import { sync as mkdirp } from "mkdirp" -import { VM } from 'vm2' +import { createContext, runInContext } from 'node:vm' import { getDefaultModels, @@ -15,11 +15,12 @@ import { TestResult, runTest, generateTestReport, - getReportPaths + getReportPaths, + ModelCategory } from './commons' // Optionally override models for this specific test file -const models = getDefaultModels() +const models = getDefaultModels(ModelCategory.CODING) // Ensure test-data/code directory exists const TEST_CODE_DIR = path.resolve(__dirname, '../test-data/code') @@ -32,12 +33,16 @@ describe('Coding Capabilities', () => { const TEST_LOG_PATH = getReportPaths('coding', 'json') const TEST_REPORT_PATH = getReportPaths('coding', 'md') - const executeCode = (code: string, context: any = {}): any => { - const vm = new VM({ - timeout: 1000, - sandbox: context - }) - return vm.run(code) + const executeCode = (code: string, functionName: string): any => { + const vmContext = createContext({}) + // Wrap the code in a module pattern to avoid global scope pollution + const wrappedCode = ` + (function() { + ${code} + return ${functionName}; + })() + ` + return runInContext(wrappedCode, vmContext, { timeout: 1000 }) } it.each(models)('should generate and execute a simple function with model %s', async (modelName) => { @@ -60,8 +65,7 @@ describe('Coding Capabilities', () => { write(codePath, result.result[0]) // Execute the code - const context = {} - const addFunction = executeCode(result.result[0], context) + const addFunction = executeCode(result.result[0], 'add') expect(addFunction(5, 3)).toBe(8) }, { timeout: TEST_TIMEOUT }) @@ -85,8 +89,7 @@ describe('Coding Capabilities', () => { write(codePath, result.result[0]) // Execute the code - const context = {} - const factorialFunction = executeCode(result.result[0], context) + const factorialFunction = executeCode(result.result[0], 'factorial') expect(factorialFunction(5)).toBe(120) }, { timeout: TEST_TIMEOUT }) @@ -110,8 +113,7 @@ describe('Coding Capabilities', () => { write(codePath, result.result[0]) // Execute the code - const context = {} - const fibonacciFunction = executeCode(result.result[0], context) + const fibonacciFunction = executeCode(result.result[0], 'fibonacci') expect(fibonacciFunction(6)).toBe(8) }, { timeout: TEST_TIMEOUT }) diff --git a/packages/kbot/tests/unit/commons.ts b/packages/kbot/tests/unit/commons.ts index 18e4e1c1..51dab179 100644 --- a/packages/kbot/tests/unit/commons.ts +++ b/packages/kbot/tests/unit/commons.ts @@ -10,7 +10,8 @@ export enum ModelCategory { FAST = 'fast', LANGUAGE = 'language', TOOL = 'tool', - ALL = 'all' + ALL = 'all', + CODING = 'coding' } export const getFastModels = (): string[] => { @@ -22,6 +23,12 @@ export const getFastModels = (): string[] => { ] } +export const getCodingModels = (): string[] => { + return [ + E_OPENROUTER_MODEL.MODEL_OPENROUTER_QUASAR_ALPHA + ] +} + export const getLanguageModels = (): string[] => { return [ E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_3_5_SONNET, @@ -45,6 +52,8 @@ export const getDefaultModels = (category: ModelCategory = ModelCategory.FAST): return getLanguageModels() case ModelCategory.TOOL: return getToolModels() + case ModelCategory.CODING: + return getCodingModels() case ModelCategory.ALL: default: return [ @@ -124,7 +133,7 @@ export const isEmptyResponse = (result: string[] | null | undefined): boolean => export const generateHighscores = (latestResults: Map>): TestHighscore[] => { const highscores: TestHighscore[] = [] - + for (const [testName, modelResults] of latestResults) { // Convert model results to array and sort by duration const sortedResults = Array.from(modelResults.entries()) @@ -143,14 +152,14 @@ export const generateHighscores = (latestResults: Map { model = options.model || 'unknown' router = options.model as string return options } }), - new Promise((_, reject) => + new Promise((_, reject) => setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT) ) ]) as string[] @@ -200,7 +211,7 @@ export const runTest = async ( } else { const actual = result?.[0]?.trim()?.toLowerCase() || '' const passed = actual === expected - + testResult = { test: testName, prompt, @@ -234,14 +245,14 @@ export const runTest = async ( if (testResult) { // Extract category from logPath (e.g., 'reports/basic.json' -> 'basic') const category = path.basename(logPath, path.extname(logPath)) - + // Add category to test result testResult.category = category // Update category-specific log const existingData = exists(logPath) === 'file' ? JSON.parse(read(logPath) as string) : { results: [], highscores: [] } const updatedResults = [...(existingData.results || []), testResult] - + // Group results by test and model const latestResults = new Map>() updatedResults.forEach(result => { @@ -269,7 +280,7 @@ export const runTest = async ( const allLogPath = path.resolve(REPORTS_DIR, 'all.json') const allExistingData = exists(allLogPath) === 'file' ? JSON.parse(read(allLogPath) as string) : { results: [], highscores: [] } const allUpdatedResults = [...(allExistingData.results || []), testResult] - + // Group all results by test and model const allLatestResults = new Map>() allUpdatedResults.forEach(result => { @@ -304,7 +315,7 @@ export const generateTestReport = ( ): void => { // Group results by test and model const latestResults = new Map>() - + // Get only the latest result for each test+model combination testResults.forEach(result => { if (!latestResults.has(result.test)) { @@ -322,12 +333,12 @@ export const generateTestReport = ( // Add highscore section report += '## Highscores\n\n' - + // Add regular test rankings report += '### Performance Rankings (Duration)\n\n' report += '| Test | Model | Duration (ms) | Duration (s) |\n' report += '|------|-------|--------------|--------------|\n' - + Array.from(latestResults.entries()).forEach(([testName, modelResults]) => { const sortedResults = Array.from(modelResults.entries()) .map(([model, result]) => ({ @@ -353,7 +364,7 @@ export const generateTestReport = ( report += `- Failed: ${failedTests}\n` report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n` report += `- Average Duration: ${avgDuration.toFixed(0)}ms (${(avgDuration / 1000).toFixed(2)}s)\n\n` - + // First list failed tests report += '## Failed Tests\n\n' let hasFailures = false @@ -379,7 +390,7 @@ export const generateTestReport = ( } } } - + if (!hasFailures) { report += '*No failed tests*\n\n' } @@ -400,7 +411,7 @@ export const generateTestReport = ( } } } - + if (!hasPassed) { report += '*No passed tests*\n\n' } diff --git a/packages/kbot/tests/unit/reports/all.json b/packages/kbot/tests/unit/reports/all.json index 836988a1..118e91f4 100644 --- a/packages/kbot/tests/unit/reports/all.json +++ b/packages/kbot/tests/unit/reports/all.json @@ -7627,6 +7627,591 @@ "passed": true, "duration": 1567, "category": "math" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:44:45.218Z", + "passed": false, + "duration": 1435, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:44:49.883Z", + "passed": false, + "duration": 4655, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:44:51.127Z", + "passed": false, + "duration": 1237, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:44:52.054Z", + "passed": false, + "duration": 917, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:44:53.187Z", + "passed": false, + "duration": 1125, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:45:02.361Z", + "passed": false, + "duration": 9166, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:45:04.060Z", + "passed": false, + "duration": 1690, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:45:05.043Z", + "passed": false, + "duration": 975, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:45:06.250Z", + "passed": false, + "duration": 1200, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:45:22.955Z", + "passed": false, + "duration": 16697, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:45:25.030Z", + "passed": false, + "duration": 2068, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:45:27.584Z", + "passed": false, + "duration": 2546, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:45:51.588Z", + "passed": false, + "duration": 1992, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:45:55.658Z", + "passed": false, + "duration": 4061, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:45:56.953Z", + "passed": false, + "duration": 1287, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:45:57.801Z", + "passed": false, + "duration": 840, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:45:59.452Z", + "passed": false, + "duration": 1644, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:46:05.931Z", + "passed": false, + "duration": 6471, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:46:07.867Z", + "passed": false, + "duration": 1927, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:46:09.006Z", + "passed": false, + "duration": 1131, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:46:10.511Z", + "passed": false, + "duration": 1496, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:46:28.567Z", + "passed": false, + "duration": 18048, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:46:31.014Z", + "passed": false, + "duration": 2439, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:46:32.933Z", + "passed": false, + "duration": 1911, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:50:37.864Z", + "passed": false, + "duration": 1441, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:50:42.826Z", + "passed": false, + "duration": 4953, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:50:44.254Z", + "passed": false, + "duration": 1420, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:50:45.251Z", + "passed": false, + "duration": 989, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:50:46.448Z", + "passed": false, + "duration": 1187, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:50:52.751Z", + "passed": false, + "duration": 6295, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:50:54.086Z", + "passed": false, + "duration": 1326, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:50:55.060Z", + "passed": false, + "duration": 965, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:50:56.266Z", + "passed": false, + "duration": 1198, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:51:06.690Z", + "passed": false, + "duration": 10417, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:51:09.116Z", + "passed": false, + "duration": 2418, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:51:11.294Z", + "passed": false, + "duration": 2169, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:52:19.974Z", + "passed": false, + "duration": 1425, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:52:20.932Z", + "passed": false, + "duration": 948, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:52:22.228Z", + "passed": false, + "duration": 1287, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}", + "category": "coding" } ], "highscores": [ @@ -7809,7 +8394,52 @@ "duration_secs": 2.274 } ] + }, + { + "test": "simple_function", + "rankings": [ + { + "model": "openai/gpt-4o-mini", + "duration": 1420, + "duration_secs": 1.42 + }, + { + "model": "openrouter/quasar-alpha", + "duration": 1425, + "duration_secs": 1.425 + } + ] + }, + { + "test": "factorial_function", + "rankings": [ + { + "model": "openrouter/quasar-alpha", + "duration": 948, + "duration_secs": 0.948 + }, + { + "model": "openai/gpt-3.5-turbo", + "duration": 1187, + "duration_secs": 1.187 + } + ] + }, + { + "test": "fibonacci_function", + "rankings": [ + { + "model": "openai/gpt-3.5-turbo", + "duration": 1198, + "duration_secs": 1.198 + }, + { + "model": "openrouter/quasar-alpha", + "duration": 1287, + "duration_secs": 1.287 + } + ] } ], - "lastUpdated": "2025-04-04T12:39:21.300Z" + "lastUpdated": "2025-04-04T12:52:22.232Z" } \ No newline at end of file diff --git a/packages/kbot/tests/unit/reports/coding.json b/packages/kbot/tests/unit/reports/coding.json new file mode 100644 index 00000000..1434fa01 --- /dev/null +++ b/packages/kbot/tests/unit/reports/coding.json @@ -0,0 +1,637 @@ +{ + "results": [ + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:44:45.218Z", + "passed": false, + "duration": 1435, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:44:49.883Z", + "passed": false, + "duration": 4655, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:44:51.127Z", + "passed": false, + "duration": 1237, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:44:52.054Z", + "passed": false, + "duration": 917, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:44:53.187Z", + "passed": false, + "duration": 1125, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:45:02.361Z", + "passed": false, + "duration": 9166, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:45:04.060Z", + "passed": false, + "duration": 1690, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:45:05.043Z", + "passed": false, + "duration": 975, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:45:06.250Z", + "passed": false, + "duration": 1200, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:45:22.955Z", + "passed": false, + "duration": 16697, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:45:25.030Z", + "passed": false, + "duration": 2068, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:45:27.584Z", + "passed": false, + "duration": 2546, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:45:51.588Z", + "passed": false, + "duration": 1992, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:45:55.658Z", + "passed": false, + "duration": 4061, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:45:56.953Z", + "passed": false, + "duration": 1287, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction add(a, b) {\n return a + b;\n}\n```" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:45:57.801Z", + "passed": false, + "duration": 840, + "reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:45:59.452Z", + "passed": false, + "duration": 1644, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:46:05.931Z", + "passed": false, + "duration": 6471, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:46:07.867Z", + "passed": false, + "duration": 1927, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:46:09.006Z", + "passed": false, + "duration": 1131, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:46:10.511Z", + "passed": false, + "duration": 1496, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:46:28.567Z", + "passed": false, + "duration": 18048, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:46:31.014Z", + "passed": false, + "duration": 2439, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:46:32.933Z", + "passed": false, + "duration": 1911, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:50:37.864Z", + "passed": false, + "duration": 1441, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:50:42.826Z", + "passed": false, + "duration": 4953, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:50:44.254Z", + "passed": false, + "duration": 1420, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:50:45.251Z", + "passed": false, + "duration": 989, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:50:46.448Z", + "passed": false, + "duration": 1187, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:50:52.751Z", + "passed": false, + "duration": 6295, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:50:54.086Z", + "passed": false, + "duration": 1326, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:50:55.060Z", + "passed": false, + "duration": 965, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-04T12:50:56.266Z", + "passed": false, + "duration": 1198, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-04T12:51:06.690Z", + "passed": false, + "duration": 10417, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-04T12:51:09.116Z", + "passed": false, + "duration": 2418, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:51:11.294Z", + "passed": false, + "duration": 2169, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}", + "category": "coding" + }, + { + "test": "simple_function", + "prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.", + "result": [ + "function add(a, b) {\n return a + b;\n}" + ], + "expected": "function add(a, b) { return a + b; }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:52:19.974Z", + "passed": false, + "duration": 1425, + "reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}", + "category": "coding" + }, + { + "test": "factorial_function", + "prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}" + ], + "expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:52:20.932Z", + "passed": false, + "duration": 948, + "reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}", + "category": "coding" + }, + { + "test": "fibonacci_function", + "prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.", + "result": [ + "function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}" + ], + "expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }", + "model": "openrouter/quasar-alpha", + "router": "openrouter/quasar-alpha", + "timestamp": "2025-04-04T12:52:22.228Z", + "passed": false, + "duration": 1287, + "reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}", + "category": "coding" + } + ], + "highscores": [ + { + "test": "simple_function", + "rankings": [ + { + "model": "openai/gpt-4o-mini", + "duration": 1420, + "duration_secs": 1.42 + }, + { + "model": "openrouter/quasar-alpha", + "duration": 1425, + "duration_secs": 1.425 + } + ] + }, + { + "test": "factorial_function", + "rankings": [ + { + "model": "openrouter/quasar-alpha", + "duration": 948, + "duration_secs": 0.948 + }, + { + "model": "openai/gpt-3.5-turbo", + "duration": 1187, + "duration_secs": 1.187 + } + ] + }, + { + "test": "fibonacci_function", + "rankings": [ + { + "model": "openai/gpt-3.5-turbo", + "duration": 1198, + "duration_secs": 1.198 + }, + { + "model": "openrouter/quasar-alpha", + "duration": 1287, + "duration_secs": 1.287 + } + ] + } + ], + "lastUpdated": "2025-04-04T12:52:22.229Z" +} \ No newline at end of file diff --git a/packages/kbot/tests/unit/reports/coding.md b/packages/kbot/tests/unit/reports/coding.md new file mode 100644 index 00000000..f4c152d2 --- /dev/null +++ b/packages/kbot/tests/unit/reports/coding.md @@ -0,0 +1,93 @@ +# Coding Capabilities Test Results + +## Highscores + +### Performance Rankings (Duration) + +| Test | Model | Duration (ms) | Duration (s) | +|------|-------|--------------|--------------| +| simple_function | openrouter/quasar-alpha | 1425 | 1.43 | +| factorial_function | openrouter/quasar-alpha | 948 | 0.95 | +| fibonacci_function | openrouter/quasar-alpha | 1287 | 1.29 | + +## Summary + +- Total Tests: 3 +- Passed: 0 +- Failed: 3 +- Success Rate: 0.00% +- Average Duration: 1220ms (1.22s) + +## Failed Tests + +### simple_function - openrouter/quasar-alpha + +- Prompt: `Generate a JavaScript function that adds two numbers and returns the result. + The function should be named 'add' and take two parameters 'a' and 'b'. + Return only the function code, no explanation.` +- Expected: `function add(a, b) { return a + b; }` +- Actual: `function add(a, b) { + return a + b; +}` +- Duration: 1425ms (1.43s) +- Reason: Expected function add(a, b) { return a + b; }, but got function add(a, b) { + return a + b; +} +- Timestamp: 4/4/2025, 2:52:19 PM + +### factorial_function - openrouter/quasar-alpha + +- Prompt: `Generate a JavaScript function that calculates the factorial of a number. + The function should be named 'factorial' and take one parameter 'n'. + Return only the function code, no explanation.` +- Expected: `function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }` +- Actual: `function factorial(n) { + if (n === 0 || n === 1) { + return 1; + } + return n * factorial(n - 1); +}` +- Duration: 948ms (0.95s) +- Reason: Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) { + if (n === 0 || n === 1) { + return 1; + } + return n * factorial(n - 1); +} +- Timestamp: 4/4/2025, 2:52:20 PM + +### fibonacci_function - openrouter/quasar-alpha + +- Prompt: `Generate a JavaScript function that calculates the nth Fibonacci number. + The function should be named 'fibonacci' and take one parameter 'n'. + Return only the function code, no explanation.` +- Expected: `function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }` +- Actual: `function fibonacci(n) { + if (n <= 0) return 0; + if (n === 1) return 1; + let a = 0, b = 1; + for (let i = 2; i <= n; i++) { + let temp = a + b; + a = b; + b = temp; + } + return b; +}` +- Duration: 1287ms (1.29s) +- Reason: Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) { + if (n <= 0) return 0; + if (n === 1) return 1; + let a = 0, b = 1; + for (let i = 2; i <= n; i++) { + let temp = a + b; + a = b; + b = temp; + } + return b; +} +- Timestamp: 4/4/2025, 2:52:22 PM + +## Passed Tests + +*No passed tests* +