the coding challange
This commit is contained in:
parent
ddf8dbce1f
commit
fc53739b40
@ -3,7 +3,7 @@
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Calculate 2 raised to the power of 3. Return only the number, no explanation."
|
||||
"content": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
|
||||
31
packages/kbot/package-lock.json
generated
31
packages/kbot/package-lock.json
generated
@ -27,6 +27,7 @@
|
||||
"p-map": "7.0.3",
|
||||
"ts-retry": "6.0.0",
|
||||
"tslog": "^4.9.3",
|
||||
"vm2": "^3.9.19",
|
||||
"yargs": "17.7.2",
|
||||
"zod": "3.24.2"
|
||||
},
|
||||
@ -2292,7 +2293,6 @@
|
||||
"version": "8.14.0",
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz",
|
||||
"integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
@ -2311,6 +2311,18 @@
|
||||
"acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/acorn-walk": {
|
||||
"version": "8.3.4",
|
||||
"resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz",
|
||||
"integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"acorn": "^8.11.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/agentkeepalive": {
|
||||
"version": "4.6.0",
|
||||
"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
|
||||
@ -7203,6 +7215,23 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/vm2": {
|
||||
"version": "3.9.19",
|
||||
"resolved": "https://registry.npmjs.org/vm2/-/vm2-3.9.19.tgz",
|
||||
"integrity": "sha512-J637XF0DHDMV57R6JyVsTak7nIL8gy5KH4r1HiwWLf/4GBbb5MKL5y7LpmF4A8E2nR6XmzpmMFQ7V7ppPTmUQg==",
|
||||
"deprecated": "The library contains critical security issues and should not be used for production! The maintenance of the project has been discontinued. Consider migrating your code to isolated-vm.",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"acorn": "^8.7.0",
|
||||
"acorn-walk": "^8.2.0"
|
||||
},
|
||||
"bin": {
|
||||
"vm2": "bin/vm2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/watchpack": {
|
||||
"version": "2.4.2",
|
||||
"resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.2.tgz",
|
||||
|
||||
@ -50,7 +50,6 @@
|
||||
"p-map": "7.0.3",
|
||||
"ts-retry": "6.0.0",
|
||||
"tslog": "^4.9.3",
|
||||
"vm2": "^3.9.19",
|
||||
"yargs": "17.7.2",
|
||||
"zod": "3.24.2"
|
||||
},
|
||||
|
||||
3
packages/kbot/tests/test-data/code/add.js
Normal file
3
packages/kbot/tests/test-data/code/add.js
Normal file
@ -0,0 +1,3 @@
|
||||
function add(a, b) {
|
||||
return a + b;
|
||||
}
|
||||
6
packages/kbot/tests/test-data/code/factorial.js
Normal file
6
packages/kbot/tests/test-data/code/factorial.js
Normal file
@ -0,0 +1,6 @@
|
||||
function factorial(n) {
|
||||
if (n === 0 || n === 1) {
|
||||
return 1;
|
||||
}
|
||||
return n * factorial(n - 1);
|
||||
}
|
||||
11
packages/kbot/tests/test-data/code/fibonacci.js
Normal file
11
packages/kbot/tests/test-data/code/fibonacci.js
Normal file
@ -0,0 +1,11 @@
|
||||
function fibonacci(n) {
|
||||
if (n <= 0) return 0;
|
||||
if (n === 1) return 1;
|
||||
let a = 0, b = 1;
|
||||
for (let i = 2; i <= n; i++) {
|
||||
let temp = a + b;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
@ -4,7 +4,7 @@ import { sync as exists } from "@polymech/fs/exists"
|
||||
import { sync as read } from "@polymech/fs/read"
|
||||
import { sync as write } from "@polymech/fs/write"
|
||||
import { sync as mkdirp } from "mkdirp"
|
||||
import { VM } from 'vm2'
|
||||
import { createContext, runInContext } from 'node:vm'
|
||||
|
||||
import {
|
||||
getDefaultModels,
|
||||
@ -15,11 +15,12 @@ import {
|
||||
TestResult,
|
||||
runTest,
|
||||
generateTestReport,
|
||||
getReportPaths
|
||||
getReportPaths,
|
||||
ModelCategory
|
||||
} from './commons'
|
||||
|
||||
// Optionally override models for this specific test file
|
||||
const models = getDefaultModels()
|
||||
const models = getDefaultModels(ModelCategory.CODING)
|
||||
|
||||
// Ensure test-data/code directory exists
|
||||
const TEST_CODE_DIR = path.resolve(__dirname, '../test-data/code')
|
||||
@ -32,12 +33,16 @@ describe('Coding Capabilities', () => {
|
||||
const TEST_LOG_PATH = getReportPaths('coding', 'json')
|
||||
const TEST_REPORT_PATH = getReportPaths('coding', 'md')
|
||||
|
||||
const executeCode = (code: string, context: any = {}): any => {
|
||||
const vm = new VM({
|
||||
timeout: 1000,
|
||||
sandbox: context
|
||||
})
|
||||
return vm.run(code)
|
||||
const executeCode = (code: string, functionName: string): any => {
|
||||
const vmContext = createContext({})
|
||||
// Wrap the code in a module pattern to avoid global scope pollution
|
||||
const wrappedCode = `
|
||||
(function() {
|
||||
${code}
|
||||
return ${functionName};
|
||||
})()
|
||||
`
|
||||
return runInContext(wrappedCode, vmContext, { timeout: 1000 })
|
||||
}
|
||||
|
||||
it.each(models)('should generate and execute a simple function with model %s', async (modelName) => {
|
||||
@ -60,8 +65,7 @@ describe('Coding Capabilities', () => {
|
||||
write(codePath, result.result[0])
|
||||
|
||||
// Execute the code
|
||||
const context = {}
|
||||
const addFunction = executeCode(result.result[0], context)
|
||||
const addFunction = executeCode(result.result[0], 'add')
|
||||
expect(addFunction(5, 3)).toBe(8)
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
@ -85,8 +89,7 @@ describe('Coding Capabilities', () => {
|
||||
write(codePath, result.result[0])
|
||||
|
||||
// Execute the code
|
||||
const context = {}
|
||||
const factorialFunction = executeCode(result.result[0], context)
|
||||
const factorialFunction = executeCode(result.result[0], 'factorial')
|
||||
expect(factorialFunction(5)).toBe(120)
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
@ -110,8 +113,7 @@ describe('Coding Capabilities', () => {
|
||||
write(codePath, result.result[0])
|
||||
|
||||
// Execute the code
|
||||
const context = {}
|
||||
const fibonacciFunction = executeCode(result.result[0], context)
|
||||
const fibonacciFunction = executeCode(result.result[0], 'fibonacci')
|
||||
expect(fibonacciFunction(6)).toBe(8)
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
|
||||
@ -10,7 +10,8 @@ export enum ModelCategory {
|
||||
FAST = 'fast',
|
||||
LANGUAGE = 'language',
|
||||
TOOL = 'tool',
|
||||
ALL = 'all'
|
||||
ALL = 'all',
|
||||
CODING = 'coding'
|
||||
}
|
||||
|
||||
export const getFastModels = (): string[] => {
|
||||
@ -22,6 +23,12 @@ export const getFastModels = (): string[] => {
|
||||
]
|
||||
}
|
||||
|
||||
export const getCodingModels = (): string[] => {
|
||||
return [
|
||||
E_OPENROUTER_MODEL.MODEL_OPENROUTER_QUASAR_ALPHA
|
||||
]
|
||||
}
|
||||
|
||||
export const getLanguageModels = (): string[] => {
|
||||
return [
|
||||
E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_3_5_SONNET,
|
||||
@ -45,6 +52,8 @@ export const getDefaultModels = (category: ModelCategory = ModelCategory.FAST):
|
||||
return getLanguageModels()
|
||||
case ModelCategory.TOOL:
|
||||
return getToolModels()
|
||||
case ModelCategory.CODING:
|
||||
return getCodingModels()
|
||||
case ModelCategory.ALL:
|
||||
default:
|
||||
return [
|
||||
@ -124,7 +133,7 @@ export const isEmptyResponse = (result: string[] | null | undefined): boolean =>
|
||||
|
||||
export const generateHighscores = (latestResults: Map<string, Map<string, TestResult>>): TestHighscore[] => {
|
||||
const highscores: TestHighscore[] = []
|
||||
|
||||
|
||||
for (const [testName, modelResults] of latestResults) {
|
||||
// Convert model results to array and sort by duration
|
||||
const sortedResults = Array.from(modelResults.entries())
|
||||
@ -143,14 +152,14 @@ export const generateHighscores = (latestResults: Map<string, Map<string, TestRe
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return highscores
|
||||
}
|
||||
|
||||
export const runTest = async (
|
||||
prompt: string,
|
||||
expected: string,
|
||||
testName: string,
|
||||
prompt: string,
|
||||
expected: string,
|
||||
testName: string,
|
||||
modelName: string,
|
||||
logPath: string,
|
||||
mode: "completion" | "tools" | "assistant" | "custom" = "completion",
|
||||
@ -161,7 +170,9 @@ export const runTest = async (
|
||||
let startTime = Date.now()
|
||||
let error: TestResult['error'] | undefined
|
||||
let testResult: TestResult | undefined
|
||||
|
||||
let defaultOptions = {
|
||||
filters: 'code'
|
||||
}
|
||||
try {
|
||||
const result = await Promise.race([
|
||||
run({
|
||||
@ -172,14 +183,14 @@ export const runTest = async (
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
logLevel: 2,
|
||||
...options,
|
||||
...{ ...defaultOptions, ...options },
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.model as string
|
||||
return options
|
||||
}
|
||||
}),
|
||||
new Promise((_, reject) =>
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
|
||||
)
|
||||
]) as string[]
|
||||
@ -200,7 +211,7 @@ export const runTest = async (
|
||||
} else {
|
||||
const actual = result?.[0]?.trim()?.toLowerCase() || ''
|
||||
const passed = actual === expected
|
||||
|
||||
|
||||
testResult = {
|
||||
test: testName,
|
||||
prompt,
|
||||
@ -234,14 +245,14 @@ export const runTest = async (
|
||||
if (testResult) {
|
||||
// Extract category from logPath (e.g., 'reports/basic.json' -> 'basic')
|
||||
const category = path.basename(logPath, path.extname(logPath))
|
||||
|
||||
|
||||
// Add category to test result
|
||||
testResult.category = category
|
||||
|
||||
// Update category-specific log
|
||||
const existingData = exists(logPath) === 'file' ? JSON.parse(read(logPath) as string) : { results: [], highscores: [] }
|
||||
const updatedResults = [...(existingData.results || []), testResult]
|
||||
|
||||
|
||||
// Group results by test and model
|
||||
const latestResults = new Map<string, Map<string, TestResult>>()
|
||||
updatedResults.forEach(result => {
|
||||
@ -269,7 +280,7 @@ export const runTest = async (
|
||||
const allLogPath = path.resolve(REPORTS_DIR, 'all.json')
|
||||
const allExistingData = exists(allLogPath) === 'file' ? JSON.parse(read(allLogPath) as string) : { results: [], highscores: [] }
|
||||
const allUpdatedResults = [...(allExistingData.results || []), testResult]
|
||||
|
||||
|
||||
// Group all results by test and model
|
||||
const allLatestResults = new Map<string, Map<string, TestResult>>()
|
||||
allUpdatedResults.forEach(result => {
|
||||
@ -304,7 +315,7 @@ export const generateTestReport = (
|
||||
): void => {
|
||||
// Group results by test and model
|
||||
const latestResults = new Map<string, Map<string, TestResult>>()
|
||||
|
||||
|
||||
// Get only the latest result for each test+model combination
|
||||
testResults.forEach(result => {
|
||||
if (!latestResults.has(result.test)) {
|
||||
@ -322,12 +333,12 @@ export const generateTestReport = (
|
||||
|
||||
// Add highscore section
|
||||
report += '## Highscores\n\n'
|
||||
|
||||
|
||||
// Add regular test rankings
|
||||
report += '### Performance Rankings (Duration)\n\n'
|
||||
report += '| Test | Model | Duration (ms) | Duration (s) |\n'
|
||||
report += '|------|-------|--------------|--------------|\n'
|
||||
|
||||
|
||||
Array.from(latestResults.entries()).forEach(([testName, modelResults]) => {
|
||||
const sortedResults = Array.from(modelResults.entries())
|
||||
.map(([model, result]) => ({
|
||||
@ -353,7 +364,7 @@ export const generateTestReport = (
|
||||
report += `- Failed: ${failedTests}\n`
|
||||
report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n`
|
||||
report += `- Average Duration: ${avgDuration.toFixed(0)}ms (${(avgDuration / 1000).toFixed(2)}s)\n\n`
|
||||
|
||||
|
||||
// First list failed tests
|
||||
report += '## Failed Tests\n\n'
|
||||
let hasFailures = false
|
||||
@ -379,7 +390,7 @@ export const generateTestReport = (
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!hasFailures) {
|
||||
report += '*No failed tests*\n\n'
|
||||
}
|
||||
@ -400,7 +411,7 @@ export const generateTestReport = (
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!hasPassed) {
|
||||
report += '*No passed tests*\n\n'
|
||||
}
|
||||
|
||||
@ -7627,6 +7627,591 @@
|
||||
"passed": true,
|
||||
"duration": 1567,
|
||||
"category": "math"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:44:45.218Z",
|
||||
"passed": false,
|
||||
"duration": 1435,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:44:49.883Z",
|
||||
"passed": false,
|
||||
"duration": 4655,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:44:51.127Z",
|
||||
"passed": false,
|
||||
"duration": 1237,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:44:52.054Z",
|
||||
"passed": false,
|
||||
"duration": 917,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:44:53.187Z",
|
||||
"passed": false,
|
||||
"duration": 1125,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:45:02.361Z",
|
||||
"passed": false,
|
||||
"duration": 9166,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:45:04.060Z",
|
||||
"passed": false,
|
||||
"duration": 1690,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:45:05.043Z",
|
||||
"passed": false,
|
||||
"duration": 975,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:45:06.250Z",
|
||||
"passed": false,
|
||||
"duration": 1200,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:45:22.955Z",
|
||||
"passed": false,
|
||||
"duration": 16697,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:45:25.030Z",
|
||||
"passed": false,
|
||||
"duration": 2068,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:45:27.584Z",
|
||||
"passed": false,
|
||||
"duration": 2546,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:45:51.588Z",
|
||||
"passed": false,
|
||||
"duration": 1992,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:45:55.658Z",
|
||||
"passed": false,
|
||||
"duration": 4061,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:45:56.953Z",
|
||||
"passed": false,
|
||||
"duration": 1287,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:45:57.801Z",
|
||||
"passed": false,
|
||||
"duration": 840,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:45:59.452Z",
|
||||
"passed": false,
|
||||
"duration": 1644,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:46:05.931Z",
|
||||
"passed": false,
|
||||
"duration": 6471,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:46:07.867Z",
|
||||
"passed": false,
|
||||
"duration": 1927,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:46:09.006Z",
|
||||
"passed": false,
|
||||
"duration": 1131,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:46:10.511Z",
|
||||
"passed": false,
|
||||
"duration": 1496,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:46:28.567Z",
|
||||
"passed": false,
|
||||
"duration": 18048,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:46:31.014Z",
|
||||
"passed": false,
|
||||
"duration": 2439,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:46:32.933Z",
|
||||
"passed": false,
|
||||
"duration": 1911,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:50:37.864Z",
|
||||
"passed": false,
|
||||
"duration": 1441,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:50:42.826Z",
|
||||
"passed": false,
|
||||
"duration": 4953,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:50:44.254Z",
|
||||
"passed": false,
|
||||
"duration": 1420,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:50:45.251Z",
|
||||
"passed": false,
|
||||
"duration": 989,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:50:46.448Z",
|
||||
"passed": false,
|
||||
"duration": 1187,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:50:52.751Z",
|
||||
"passed": false,
|
||||
"duration": 6295,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:50:54.086Z",
|
||||
"passed": false,
|
||||
"duration": 1326,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:50:55.060Z",
|
||||
"passed": false,
|
||||
"duration": 965,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:50:56.266Z",
|
||||
"passed": false,
|
||||
"duration": 1198,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:51:06.690Z",
|
||||
"passed": false,
|
||||
"duration": 10417,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:51:09.116Z",
|
||||
"passed": false,
|
||||
"duration": 2418,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:51:11.294Z",
|
||||
"passed": false,
|
||||
"duration": 2169,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:52:19.974Z",
|
||||
"passed": false,
|
||||
"duration": 1425,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:52:20.932Z",
|
||||
"passed": false,
|
||||
"duration": 948,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:52:22.228Z",
|
||||
"passed": false,
|
||||
"duration": 1287,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
}
|
||||
],
|
||||
"highscores": [
|
||||
@ -7809,7 +8394,52 @@
|
||||
"duration_secs": 2.274
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"rankings": [
|
||||
{
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"duration": 1420,
|
||||
"duration_secs": 1.42
|
||||
},
|
||||
{
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"duration": 1425,
|
||||
"duration_secs": 1.425
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"rankings": [
|
||||
{
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"duration": 948,
|
||||
"duration_secs": 0.948
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"duration": 1187,
|
||||
"duration_secs": 1.187
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"rankings": [
|
||||
{
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"duration": 1198,
|
||||
"duration_secs": 1.198
|
||||
},
|
||||
{
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"duration": 1287,
|
||||
"duration_secs": 1.287
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"lastUpdated": "2025-04-04T12:39:21.300Z"
|
||||
"lastUpdated": "2025-04-04T12:52:22.232Z"
|
||||
}
|
||||
637
packages/kbot/tests/unit/reports/coding.json
Normal file
637
packages/kbot/tests/unit/reports/coding.json
Normal file
@ -0,0 +1,637 @@
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:44:45.218Z",
|
||||
"passed": false,
|
||||
"duration": 1435,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:44:49.883Z",
|
||||
"passed": false,
|
||||
"duration": 4655,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:44:51.127Z",
|
||||
"passed": false,
|
||||
"duration": 1237,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:44:52.054Z",
|
||||
"passed": false,
|
||||
"duration": 917,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:44:53.187Z",
|
||||
"passed": false,
|
||||
"duration": 1125,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:45:02.361Z",
|
||||
"passed": false,
|
||||
"duration": 9166,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:45:04.060Z",
|
||||
"passed": false,
|
||||
"duration": 1690,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:45:05.043Z",
|
||||
"passed": false,
|
||||
"duration": 975,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:45:06.250Z",
|
||||
"passed": false,
|
||||
"duration": 1200,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:45:22.955Z",
|
||||
"passed": false,
|
||||
"duration": 16697,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:45:25.030Z",
|
||||
"passed": false,
|
||||
"duration": 2068,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:45:27.584Z",
|
||||
"passed": false,
|
||||
"duration": 2546,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:45:51.588Z",
|
||||
"passed": false,
|
||||
"duration": 1992,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:45:55.658Z",
|
||||
"passed": false,
|
||||
"duration": 4061,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:45:56.953Z",
|
||||
"passed": false,
|
||||
"duration": 1287,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:45:57.801Z",
|
||||
"passed": false,
|
||||
"duration": 840,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:45:59.452Z",
|
||||
"passed": false,
|
||||
"duration": 1644,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:46:05.931Z",
|
||||
"passed": false,
|
||||
"duration": 6471,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:46:07.867Z",
|
||||
"passed": false,
|
||||
"duration": 1927,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:46:09.006Z",
|
||||
"passed": false,
|
||||
"duration": 1131,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:46:10.511Z",
|
||||
"passed": false,
|
||||
"duration": 1496,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:46:28.567Z",
|
||||
"passed": false,
|
||||
"duration": 18048,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:46:31.014Z",
|
||||
"passed": false,
|
||||
"duration": 2439,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:46:32.933Z",
|
||||
"passed": false,
|
||||
"duration": 1911,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:50:37.864Z",
|
||||
"passed": false,
|
||||
"duration": 1441,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:50:42.826Z",
|
||||
"passed": false,
|
||||
"duration": 4953,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:50:44.254Z",
|
||||
"passed": false,
|
||||
"duration": 1420,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:50:45.251Z",
|
||||
"passed": false,
|
||||
"duration": 989,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:50:46.448Z",
|
||||
"passed": false,
|
||||
"duration": 1187,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:50:52.751Z",
|
||||
"passed": false,
|
||||
"duration": 6295,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:50:54.086Z",
|
||||
"passed": false,
|
||||
"duration": 1326,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:50:55.060Z",
|
||||
"passed": false,
|
||||
"duration": 965,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"router": "openai/gpt-3.5-turbo",
|
||||
"timestamp": "2025-04-04T12:50:56.266Z",
|
||||
"passed": false,
|
||||
"duration": 1198,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||||
"timestamp": "2025-04-04T12:51:06.690Z",
|
||||
"passed": false,
|
||||
"duration": 10417,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"router": "openai/gpt-4o-mini",
|
||||
"timestamp": "2025-04-04T12:51:09.116Z",
|
||||
"passed": false,
|
||||
"duration": 2418,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:51:11.294Z",
|
||||
"passed": false,
|
||||
"duration": 2169,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "simple_function",
|
||||
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function add(a, b) {\n return a + b;\n}"
|
||||
],
|
||||
"expected": "function add(a, b) { return a + b; }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:52:19.974Z",
|
||||
"passed": false,
|
||||
"duration": 1425,
|
||||
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}"
|
||||
],
|
||||
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:52:20.932Z",
|
||||
"passed": false,
|
||||
"duration": 948,
|
||||
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}",
|
||||
"category": "coding"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
|
||||
"result": [
|
||||
"function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}"
|
||||
],
|
||||
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"router": "openrouter/quasar-alpha",
|
||||
"timestamp": "2025-04-04T12:52:22.228Z",
|
||||
"passed": false,
|
||||
"duration": 1287,
|
||||
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}",
|
||||
"category": "coding"
|
||||
}
|
||||
],
|
||||
"highscores": [
|
||||
{
|
||||
"test": "simple_function",
|
||||
"rankings": [
|
||||
{
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"duration": 1420,
|
||||
"duration_secs": 1.42
|
||||
},
|
||||
{
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"duration": 1425,
|
||||
"duration_secs": 1.425
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"test": "factorial_function",
|
||||
"rankings": [
|
||||
{
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"duration": 948,
|
||||
"duration_secs": 0.948
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"duration": 1187,
|
||||
"duration_secs": 1.187
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"test": "fibonacci_function",
|
||||
"rankings": [
|
||||
{
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
"duration": 1198,
|
||||
"duration_secs": 1.198
|
||||
},
|
||||
{
|
||||
"model": "openrouter/quasar-alpha",
|
||||
"duration": 1287,
|
||||
"duration_secs": 1.287
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"lastUpdated": "2025-04-04T12:52:22.229Z"
|
||||
}
|
||||
93
packages/kbot/tests/unit/reports/coding.md
Normal file
93
packages/kbot/tests/unit/reports/coding.md
Normal file
@ -0,0 +1,93 @@
|
||||
# Coding Capabilities Test Results
|
||||
|
||||
## Highscores
|
||||
|
||||
### Performance Rankings (Duration)
|
||||
|
||||
| Test | Model | Duration (ms) | Duration (s) |
|
||||
|------|-------|--------------|--------------|
|
||||
| simple_function | openrouter/quasar-alpha | 1425 | 1.43 |
|
||||
| factorial_function | openrouter/quasar-alpha | 948 | 0.95 |
|
||||
| fibonacci_function | openrouter/quasar-alpha | 1287 | 1.29 |
|
||||
|
||||
## Summary
|
||||
|
||||
- Total Tests: 3
|
||||
- Passed: 0
|
||||
- Failed: 3
|
||||
- Success Rate: 0.00%
|
||||
- Average Duration: 1220ms (1.22s)
|
||||
|
||||
## Failed Tests
|
||||
|
||||
### simple_function - openrouter/quasar-alpha
|
||||
|
||||
- Prompt: `Generate a JavaScript function that adds two numbers and returns the result.
|
||||
The function should be named 'add' and take two parameters 'a' and 'b'.
|
||||
Return only the function code, no explanation.`
|
||||
- Expected: `function add(a, b) { return a + b; }`
|
||||
- Actual: `function add(a, b) {
|
||||
return a + b;
|
||||
}`
|
||||
- Duration: 1425ms (1.43s)
|
||||
- Reason: Expected function add(a, b) { return a + b; }, but got function add(a, b) {
|
||||
return a + b;
|
||||
}
|
||||
- Timestamp: 4/4/2025, 2:52:19 PM
|
||||
|
||||
### factorial_function - openrouter/quasar-alpha
|
||||
|
||||
- Prompt: `Generate a JavaScript function that calculates the factorial of a number.
|
||||
The function should be named 'factorial' and take one parameter 'n'.
|
||||
Return only the function code, no explanation.`
|
||||
- Expected: `function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }`
|
||||
- Actual: `function factorial(n) {
|
||||
if (n === 0 || n === 1) {
|
||||
return 1;
|
||||
}
|
||||
return n * factorial(n - 1);
|
||||
}`
|
||||
- Duration: 948ms (0.95s)
|
||||
- Reason: Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {
|
||||
if (n === 0 || n === 1) {
|
||||
return 1;
|
||||
}
|
||||
return n * factorial(n - 1);
|
||||
}
|
||||
- Timestamp: 4/4/2025, 2:52:20 PM
|
||||
|
||||
### fibonacci_function - openrouter/quasar-alpha
|
||||
|
||||
- Prompt: `Generate a JavaScript function that calculates the nth Fibonacci number.
|
||||
The function should be named 'fibonacci' and take one parameter 'n'.
|
||||
Return only the function code, no explanation.`
|
||||
- Expected: `function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }`
|
||||
- Actual: `function fibonacci(n) {
|
||||
if (n <= 0) return 0;
|
||||
if (n === 1) return 1;
|
||||
let a = 0, b = 1;
|
||||
for (let i = 2; i <= n; i++) {
|
||||
let temp = a + b;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
return b;
|
||||
}`
|
||||
- Duration: 1287ms (1.29s)
|
||||
- Reason: Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {
|
||||
if (n <= 0) return 0;
|
||||
if (n === 1) return 1;
|
||||
let a = 0, b = 1;
|
||||
for (let i = 2; i <= n; i++) {
|
||||
let temp = a + b;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
- Timestamp: 4/4/2025, 2:52:22 PM
|
||||
|
||||
## Passed Tests
|
||||
|
||||
*No passed tests*
|
||||
|
||||
Loading…
Reference in New Issue
Block a user