the coding challange

This commit is contained in:
lovebird 2025-04-04 14:52:37 +02:00
parent ddf8dbce1f
commit fc53739b40
11 changed files with 1459 additions and 38 deletions

View File

@ -3,7 +3,7 @@
"messages": [
{
"role": "user",
"content": "Calculate 2 raised to the power of 3. Return only the number, no explanation."
"content": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation."
},
{
"role": "user",

View File

@ -27,6 +27,7 @@
"p-map": "7.0.3",
"ts-retry": "6.0.0",
"tslog": "^4.9.3",
"vm2": "^3.9.19",
"yargs": "17.7.2",
"zod": "3.24.2"
},
@ -2292,7 +2293,6 @@
"version": "8.14.0",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz",
"integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==",
"dev": true,
"license": "MIT",
"bin": {
"acorn": "bin/acorn"
@ -2311,6 +2311,18 @@
"acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
}
},
"node_modules/acorn-walk": {
"version": "8.3.4",
"resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz",
"integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==",
"license": "MIT",
"dependencies": {
"acorn": "^8.11.0"
},
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/agentkeepalive": {
"version": "4.6.0",
"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
@ -7203,6 +7215,23 @@
}
}
},
"node_modules/vm2": {
"version": "3.9.19",
"resolved": "https://registry.npmjs.org/vm2/-/vm2-3.9.19.tgz",
"integrity": "sha512-J637XF0DHDMV57R6JyVsTak7nIL8gy5KH4r1HiwWLf/4GBbb5MKL5y7LpmF4A8E2nR6XmzpmMFQ7V7ppPTmUQg==",
"deprecated": "The library contains critical security issues and should not be used for production! The maintenance of the project has been discontinued. Consider migrating your code to isolated-vm.",
"license": "MIT",
"dependencies": {
"acorn": "^8.7.0",
"acorn-walk": "^8.2.0"
},
"bin": {
"vm2": "bin/vm2"
},
"engines": {
"node": ">=6.0"
}
},
"node_modules/watchpack": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.2.tgz",

View File

@ -50,7 +50,6 @@
"p-map": "7.0.3",
"ts-retry": "6.0.0",
"tslog": "^4.9.3",
"vm2": "^3.9.19",
"yargs": "17.7.2",
"zod": "3.24.2"
},

View File

@ -0,0 +1,3 @@
function add(a, b) {
return a + b;
}

View File

@ -0,0 +1,6 @@
function factorial(n) {
if (n === 0 || n === 1) {
return 1;
}
return n * factorial(n - 1);
}

View File

@ -0,0 +1,11 @@
function fibonacci(n) {
if (n <= 0) return 0;
if (n === 1) return 1;
let a = 0, b = 1;
for (let i = 2; i <= n; i++) {
let temp = a + b;
a = b;
b = temp;
}
return b;
}

View File

@ -4,7 +4,7 @@ import { sync as exists } from "@polymech/fs/exists"
import { sync as read } from "@polymech/fs/read"
import { sync as write } from "@polymech/fs/write"
import { sync as mkdirp } from "mkdirp"
import { VM } from 'vm2'
import { createContext, runInContext } from 'node:vm'
import {
getDefaultModels,
@ -15,11 +15,12 @@ import {
TestResult,
runTest,
generateTestReport,
getReportPaths
getReportPaths,
ModelCategory
} from './commons'
// Optionally override models for this specific test file
const models = getDefaultModels()
const models = getDefaultModels(ModelCategory.CODING)
// Ensure test-data/code directory exists
const TEST_CODE_DIR = path.resolve(__dirname, '../test-data/code')
@ -32,12 +33,16 @@ describe('Coding Capabilities', () => {
const TEST_LOG_PATH = getReportPaths('coding', 'json')
const TEST_REPORT_PATH = getReportPaths('coding', 'md')
const executeCode = (code: string, context: any = {}): any => {
const vm = new VM({
timeout: 1000,
sandbox: context
})
return vm.run(code)
const executeCode = (code: string, functionName: string): any => {
const vmContext = createContext({})
// Wrap the code in a module pattern to avoid global scope pollution
const wrappedCode = `
(function() {
${code}
return ${functionName};
})()
`
return runInContext(wrappedCode, vmContext, { timeout: 1000 })
}
it.each(models)('should generate and execute a simple function with model %s', async (modelName) => {
@ -60,8 +65,7 @@ describe('Coding Capabilities', () => {
write(codePath, result.result[0])
// Execute the code
const context = {}
const addFunction = executeCode(result.result[0], context)
const addFunction = executeCode(result.result[0], 'add')
expect(addFunction(5, 3)).toBe(8)
}, { timeout: TEST_TIMEOUT })
@ -85,8 +89,7 @@ describe('Coding Capabilities', () => {
write(codePath, result.result[0])
// Execute the code
const context = {}
const factorialFunction = executeCode(result.result[0], context)
const factorialFunction = executeCode(result.result[0], 'factorial')
expect(factorialFunction(5)).toBe(120)
}, { timeout: TEST_TIMEOUT })
@ -110,8 +113,7 @@ describe('Coding Capabilities', () => {
write(codePath, result.result[0])
// Execute the code
const context = {}
const fibonacciFunction = executeCode(result.result[0], context)
const fibonacciFunction = executeCode(result.result[0], 'fibonacci')
expect(fibonacciFunction(6)).toBe(8)
}, { timeout: TEST_TIMEOUT })

View File

@ -10,7 +10,8 @@ export enum ModelCategory {
FAST = 'fast',
LANGUAGE = 'language',
TOOL = 'tool',
ALL = 'all'
ALL = 'all',
CODING = 'coding'
}
export const getFastModels = (): string[] => {
@ -22,6 +23,12 @@ export const getFastModels = (): string[] => {
]
}
export const getCodingModels = (): string[] => {
return [
E_OPENROUTER_MODEL.MODEL_OPENROUTER_QUASAR_ALPHA
]
}
export const getLanguageModels = (): string[] => {
return [
E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_3_5_SONNET,
@ -45,6 +52,8 @@ export const getDefaultModels = (category: ModelCategory = ModelCategory.FAST):
return getLanguageModels()
case ModelCategory.TOOL:
return getToolModels()
case ModelCategory.CODING:
return getCodingModels()
case ModelCategory.ALL:
default:
return [
@ -124,7 +133,7 @@ export const isEmptyResponse = (result: string[] | null | undefined): boolean =>
export const generateHighscores = (latestResults: Map<string, Map<string, TestResult>>): TestHighscore[] => {
const highscores: TestHighscore[] = []
for (const [testName, modelResults] of latestResults) {
// Convert model results to array and sort by duration
const sortedResults = Array.from(modelResults.entries())
@ -143,14 +152,14 @@ export const generateHighscores = (latestResults: Map<string, Map<string, TestRe
})
}
}
return highscores
}
export const runTest = async (
prompt: string,
expected: string,
testName: string,
prompt: string,
expected: string,
testName: string,
modelName: string,
logPath: string,
mode: "completion" | "tools" | "assistant" | "custom" = "completion",
@ -161,7 +170,9 @@ export const runTest = async (
let startTime = Date.now()
let error: TestResult['error'] | undefined
let testResult: TestResult | undefined
let defaultOptions = {
filters: 'code'
}
try {
const result = await Promise.race([
run({
@ -172,14 +183,14 @@ export const runTest = async (
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
logLevel: 2,
...options,
...{ ...defaultOptions, ...options },
onRun: async (options) => {
model = options.model || 'unknown'
router = options.model as string
return options
}
}),
new Promise((_, reject) =>
new Promise((_, reject) =>
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
)
]) as string[]
@ -200,7 +211,7 @@ export const runTest = async (
} else {
const actual = result?.[0]?.trim()?.toLowerCase() || ''
const passed = actual === expected
testResult = {
test: testName,
prompt,
@ -234,14 +245,14 @@ export const runTest = async (
if (testResult) {
// Extract category from logPath (e.g., 'reports/basic.json' -> 'basic')
const category = path.basename(logPath, path.extname(logPath))
// Add category to test result
testResult.category = category
// Update category-specific log
const existingData = exists(logPath) === 'file' ? JSON.parse(read(logPath) as string) : { results: [], highscores: [] }
const updatedResults = [...(existingData.results || []), testResult]
// Group results by test and model
const latestResults = new Map<string, Map<string, TestResult>>()
updatedResults.forEach(result => {
@ -269,7 +280,7 @@ export const runTest = async (
const allLogPath = path.resolve(REPORTS_DIR, 'all.json')
const allExistingData = exists(allLogPath) === 'file' ? JSON.parse(read(allLogPath) as string) : { results: [], highscores: [] }
const allUpdatedResults = [...(allExistingData.results || []), testResult]
// Group all results by test and model
const allLatestResults = new Map<string, Map<string, TestResult>>()
allUpdatedResults.forEach(result => {
@ -304,7 +315,7 @@ export const generateTestReport = (
): void => {
// Group results by test and model
const latestResults = new Map<string, Map<string, TestResult>>()
// Get only the latest result for each test+model combination
testResults.forEach(result => {
if (!latestResults.has(result.test)) {
@ -322,12 +333,12 @@ export const generateTestReport = (
// Add highscore section
report += '## Highscores\n\n'
// Add regular test rankings
report += '### Performance Rankings (Duration)\n\n'
report += '| Test | Model | Duration (ms) | Duration (s) |\n'
report += '|------|-------|--------------|--------------|\n'
Array.from(latestResults.entries()).forEach(([testName, modelResults]) => {
const sortedResults = Array.from(modelResults.entries())
.map(([model, result]) => ({
@ -353,7 +364,7 @@ export const generateTestReport = (
report += `- Failed: ${failedTests}\n`
report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n`
report += `- Average Duration: ${avgDuration.toFixed(0)}ms (${(avgDuration / 1000).toFixed(2)}s)\n\n`
// First list failed tests
report += '## Failed Tests\n\n'
let hasFailures = false
@ -379,7 +390,7 @@ export const generateTestReport = (
}
}
}
if (!hasFailures) {
report += '*No failed tests*\n\n'
}
@ -400,7 +411,7 @@ export const generateTestReport = (
}
}
}
if (!hasPassed) {
report += '*No passed tests*\n\n'
}

View File

@ -7627,6 +7627,591 @@
"passed": true,
"duration": 1567,
"category": "math"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:44:45.218Z",
"passed": false,
"duration": 1435,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:44:49.883Z",
"passed": false,
"duration": 4655,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:44:51.127Z",
"passed": false,
"duration": 1237,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:44:52.054Z",
"passed": false,
"duration": 917,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:44:53.187Z",
"passed": false,
"duration": 1125,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:45:02.361Z",
"passed": false,
"duration": 9166,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:45:04.060Z",
"passed": false,
"duration": 1690,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:45:05.043Z",
"passed": false,
"duration": 975,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:45:06.250Z",
"passed": false,
"duration": 1200,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:45:22.955Z",
"passed": false,
"duration": 16697,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:45:25.030Z",
"passed": false,
"duration": 2068,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:45:27.584Z",
"passed": false,
"duration": 2546,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:45:51.588Z",
"passed": false,
"duration": 1992,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:45:55.658Z",
"passed": false,
"duration": 4061,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:45:56.953Z",
"passed": false,
"duration": 1287,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:45:57.801Z",
"passed": false,
"duration": 840,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:45:59.452Z",
"passed": false,
"duration": 1644,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:46:05.931Z",
"passed": false,
"duration": 6471,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:46:07.867Z",
"passed": false,
"duration": 1927,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:46:09.006Z",
"passed": false,
"duration": 1131,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:46:10.511Z",
"passed": false,
"duration": 1496,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:46:28.567Z",
"passed": false,
"duration": 18048,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:46:31.014Z",
"passed": false,
"duration": 2439,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:46:32.933Z",
"passed": false,
"duration": 1911,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:50:37.864Z",
"passed": false,
"duration": 1441,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:50:42.826Z",
"passed": false,
"duration": 4953,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:50:44.254Z",
"passed": false,
"duration": 1420,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:50:45.251Z",
"passed": false,
"duration": 989,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:50:46.448Z",
"passed": false,
"duration": 1187,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:50:52.751Z",
"passed": false,
"duration": 6295,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:50:54.086Z",
"passed": false,
"duration": 1326,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:50:55.060Z",
"passed": false,
"duration": 965,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:50:56.266Z",
"passed": false,
"duration": 1198,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:51:06.690Z",
"passed": false,
"duration": 10417,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:51:09.116Z",
"passed": false,
"duration": 2418,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:51:11.294Z",
"passed": false,
"duration": 2169,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:52:19.974Z",
"passed": false,
"duration": 1425,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:52:20.932Z",
"passed": false,
"duration": 948,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:52:22.228Z",
"passed": false,
"duration": 1287,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}",
"category": "coding"
}
],
"highscores": [
@ -7809,7 +8394,52 @@
"duration_secs": 2.274
}
]
},
{
"test": "simple_function",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 1420,
"duration_secs": 1.42
},
{
"model": "openrouter/quasar-alpha",
"duration": 1425,
"duration_secs": 1.425
}
]
},
{
"test": "factorial_function",
"rankings": [
{
"model": "openrouter/quasar-alpha",
"duration": 948,
"duration_secs": 0.948
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 1187,
"duration_secs": 1.187
}
]
},
{
"test": "fibonacci_function",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 1198,
"duration_secs": 1.198
},
{
"model": "openrouter/quasar-alpha",
"duration": 1287,
"duration_secs": 1.287
}
]
}
],
"lastUpdated": "2025-04-04T12:39:21.300Z"
"lastUpdated": "2025-04-04T12:52:22.232Z"
}

View File

@ -0,0 +1,637 @@
{
"results": [
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:44:45.218Z",
"passed": false,
"duration": 1435,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:44:49.883Z",
"passed": false,
"duration": 4655,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:44:51.127Z",
"passed": false,
"duration": 1237,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:44:52.054Z",
"passed": false,
"duration": 917,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:44:53.187Z",
"passed": false,
"duration": 1125,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:45:02.361Z",
"passed": false,
"duration": 9166,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n <= 1) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:45:04.060Z",
"passed": false,
"duration": 1690,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:45:05.043Z",
"passed": false,
"duration": 975,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:45:06.250Z",
"passed": false,
"duration": 1200,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n```",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:45:22.955Z",
"passed": false,
"duration": 16697,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a;\n a = b;\n b = temp + b;\n }\n return b;\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:45:25.030Z",
"passed": false,
"duration": 2068,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:45:27.584Z",
"passed": false,
"duration": 2546,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:45:51.588Z",
"passed": false,
"duration": 1992,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:45:55.658Z",
"passed": false,
"duration": 4061,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:45:56.953Z",
"passed": false,
"duration": 1287,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction add(a, b) {\n return a + b;\n}\n```"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:45:57.801Z",
"passed": false,
"duration": 840,
"reason": "Expected function add(a, b) { return a + b; }, but got ```javascript\nfunction add(a, b) {\n return a + b;\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:45:59.452Z",
"passed": false,
"duration": 1644,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:46:05.931Z",
"passed": false,
"duration": 6471,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 2) return 1;\n let result = 1;\n for (let i = 2; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:46:07.867Z",
"passed": false,
"duration": 1927,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got ```javascript\nfunction factorial(n) {\n if (n < 0) return undefined;\n if (n === 0) return 1;\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}\n```",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:46:09.006Z",
"passed": false,
"duration": 1131,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n < 0) return undefined;\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:46:10.511Z",
"passed": false,
"duration": 1496,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) {\n return n;\n } else {\n return fibonacci(n - 1) + fibonacci(n - 2);\n }\n}\n```",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:46:28.567Z",
"passed": false,
"duration": 18048,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n let a = 0, b = 1;\n if (n === 0) return a;\n for (let i = 2; i <= n; i++) {\n let next = a + b;\n a = b;\n b = next;\n }\n return b;\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:46:31.014Z",
"passed": false,
"duration": 2439,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, temp;\n for (let i = 2; i <= n; i++) {\n temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}\n```",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:46:32.933Z",
"passed": false,
"duration": 1911,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got ```javascript\nfunction fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}\n```",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:50:37.864Z",
"passed": false,
"duration": 1441,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:50:42.826Z",
"passed": false,
"duration": 4953,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:50:44.254Z",
"passed": false,
"duration": 1420,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:50:45.251Z",
"passed": false,
"duration": 989,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:50:46.448Z",
"passed": false,
"duration": 1187,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0) {\n return 1;\n } else {\n return n * factorial(n - 1);\n }\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:50:52.751Z",
"passed": false,
"duration": 6295,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n let result = 1;\n for (let i = 1; i <= n; i++) {\n result *= i;\n }\n return result;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:50:54.086Z",
"passed": false,
"duration": 1326,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:50:55.060Z",
"passed": false,
"duration": 965,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) return 1;\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:50:56.266Z",
"passed": false,
"duration": 1198,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) {\n return n;\n }\n\n return fibonacci(n - 1) + fibonacci(n - 2);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:51:06.690Z",
"passed": false,
"duration": 10417,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n === 0) return 0;\n if (n === 1 || n === 2) return 1;\n let a = 1, b = 1, temp;\n for (let i = 3; i <= n; i++) {\n temp = a;\n a = a + b;\n b = temp;\n }\n return a;\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:51:09.116Z",
"passed": false,
"duration": 2418,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 1) return n;\n let a = 0, b = 1, c;\n for (let i = 2; i <= n; i++) {\n c = a + b;\n a = b;\n b = c;\n }\n return b;\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:51:11.294Z",
"passed": false,
"duration": 2169,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n [a, b] = [b, a + b];\n }\n return b;\n}",
"category": "coding"
},
{
"test": "simple_function",
"prompt": "Generate a JavaScript function that adds two numbers and returns the result. \n The function should be named 'add' and take two parameters 'a' and 'b'.\n Return only the function code, no explanation.",
"result": [
"function add(a, b) {\n return a + b;\n}"
],
"expected": "function add(a, b) { return a + b; }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:52:19.974Z",
"passed": false,
"duration": 1425,
"reason": "Expected function add(a, b) { return a + b; }, but got function add(a, b) {\n return a + b;\n}",
"category": "coding"
},
{
"test": "factorial_function",
"prompt": "Generate a JavaScript function that calculates the factorial of a number.\n The function should be named 'factorial' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}"
],
"expected": "function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:52:20.932Z",
"passed": false,
"duration": 948,
"reason": "Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {\n if (n === 0 || n === 1) {\n return 1;\n }\n return n * factorial(n - 1);\n}",
"category": "coding"
},
{
"test": "fibonacci_function",
"prompt": "Generate a JavaScript function that calculates the nth Fibonacci number.\n The function should be named 'fibonacci' and take one parameter 'n'.\n Return only the function code, no explanation.",
"result": [
"function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}"
],
"expected": "function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:52:22.228Z",
"passed": false,
"duration": 1287,
"reason": "Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {\n if (n <= 0) return 0;\n if (n === 1) return 1;\n let a = 0, b = 1;\n for (let i = 2; i <= n; i++) {\n let temp = a + b;\n a = b;\n b = temp;\n }\n return b;\n}",
"category": "coding"
}
],
"highscores": [
{
"test": "simple_function",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 1420,
"duration_secs": 1.42
},
{
"model": "openrouter/quasar-alpha",
"duration": 1425,
"duration_secs": 1.425
}
]
},
{
"test": "factorial_function",
"rankings": [
{
"model": "openrouter/quasar-alpha",
"duration": 948,
"duration_secs": 0.948
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 1187,
"duration_secs": 1.187
}
]
},
{
"test": "fibonacci_function",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 1198,
"duration_secs": 1.198
},
{
"model": "openrouter/quasar-alpha",
"duration": 1287,
"duration_secs": 1.287
}
]
}
],
"lastUpdated": "2025-04-04T12:52:22.229Z"
}

View File

@ -0,0 +1,93 @@
# Coding Capabilities Test Results
## Highscores
### Performance Rankings (Duration)
| Test | Model | Duration (ms) | Duration (s) |
|------|-------|--------------|--------------|
| simple_function | openrouter/quasar-alpha | 1425 | 1.43 |
| factorial_function | openrouter/quasar-alpha | 948 | 0.95 |
| fibonacci_function | openrouter/quasar-alpha | 1287 | 1.29 |
## Summary
- Total Tests: 3
- Passed: 0
- Failed: 3
- Success Rate: 0.00%
- Average Duration: 1220ms (1.22s)
## Failed Tests
### simple_function - openrouter/quasar-alpha
- Prompt: `Generate a JavaScript function that adds two numbers and returns the result.
The function should be named 'add' and take two parameters 'a' and 'b'.
Return only the function code, no explanation.`
- Expected: `function add(a, b) { return a + b; }`
- Actual: `function add(a, b) {
return a + b;
}`
- Duration: 1425ms (1.43s)
- Reason: Expected function add(a, b) { return a + b; }, but got function add(a, b) {
return a + b;
}
- Timestamp: 4/4/2025, 2:52:19 PM
### factorial_function - openrouter/quasar-alpha
- Prompt: `Generate a JavaScript function that calculates the factorial of a number.
The function should be named 'factorial' and take one parameter 'n'.
Return only the function code, no explanation.`
- Expected: `function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }`
- Actual: `function factorial(n) {
if (n === 0 || n === 1) {
return 1;
}
return n * factorial(n - 1);
}`
- Duration: 948ms (0.95s)
- Reason: Expected function factorial(n) { return n <= 1 ? 1 : n * factorial(n - 1); }, but got function factorial(n) {
if (n === 0 || n === 1) {
return 1;
}
return n * factorial(n - 1);
}
- Timestamp: 4/4/2025, 2:52:20 PM
### fibonacci_function - openrouter/quasar-alpha
- Prompt: `Generate a JavaScript function that calculates the nth Fibonacci number.
The function should be named 'fibonacci' and take one parameter 'n'.
Return only the function code, no explanation.`
- Expected: `function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }`
- Actual: `function fibonacci(n) {
if (n <= 0) return 0;
if (n === 1) return 1;
let a = 0, b = 1;
for (let i = 2; i <= n; i++) {
let temp = a + b;
a = b;
b = temp;
}
return b;
}`
- Duration: 1287ms (1.29s)
- Reason: Expected function fibonacci(n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); }, but got function fibonacci(n) {
if (n <= 0) return 0;
if (n === 1) return 1;
let a = 0, b = 1;
for (let i = 2; i <= n; i++) {
let temp = a + b;
a = b;
b = temp;
}
return b;
}
- Timestamp: 4/4/2025, 2:52:22 PM
## Passed Tests
*No passed tests*