mono/packages/kbot/tests/unit/basic.json
2025-04-02 12:49:47 +02:00

541 lines
15 KiB
JSON

{
"results": [
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-01T22:17:39.340Z",
"passed": true,
"duration": 1551
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-01T22:17:42.962Z",
"passed": true,
"duration": 3621
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-01T22:17:43.836Z",
"passed": true,
"duration": 873
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-01T22:17:47.309Z",
"passed": true,
"duration": 3472
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-01T22:17:48.493Z",
"passed": true,
"duration": 1183
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-01T22:17:53.335Z",
"passed": true,
"duration": 4841
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:38:37.069Z",
"passed": true,
"duration": 1256
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:38:40.167Z",
"passed": false,
"duration": 3096,
"error": {
"message": "Model returned empty response",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: Model returned empty response\n at Module.runTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:85:13)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:21:20)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "Model returned empty response"
}
},
"reason": "Model returned empty response"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:38:41.086Z",
"passed": true,
"duration": 916
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:38:45.879Z",
"passed": true,
"duration": 4793
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:38:46.900Z",
"passed": true,
"duration": 1020
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:38:50.446Z",
"passed": true,
"duration": 3545
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:39:58.836Z",
"passed": true,
"duration": 1266
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:40:02.777Z",
"passed": true,
"duration": 3939
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:40:03.961Z",
"passed": true,
"duration": 1183
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:40:06.962Z",
"passed": true,
"duration": 3000
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:40:08.115Z",
"passed": true,
"duration": 1152
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:40:12.565Z",
"passed": true,
"duration": 4449
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:41:52.176Z",
"passed": true,
"duration": 1458
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:41:55.869Z",
"passed": true,
"duration": 3691
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:41:57.106Z",
"passed": true,
"duration": 1236
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:41:59.974Z",
"passed": true,
"duration": 2867
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:42:01.272Z",
"passed": true,
"duration": 1297
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:42:04.326Z",
"passed": true,
"duration": 3053
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:44:39.002Z",
"passed": true,
"duration": 1196
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:44:48.668Z",
"passed": true,
"duration": 9664
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:44:49.806Z",
"passed": true,
"duration": 1137
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:44:53.017Z",
"passed": true,
"duration": 3210
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:44:53.814Z",
"passed": true,
"duration": 796
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:44:55.383Z",
"passed": true,
"duration": 1568
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:47:00.955Z",
"passed": true,
"duration": 1297
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:49:09.343Z",
"passed": true,
"duration": 1278
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:49:15.630Z",
"passed": true,
"duration": 6285
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:49:16.246Z",
"passed": true,
"duration": 615
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:49:25.857Z",
"passed": true,
"duration": 9610
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:49:27.101Z",
"passed": true,
"duration": 1242
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:49:31.142Z",
"passed": true,
"duration": 4040
}
],
"highscores": [
{
"test": "addition",
"rankings": [
{
"model": "anthropic/claude-3.5-sonnet",
"duration": 1278,
"duration_secs": 1.278
},
{
"model": "qwen/qwq-32b",
"duration": 6285,
"duration_secs": 6.285
}
]
},
{
"test": "multiplication",
"rankings": [
{
"model": "anthropic/claude-3.5-sonnet",
"duration": 615,
"duration_secs": 0.615
},
{
"model": "qwen/qwq-32b",
"duration": 9610,
"duration_secs": 9.61
}
]
},
{
"test": "division",
"rankings": [
{
"model": "anthropic/claude-3.5-sonnet",
"duration": 1242,
"duration_secs": 1.242
},
{
"model": "qwen/qwq-32b",
"duration": 4040,
"duration_secs": 4.04
}
]
}
],
"lastUpdated": "2025-04-02T10:49:31.142Z"
}