541 lines
15 KiB
JSON
541 lines
15 KiB
JSON
{
|
|
"results": [
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:39.340Z",
|
|
"passed": true,
|
|
"duration": 1551
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:42.962Z",
|
|
"passed": true,
|
|
"duration": 3621
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:43.836Z",
|
|
"passed": true,
|
|
"duration": 873
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:47.309Z",
|
|
"passed": true,
|
|
"duration": 3472
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:48.493Z",
|
|
"passed": true,
|
|
"duration": 1183
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:53.335Z",
|
|
"passed": true,
|
|
"duration": 4841
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:38:37.069Z",
|
|
"passed": true,
|
|
"duration": 1256
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:38:40.167Z",
|
|
"passed": false,
|
|
"duration": 3096,
|
|
"error": {
|
|
"message": "Model returned empty response",
|
|
"code": "UNKNOWN",
|
|
"type": "Error",
|
|
"details": {
|
|
"stack": "Error: Model returned empty response\n at Module.runTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:85:13)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:21:20)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
|
"message": "Model returned empty response"
|
|
}
|
|
},
|
|
"reason": "Model returned empty response"
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:38:41.086Z",
|
|
"passed": true,
|
|
"duration": 916
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:38:45.879Z",
|
|
"passed": true,
|
|
"duration": 4793
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:38:46.900Z",
|
|
"passed": true,
|
|
"duration": 1020
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:38:50.446Z",
|
|
"passed": true,
|
|
"duration": 3545
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:39:58.836Z",
|
|
"passed": true,
|
|
"duration": 1266
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:40:02.777Z",
|
|
"passed": true,
|
|
"duration": 3939
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:40:03.961Z",
|
|
"passed": true,
|
|
"duration": 1183
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:40:06.962Z",
|
|
"passed": true,
|
|
"duration": 3000
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:40:08.115Z",
|
|
"passed": true,
|
|
"duration": 1152
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:40:12.565Z",
|
|
"passed": true,
|
|
"duration": 4449
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:41:52.176Z",
|
|
"passed": true,
|
|
"duration": 1458
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:41:55.869Z",
|
|
"passed": true,
|
|
"duration": 3691
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:41:57.106Z",
|
|
"passed": true,
|
|
"duration": 1236
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:41:59.974Z",
|
|
"passed": true,
|
|
"duration": 2867
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:42:01.272Z",
|
|
"passed": true,
|
|
"duration": 1297
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:42:04.326Z",
|
|
"passed": true,
|
|
"duration": 3053
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:44:39.002Z",
|
|
"passed": true,
|
|
"duration": 1196
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:44:48.668Z",
|
|
"passed": true,
|
|
"duration": 9664
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:44:49.806Z",
|
|
"passed": true,
|
|
"duration": 1137
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:44:53.017Z",
|
|
"passed": true,
|
|
"duration": 3210
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:44:53.814Z",
|
|
"passed": true,
|
|
"duration": 796
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:44:55.383Z",
|
|
"passed": true,
|
|
"duration": 1568
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:47:00.955Z",
|
|
"passed": true,
|
|
"duration": 1297
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:49:09.343Z",
|
|
"passed": true,
|
|
"duration": 1278
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:49:15.630Z",
|
|
"passed": true,
|
|
"duration": 6285
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:49:16.246Z",
|
|
"passed": true,
|
|
"duration": 615
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:49:25.857Z",
|
|
"passed": true,
|
|
"duration": 9610
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:49:27.101Z",
|
|
"passed": true,
|
|
"duration": 1242
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:49:31.142Z",
|
|
"passed": true,
|
|
"duration": 4040
|
|
}
|
|
],
|
|
"highscores": [
|
|
{
|
|
"test": "addition",
|
|
"rankings": [
|
|
{
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"duration": 1278,
|
|
"duration_secs": 1.278
|
|
},
|
|
{
|
|
"model": "qwen/qwq-32b",
|
|
"duration": 6285,
|
|
"duration_secs": 6.285
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"rankings": [
|
|
{
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"duration": 615,
|
|
"duration_secs": 0.615
|
|
},
|
|
{
|
|
"model": "qwen/qwq-32b",
|
|
"duration": 9610,
|
|
"duration_secs": 9.61
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "division",
|
|
"rankings": [
|
|
{
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"duration": 1242,
|
|
"duration_secs": 1.242
|
|
},
|
|
{
|
|
"model": "qwen/qwq-32b",
|
|
"duration": 4040,
|
|
"duration_secs": 4.04
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"lastUpdated": "2025-04-02T10:49:31.142Z"
|
|
} |