mono/packages/kbot/tests/unit/reports/basic.json
2025-04-04 14:44:04 +02:00

1471 lines
39 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"results": [
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:09.502Z",
"passed": true,
"duration": 1237
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:13.802Z",
"passed": true,
"duration": 4298
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:15.214Z",
"passed": true,
"duration": 1411
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:18.337Z",
"passed": true,
"duration": 3122
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:18.922Z",
"passed": true,
"duration": 583
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:22.539Z",
"passed": true,
"duration": 3615
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:08.904Z",
"passed": true,
"duration": 1888
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:15.210Z",
"passed": true,
"duration": 6304
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:16.502Z",
"passed": true,
"duration": 1291
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:18.728Z",
"passed": true,
"duration": 2225
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:19.938Z",
"passed": true,
"duration": 1209
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:27.791Z",
"passed": true,
"duration": 7852
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:21.370Z",
"passed": true,
"duration": 1213,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:24.898Z",
"passed": true,
"duration": 3524,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:25.624Z",
"passed": true,
"duration": 724,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:26.630Z",
"passed": true,
"duration": 1005,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:27.812Z",
"passed": true,
"duration": 1178,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:31.317Z",
"passed": true,
"duration": 3503,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:32.288Z",
"passed": true,
"duration": 969,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:33.147Z",
"passed": true,
"duration": 858,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:33.724Z",
"passed": true,
"duration": 576,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:34.841Z",
"passed": true,
"duration": 1115,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:35.673Z",
"passed": true,
"duration": 831,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:16:36.762Z",
"passed": true,
"duration": 1087,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:25.749Z",
"passed": true,
"duration": 1644,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:31.261Z",
"passed": true,
"duration": 5507,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:32.131Z",
"passed": true,
"duration": 869,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:33.306Z",
"passed": true,
"duration": 1173,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:34.323Z",
"passed": true,
"duration": 1016,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:38.976Z",
"passed": true,
"duration": 4651,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:39.914Z",
"passed": true,
"duration": 937,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:41.053Z",
"passed": true,
"duration": 1137,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:42.918Z",
"passed": true,
"duration": 1863,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:47.234Z",
"passed": true,
"duration": 4314,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:47.966Z",
"passed": true,
"duration": 730,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:20:48.941Z",
"passed": true,
"duration": 973,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:15.745Z",
"passed": true,
"duration": 1951,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:19.476Z",
"passed": true,
"duration": 3726,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:20.854Z",
"passed": true,
"duration": 1376,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:28.044Z",
"passed": true,
"duration": 7188,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:43.203Z",
"passed": true,
"duration": 15157,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:50.736Z",
"passed": true,
"duration": 7531,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:51.834Z",
"passed": true,
"duration": 1096,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:55.428Z",
"passed": true,
"duration": 3592,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:56.874Z",
"passed": true,
"duration": 1444,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:25:57.746Z",
"passed": true,
"duration": 870,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "deepseek/deepseek-r1",
"router": "openrouter",
"timestamp": "2025-04-02T11:26:08.731Z",
"passed": true,
"duration": 10983,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "openrouter",
"timestamp": "2025-04-02T11:26:14.379Z",
"passed": true,
"duration": 5646,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:26:15.658Z",
"passed": true,
"duration": 1276,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:26:21.428Z",
"passed": true,
"duration": 5768,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:26:22.358Z",
"passed": true,
"duration": 929,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:26:23.155Z",
"passed": true,
"duration": 794,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "deepseek/deepseek-r1",
"router": "openrouter",
"timestamp": "2025-04-02T11:26:38.566Z",
"passed": true,
"duration": 15409,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "openrouter",
"timestamp": "2025-04-02T11:26:40.358Z",
"passed": true,
"duration": 1790,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:44:06.429Z",
"passed": true,
"duration": 1689,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:44:10.240Z",
"passed": true,
"duration": 3807,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:44:11.128Z",
"passed": true,
"duration": 885,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:44:21.587Z",
"passed": true,
"duration": 10455,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:44:33.654Z",
"passed": true,
"duration": 12064,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"The sum of 5 and 3 is 8. Therefore, the result is \\boxed{8}."
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:44:40.062Z",
"passed": false,
"duration": 6405,
"reason": "Expected 8, but got the sum of 5 and 3 is 8. therefore, the result is \\boxed{8}.",
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:44:41.261Z",
"passed": true,
"duration": 1190,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:44:46.272Z",
"passed": true,
"duration": 5008,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:44:47.386Z",
"passed": true,
"duration": 1111,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:44:48.372Z",
"passed": true,
"duration": 984,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24\n\n24\n\nThe result is 24.\n\n24\n\nHere's the answer: 24\n\nThe answer will be 24.\n\n24\n\n24\n\nThe product of 8 and 3 is 24.\n\n24\n\nThe answer is 24.\n\n24\n\n24\n\n24\n\nThe result is 24.\n\n24\n\nHere's the numerical result: 24\n\nThe answer is 24.\n\n24\n\n24\n\nThe answer is 24."
],
"expected": "24",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:44:53.633Z",
"passed": false,
"duration": 5258,
"reason": "Expected 24, but got 24\n\n24\n\nthe result is 24.\n\n24\n\nhere's the answer: 24\n\nthe answer will be 24.\n\n24\n\n24\n\nthe product of 8 and 3 is 24.\n\n24\n\nthe answer is 24.\n\n24\n\n24\n\n24\n\nthe result is 24.\n\n24\n\nhere's the numerical result: 24\n\nthe answer is 24.\n\n24\n\n24\n\nthe answer is 24.",
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:44:55.196Z",
"passed": true,
"duration": 1558,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:44:56.604Z",
"passed": true,
"duration": 1405,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:44:57.523Z",
"passed": true,
"duration": 917,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:44:58.630Z",
"passed": true,
"duration": 1104,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:44:59.523Z",
"passed": true,
"duration": 889,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:45:06.658Z",
"passed": true,
"duration": 7130,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:45:10.307Z",
"passed": true,
"duration": 3646,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:38.904Z",
"passed": true,
"duration": 2263,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:41.138Z",
"passed": true,
"duration": 2228,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:41.934Z",
"passed": true,
"duration": 791,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:43.239Z",
"passed": true,
"duration": 1300,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"8 × 3 = 24"
],
"expected": "24",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:46.025Z",
"passed": false,
"duration": 2782,
"reason": "Expected 24, but got 8 × 3 = 24",
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:47.239Z",
"passed": true,
"duration": 1206,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:48.026Z",
"passed": true,
"duration": 783,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:51.012Z",
"passed": true,
"duration": 2982,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:51.777Z",
"passed": true,
"duration": 760,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-03T17:14:04.393Z",
"passed": true,
"duration": 1484,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-03T17:14:12.861Z",
"passed": true,
"duration": 8460,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-03T17:14:13.779Z",
"passed": true,
"duration": 910,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-03T17:14:14.740Z",
"passed": true,
"duration": 955,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-03T17:14:22.399Z",
"passed": true,
"duration": 7653,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-03T17:14:23.502Z",
"passed": true,
"duration": 1095,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-03T17:14:24.325Z",
"passed": true,
"duration": 816,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"15 divided by 3 is 5. \n\nAnswer: 5"
],
"expected": "5",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-03T17:14:40.987Z",
"passed": false,
"duration": 16655,
"reason": "Expected 5, but got 15 divided by 3 is 5. \n\nanswer: 5",
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-03T17:14:41.951Z",
"passed": true,
"duration": 954,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:36:55.754Z",
"passed": true,
"duration": 1505,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:36:59.232Z",
"passed": true,
"duration": 3470,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:37:00.080Z",
"passed": true,
"duration": 842,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:37:00.897Z",
"passed": true,
"duration": 811,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:37:01.784Z",
"passed": true,
"duration": 881,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [],
"expected": "24",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:37:03.117Z",
"passed": false,
"duration": 1327,
"reason": "Model returned empty response",
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:37:04.222Z",
"passed": true,
"duration": 1096,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:37:05.008Z",
"passed": true,
"duration": 780,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-04T12:37:05.799Z",
"passed": true,
"duration": 784,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-04T12:37:10.272Z",
"passed": true,
"duration": 4467,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-04T12:37:11.255Z",
"passed": true,
"duration": 975,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-04T12:37:11.993Z",
"passed": true,
"duration": 731,
"category": "basic"
}
],
"highscores": [
{
"test": "addition",
"rankings": [
{
"model": "openrouter/quasar-alpha",
"duration": 811,
"duration_secs": 0.811
},
{
"model": "openai/gpt-4o-mini",
"duration": 842,
"duration_secs": 0.842
}
]
},
{
"test": "multiplication",
"rankings": [
{
"model": "openrouter/quasar-alpha",
"duration": 780,
"duration_secs": 0.78
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 881,
"duration_secs": 0.881
}
]
},
{
"test": "division",
"rankings": [
{
"model": "openrouter/quasar-alpha",
"duration": 731,
"duration_secs": 0.731
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 784,
"duration_secs": 0.784
}
]
}
],
"lastUpdated": "2025-04-04T12:37:11.994Z"
}