2381 lines
64 KiB
JSON
2381 lines
64 KiB
JSON
{
|
||
"results": [
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T10:56:09.502Z",
|
||
"passed": true,
|
||
"duration": 1237
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T10:56:13.802Z",
|
||
"passed": true,
|
||
"duration": 4298
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T10:56:15.214Z",
|
||
"passed": true,
|
||
"duration": 1411
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T10:56:18.337Z",
|
||
"passed": true,
|
||
"duration": 3122
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T10:56:18.922Z",
|
||
"passed": true,
|
||
"duration": 583
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T10:56:22.539Z",
|
||
"passed": true,
|
||
"duration": 3615
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:01:08.904Z",
|
||
"passed": true,
|
||
"duration": 1888
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:01:15.210Z",
|
||
"passed": true,
|
||
"duration": 6304
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:01:16.502Z",
|
||
"passed": true,
|
||
"duration": 1291
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:01:18.728Z",
|
||
"passed": true,
|
||
"duration": 2225
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:01:19.938Z",
|
||
"passed": true,
|
||
"duration": 1209
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:01:27.791Z",
|
||
"passed": true,
|
||
"duration": 7852
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:21.370Z",
|
||
"passed": true,
|
||
"duration": 1213,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:24.898Z",
|
||
"passed": true,
|
||
"duration": 3524,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:25.624Z",
|
||
"passed": true,
|
||
"duration": 724,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:26.630Z",
|
||
"passed": true,
|
||
"duration": 1005,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:27.812Z",
|
||
"passed": true,
|
||
"duration": 1178,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:31.317Z",
|
||
"passed": true,
|
||
"duration": 3503,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:32.288Z",
|
||
"passed": true,
|
||
"duration": 969,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:33.147Z",
|
||
"passed": true,
|
||
"duration": 858,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:33.724Z",
|
||
"passed": true,
|
||
"duration": 576,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:34.841Z",
|
||
"passed": true,
|
||
"duration": 1115,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:35.673Z",
|
||
"passed": true,
|
||
"duration": 831,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:16:36.762Z",
|
||
"passed": true,
|
||
"duration": 1087,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:25.749Z",
|
||
"passed": true,
|
||
"duration": 1644,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:31.261Z",
|
||
"passed": true,
|
||
"duration": 5507,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:32.131Z",
|
||
"passed": true,
|
||
"duration": 869,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:33.306Z",
|
||
"passed": true,
|
||
"duration": 1173,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:34.323Z",
|
||
"passed": true,
|
||
"duration": 1016,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:38.976Z",
|
||
"passed": true,
|
||
"duration": 4651,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:39.914Z",
|
||
"passed": true,
|
||
"duration": 937,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:41.053Z",
|
||
"passed": true,
|
||
"duration": 1137,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:42.918Z",
|
||
"passed": true,
|
||
"duration": 1863,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:47.234Z",
|
||
"passed": true,
|
||
"duration": 4314,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:47.966Z",
|
||
"passed": true,
|
||
"duration": 730,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:20:48.941Z",
|
||
"passed": true,
|
||
"duration": 973,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:15.745Z",
|
||
"passed": true,
|
||
"duration": 1951,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:19.476Z",
|
||
"passed": true,
|
||
"duration": 3726,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:20.854Z",
|
||
"passed": true,
|
||
"duration": 1376,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:28.044Z",
|
||
"passed": true,
|
||
"duration": 7188,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:43.203Z",
|
||
"passed": true,
|
||
"duration": 15157,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:50.736Z",
|
||
"passed": true,
|
||
"duration": 7531,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:51.834Z",
|
||
"passed": true,
|
||
"duration": 1096,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:55.428Z",
|
||
"passed": true,
|
||
"duration": 3592,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:56.874Z",
|
||
"passed": true,
|
||
"duration": 1444,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:25:57.746Z",
|
||
"passed": true,
|
||
"duration": 870,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:26:08.731Z",
|
||
"passed": true,
|
||
"duration": 10983,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:26:14.379Z",
|
||
"passed": true,
|
||
"duration": 5646,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:26:15.658Z",
|
||
"passed": true,
|
||
"duration": 1276,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:26:21.428Z",
|
||
"passed": true,
|
||
"duration": 5768,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:26:22.358Z",
|
||
"passed": true,
|
||
"duration": 929,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:26:23.155Z",
|
||
"passed": true,
|
||
"duration": 794,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:26:38.566Z",
|
||
"passed": true,
|
||
"duration": 15409,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:26:40.358Z",
|
||
"passed": true,
|
||
"duration": 1790,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:44:06.429Z",
|
||
"passed": true,
|
||
"duration": 1689,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:44:10.240Z",
|
||
"passed": true,
|
||
"duration": 3807,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:44:11.128Z",
|
||
"passed": true,
|
||
"duration": 885,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:44:21.587Z",
|
||
"passed": true,
|
||
"duration": 10455,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:44:33.654Z",
|
||
"passed": true,
|
||
"duration": 12064,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"The sum of 5 and 3 is 8. Therefore, the result is \\boxed{8}."
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:44:40.062Z",
|
||
"passed": false,
|
||
"duration": 6405,
|
||
"reason": "Expected 8, but got the sum of 5 and 3 is 8. therefore, the result is \\boxed{8}.",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:44:41.261Z",
|
||
"passed": true,
|
||
"duration": 1190,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:44:46.272Z",
|
||
"passed": true,
|
||
"duration": 5008,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:44:47.386Z",
|
||
"passed": true,
|
||
"duration": 1111,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:44:48.372Z",
|
||
"passed": true,
|
||
"duration": 984,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24\n\n24\n\nThe result is 24.\n\n24\n\nHere's the answer: 24\n\nThe answer will be 24.\n\n24\n\n24\n\nThe product of 8 and 3 is 24.\n\n24\n\nThe answer is 24.\n\n24\n\n24\n\n24\n\nThe result is 24.\n\n24\n\nHere's the numerical result: 24\n\nThe answer is 24.\n\n24\n\n24\n\nThe answer is 24."
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:44:53.633Z",
|
||
"passed": false,
|
||
"duration": 5258,
|
||
"reason": "Expected 24, but got 24\n\n24\n\nthe result is 24.\n\n24\n\nhere's the answer: 24\n\nthe answer will be 24.\n\n24\n\n24\n\nthe product of 8 and 3 is 24.\n\n24\n\nthe answer is 24.\n\n24\n\n24\n\n24\n\nthe result is 24.\n\n24\n\nhere's the numerical result: 24\n\nthe answer is 24.\n\n24\n\n24\n\nthe answer is 24.",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:44:55.196Z",
|
||
"passed": true,
|
||
"duration": 1558,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:44:56.604Z",
|
||
"passed": true,
|
||
"duration": 1405,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:44:57.523Z",
|
||
"passed": true,
|
||
"duration": 917,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:44:58.630Z",
|
||
"passed": true,
|
||
"duration": 1104,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:44:59.523Z",
|
||
"passed": true,
|
||
"duration": 889,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:45:06.658Z",
|
||
"passed": true,
|
||
"duration": 7130,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:45:10.307Z",
|
||
"passed": true,
|
||
"duration": 3646,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:06:38.904Z",
|
||
"passed": true,
|
||
"duration": 2263,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:06:41.138Z",
|
||
"passed": true,
|
||
"duration": 2228,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:06:41.934Z",
|
||
"passed": true,
|
||
"duration": 791,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:06:43.239Z",
|
||
"passed": true,
|
||
"duration": 1300,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8 × 3 = 24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:06:46.025Z",
|
||
"passed": false,
|
||
"duration": 2782,
|
||
"reason": "Expected 24, but got 8 × 3 = 24",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:06:47.239Z",
|
||
"passed": true,
|
||
"duration": 1206,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:06:48.026Z",
|
||
"passed": true,
|
||
"duration": 783,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:06:51.012Z",
|
||
"passed": true,
|
||
"duration": 2982,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:06:51.777Z",
|
||
"passed": true,
|
||
"duration": 760,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-03T17:14:04.393Z",
|
||
"passed": true,
|
||
"duration": 1484,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-03T17:14:12.861Z",
|
||
"passed": true,
|
||
"duration": 8460,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-03T17:14:13.779Z",
|
||
"passed": true,
|
||
"duration": 910,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-03T17:14:14.740Z",
|
||
"passed": true,
|
||
"duration": 955,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-03T17:14:22.399Z",
|
||
"passed": true,
|
||
"duration": 7653,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-03T17:14:23.502Z",
|
||
"passed": true,
|
||
"duration": 1095,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-03T17:14:24.325Z",
|
||
"passed": true,
|
||
"duration": 816,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"15 divided by 3 is 5. \n\nAnswer: 5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-03T17:14:40.987Z",
|
||
"passed": false,
|
||
"duration": 16655,
|
||
"reason": "Expected 5, but got 15 divided by 3 is 5. \n\nanswer: 5",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-03T17:14:41.951Z",
|
||
"passed": true,
|
||
"duration": 954,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-04T12:36:55.754Z",
|
||
"passed": true,
|
||
"duration": 1505,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-04T12:36:59.232Z",
|
||
"passed": true,
|
||
"duration": 3470,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-04T12:37:00.080Z",
|
||
"passed": true,
|
||
"duration": 842,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-04T12:37:00.897Z",
|
||
"passed": true,
|
||
"duration": 811,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-04T12:37:01.784Z",
|
||
"passed": true,
|
||
"duration": 881,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-04T12:37:03.117Z",
|
||
"passed": false,
|
||
"duration": 1327,
|
||
"reason": "Model returned empty response",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-04T12:37:04.222Z",
|
||
"passed": true,
|
||
"duration": 1096,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-04T12:37:05.008Z",
|
||
"passed": true,
|
||
"duration": 780,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-04T12:37:05.799Z",
|
||
"passed": true,
|
||
"duration": 784,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-04T12:37:10.272Z",
|
||
"passed": true,
|
||
"duration": 4467,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-04T12:37:11.255Z",
|
||
"passed": true,
|
||
"duration": 975,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-04T12:37:11.993Z",
|
||
"passed": true,
|
||
"duration": 731,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:13:32.997Z",
|
||
"passed": true,
|
||
"duration": 1246,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:13:40.019Z",
|
||
"passed": true,
|
||
"duration": 7011,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:13:40.950Z",
|
||
"passed": true,
|
||
"duration": 922,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:13:41.833Z",
|
||
"passed": true,
|
||
"duration": 874,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:13:42.682Z",
|
||
"passed": true,
|
||
"duration": 840,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:13:51.918Z",
|
||
"passed": true,
|
||
"duration": 9227,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:13:52.915Z",
|
||
"passed": true,
|
||
"duration": 987,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:13:53.795Z",
|
||
"passed": true,
|
||
"duration": 871,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:13:55.036Z",
|
||
"passed": true,
|
||
"duration": 1229,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:13:59.410Z",
|
||
"passed": true,
|
||
"duration": 4364,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:14:00.446Z",
|
||
"passed": true,
|
||
"duration": 1028,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:14:01.478Z",
|
||
"passed": true,
|
||
"duration": 1023,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:18:33.556Z",
|
||
"passed": true,
|
||
"duration": 1293,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:18:36.782Z",
|
||
"passed": true,
|
||
"duration": 3215,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:18:37.574Z",
|
||
"passed": true,
|
||
"duration": 783,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:18:38.342Z",
|
||
"passed": true,
|
||
"duration": 760,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:18:39.174Z",
|
||
"passed": true,
|
||
"duration": 823,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:18:44.118Z",
|
||
"passed": true,
|
||
"duration": 4936,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:18:45.161Z",
|
||
"passed": true,
|
||
"duration": 1035,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:18:45.996Z",
|
||
"passed": true,
|
||
"duration": 827,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:18:50.222Z",
|
||
"passed": true,
|
||
"duration": 4216,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"The result of dividing 15 by 3 is 5.\n\n5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:18:58.164Z",
|
||
"passed": false,
|
||
"duration": 7931,
|
||
"reason": "Expected 5, but got the result of dividing 15 by 3 is 5.\n\n5",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:19:04.862Z",
|
||
"passed": true,
|
||
"duration": 6684,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:19:05.910Z",
|
||
"passed": true,
|
||
"duration": 1038,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:20:15.545Z",
|
||
"passed": true,
|
||
"duration": 1197,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:20:27.125Z",
|
||
"passed": true,
|
||
"duration": 11570,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:20:28.062Z",
|
||
"passed": true,
|
||
"duration": 927,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:20:28.801Z",
|
||
"passed": true,
|
||
"duration": 729,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:20:29.675Z",
|
||
"passed": true,
|
||
"duration": 863,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:20:45.996Z",
|
||
"passed": true,
|
||
"duration": 16310,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:20:47.110Z",
|
||
"passed": true,
|
||
"duration": 1105,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:20:48.079Z",
|
||
"passed": true,
|
||
"duration": 960,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T14:20:48.991Z",
|
||
"passed": true,
|
||
"duration": 901,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"15 ÷ 3 equals 5.\n\n5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T14:21:00.413Z",
|
||
"passed": false,
|
||
"duration": 11412,
|
||
"reason": "Expected 5, but got 15 ÷ 3 equals 5.\n\n5",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T14:21:01.284Z",
|
||
"passed": true,
|
||
"duration": 856,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T14:21:02.046Z",
|
||
"passed": true,
|
||
"duration": 749,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T15:30:34.409Z",
|
||
"passed": true,
|
||
"duration": 1182,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"The result of adding 5 and 3 is:\n\n\\[\n5 + 3 = \\boxed{8}\n\\]"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T15:30:36.894Z",
|
||
"passed": false,
|
||
"duration": 2473,
|
||
"reason": "Expected 8, but got the result of adding 5 and 3 is:\n\n\\[\n5 + 3 = \\boxed{8}\n\\]",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T15:30:37.616Z",
|
||
"passed": true,
|
||
"duration": 709,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T15:30:38.429Z",
|
||
"passed": true,
|
||
"duration": 803,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T15:30:39.355Z",
|
||
"passed": true,
|
||
"duration": 916,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T15:30:45.589Z",
|
||
"passed": true,
|
||
"duration": 6224,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T15:30:46.831Z",
|
||
"passed": true,
|
||
"duration": 1232,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T15:30:52.602Z",
|
||
"passed": true,
|
||
"duration": 5757,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T15:30:53.506Z",
|
||
"passed": true,
|
||
"duration": 893,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T15:30:55.129Z",
|
||
"passed": true,
|
||
"duration": 1612,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T15:30:55.819Z",
|
||
"passed": true,
|
||
"duration": 680,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T15:30:56.860Z",
|
||
"passed": true,
|
||
"duration": 1031,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T15:42:04.567Z",
|
||
"passed": true,
|
||
"duration": 1157,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T15:42:08.794Z",
|
||
"passed": true,
|
||
"duration": 4214,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T15:42:09.531Z",
|
||
"passed": true,
|
||
"duration": 726,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "addition",
|
||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T15:42:10.356Z",
|
||
"passed": true,
|
||
"duration": 814,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T15:42:11.193Z",
|
||
"passed": true,
|
||
"duration": 826,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T15:42:17.389Z",
|
||
"passed": true,
|
||
"duration": 6184,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T15:42:18.260Z",
|
||
"passed": true,
|
||
"duration": 856,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"24"
|
||
],
|
||
"expected": "24",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T15:42:18.956Z",
|
||
"passed": true,
|
||
"duration": 684,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T15:42:19.758Z",
|
||
"passed": true,
|
||
"duration": 790,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T15:42:21.273Z",
|
||
"passed": true,
|
||
"duration": 1502,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T15:42:22.107Z",
|
||
"passed": true,
|
||
"duration": 823,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "division",
|
||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "5",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T15:42:22.974Z",
|
||
"passed": true,
|
||
"duration": 855,
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "web_content",
|
||
"prompt": "Check if the content contains a section about Human prehistory. Reply with \"yes\" if it does, \"no\" if it does not.",
|
||
"result": [],
|
||
"expected": "yes",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-06T15:42:26.297Z",
|
||
"passed": false,
|
||
"duration": 3311,
|
||
"reason": "Model returned empty response",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "web_content",
|
||
"prompt": "Check if the content contains a section about Human prehistory. Reply with \"yes\" if it does, \"no\" if it does not.",
|
||
"result": [],
|
||
"expected": "yes",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-06T15:42:26.573Z",
|
||
"passed": false,
|
||
"duration": 263,
|
||
"reason": "Model returned empty response",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "web_content",
|
||
"prompt": "Check if the content contains a section about Human prehistory. Reply with \"yes\" if it does, \"no\" if it does not.",
|
||
"result": [],
|
||
"expected": "yes",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-06T15:42:36.633Z",
|
||
"passed": false,
|
||
"duration": 10048,
|
||
"reason": "Model returned empty response",
|
||
"category": "basic"
|
||
},
|
||
{
|
||
"test": "web_content",
|
||
"prompt": "Check if the content contains a section about Human prehistory. Reply with \"yes\" if it does, \"no\" if it does not.",
|
||
"result": [],
|
||
"expected": "yes",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-06T15:42:44.951Z",
|
||
"passed": false,
|
||
"duration": 8305,
|
||
"reason": "Model returned empty response",
|
||
"category": "basic"
|
||
}
|
||
],
|
||
"highscores": [
|
||
{
|
||
"test": "addition",
|
||
"rankings": [
|
||
{
|
||
"model": "openai/gpt-4o-mini",
|
||
"duration": 726,
|
||
"duration_secs": 0.726
|
||
},
|
||
{
|
||
"model": "openrouter/quasar-alpha",
|
||
"duration": 814,
|
||
"duration_secs": 0.814
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"test": "multiplication",
|
||
"rankings": [
|
||
{
|
||
"model": "openrouter/quasar-alpha",
|
||
"duration": 684,
|
||
"duration_secs": 0.684
|
||
},
|
||
{
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"duration": 826,
|
||
"duration_secs": 0.826
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"test": "division",
|
||
"rankings": [
|
||
{
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"duration": 790,
|
||
"duration_secs": 0.79
|
||
},
|
||
{
|
||
"model": "openai/gpt-4o-mini",
|
||
"duration": 823,
|
||
"duration_secs": 0.823
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"test": "web_content",
|
||
"rankings": [
|
||
{
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"duration": 263,
|
||
"duration_secs": 0.263
|
||
},
|
||
{
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"duration": 3311,
|
||
"duration_secs": 3.311
|
||
}
|
||
]
|
||
}
|
||
],
|
||
"lastUpdated": "2025-04-06T15:42:44.952Z"
|
||
} |