mono/packages/kbot/tests/unit/reports/basic.json
2025-04-02 13:04:20 +02:00

208 lines
5.0 KiB
JSON

{
"results": [
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:09.502Z",
"passed": true,
"duration": 1237
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:13.802Z",
"passed": true,
"duration": 4298
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:15.214Z",
"passed": true,
"duration": 1411
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:18.337Z",
"passed": true,
"duration": 3122
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:18.922Z",
"passed": true,
"duration": 583
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T10:56:22.539Z",
"passed": true,
"duration": 3615
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:08.904Z",
"passed": true,
"duration": 1888
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:15.210Z",
"passed": true,
"duration": 6304
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:16.502Z",
"passed": true,
"duration": 1291
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:18.728Z",
"passed": true,
"duration": 2225
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:19.938Z",
"passed": true,
"duration": 1209
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:01:27.791Z",
"passed": true,
"duration": 7852
}
],
"highscores": [
{
"test": "addition",
"rankings": [
{
"model": "anthropic/claude-3.5-sonnet",
"duration": 1888,
"duration_secs": 1.888
},
{
"model": "qwen/qwq-32b",
"duration": 6304,
"duration_secs": 6.304
}
]
},
{
"test": "multiplication",
"rankings": [
{
"model": "anthropic/claude-3.5-sonnet",
"duration": 1291,
"duration_secs": 1.291
},
{
"model": "qwen/qwq-32b",
"duration": 2225,
"duration_secs": 2.225
}
]
},
{
"test": "division",
"rankings": [
{
"model": "anthropic/claude-3.5-sonnet",
"duration": 1209,
"duration_secs": 1.209
},
{
"model": "qwen/qwq-32b",
"duration": 7852,
"duration_secs": 7.852
}
]
}
],
"lastUpdated": "2025-04-02T11:01:27.792Z"
}