80 lines
1.8 KiB
JSON
80 lines
1.8 KiB
JSON
[
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:39.340Z",
|
|
"passed": true,
|
|
"duration": 1551
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:42.962Z",
|
|
"passed": true,
|
|
"duration": 3621
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:43.836Z",
|
|
"passed": true,
|
|
"duration": 873
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:47.309Z",
|
|
"passed": true,
|
|
"duration": 3472
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:48.493Z",
|
|
"passed": true,
|
|
"duration": 1183
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-01T22:17:53.335Z",
|
|
"passed": true,
|
|
"duration": 4841
|
|
}
|
|
] |