208 lines
5.0 KiB
JSON
208 lines
5.0 KiB
JSON
{
|
|
"results": [
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:56:09.502Z",
|
|
"passed": true,
|
|
"duration": 1237
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:56:13.802Z",
|
|
"passed": true,
|
|
"duration": 4298
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:56:15.214Z",
|
|
"passed": true,
|
|
"duration": 1411
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:56:18.337Z",
|
|
"passed": true,
|
|
"duration": 3122
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:56:18.922Z",
|
|
"passed": true,
|
|
"duration": 583
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T10:56:22.539Z",
|
|
"passed": true,
|
|
"duration": 3615
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T11:01:08.904Z",
|
|
"passed": true,
|
|
"duration": 1888
|
|
},
|
|
{
|
|
"test": "addition",
|
|
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"8"
|
|
],
|
|
"expected": "8",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T11:01:15.210Z",
|
|
"passed": true,
|
|
"duration": 6304
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T11:01:16.502Z",
|
|
"passed": true,
|
|
"duration": 1291
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"24"
|
|
],
|
|
"expected": "24",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T11:01:18.728Z",
|
|
"passed": true,
|
|
"duration": 2225
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T11:01:19.938Z",
|
|
"passed": true,
|
|
"duration": 1209
|
|
},
|
|
{
|
|
"test": "division",
|
|
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
|
"result": [
|
|
"5"
|
|
],
|
|
"expected": "5",
|
|
"model": "qwen/qwq-32b",
|
|
"router": "openrouter",
|
|
"timestamp": "2025-04-02T11:01:27.791Z",
|
|
"passed": true,
|
|
"duration": 7852
|
|
}
|
|
],
|
|
"highscores": [
|
|
{
|
|
"test": "addition",
|
|
"rankings": [
|
|
{
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"duration": 1888,
|
|
"duration_secs": 1.888
|
|
},
|
|
{
|
|
"model": "qwen/qwq-32b",
|
|
"duration": 6304,
|
|
"duration_secs": 6.304
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "multiplication",
|
|
"rankings": [
|
|
{
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"duration": 1291,
|
|
"duration_secs": 1.291
|
|
},
|
|
{
|
|
"model": "qwen/qwq-32b",
|
|
"duration": 2225,
|
|
"duration_secs": 2.225
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "division",
|
|
"rankings": [
|
|
{
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"duration": 1209,
|
|
"duration_secs": 1.209
|
|
},
|
|
{
|
|
"model": "qwen/qwq-32b",
|
|
"duration": 7852,
|
|
"duration_secs": 7.852
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"lastUpdated": "2025-04-02T11:01:27.792Z"
|
|
} |