mono/packages/kbot/tests/unit/reports/math.json
2025-04-02 16:26:00 +02:00

2265 lines
68 KiB
JSON

{
"results": [
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:09:36.865Z",
"passed": false,
"duration": 1944,
"reason": "Expected -3,-2, but got -2,-3"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-3,-2"
],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:09:47.481Z",
"passed": true,
"duration": 10608
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:09:49.153Z",
"passed": true,
"duration": 1671
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [],
"expected": "120",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:10:03.043Z",
"passed": false,
"duration": 13889,
"reason": "Model returned empty response"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:10:03.988Z",
"passed": true,
"duration": 943
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:10:05.723Z",
"passed": false,
"duration": 1734,
"reason": "Expected 8, but got 5"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:10:07.465Z",
"passed": true,
"duration": 1739
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:10:13.671Z",
"passed": true,
"duration": 6205
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:10:14.967Z",
"passed": true,
"duration": 1295
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:10:20.932Z",
"passed": true,
"duration": 5964
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:13:10.276Z",
"passed": false,
"duration": 1242,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:13:31.650Z",
"passed": false,
"duration": 21368,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:13:34.699Z",
"passed": true,
"duration": 3046,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:13:45.957Z",
"passed": true,
"duration": 11256,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:13:47.935Z",
"passed": true,
"duration": 1976,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:14:07.714Z",
"passed": false,
"duration": 19778,
"reason": "Expected 8, but got 5",
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:14:08.883Z",
"passed": true,
"duration": 1167,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:14:12.225Z",
"passed": true,
"duration": 3341,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:14:12.889Z",
"passed": true,
"duration": 663,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:14:32.527Z",
"passed": true,
"duration": 19636,
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:10.419Z",
"passed": false,
"duration": 1650,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-3,-2"
],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:20.647Z",
"passed": true,
"duration": 10222,
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:21.643Z",
"passed": false,
"duration": 994,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:22.524Z",
"passed": false,
"duration": 878,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:23.496Z",
"passed": true,
"duration": 970,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:28.452Z",
"passed": true,
"duration": 4954,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:29.325Z",
"passed": true,
"duration": 872,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:30.109Z",
"passed": true,
"duration": 782,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:33.902Z",
"passed": true,
"duration": 3791,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:46.225Z",
"passed": false,
"duration": 12322,
"reason": "Expected 8, but got 5",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:47.202Z",
"passed": false,
"duration": 974,
"reason": "Expected 8, but got 5",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:48.005Z",
"passed": true,
"duration": 800,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:48.763Z",
"passed": true,
"duration": 756,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:55.510Z",
"passed": true,
"duration": 6745,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:56.297Z",
"passed": true,
"duration": 785,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:57.051Z",
"passed": true,
"duration": 751,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:22:58.294Z",
"passed": true,
"duration": 1241,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:23:04.551Z",
"passed": false,
"duration": 6255,
"reason": "Model returned empty response",
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:23:05.297Z",
"passed": true,
"duration": 743,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:23:06.018Z",
"passed": true,
"duration": 719,
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "openrouter",
"timestamp": "2025-04-02T11:24:32.237Z",
"passed": false,
"duration": 1533,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-3, -2"
],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "openrouter",
"timestamp": "2025-04-02T11:24:50.178Z",
"passed": false,
"duration": 17934,
"reason": "Expected -3,-2, but got -3, -2",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openrouter",
"timestamp": "2025-04-02T11:24:51.040Z",
"passed": false,
"duration": 859,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-3.5-turbo",
"router": "openrouter",
"timestamp": "2025-04-02T11:24:51.938Z",
"passed": false,
"duration": 895,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:22:28.224Z",
"passed": false,
"duration": 1311,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:22:58.238Z",
"passed": false,
"duration": 30008,
"error": {
"message": "API call timed out",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:137:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
"message": "API call timed out"
}
},
"reason": "API call timed out",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:22:59.263Z",
"passed": false,
"duration": 1022,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-3, -2"
],
"expected": "-3,-2",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:23:00.561Z",
"passed": false,
"duration": 1294,
"reason": "Expected -3,-2, but got -3, -2",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:23:17.772Z",
"passed": false,
"duration": 17208,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:23:28.819Z",
"passed": false,
"duration": 11043,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:23:30.914Z",
"passed": true,
"duration": 2093,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:23:36.265Z",
"passed": true,
"duration": 5349,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:23:37.084Z",
"passed": true,
"duration": 816,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:23:38.020Z",
"passed": true,
"duration": 934,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:23:49.147Z",
"passed": true,
"duration": 11125,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:23:52.904Z",
"passed": true,
"duration": 3755,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:23:54.223Z",
"passed": true,
"duration": 1316,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:24:24.234Z",
"passed": false,
"duration": 30009,
"error": {
"message": "API call timed out",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:137:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
"message": "API call timed out"
}
},
"reason": "API call timed out",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:24:25.494Z",
"passed": false,
"duration": 1257,
"reason": "Expected 8, but got 5",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:24:26.272Z",
"passed": true,
"duration": 776,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [],
"expected": "8",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:24:56.284Z",
"passed": false,
"duration": 30009,
"error": {
"message": "API call timed out",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:137:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
"message": "API call timed out"
}
},
"reason": "API call timed out",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"The 6th number in the Fibonacci sequence is 5."
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:25:06.273Z",
"passed": false,
"duration": 9986,
"reason": "Expected 8, but got the 6th number in the fibonacci sequence is 5.",
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:25:07.072Z",
"passed": true,
"duration": 795,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:25:12.207Z",
"passed": true,
"duration": 5133,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:25:13.308Z",
"passed": true,
"duration": 1099,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:25:14.724Z",
"passed": true,
"duration": 1414,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:25:24.633Z",
"passed": true,
"duration": 9907,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:25:28.285Z",
"passed": true,
"duration": 3650,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:25:31.067Z",
"passed": true,
"duration": 2779,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:25:35.904Z",
"passed": true,
"duration": 4834,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:25:36.993Z",
"passed": true,
"duration": 1087,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:25:37.796Z",
"passed": true,
"duration": 800,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:25:48.323Z",
"passed": true,
"duration": 10524,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"The result of 2 raised to the power of 3 is 8.\n\nAnswer: 8"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:26:02.730Z",
"passed": false,
"duration": 14405,
"reason": "Expected 8, but got the result of 2 raised to the power of 3 is 8.\n\nanswer: 8",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:30:35.457Z",
"passed": false,
"duration": 3064,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-3,-2"
],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:30:47.981Z",
"passed": true,
"duration": 12517,
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:30:49.084Z",
"passed": false,
"duration": 1100,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:30:50.009Z",
"passed": false,
"duration": 921,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:31:24.046Z",
"passed": false,
"duration": 2341,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:32:31.578Z",
"passed": false,
"duration": 3137,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:32:51.448Z",
"passed": false,
"duration": 1892,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-3,-2"
],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:32:58.250Z",
"passed": true,
"duration": 6795,
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:32:59.105Z",
"passed": false,
"duration": 853,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:32:59.942Z",
"passed": false,
"duration": 832,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:33:19.796Z",
"passed": false,
"duration": 19850,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"The solutions to the quadratic equation x² + 5x + 6 = 0 are x = -2 and x = -3.\n\n-2,-3"
],
"expected": "-3,-2",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:33:35.611Z",
"passed": false,
"duration": 15811,
"reason": "Expected -3,-2, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are x = -2 and x = -3.\n\n-2,-3",
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:33:37.469Z",
"passed": true,
"duration": 1853,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:33:44.364Z",
"passed": true,
"duration": 6892,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:33:45.323Z",
"passed": true,
"duration": 956,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:33:46.153Z",
"passed": true,
"duration": 827,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:33:57.349Z",
"passed": true,
"duration": 11193,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:34:02.166Z",
"passed": true,
"duration": 4814,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:34:04.174Z",
"passed": true,
"duration": 2004,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:34:05.686Z",
"passed": false,
"duration": 1509,
"reason": "Expected 8, but got 5",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:34:07.363Z",
"passed": true,
"duration": 1673,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:34:08.909Z",
"passed": true,
"duration": 1543,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [],
"expected": "8",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:34:38.921Z",
"passed": false,
"duration": 30009,
"error": {
"message": "API call timed out",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:137:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
"message": "API call timed out"
}
},
"reason": "API call timed out",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:34:44.095Z",
"passed": false,
"duration": 5171,
"reason": "Expected 8, but got 5",
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:34:46.111Z",
"passed": true,
"duration": 2012,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:34:52.001Z",
"passed": true,
"duration": 5888,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:34:52.968Z",
"passed": true,
"duration": 964,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:34:54.051Z",
"passed": true,
"duration": 1080,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:35:04.364Z",
"passed": true,
"duration": 10309,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:35:10.480Z",
"passed": true,
"duration": 6114,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:35:11.619Z",
"passed": true,
"duration": 1136,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:35:19.194Z",
"passed": true,
"duration": 7572,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:35:20.455Z",
"passed": true,
"duration": 1259,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:35:21.956Z",
"passed": true,
"duration": 1498,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:35:27.372Z",
"passed": true,
"duration": 5414,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:35:37.266Z",
"passed": true,
"duration": 9891,
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:36:22.644Z",
"passed": false,
"duration": 1358,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:36:31.987Z",
"passed": false,
"duration": 1375,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:37:00.757Z",
"passed": false,
"duration": 1589,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:37:38.502Z",
"passed": false,
"duration": 2344,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:37:51.538Z",
"passed": false,
"duration": 2010,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:37:59.511Z",
"passed": false,
"duration": 1399,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:39:33.082Z",
"passed": false,
"duration": 1792,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:40:21.065Z",
"passed": false,
"duration": 1221,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:40:47.110Z",
"passed": false,
"duration": 2367,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-3, -2"
],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:40:56.377Z",
"passed": false,
"duration": 9259,
"reason": "Expected -3,-2, but got -3, -2",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:40:57.391Z",
"passed": false,
"duration": 1010,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:40:58.209Z",
"passed": false,
"duration": 815,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:41:02.148Z",
"passed": false,
"duration": 3934,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"The solutions to the equation x² + 5x + 6 = 0 are -2 and -3.\n\n-2, -3"
],
"expected": "-3,-2",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:41:10.265Z",
"passed": false,
"duration": 8112,
"reason": "Expected -3,-2, but got the solutions to the equation x² + 5x + 6 = 0 are -2 and -3.\n\n-2, -3",
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:41:11.723Z",
"passed": true,
"duration": 1454,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:41:21.969Z",
"passed": true,
"duration": 10242,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:41:22.848Z",
"passed": true,
"duration": 876,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:41:23.636Z",
"passed": true,
"duration": 785,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:41:30.847Z",
"passed": true,
"duration": 7208,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
"result": [
"5! is equal to 120. \n\nAnswer: 120"
],
"expected": "120",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:41:34.615Z",
"passed": false,
"duration": 3765,
"reason": "Expected 120, but got 5! is equal to 120. \n\nanswer: 120",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:41:36.384Z",
"passed": true,
"duration": 1765,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:41:56.369Z",
"passed": false,
"duration": 19981,
"reason": "Expected 8, but got 5",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:41:57.522Z",
"passed": true,
"duration": 1149,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:41:58.423Z",
"passed": true,
"duration": 898,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [],
"expected": "8",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:42:28.442Z",
"passed": false,
"duration": 30014,
"error": {
"message": "API call timed out",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:138:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
"message": "API call timed out"
}
},
"reason": "API call timed out",
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:42:33.805Z",
"passed": false,
"duration": 5358,
"reason": "Expected 8, but got 5",
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:42:34.628Z",
"passed": true,
"duration": 819,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:42:35.522Z",
"passed": true,
"duration": 890,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:42:36.791Z",
"passed": true,
"duration": 1266,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:42:37.938Z",
"passed": true,
"duration": 1144,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "deepseek/deepseek-r1",
"router": "deepseek/deepseek-r1",
"timestamp": "2025-04-02T13:42:42.967Z",
"passed": true,
"duration": 5026,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
"result": [
"4"
],
"expected": "4",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T13:42:51.135Z",
"passed": true,
"duration": 8165,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:42:52.650Z",
"passed": true,
"duration": 1512,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:42:57.896Z",
"passed": true,
"duration": 5243,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:42:58.774Z",
"passed": true,
"duration": 875,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:42:59.643Z",
"passed": true,
"duration": 866,
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-3.5-sonnet",
"router": "anthropic/claude-3.5-sonnet",
"timestamp": "2025-04-02T13:43:13.962Z",
"passed": false,
"duration": 1435,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "qwen/qwq-32b",
"router": "qwen/qwq-32b",
"timestamp": "2025-04-02T13:43:39.174Z",
"passed": false,
"duration": 25202,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-2, -3"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T13:43:40.474Z",
"passed": false,
"duration": 1295,
"reason": "Expected -3,-2, but got -2, -3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
"result": [
"-3, -2"
],
"expected": "-3,-2",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T13:43:41.957Z",
"passed": false,
"duration": 1478,
"reason": "Expected -3,-2, but got -3, -2",
"category": "math"
}
],
"highscores": [
{
"test": "quadratic",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 1295,
"duration_secs": 1.295
},
{
"model": "anthropic/claude-3.5-sonnet",
"duration": 1435,
"duration_secs": 1.435
}
]
},
{
"test": "factorial",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 785,
"duration_secs": 0.785
},
{
"model": "openai/gpt-4o-mini",
"duration": 876,
"duration_secs": 0.876
}
]
},
{
"test": "fibonacci",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 898,
"duration_secs": 0.898
},
{
"model": "openai/gpt-4o-mini",
"duration": 1149,
"duration_secs": 1.149
}
]
},
{
"test": "square_root",
"rankings": [
{
"model": "anthropic/claude-3.5-sonnet",
"duration": 819,
"duration_secs": 0.819
},
{
"model": "qwen/qwq-32b",
"duration": 890,
"duration_secs": 0.89
}
]
},
{
"test": "power",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 866,
"duration_secs": 0.866
},
{
"model": "openai/gpt-4o-mini",
"duration": 875,
"duration_secs": 0.875
}
]
}
],
"lastUpdated": "2025-04-02T13:43:41.958Z"
}