3195 lines
96 KiB
JSON
3195 lines
96 KiB
JSON
{
|
||
"results": [
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:09:36.865Z",
|
||
"passed": false,
|
||
"duration": 1944,
|
||
"reason": "Expected -3,-2, but got -2,-3"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3,-2"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:09:47.481Z",
|
||
"passed": true,
|
||
"duration": 10608
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:09:49.153Z",
|
||
"passed": true,
|
||
"duration": 1671
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [],
|
||
"expected": "120",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:10:03.043Z",
|
||
"passed": false,
|
||
"duration": 13889,
|
||
"reason": "Model returned empty response"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:10:03.988Z",
|
||
"passed": true,
|
||
"duration": 943
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:10:05.723Z",
|
||
"passed": false,
|
||
"duration": 1734,
|
||
"reason": "Expected 8, but got 5"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:10:07.465Z",
|
||
"passed": true,
|
||
"duration": 1739
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:10:13.671Z",
|
||
"passed": true,
|
||
"duration": 6205
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:10:14.967Z",
|
||
"passed": true,
|
||
"duration": 1295
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:10:20.932Z",
|
||
"passed": true,
|
||
"duration": 5964
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:13:10.276Z",
|
||
"passed": false,
|
||
"duration": 1242,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:13:31.650Z",
|
||
"passed": false,
|
||
"duration": 21368,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:13:34.699Z",
|
||
"passed": true,
|
||
"duration": 3046,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:13:45.957Z",
|
||
"passed": true,
|
||
"duration": 11256,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:13:47.935Z",
|
||
"passed": true,
|
||
"duration": 1976,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:14:07.714Z",
|
||
"passed": false,
|
||
"duration": 19778,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:14:08.883Z",
|
||
"passed": true,
|
||
"duration": 1167,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:14:12.225Z",
|
||
"passed": true,
|
||
"duration": 3341,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:14:12.889Z",
|
||
"passed": true,
|
||
"duration": 663,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:14:32.527Z",
|
||
"passed": true,
|
||
"duration": 19636,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:10.419Z",
|
||
"passed": false,
|
||
"duration": 1650,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3,-2"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:20.647Z",
|
||
"passed": true,
|
||
"duration": 10222,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:21.643Z",
|
||
"passed": false,
|
||
"duration": 994,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:22.524Z",
|
||
"passed": false,
|
||
"duration": 878,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:23.496Z",
|
||
"passed": true,
|
||
"duration": 970,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:28.452Z",
|
||
"passed": true,
|
||
"duration": 4954,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:29.325Z",
|
||
"passed": true,
|
||
"duration": 872,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:30.109Z",
|
||
"passed": true,
|
||
"duration": 782,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:33.902Z",
|
||
"passed": true,
|
||
"duration": 3791,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:46.225Z",
|
||
"passed": false,
|
||
"duration": 12322,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:47.202Z",
|
||
"passed": false,
|
||
"duration": 974,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:48.005Z",
|
||
"passed": true,
|
||
"duration": 800,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:48.763Z",
|
||
"passed": true,
|
||
"duration": 756,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:55.510Z",
|
||
"passed": true,
|
||
"duration": 6745,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:56.297Z",
|
||
"passed": true,
|
||
"duration": 785,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:57.051Z",
|
||
"passed": true,
|
||
"duration": 751,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:22:58.294Z",
|
||
"passed": true,
|
||
"duration": 1241,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:23:04.551Z",
|
||
"passed": false,
|
||
"duration": 6255,
|
||
"reason": "Model returned empty response",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:23:05.297Z",
|
||
"passed": true,
|
||
"duration": 743,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:23:06.018Z",
|
||
"passed": true,
|
||
"duration": 719,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:24:32.237Z",
|
||
"passed": false,
|
||
"duration": 1533,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3, -2"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:24:50.178Z",
|
||
"passed": false,
|
||
"duration": 17934,
|
||
"reason": "Expected -3,-2, but got -3, -2",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:24:51.040Z",
|
||
"passed": false,
|
||
"duration": 859,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openrouter",
|
||
"timestamp": "2025-04-02T11:24:51.938Z",
|
||
"passed": false,
|
||
"duration": 895,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:22:28.224Z",
|
||
"passed": false,
|
||
"duration": 1311,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:22:58.238Z",
|
||
"passed": false,
|
||
"duration": 30008,
|
||
"error": {
|
||
"message": "API call timed out",
|
||
"code": "UNKNOWN",
|
||
"type": "Error",
|
||
"details": {
|
||
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:137:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
|
||
"message": "API call timed out"
|
||
}
|
||
},
|
||
"reason": "API call timed out",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:22:59.263Z",
|
||
"passed": false,
|
||
"duration": 1022,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3, -2"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:23:00.561Z",
|
||
"passed": false,
|
||
"duration": 1294,
|
||
"reason": "Expected -3,-2, but got -3, -2",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:23:17.772Z",
|
||
"passed": false,
|
||
"duration": 17208,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:23:28.819Z",
|
||
"passed": false,
|
||
"duration": 11043,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:23:30.914Z",
|
||
"passed": true,
|
||
"duration": 2093,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:23:36.265Z",
|
||
"passed": true,
|
||
"duration": 5349,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:23:37.084Z",
|
||
"passed": true,
|
||
"duration": 816,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:23:38.020Z",
|
||
"passed": true,
|
||
"duration": 934,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:23:49.147Z",
|
||
"passed": true,
|
||
"duration": 11125,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:23:52.904Z",
|
||
"passed": true,
|
||
"duration": 3755,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:23:54.223Z",
|
||
"passed": true,
|
||
"duration": 1316,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:24:24.234Z",
|
||
"passed": false,
|
||
"duration": 30009,
|
||
"error": {
|
||
"message": "API call timed out",
|
||
"code": "UNKNOWN",
|
||
"type": "Error",
|
||
"details": {
|
||
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:137:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
|
||
"message": "API call timed out"
|
||
}
|
||
},
|
||
"reason": "API call timed out",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:24:25.494Z",
|
||
"passed": false,
|
||
"duration": 1257,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:24:26.272Z",
|
||
"passed": true,
|
||
"duration": 776,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:24:56.284Z",
|
||
"passed": false,
|
||
"duration": 30009,
|
||
"error": {
|
||
"message": "API call timed out",
|
||
"code": "UNKNOWN",
|
||
"type": "Error",
|
||
"details": {
|
||
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:137:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
|
||
"message": "API call timed out"
|
||
}
|
||
},
|
||
"reason": "API call timed out",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"The 6th number in the Fibonacci sequence is 5."
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:25:06.273Z",
|
||
"passed": false,
|
||
"duration": 9986,
|
||
"reason": "Expected 8, but got the 6th number in the fibonacci sequence is 5.",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:25:07.072Z",
|
||
"passed": true,
|
||
"duration": 795,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:25:12.207Z",
|
||
"passed": true,
|
||
"duration": 5133,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:25:13.308Z",
|
||
"passed": true,
|
||
"duration": 1099,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:25:14.724Z",
|
||
"passed": true,
|
||
"duration": 1414,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:25:24.633Z",
|
||
"passed": true,
|
||
"duration": 9907,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:25:28.285Z",
|
||
"passed": true,
|
||
"duration": 3650,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:25:31.067Z",
|
||
"passed": true,
|
||
"duration": 2779,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:25:35.904Z",
|
||
"passed": true,
|
||
"duration": 4834,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:25:36.993Z",
|
||
"passed": true,
|
||
"duration": 1087,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:25:37.796Z",
|
||
"passed": true,
|
||
"duration": 800,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:25:48.323Z",
|
||
"passed": true,
|
||
"duration": 10524,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"The result of 2 raised to the power of 3 is 8.\n\nAnswer: 8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:26:02.730Z",
|
||
"passed": false,
|
||
"duration": 14405,
|
||
"reason": "Expected 8, but got the result of 2 raised to the power of 3 is 8.\n\nanswer: 8",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:30:35.457Z",
|
||
"passed": false,
|
||
"duration": 3064,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3,-2"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:30:47.981Z",
|
||
"passed": true,
|
||
"duration": 12517,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:30:49.084Z",
|
||
"passed": false,
|
||
"duration": 1100,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:30:50.009Z",
|
||
"passed": false,
|
||
"duration": 921,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:31:24.046Z",
|
||
"passed": false,
|
||
"duration": 2341,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:32:31.578Z",
|
||
"passed": false,
|
||
"duration": 3137,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:32:51.448Z",
|
||
"passed": false,
|
||
"duration": 1892,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3,-2"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:32:58.250Z",
|
||
"passed": true,
|
||
"duration": 6795,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:32:59.105Z",
|
||
"passed": false,
|
||
"duration": 853,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:32:59.942Z",
|
||
"passed": false,
|
||
"duration": 832,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:33:19.796Z",
|
||
"passed": false,
|
||
"duration": 19850,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"The solutions to the quadratic equation x² + 5x + 6 = 0 are x = -2 and x = -3.\n\n-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:33:35.611Z",
|
||
"passed": false,
|
||
"duration": 15811,
|
||
"reason": "Expected -3,-2, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are x = -2 and x = -3.\n\n-2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:33:37.469Z",
|
||
"passed": true,
|
||
"duration": 1853,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:33:44.364Z",
|
||
"passed": true,
|
||
"duration": 6892,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:33:45.323Z",
|
||
"passed": true,
|
||
"duration": 956,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:33:46.153Z",
|
||
"passed": true,
|
||
"duration": 827,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:33:57.349Z",
|
||
"passed": true,
|
||
"duration": 11193,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:34:02.166Z",
|
||
"passed": true,
|
||
"duration": 4814,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:34:04.174Z",
|
||
"passed": true,
|
||
"duration": 2004,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:34:05.686Z",
|
||
"passed": false,
|
||
"duration": 1509,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:34:07.363Z",
|
||
"passed": true,
|
||
"duration": 1673,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:34:08.909Z",
|
||
"passed": true,
|
||
"duration": 1543,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:34:38.921Z",
|
||
"passed": false,
|
||
"duration": 30009,
|
||
"error": {
|
||
"message": "API call timed out",
|
||
"code": "UNKNOWN",
|
||
"type": "Error",
|
||
"details": {
|
||
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:137:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
|
||
"message": "API call timed out"
|
||
}
|
||
},
|
||
"reason": "API call timed out",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:34:44.095Z",
|
||
"passed": false,
|
||
"duration": 5171,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:34:46.111Z",
|
||
"passed": true,
|
||
"duration": 2012,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:34:52.001Z",
|
||
"passed": true,
|
||
"duration": 5888,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:34:52.968Z",
|
||
"passed": true,
|
||
"duration": 964,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:34:54.051Z",
|
||
"passed": true,
|
||
"duration": 1080,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:35:04.364Z",
|
||
"passed": true,
|
||
"duration": 10309,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:35:10.480Z",
|
||
"passed": true,
|
||
"duration": 6114,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:35:11.619Z",
|
||
"passed": true,
|
||
"duration": 1136,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:35:19.194Z",
|
||
"passed": true,
|
||
"duration": 7572,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:35:20.455Z",
|
||
"passed": true,
|
||
"duration": 1259,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:35:21.956Z",
|
||
"passed": true,
|
||
"duration": 1498,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:35:27.372Z",
|
||
"passed": true,
|
||
"duration": 5414,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:35:37.266Z",
|
||
"passed": true,
|
||
"duration": 9891,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:36:22.644Z",
|
||
"passed": false,
|
||
"duration": 1358,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:36:31.987Z",
|
||
"passed": false,
|
||
"duration": 1375,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:37:00.757Z",
|
||
"passed": false,
|
||
"duration": 1589,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:37:38.502Z",
|
||
"passed": false,
|
||
"duration": 2344,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:37:51.538Z",
|
||
"passed": false,
|
||
"duration": 2010,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:37:59.511Z",
|
||
"passed": false,
|
||
"duration": 1399,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:39:33.082Z",
|
||
"passed": false,
|
||
"duration": 1792,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:40:21.065Z",
|
||
"passed": false,
|
||
"duration": 1221,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:40:47.110Z",
|
||
"passed": false,
|
||
"duration": 2367,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3, -2"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:40:56.377Z",
|
||
"passed": false,
|
||
"duration": 9259,
|
||
"reason": "Expected -3,-2, but got -3, -2",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:40:57.391Z",
|
||
"passed": false,
|
||
"duration": 1010,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:40:58.209Z",
|
||
"passed": false,
|
||
"duration": 815,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:41:02.148Z",
|
||
"passed": false,
|
||
"duration": 3934,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"The solutions to the equation x² + 5x + 6 = 0 are -2 and -3.\n\n-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:41:10.265Z",
|
||
"passed": false,
|
||
"duration": 8112,
|
||
"reason": "Expected -3,-2, but got the solutions to the equation x² + 5x + 6 = 0 are -2 and -3.\n\n-2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:41:11.723Z",
|
||
"passed": true,
|
||
"duration": 1454,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:41:21.969Z",
|
||
"passed": true,
|
||
"duration": 10242,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:41:22.848Z",
|
||
"passed": true,
|
||
"duration": 876,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:41:23.636Z",
|
||
"passed": true,
|
||
"duration": 785,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:41:30.847Z",
|
||
"passed": true,
|
||
"duration": 7208,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"5! is equal to 120. \n\nAnswer: 120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:41:34.615Z",
|
||
"passed": false,
|
||
"duration": 3765,
|
||
"reason": "Expected 120, but got 5! is equal to 120. \n\nanswer: 120",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:41:36.384Z",
|
||
"passed": true,
|
||
"duration": 1765,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:41:56.369Z",
|
||
"passed": false,
|
||
"duration": 19981,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:41:57.522Z",
|
||
"passed": true,
|
||
"duration": 1149,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:41:58.423Z",
|
||
"passed": true,
|
||
"duration": 898,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:42:28.442Z",
|
||
"passed": false,
|
||
"duration": 30014,
|
||
"error": {
|
||
"message": "API call timed out",
|
||
"code": "UNKNOWN",
|
||
"type": "Error",
|
||
"details": {
|
||
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:138:33)\n at listOnTimeout (node:internal/timers:594:17)\n at processTimers (node:internal/timers:529:7)",
|
||
"message": "API call timed out"
|
||
}
|
||
},
|
||
"reason": "API call timed out",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:42:33.805Z",
|
||
"passed": false,
|
||
"duration": 5358,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:42:34.628Z",
|
||
"passed": true,
|
||
"duration": 819,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:42:35.522Z",
|
||
"passed": true,
|
||
"duration": 890,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:42:36.791Z",
|
||
"passed": true,
|
||
"duration": 1266,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:42:37.938Z",
|
||
"passed": true,
|
||
"duration": 1144,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1",
|
||
"router": "deepseek/deepseek-r1",
|
||
"timestamp": "2025-04-02T13:42:42.967Z",
|
||
"passed": true,
|
||
"duration": 5026,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T13:42:51.135Z",
|
||
"passed": true,
|
||
"duration": 8165,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:42:52.650Z",
|
||
"passed": true,
|
||
"duration": 1512,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:42:57.896Z",
|
||
"passed": true,
|
||
"duration": 5243,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:42:58.774Z",
|
||
"passed": true,
|
||
"duration": 875,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:42:59.643Z",
|
||
"passed": true,
|
||
"duration": 866,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"router": "anthropic/claude-3.5-sonnet",
|
||
"timestamp": "2025-04-02T13:43:13.962Z",
|
||
"passed": false,
|
||
"duration": 1435,
|
||
"reason": "Expected -3,-2, but got -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "qwen/qwq-32b",
|
||
"router": "qwen/qwq-32b",
|
||
"timestamp": "2025-04-02T13:43:39.174Z",
|
||
"passed": false,
|
||
"duration": 25202,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T13:43:40.474Z",
|
||
"passed": false,
|
||
"duration": 1295,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3, -2"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T13:43:41.957Z",
|
||
"passed": false,
|
||
"duration": 1478,
|
||
"reason": "Expected -3,-2, but got -3, -2",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:08:37.779Z",
|
||
"passed": false,
|
||
"duration": 1244,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"The solutions to the quadratic equation x² + 5x + 6 = 0 are -2 and -3.\n\n\\boxed{-2}, \\boxed{-3}"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:08:48.803Z",
|
||
"passed": false,
|
||
"duration": 11013,
|
||
"reason": "Expected -3,-2, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are -2 and -3.\n\n\\boxed{-2}, \\boxed{-3}",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-3,-2",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:08:50.291Z",
|
||
"passed": false,
|
||
"duration": 1482,
|
||
"reason": "Expected -3,-2, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:08:51.033Z",
|
||
"passed": true,
|
||
"duration": 736,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:08:55.877Z",
|
||
"passed": true,
|
||
"duration": 4838,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:08:56.613Z",
|
||
"passed": true,
|
||
"duration": 730,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:08:57.421Z",
|
||
"passed": true,
|
||
"duration": 802,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"The 6th number in the Fibonacci sequence is 5. \n\n5"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:09:04.449Z",
|
||
"passed": false,
|
||
"duration": 7023,
|
||
"reason": "Expected 8, but got the 6th number in the fibonacci sequence is 5. \n\n5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:09:05.286Z",
|
||
"passed": false,
|
||
"duration": 831,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:09:06.233Z",
|
||
"passed": true,
|
||
"duration": 940,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:09:08.662Z",
|
||
"passed": true,
|
||
"duration": 2423,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:09:09.441Z",
|
||
"passed": true,
|
||
"duration": 773,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:09:10.298Z",
|
||
"passed": true,
|
||
"duration": 851,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:09:11.870Z",
|
||
"passed": true,
|
||
"duration": 1566,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:09:12.628Z",
|
||
"passed": true,
|
||
"duration": 753,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:11:07.251Z",
|
||
"passed": false,
|
||
"duration": 1329,
|
||
"reason": "Expected -2,-3, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:11:14.807Z",
|
||
"passed": false,
|
||
"duration": 7547,
|
||
"reason": "Expected -2,-3, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:11:15.551Z",
|
||
"passed": false,
|
||
"duration": 738,
|
||
"reason": "Expected -2,-3, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:11:16.305Z",
|
||
"passed": true,
|
||
"duration": 749,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:11:21.413Z",
|
||
"passed": true,
|
||
"duration": 5102,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:11:22.690Z",
|
||
"passed": true,
|
||
"duration": 1271,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:11:23.404Z",
|
||
"passed": true,
|
||
"duration": 707,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:11:32.032Z",
|
||
"passed": false,
|
||
"duration": 8621,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:11:33.022Z",
|
||
"passed": false,
|
||
"duration": 983,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:11:33.875Z",
|
||
"passed": true,
|
||
"duration": 845,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:11:37.488Z",
|
||
"passed": true,
|
||
"duration": 3608,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:11:38.246Z",
|
||
"passed": true,
|
||
"duration": 753,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-02T22:11:39.169Z",
|
||
"passed": true,
|
||
"duration": 918,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-02T22:11:43.117Z",
|
||
"passed": true,
|
||
"duration": 3943,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-02T22:11:44.077Z",
|
||
"passed": true,
|
||
"duration": 954,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-3, -2"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-03T17:09:38.709Z",
|
||
"passed": false,
|
||
"duration": 1685,
|
||
"reason": "Expected -2,-3, but got -3, -2",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"The solutions to the quadratic equation x² + 5x + 6 = 0 are -2 and -3.\n\nSpecific steps:\n\n1. **Identify coefficients**: a = 1, b = 5, c = 6.\n2. **Calculate discriminant**: b² - 4ac = 25 - 24 = 1.\n3. **Apply quadratic formula**: x = (-5 ± √1)/2.\n4. **Compute solutions**: x = (-5 + 1)/2 = -2 and x = (-5 - 1)/2 = -3.\n\n-2, -3\n\nAnswer: -2,-3"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-03T17:09:49.547Z",
|
||
"passed": false,
|
||
"duration": 10827,
|
||
"reason": "Expected -2,-3, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are -2 and -3.\n\nspecific steps:\n\n1. **identify coefficients**: a = 1, b = 5, c = 6.\n2. **calculate discriminant**: b² - 4ac = 25 - 24 = 1.\n3. **apply quadratic formula**: x = (-5 ± √1)/2.\n4. **compute solutions**: x = (-5 + 1)/2 = -2 and x = (-5 - 1)/2 = -3.\n\n-2, -3\n\nanswer: -2,-3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-03T17:09:50.489Z",
|
||
"passed": false,
|
||
"duration": 935,
|
||
"reason": "Expected -2,-3, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-03T17:09:54.489Z",
|
||
"passed": true,
|
||
"duration": 3991,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"5! = 120\n\nThe factorial of 5 is calculated as:\n\n5 × 4 × 3 × 2 × 1 = 120\n\n**Answer:** 120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-03T17:10:03.614Z",
|
||
"passed": false,
|
||
"duration": 9116,
|
||
"reason": "Expected 120, but got 5! = 120\n\nthe factorial of 5 is calculated as:\n\n5 × 4 × 3 × 2 × 1 = 120\n\n**answer:** 120",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-03T17:10:04.483Z",
|
||
"passed": true,
|
||
"duration": 861,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-03T17:10:05.284Z",
|
||
"passed": true,
|
||
"duration": 792,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-03T17:10:13.584Z",
|
||
"passed": false,
|
||
"duration": 8292,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-03T17:10:14.350Z",
|
||
"passed": false,
|
||
"duration": 756,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-03T17:10:15.250Z",
|
||
"passed": true,
|
||
"duration": 892,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-03T17:10:17.013Z",
|
||
"passed": true,
|
||
"duration": 1755,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-03T17:10:17.849Z",
|
||
"passed": true,
|
||
"duration": 828,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-03T17:10:18.651Z",
|
||
"passed": true,
|
||
"duration": 795,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-03T17:10:25.922Z",
|
||
"passed": true,
|
||
"duration": 7263,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-03T17:10:26.895Z",
|
||
"passed": true,
|
||
"duration": 966,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-04T12:38:12.580Z",
|
||
"passed": true,
|
||
"duration": 1229,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"The solutions to the quadratic equation x² + 5x + 6 = 0 are -3, -2.\n\nAnswer: -3, -2"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-04T12:38:24.221Z",
|
||
"passed": false,
|
||
"duration": 11633,
|
||
"reason": "Expected -2,-3, but got the solutions to the quadratic equation x² + 5x + 6 = 0 are -3, -2.\n\nanswer: -3, -2",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2, -3"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-04T12:38:25.175Z",
|
||
"passed": false,
|
||
"duration": 943,
|
||
"reason": "Expected -2,-3, but got -2, -3",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "quadratic",
|
||
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Return only the solutions as comma-separated numbers, no explanation.",
|
||
"result": [
|
||
"-2,-3"
|
||
],
|
||
"expected": "-2,-3",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-04T12:38:26.290Z",
|
||
"passed": true,
|
||
"duration": 1105,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-04T12:38:27.138Z",
|
||
"passed": true,
|
||
"duration": 838,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-04T12:38:34.971Z",
|
||
"passed": true,
|
||
"duration": 7825,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-04T12:38:35.899Z",
|
||
"passed": true,
|
||
"duration": 920,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"prompt": "Calculate 5! (factorial of 5). Return only the number, no explanation.",
|
||
"result": [
|
||
"120"
|
||
],
|
||
"expected": "120",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-04T12:38:36.748Z",
|
||
"passed": true,
|
||
"duration": 840,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-04T12:38:37.951Z",
|
||
"passed": true,
|
||
"duration": 1195,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-04T12:38:49.318Z",
|
||
"passed": false,
|
||
"duration": 11358,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"5"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-04T12:38:50.264Z",
|
||
"passed": false,
|
||
"duration": 935,
|
||
"reason": "Expected 8, but got 5",
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"prompt": "Calculate the 6th number in the Fibonacci sequence. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-04T12:38:50.973Z",
|
||
"passed": true,
|
||
"duration": 701,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-04T12:38:51.774Z",
|
||
"passed": true,
|
||
"duration": 793,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-04T12:39:08.114Z",
|
||
"passed": true,
|
||
"duration": 16332,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-04T12:39:09.133Z",
|
||
"passed": true,
|
||
"duration": 1012,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"prompt": "Calculate the square root of 16. Return only the number, no explanation.",
|
||
"result": [
|
||
"4"
|
||
],
|
||
"expected": "4",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-04T12:39:10.677Z",
|
||
"passed": true,
|
||
"duration": 1535,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"router": "openai/gpt-3.5-turbo",
|
||
"timestamp": "2025-04-04T12:39:11.607Z",
|
||
"passed": true,
|
||
"duration": 922,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
||
"timestamp": "2025-04-04T12:39:18.707Z",
|
||
"passed": true,
|
||
"duration": 7091,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openai/gpt-4o-mini",
|
||
"router": "openai/gpt-4o-mini",
|
||
"timestamp": "2025-04-04T12:39:19.719Z",
|
||
"passed": true,
|
||
"duration": 1004,
|
||
"category": "math"
|
||
},
|
||
{
|
||
"test": "power",
|
||
"prompt": "Calculate 2 raised to the power of 3. Return only the number, no explanation.",
|
||
"result": [
|
||
"8"
|
||
],
|
||
"expected": "8",
|
||
"model": "openrouter/quasar-alpha",
|
||
"router": "openrouter/quasar-alpha",
|
||
"timestamp": "2025-04-04T12:39:21.294Z",
|
||
"passed": true,
|
||
"duration": 1567,
|
||
"category": "math"
|
||
}
|
||
],
|
||
"highscores": [
|
||
{
|
||
"test": "quadratic",
|
||
"rankings": [
|
||
{
|
||
"model": "openai/gpt-4o-mini",
|
||
"duration": 943,
|
||
"duration_secs": 0.943
|
||
},
|
||
{
|
||
"model": "openrouter/quasar-alpha",
|
||
"duration": 1105,
|
||
"duration_secs": 1.105
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"test": "factorial",
|
||
"rankings": [
|
||
{
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"duration": 838,
|
||
"duration_secs": 0.838
|
||
},
|
||
{
|
||
"model": "openrouter/quasar-alpha",
|
||
"duration": 840,
|
||
"duration_secs": 0.84
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"test": "fibonacci",
|
||
"rankings": [
|
||
{
|
||
"model": "openrouter/quasar-alpha",
|
||
"duration": 701,
|
||
"duration_secs": 0.701
|
||
},
|
||
{
|
||
"model": "openai/gpt-4o-mini",
|
||
"duration": 935,
|
||
"duration_secs": 0.935
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"test": "square_root",
|
||
"rankings": [
|
||
{
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"duration": 793,
|
||
"duration_secs": 0.793
|
||
},
|
||
{
|
||
"model": "anthropic/claude-3.5-sonnet",
|
||
"duration": 819,
|
||
"duration_secs": 0.819
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"test": "power",
|
||
"rankings": [
|
||
{
|
||
"model": "openai/gpt-3.5-turbo",
|
||
"duration": 922,
|
||
"duration_secs": 0.922
|
||
},
|
||
{
|
||
"model": "openai/gpt-4o-mini",
|
||
"duration": 1004,
|
||
"duration_secs": 1.004
|
||
}
|
||
]
|
||
}
|
||
],
|
||
"lastUpdated": "2025-04-04T12:39:21.296Z"
|
||
} |