mono/packages/kbot/tests/unit/reports/math.json
2026-03-19 17:40:06 +01:00

436 lines
12 KiB
JSON

{
"results": [
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Respond ONLY with the solutions as comma-separated numbers (e.g., -3,-2). No other text.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:07.920Z",
"passed": false,
"duration": 1202,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Respond ONLY with the solutions as comma-separated numbers (e.g., -3,-2). No other text.",
"result": [
"-3,-2"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:09.012Z",
"passed": true,
"duration": 1088,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Respond ONLY with the final numerical answer. No explanation, no other text.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:09.517Z",
"passed": true,
"duration": 503,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Respond ONLY with the final numerical answer. No explanation, no other text.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:10.000Z",
"passed": true,
"duration": 481,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence (assuming F(1)=1, F(2)=1). Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:10.505Z",
"passed": true,
"duration": 503,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence (assuming F(1)=1, F(2)=1). Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:11.109Z",
"passed": true,
"duration": 601,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Respond ONLY with the final numerical answer. No other text.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:11.849Z",
"passed": true,
"duration": 738,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Respond ONLY with the final numerical answer. No other text.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:12.390Z",
"passed": true,
"duration": 539,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:12.985Z",
"passed": true,
"duration": 592,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:14.091Z",
"passed": true,
"duration": 1103,
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Respond ONLY with the solutions as comma-separated numbers (e.g., -3,-2). No other text.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:38:59.106Z",
"passed": false,
"duration": 1401,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Respond ONLY with the solutions as comma-separated numbers (e.g., -3,-2). No other text.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2026-03-19T15:38:59.626Z",
"passed": false,
"duration": 514,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "quadratic",
"prompt": "Solve the quadratic equation x² + 5x + 6 = 0. Respond ONLY with the solutions as comma-separated numbers (e.g., -3,-2). No other text.",
"result": [
"-2,-3"
],
"expected": "-3,-2",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:00.750Z",
"passed": false,
"duration": 1120,
"reason": "Expected -3,-2, but got -2,-3",
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Respond ONLY with the final numerical answer. No explanation, no other text.",
"result": [
"120"
],
"expected": "120",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:01.739Z",
"passed": true,
"duration": 985,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Respond ONLY with the final numerical answer. No explanation, no other text.",
"result": [
"120"
],
"expected": "120",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2026-03-19T15:39:02.255Z",
"passed": true,
"duration": 512,
"category": "math"
},
{
"test": "factorial",
"prompt": "Calculate 5! (factorial of 5). Respond ONLY with the final numerical answer. No explanation, no other text.",
"result": [
"120"
],
"expected": "120",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:03.136Z",
"passed": true,
"duration": 877,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence (assuming F(1)=1, F(2)=1). Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:04.243Z",
"passed": true,
"duration": 1103,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence (assuming F(1)=1, F(2)=1). Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2026-03-19T15:39:04.740Z",
"passed": true,
"duration": 494,
"category": "math"
},
{
"test": "fibonacci",
"prompt": "Calculate the 6th number in the Fibonacci sequence (assuming F(1)=1, F(2)=1). Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:08.836Z",
"passed": true,
"duration": 4093,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Respond ONLY with the final numerical answer. No other text.",
"result": [
"4"
],
"expected": "4",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:10.351Z",
"passed": true,
"duration": 1512,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Respond ONLY with the final numerical answer. No other text.",
"result": [
"4"
],
"expected": "4",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2026-03-19T15:39:10.838Z",
"passed": true,
"duration": 483,
"category": "math"
},
{
"test": "square_root",
"prompt": "Calculate the square root of 16. Respond ONLY with the final numerical answer. No other text.",
"result": [
"4"
],
"expected": "4",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:11.810Z",
"passed": true,
"duration": 969,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:13.154Z",
"passed": true,
"duration": 1340,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2026-03-19T15:39:14.466Z",
"passed": true,
"duration": 1308,
"category": "math"
},
{
"test": "power",
"prompt": "Calculate 2 raised to the power of 3. Respond ONLY with the final numerical answer. No other text.",
"result": [
"8"
],
"expected": "8",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2026-03-19T15:39:15.600Z",
"passed": true,
"duration": 1129,
"category": "math"
}
],
"highscores": [
{
"test": "quadratic",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 514,
"duration_secs": 0.514
},
{
"model": "anthropic/claude-sonnet-4",
"duration": 1120,
"duration_secs": 1.12
}
]
},
{
"test": "factorial",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 503,
"duration_secs": 0.503
},
{
"model": "openai/gpt-4o-mini",
"duration": 512,
"duration_secs": 0.512
}
]
},
{
"test": "fibonacci",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 494,
"duration_secs": 0.494
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 503,
"duration_secs": 0.503
}
]
},
{
"test": "square_root",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 483,
"duration_secs": 0.483
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 738,
"duration_secs": 0.738
}
]
},
{
"test": "power",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 592,
"duration_secs": 0.592
},
{
"model": "anthropic/claude-sonnet-4",
"duration": 1129,
"duration_secs": 1.129
}
]
}
],
"lastUpdated": "2026-03-19T15:39:15.600Z"
}