mono/packages/kbot/tests/unit/reports/files.json

258 lines
8.3 KiB
JSON

{
"results": [
{
"test": "file-inclusion",
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
"result": [
"Bubble Sort"
],
"expected": "bubble sort",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:16:00.645Z",
"passed": true,
"duration": 868,
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
"result": [
"Bubble Sort\n"
],
"expected": "bubble sort",
"model": "google/gemini-2.0-flash-exp:free",
"router": "google/gemini-2.0-flash-exp:free",
"timestamp": "2025-06-05T05:16:02.979Z",
"passed": true,
"duration": 2331,
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "List all algorithms implemented in these files, as JSON array.",
"result": [
"[\"factorial\", \"bubbleSort\"]"
],
"expected": "[\"bubble sort\",\"factorial\"]",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:16:03.804Z",
"passed": false,
"duration": 823,
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "List all algorithms implemented in these files, as JSON array.",
"result": [
"[\n \"factorial\",\n \"bubbleSort\"\n]"
],
"expected": "[\"bubble sort\",\"factorial\"]",
"model": "google/gemini-2.0-flash-exp:free",
"router": "google/gemini-2.0-flash-exp:free",
"timestamp": "2025-06-05T05:16:06.043Z",
"passed": false,
"duration": 2236,
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What is the title of the product in data.json? Return only the title.",
"result": [
"Injection Barrel"
],
"expected": "Injection Barrel",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:16:06.739Z",
"passed": false,
"duration": 694,
"reason": "Expected Injection Barrel, but got injection barrel",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What is the title of the product in data.json? Return only the title.",
"result": [
"Injection Barrel\n"
],
"expected": "Injection Barrel",
"model": "google/gemini-2.0-flash-exp:free",
"router": "google/gemini-2.0-flash-exp:free",
"timestamp": "2025-06-05T05:16:08.946Z",
"passed": false,
"duration": 2205,
"reason": "Expected Injection Barrel, but got injection barrel",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What animals are shown in these images? Return as JSON array.",
"result": [
"[\"cat\", \"fox\"]"
],
"expected": "[\"cat\",\"fox\"]",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:16:11.187Z",
"passed": false,
"duration": 2239,
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What animals are shown in these images? Return as JSON array.",
"result": [
"[\"cat\", \"fox\"]"
],
"expected": "[\"cat\",\"fox\"]",
"model": "google/gemini-2.0-flash-exp:free",
"router": "google/gemini-2.0-flash-exp:free",
"timestamp": "2025-06-05T05:16:13.553Z",
"passed": false,
"duration": 2364,
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
"result": [
"bubble sort"
],
"expected": "bubble sort",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:08.292Z",
"passed": true,
"duration": 1569,
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
"result": [
"Bubble Sort\n"
],
"expected": "bubble sort",
"model": "google/gemini-2.0-flash-exp:free",
"router": "google/gemini-2.0-flash-exp:free",
"timestamp": "2025-06-05T18:46:09.917Z",
"passed": true,
"duration": 1621,
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "List all algorithms implemented in these files, as JSON array.",
"result": [
"[\n \"factorial\",\n \"bubbleSort\"\n]"
],
"expected": "[\"bubble sort\",\"factorial\"]",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:10.702Z",
"passed": false,
"duration": 781,
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "List all algorithms implemented in these files, as JSON array.",
"result": [
"[\n \"factorial\",\n \"bubbleSort\"\n]"
],
"expected": "[\"bubble sort\",\"factorial\"]",
"model": "google/gemini-2.0-flash-exp:free",
"router": "google/gemini-2.0-flash-exp:free",
"timestamp": "2025-06-05T18:46:12.980Z",
"passed": false,
"duration": 2274,
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What is the title of the product in data.json? Return only the title.",
"result": [
"Injection Barrel"
],
"expected": "Injection Barrel",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:13.800Z",
"passed": false,
"duration": 818,
"reason": "Expected Injection Barrel, but got injection barrel",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What is the title of the product in data.json? Return only the title.",
"result": [
"Injection Barrel\n"
],
"expected": "Injection Barrel",
"model": "google/gemini-2.0-flash-exp:free",
"router": "google/gemini-2.0-flash-exp:free",
"timestamp": "2025-06-05T18:46:15.477Z",
"passed": false,
"duration": 1674,
"reason": "Expected Injection Barrel, but got injection barrel",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What animals are shown in these images? Return as JSON array.",
"result": [
"[\"cat\", \"fox\"]"
],
"expected": "[\"cat\",\"fox\"]",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:17.703Z",
"passed": false,
"duration": 2223,
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
"category": "files"
},
{
"test": "file-inclusion",
"prompt": "What animals are shown in these images? Return as JSON array.",
"result": [
"[\n \"cat\",\n \"fox\"\n]"
],
"expected": "[\"cat\",\"fox\"]",
"model": "google/gemini-2.0-flash-exp:free",
"router": "google/gemini-2.0-flash-exp:free",
"timestamp": "2025-06-05T18:46:20.109Z",
"passed": false,
"duration": 2404,
"reason": "Expected [\"cat\",\"fox\"], but got [\n \"cat\",\n \"fox\"\n]",
"category": "files"
}
],
"highscores": [
{
"test": "file-inclusion",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 2223,
"duration_secs": 2.223
},
{
"model": "google/gemini-2.0-flash-exp:free",
"duration": 2404,
"duration_secs": 2.404
}
]
}
],
"lastUpdated": "2025-06-05T18:46:20.109Z"
}