258 lines
8.3 KiB
JSON
258 lines
8.3 KiB
JSON
{
|
|
"results": [
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:16:00.645Z",
|
|
"passed": true,
|
|
"duration": 868,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T05:16:02.979Z",
|
|
"passed": true,
|
|
"duration": 2331,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:16:03.804Z",
|
|
"passed": false,
|
|
"duration": 823,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T05:16:06.043Z",
|
|
"passed": false,
|
|
"duration": 2236,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:16:06.739Z",
|
|
"passed": false,
|
|
"duration": 694,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T05:16:08.946Z",
|
|
"passed": false,
|
|
"duration": 2205,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:16:11.187Z",
|
|
"passed": false,
|
|
"duration": 2239,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T05:16:13.553Z",
|
|
"passed": false,
|
|
"duration": 2364,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:08.292Z",
|
|
"passed": true,
|
|
"duration": 1569,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T18:46:09.917Z",
|
|
"passed": true,
|
|
"duration": 1621,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:10.702Z",
|
|
"passed": false,
|
|
"duration": 781,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T18:46:12.980Z",
|
|
"passed": false,
|
|
"duration": 2274,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:13.800Z",
|
|
"passed": false,
|
|
"duration": 818,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T18:46:15.477Z",
|
|
"passed": false,
|
|
"duration": 1674,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:17.703Z",
|
|
"passed": false,
|
|
"duration": 2223,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"cat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T18:46:20.109Z",
|
|
"passed": false,
|
|
"duration": 2404,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\n \"cat\",\n \"fox\"\n]",
|
|
"category": "files"
|
|
}
|
|
],
|
|
"highscores": [
|
|
{
|
|
"test": "file-inclusion",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 2223,
|
|
"duration_secs": 2.223
|
|
},
|
|
{
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"duration": 2404,
|
|
"duration_secs": 2.404
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"lastUpdated": "2025-06-05T18:46:20.109Z"
|
|
} |