2076 lines
66 KiB
JSON
2076 lines
66 KiB
JSON
{
|
|
"results": [
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:16:00.645Z",
|
|
"passed": true,
|
|
"duration": 868,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T05:16:02.979Z",
|
|
"passed": true,
|
|
"duration": 2331,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:16:03.804Z",
|
|
"passed": false,
|
|
"duration": 823,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T05:16:06.043Z",
|
|
"passed": false,
|
|
"duration": 2236,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:16:06.739Z",
|
|
"passed": false,
|
|
"duration": 694,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T05:16:08.946Z",
|
|
"passed": false,
|
|
"duration": 2205,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:16:11.187Z",
|
|
"passed": false,
|
|
"duration": 2239,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T05:16:13.553Z",
|
|
"passed": false,
|
|
"duration": 2364,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:08.292Z",
|
|
"passed": true,
|
|
"duration": 1569,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T18:46:09.917Z",
|
|
"passed": true,
|
|
"duration": 1621,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:10.702Z",
|
|
"passed": false,
|
|
"duration": 781,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T18:46:12.980Z",
|
|
"passed": false,
|
|
"duration": 2274,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:13.800Z",
|
|
"passed": false,
|
|
"duration": 818,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T18:46:15.477Z",
|
|
"passed": false,
|
|
"duration": 1674,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:17.703Z",
|
|
"passed": false,
|
|
"duration": 2223,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"cat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T18:46:20.109Z",
|
|
"passed": false,
|
|
"duration": 2404,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\n \"cat\",\n \"fox\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:25:46.078Z",
|
|
"passed": true,
|
|
"duration": 824,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:25:53.366Z",
|
|
"passed": true,
|
|
"duration": 7284,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:25:54.218Z",
|
|
"passed": false,
|
|
"duration": 849,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubbleSort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:25:59.456Z",
|
|
"passed": false,
|
|
"duration": 5231,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:26:00.076Z",
|
|
"passed": true,
|
|
"duration": 616,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:26:02.225Z",
|
|
"passed": true,
|
|
"duration": 2146,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:26:05.871Z",
|
|
"passed": true,
|
|
"duration": 3643,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"cat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:26:09.058Z",
|
|
"passed": true,
|
|
"duration": 3183,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:26:42.944Z",
|
|
"passed": true,
|
|
"duration": 772,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:26:46.369Z",
|
|
"passed": true,
|
|
"duration": 3421,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:26:47.073Z",
|
|
"passed": false,
|
|
"duration": 700,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubbleSort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:26:48.594Z",
|
|
"passed": false,
|
|
"duration": 1514,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubbleSort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:26:49.375Z",
|
|
"passed": true,
|
|
"duration": 779,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:26:55.618Z",
|
|
"passed": true,
|
|
"duration": 6239,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"wildcat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:26:57.688Z",
|
|
"passed": false,
|
|
"duration": 2067,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\n \"wildcat\",\n \"fox\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:27:00.508Z",
|
|
"passed": true,
|
|
"duration": 2815,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:27:38.292Z",
|
|
"passed": true,
|
|
"duration": 1023,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:27:42.531Z",
|
|
"passed": true,
|
|
"duration": 4235,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:27:43.285Z",
|
|
"passed": false,
|
|
"duration": 751,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubbleSort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:27:48.461Z",
|
|
"passed": false,
|
|
"duration": 5168,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:27:49.024Z",
|
|
"passed": true,
|
|
"duration": 559,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:27:54.942Z",
|
|
"passed": false,
|
|
"duration": 5915,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:27:56.766Z",
|
|
"passed": true,
|
|
"duration": 1819,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:27:59.278Z",
|
|
"passed": true,
|
|
"duration": 2508,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:28:17.298Z",
|
|
"passed": true,
|
|
"duration": 834,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:28:18.833Z",
|
|
"passed": true,
|
|
"duration": 1530,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:28:19.525Z",
|
|
"passed": false,
|
|
"duration": 688,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubbleSort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:28:23.761Z",
|
|
"passed": false,
|
|
"duration": 4229,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubbleSort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:28:24.280Z",
|
|
"passed": true,
|
|
"duration": 515,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:28:26.274Z",
|
|
"passed": true,
|
|
"duration": 1990,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:28:29.111Z",
|
|
"passed": true,
|
|
"duration": 2834,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:28:35.340Z",
|
|
"passed": false,
|
|
"duration": 6225,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:30:09.177Z",
|
|
"passed": true,
|
|
"duration": 1035,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:30:11.447Z",
|
|
"passed": true,
|
|
"duration": 2266,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:30:12.403Z",
|
|
"passed": false,
|
|
"duration": 952,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubbleSort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:30:18.660Z",
|
|
"passed": false,
|
|
"duration": 6250,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:30:19.412Z",
|
|
"passed": true,
|
|
"duration": 748,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:30:24.575Z",
|
|
"passed": false,
|
|
"duration": 5159,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:30:26.812Z",
|
|
"passed": true,
|
|
"duration": 2232,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:30:33.768Z",
|
|
"passed": false,
|
|
"duration": 6951,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:31:32.809Z",
|
|
"passed": true,
|
|
"duration": 941,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:31:35.174Z",
|
|
"passed": true,
|
|
"duration": 2360,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:31:49.546Z",
|
|
"passed": false,
|
|
"duration": 759,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:32:00.755Z",
|
|
"passed": false,
|
|
"duration": 7224,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:32:01.351Z",
|
|
"passed": false,
|
|
"duration": 592,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:32:02.883Z",
|
|
"passed": false,
|
|
"duration": 1528,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:32:05.172Z",
|
|
"passed": false,
|
|
"duration": 2283,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:32:07.065Z",
|
|
"passed": false,
|
|
"duration": 1887,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:32:59.145Z",
|
|
"passed": true,
|
|
"duration": 883,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:33:01.231Z",
|
|
"passed": true,
|
|
"duration": 2081,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:33:01.922Z",
|
|
"passed": false,
|
|
"duration": 686,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:33:04.999Z",
|
|
"passed": false,
|
|
"duration": 3070,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:33:07.789Z",
|
|
"passed": false,
|
|
"duration": 2785,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:33:09.512Z",
|
|
"passed": false,
|
|
"duration": 1718,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:33:14.818Z",
|
|
"passed": false,
|
|
"duration": 5303,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"cat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:33:17.979Z",
|
|
"passed": false,
|
|
"duration": 3156,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\n \"cat\",\n \"fox\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:34:43.846Z",
|
|
"passed": true,
|
|
"duration": 1036,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:34:48.463Z",
|
|
"passed": true,
|
|
"duration": 4612,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:34:49.231Z",
|
|
"passed": false,
|
|
"duration": 763,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:34:55.136Z",
|
|
"passed": false,
|
|
"duration": 5897,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:34:55.630Z",
|
|
"passed": false,
|
|
"duration": 489,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:35:00.574Z",
|
|
"passed": false,
|
|
"duration": 4939,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:35:03.038Z",
|
|
"passed": false,
|
|
"duration": 2459,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"cat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:35:05.480Z",
|
|
"passed": false,
|
|
"duration": 2438,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\n \"cat\",\n \"fox\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:36:08.730Z",
|
|
"passed": true,
|
|
"duration": 1322,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:36:12.605Z",
|
|
"passed": true,
|
|
"duration": 3870,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:36:13.472Z",
|
|
"passed": false,
|
|
"duration": 862,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:36:58.567Z",
|
|
"passed": true,
|
|
"duration": 840,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:37:00.081Z",
|
|
"passed": true,
|
|
"duration": 1509,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:37:00.976Z",
|
|
"passed": false,
|
|
"duration": 891,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:37:02.031Z",
|
|
"passed": false,
|
|
"duration": 1048,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\n \"factorial\",\n \"bubblesort\"\n]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:37:02.889Z",
|
|
"passed": false,
|
|
"duration": 854,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:37:04.623Z",
|
|
"passed": false,
|
|
"duration": 1730,
|
|
"reason": "Expected Injection Barrel, but got injection barrel",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:37:07.559Z",
|
|
"passed": false,
|
|
"duration": 2933,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:37:10.561Z",
|
|
"passed": false,
|
|
"duration": 2998,
|
|
"reason": "Expected [\"cat\",\"fox\"], but got [\"cat\", \"fox\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:38:15.039Z",
|
|
"passed": true,
|
|
"duration": 804,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:38:17.121Z",
|
|
"passed": true,
|
|
"duration": 2077,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:38:25.035Z",
|
|
"passed": false,
|
|
"duration": 797,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:39:30.971Z",
|
|
"passed": false,
|
|
"duration": 1721,
|
|
"reason": "Expected [\"bubble sort\",\"factorial\"], but got [\"factorial\", \"bubblesort\"]",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:53:07.100Z",
|
|
"passed": true,
|
|
"duration": 51668,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:53:13.381Z",
|
|
"passed": true,
|
|
"duration": 2388,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:53:14.200Z",
|
|
"passed": true,
|
|
"duration": 814,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:53:23.416Z",
|
|
"passed": true,
|
|
"duration": 2210,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:53:51.472Z",
|
|
"passed": true,
|
|
"duration": 8947,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:53:57.721Z",
|
|
"passed": false,
|
|
"duration": 6245,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"cat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:54:00.301Z",
|
|
"passed": true,
|
|
"duration": 2573,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"cat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:54:02.900Z",
|
|
"passed": true,
|
|
"duration": 2594,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:54:12.068Z",
|
|
"passed": true,
|
|
"duration": 792,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:54:13.596Z",
|
|
"passed": true,
|
|
"duration": 1522,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:54:14.332Z",
|
|
"passed": true,
|
|
"duration": 731,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:55:55.651Z",
|
|
"passed": false,
|
|
"duration": 7061,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:56:27.181Z",
|
|
"passed": true,
|
|
"duration": 3432,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:56:28.770Z",
|
|
"passed": true,
|
|
"duration": 1583,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:56:31.322Z",
|
|
"passed": true,
|
|
"duration": 2548,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\"cat\", \"fox\"]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:56:34.465Z",
|
|
"passed": true,
|
|
"duration": 3138,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:57:18.569Z",
|
|
"passed": true,
|
|
"duration": 867,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:57:20.692Z",
|
|
"passed": true,
|
|
"duration": 2117,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\"factorial\", \"bubbleSort\"]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:57:21.814Z",
|
|
"passed": true,
|
|
"duration": 1117,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:57:23.423Z",
|
|
"passed": true,
|
|
"duration": 1603,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:57:24.145Z",
|
|
"passed": true,
|
|
"duration": 717,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:57:26.292Z",
|
|
"passed": true,
|
|
"duration": 2142,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images? Return as JSON array.",
|
|
"result": [
|
|
"[\n \"cat\",\n \"fox\"\n]"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:57:29.395Z",
|
|
"passed": true,
|
|
"duration": 3099,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubble sort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:59:17.877Z",
|
|
"passed": true,
|
|
"duration": 4686,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"Bubble Sort\n"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:59:20.537Z",
|
|
"passed": true,
|
|
"duration": 2653,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:59:21.334Z",
|
|
"passed": true,
|
|
"duration": 793,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"[\n \"factorial\",\n \"bubbleSort\"\n]"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:59:25.325Z",
|
|
"passed": true,
|
|
"duration": 3986,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:59:25.873Z",
|
|
"passed": true,
|
|
"duration": 543,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel\n"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T21:59:27.328Z",
|
|
"passed": true,
|
|
"duration": 1451,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [
|
|
"The image shows a wildcat on the left and a red fox on the right."
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T21:59:30.021Z",
|
|
"passed": true,
|
|
"duration": 2689,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:11:44.531Z",
|
|
"passed": false,
|
|
"duration": 26687,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [
|
|
"The image shows a cat and a fox.\n"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"router": "google/gemini-2.0-flash-exp:free",
|
|
"timestamp": "2025-06-05T22:12:19.670Z",
|
|
"passed": true,
|
|
"duration": 4801,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:18:06.005Z",
|
|
"passed": false,
|
|
"duration": 7536,
|
|
"reason": "Model returned empty response",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [
|
|
"{\"animals\":[\"cat\",\"fox\"]}"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:25:35.300Z",
|
|
"passed": true,
|
|
"duration": 20975,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [
|
|
"{\"animals\":[\"cat\",\"fox\"]}"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:26:40.769Z",
|
|
"passed": true,
|
|
"duration": 16606,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubbleSort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:26:41.541Z",
|
|
"passed": false,
|
|
"duration": 765,
|
|
"reason": "Expected bubble sort, but got bubbleSort",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"{\"algorithms\":[\"factorial\",\"bubbleSort\"]}"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:26:42.264Z",
|
|
"passed": true,
|
|
"duration": 718,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:26:43.192Z",
|
|
"passed": true,
|
|
"duration": 919,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [
|
|
"{\"animals\":[\"cat\",\"fox\"]}"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:27:59.823Z",
|
|
"passed": true,
|
|
"duration": 2365,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubbleSort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:28:00.651Z",
|
|
"passed": false,
|
|
"duration": 822,
|
|
"reason": "Expected bubble sort, but got bubbleSort",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"{\"algorithms\":[\"factorial\",\"bubbleSort\"]}"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:28:01.338Z",
|
|
"passed": true,
|
|
"duration": 682,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [
|
|
"{\"animals\":[\"cat\",\"fox\"]}"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:29:44.549Z",
|
|
"passed": true,
|
|
"duration": 3234,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubbleSort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:29:45.518Z",
|
|
"passed": false,
|
|
"duration": 961,
|
|
"reason": "Expected bubble sort, but got bubbleSort",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"{\"algorithms\":[\"factorial\",\"bubbleSort\"]}"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:29:46.232Z",
|
|
"passed": true,
|
|
"duration": 709,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:29:46.851Z",
|
|
"passed": true,
|
|
"duration": 614,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What animals are shown in these images?",
|
|
"result": [
|
|
"{\"animals\":[\"cat\",\"fox\"]}"
|
|
],
|
|
"expected": "[\"cat\",\"fox\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:55:45.624Z",
|
|
"passed": true,
|
|
"duration": 5694,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the name of the algorithm implemented in these files? Return only the name.",
|
|
"result": [
|
|
"bubbleSort"
|
|
],
|
|
"expected": "bubble sort",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:55:46.325Z",
|
|
"passed": false,
|
|
"duration": 694,
|
|
"reason": "Expected bubble sort, but got bubbleSort",
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "List all algorithms implemented in these files, as JSON array.",
|
|
"result": [
|
|
"{\"algorithms\":[\"factorial\",\"bubbleSort\"]}"
|
|
],
|
|
"expected": "[\"bubble sort\",\"factorial\"]",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:55:47.088Z",
|
|
"passed": true,
|
|
"duration": 758,
|
|
"category": "files"
|
|
},
|
|
{
|
|
"test": "file-inclusion",
|
|
"prompt": "What is the title of the product in data.json? Return only the title.",
|
|
"result": [
|
|
"Injection Barrel"
|
|
],
|
|
"expected": "Injection Barrel",
|
|
"model": "openai/gpt-4o",
|
|
"router": "openai/gpt-4o",
|
|
"timestamp": "2025-06-05T22:55:47.875Z",
|
|
"passed": true,
|
|
"duration": 782,
|
|
"category": "files"
|
|
}
|
|
],
|
|
"highscores": [
|
|
{
|
|
"test": "file-inclusion",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-4o",
|
|
"duration": 782,
|
|
"duration_secs": 0.782
|
|
},
|
|
{
|
|
"model": "google/gemini-2.0-flash-exp:free",
|
|
"duration": 4801,
|
|
"duration_secs": 4.801
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"lastUpdated": "2025-06-05T22:55:47.876Z"
|
|
} |