333 lines
10 KiB
JSON
333 lines
10 KiB
JSON
{
|
|
"results": [
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content contain information about Kenya's \"Human prehistory\"? Reply with \"yes\" if it does, \"no\" if it does not.",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:36:42.598Z",
|
|
"passed": false,
|
|
"duration": 4790,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "How many users are in the data? Return just the number.",
|
|
"result": [],
|
|
"expected": "10",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:36:43.396Z",
|
|
"passed": false,
|
|
"duration": 783,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_cache_first",
|
|
"prompt": "Check if content loaded successfully. Reply with \"ok\" if it did.",
|
|
"result": [],
|
|
"expected": "ok",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:36:46.904Z",
|
|
"passed": false,
|
|
"duration": 3494,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_cache_second",
|
|
"prompt": "Was the content loaded from cache? If you can't tell, just reply \"unknown\".",
|
|
"result": [],
|
|
"expected": "unknown",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:36:50.381Z",
|
|
"passed": false,
|
|
"duration": 3468,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:37:26.448Z",
|
|
"passed": false,
|
|
"duration": 4081,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:37:27.153Z",
|
|
"passed": false,
|
|
"duration": 693,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_cache_first",
|
|
"prompt": "Check if content loaded successfully. Reply with \"ok\" if it did.",
|
|
"result": [],
|
|
"expected": "ok",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:37:30.678Z",
|
|
"passed": false,
|
|
"duration": 3515,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_cache_second",
|
|
"prompt": "Was the content loaded from cache? If you can't tell, just reply \"unknown\".",
|
|
"result": [],
|
|
"expected": "unknown",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:37:34.158Z",
|
|
"passed": false,
|
|
"duration": 3471,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:38:21.584Z",
|
|
"passed": false,
|
|
"duration": 4029,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:38:22.352Z",
|
|
"passed": false,
|
|
"duration": 755,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:40:19.387Z",
|
|
"passed": false,
|
|
"duration": 4165,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:40:20.265Z",
|
|
"passed": false,
|
|
"duration": 863,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:41:51.321Z",
|
|
"passed": false,
|
|
"duration": 3707,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:41:53.081Z",
|
|
"passed": false,
|
|
"duration": 737,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:42:06.952Z",
|
|
"passed": false,
|
|
"duration": 3542,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:42:10.527Z",
|
|
"passed": false,
|
|
"duration": 2556,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:46:52.443Z",
|
|
"passed": false,
|
|
"duration": 4035,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:46:54.153Z",
|
|
"passed": false,
|
|
"duration": 679,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:48:04.211Z",
|
|
"passed": false,
|
|
"duration": 3670,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:48:06.447Z",
|
|
"passed": false,
|
|
"duration": 1215,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_wikipedia",
|
|
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:48:31.738Z",
|
|
"passed": false,
|
|
"duration": 4125,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
|
|
"result": [],
|
|
"expected": "yes",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:48:33.801Z",
|
|
"passed": false,
|
|
"duration": 1033,
|
|
"reason": "Model returned empty response",
|
|
"category": "web"
|
|
}
|
|
],
|
|
"highscores": [
|
|
{
|
|
"test": "web_wikipedia",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 4125,
|
|
"duration_secs": 4.125
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "web_json",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 1033,
|
|
"duration_secs": 1.033
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "web_cache_first",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 3515,
|
|
"duration_secs": 3.515
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "web_cache_second",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 3471,
|
|
"duration_secs": 3.471
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"lastUpdated": "2025-04-06T15:48:33.801Z"
|
|
} |