mono/packages/kbot/tests/unit/reports/web.json
2025-04-06 17:49:35 +02:00

333 lines
10 KiB
JSON

{
"results": [
{
"test": "web_wikipedia",
"prompt": "Does the content contain information about Kenya's \"Human prehistory\"? Reply with \"yes\" if it does, \"no\" if it does not.",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:36:42.598Z",
"passed": false,
"duration": 4790,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "How many users are in the data? Return just the number.",
"result": [],
"expected": "10",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:36:43.396Z",
"passed": false,
"duration": 783,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_cache_first",
"prompt": "Check if content loaded successfully. Reply with \"ok\" if it did.",
"result": [],
"expected": "ok",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:36:46.904Z",
"passed": false,
"duration": 3494,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_cache_second",
"prompt": "Was the content loaded from cache? If you can't tell, just reply \"unknown\".",
"result": [],
"expected": "unknown",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:36:50.381Z",
"passed": false,
"duration": 3468,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_wikipedia",
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:37:26.448Z",
"passed": false,
"duration": 4081,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:37:27.153Z",
"passed": false,
"duration": 693,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_cache_first",
"prompt": "Check if content loaded successfully. Reply with \"ok\" if it did.",
"result": [],
"expected": "ok",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:37:30.678Z",
"passed": false,
"duration": 3515,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_cache_second",
"prompt": "Was the content loaded from cache? If you can't tell, just reply \"unknown\".",
"result": [],
"expected": "unknown",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:37:34.158Z",
"passed": false,
"duration": 3471,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_wikipedia",
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:38:21.584Z",
"passed": false,
"duration": 4029,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:38:22.352Z",
"passed": false,
"duration": 755,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_wikipedia",
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:40:19.387Z",
"passed": false,
"duration": 4165,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:40:20.265Z",
"passed": false,
"duration": 863,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_wikipedia",
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:41:51.321Z",
"passed": false,
"duration": 3707,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:41:53.081Z",
"passed": false,
"duration": 737,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_wikipedia",
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:06.952Z",
"passed": false,
"duration": 3542,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:10.527Z",
"passed": false,
"duration": 2556,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_wikipedia",
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:46:52.443Z",
"passed": false,
"duration": 4035,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:46:54.153Z",
"passed": false,
"duration": 679,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_wikipedia",
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:48:04.211Z",
"passed": false,
"duration": 3670,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:48:06.447Z",
"passed": false,
"duration": 1215,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_wikipedia",
"prompt": "Does the content have information about Kenya? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:48:31.738Z",
"passed": false,
"duration": 4125,
"reason": "Model returned empty response",
"category": "web"
},
{
"test": "web_json",
"prompt": "Is this data in JSON format? Answer with only \"yes\" or \"no\".",
"result": [],
"expected": "yes",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:48:33.801Z",
"passed": false,
"duration": 1033,
"reason": "Model returned empty response",
"category": "web"
}
],
"highscores": [
{
"test": "web_wikipedia",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 4125,
"duration_secs": 4.125
}
]
},
{
"test": "web_json",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 1033,
"duration_secs": 1.033
}
]
},
{
"test": "web_cache_first",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 3515,
"duration_secs": 3.515
}
]
},
{
"test": "web_cache_second",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 3471,
"duration_secs": 3.471
}
]
}
],
"lastUpdated": "2025-04-06T15:48:33.801Z"
}