mono/packages/kbot/tests/unit/reports/language.json
2025-06-28 10:37:04 +02:00

2315 lines
77 KiB
JSON

{
"results": [
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T05:14:34.735Z",
"passed": false,
"duration": 942,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:14:35.415Z",
"passed": false,
"duration": 677,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T05:14:35.981Z",
"passed": false,
"duration": 564,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:14:36.670Z",
"passed": false,
"duration": 684,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"The quick brown fox jumps over the lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T05:14:37.351Z",
"passed": false,
"duration": 678,
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:14:38.015Z",
"passed": false,
"duration": 661,
"reason": "Expected A fox jumps over a dog, but got a fox leaps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T05:14:39.002Z",
"passed": false,
"duration": 985,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:14:39.575Z",
"passed": false,
"duration": 571,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T05:14:40.170Z",
"passed": true,
"duration": 594,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T05:14:40.790Z",
"passed": true,
"duration": 617,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:07.876Z",
"passed": false,
"duration": 1153,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:08.421Z",
"passed": false,
"duration": 540,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:08.967Z",
"passed": false,
"duration": 542,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:09.649Z",
"passed": false,
"duration": 677,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"The quick brown fox jumps over the lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:10.358Z",
"passed": false,
"duration": 706,
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:10.973Z",
"passed": false,
"duration": 612,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:11.549Z",
"passed": false,
"duration": 573,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:12.127Z",
"passed": false,
"duration": 575,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:46:13.050Z",
"passed": true,
"duration": 920,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:46:13.615Z",
"passed": true,
"duration": 562,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:50:29.662Z",
"passed": false,
"duration": 756,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:50:30.776Z",
"passed": false,
"duration": 1109,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:50:31.560Z",
"passed": false,
"duration": 781,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:50:32.159Z",
"passed": false,
"duration": 595,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A quick fox jumps over a lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:50:33.261Z",
"passed": false,
"duration": 1099,
"reason": "Expected A fox jumps over a dog, but got a quick fox jumps over a lazy dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A fox jumps over a lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:50:34.108Z",
"passed": false,
"duration": 840,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a lazy dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:50:34.652Z",
"passed": false,
"duration": 541,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:50:35.143Z",
"passed": false,
"duration": 487,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:50:35.656Z",
"passed": true,
"duration": 510,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:50:36.124Z",
"passed": true,
"duration": 465,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:25.151Z",
"passed": false,
"duration": 871,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:51:26.099Z",
"passed": false,
"duration": 943,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:26.669Z",
"passed": false,
"duration": 567,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:51:27.226Z",
"passed": false,
"duration": 554,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, compact, no explanation.",
"result": [
"The quick brown fox jumps over the lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:28.019Z",
"passed": false,
"duration": 791,
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, compact, no explanation.",
"result": [
"A quick fox jumps over a lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:51:28.771Z",
"passed": false,
"duration": 746,
"reason": "Expected A fox jumps over a dog, but got a quick fox jumps over a lazy dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:29.352Z",
"passed": false,
"duration": 578,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:51:30.320Z",
"passed": false,
"duration": 966,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:30.862Z",
"passed": true,
"duration": 539,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:51:33.736Z",
"passed": true,
"duration": 2872,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:53.263Z",
"passed": false,
"duration": 831,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:51:53.884Z",
"passed": false,
"duration": 617,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:54.462Z",
"passed": false,
"duration": 575,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:51:55.104Z",
"passed": false,
"duration": 639,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A quick brown fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:55.894Z",
"passed": false,
"duration": 787,
"reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:51:56.484Z",
"passed": false,
"duration": 582,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:51:59.835Z",
"passed": false,
"duration": 3348,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:52:00.400Z",
"passed": false,
"duration": 562,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:52:01.616Z",
"passed": true,
"duration": 1214,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:52:02.542Z",
"passed": true,
"duration": 923,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:53:09.330Z",
"passed": false,
"duration": 844,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:53:10.262Z",
"passed": false,
"duration": 928,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:53:11.043Z",
"passed": false,
"duration": 779,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:53:11.724Z",
"passed": false,
"duration": 678,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:53:12.663Z",
"passed": false,
"duration": 937,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:53:13.482Z",
"passed": false,
"duration": 817,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:53:14.188Z",
"passed": false,
"duration": 704,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:53:14.748Z",
"passed": false,
"duration": 557,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:53:15.311Z",
"passed": true,
"duration": 559,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:53:15.852Z",
"passed": true,
"duration": 538,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:53:35.433Z",
"passed": false,
"duration": 941,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:53:36.309Z",
"passed": false,
"duration": 871,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:53:36.924Z",
"passed": false,
"duration": 612,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:54:06.162Z",
"passed": false,
"duration": 818,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:06.810Z",
"passed": false,
"duration": 642,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:54:07.390Z",
"passed": false,
"duration": 576,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:08.237Z",
"passed": false,
"duration": 844,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"The quick brown fox jumps over the dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:54:08.852Z",
"passed": false,
"duration": 612,
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:09.559Z",
"passed": false,
"duration": 699,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:54:10.257Z",
"passed": false,
"duration": 695,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:10.757Z",
"passed": false,
"duration": 497,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-06-05T18:54:11.331Z",
"passed": true,
"duration": 570,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:12.093Z",
"passed": true,
"duration": 760,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:54:43.059Z",
"passed": false,
"duration": 2067,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:43.754Z",
"passed": false,
"duration": 689,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:54:45.466Z",
"passed": false,
"duration": 1708,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:46.074Z",
"passed": false,
"duration": 605,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A brown fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:54:48.340Z",
"passed": false,
"duration": 2263,
"reason": "Expected A fox jumps over a dog, but got a brown fox leaps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:49.025Z",
"passed": false,
"duration": 675,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:54:50.753Z",
"passed": false,
"duration": 1724,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:51.307Z",
"passed": false,
"duration": 551,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:54:53.244Z",
"passed": true,
"duration": 1934,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:54:53.740Z",
"passed": true,
"duration": 493,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:55:31.636Z",
"passed": false,
"duration": 1317,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:55:32.306Z",
"passed": false,
"duration": 666,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T18:55:37.706Z",
"passed": false,
"duration": 5397,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:55:39.433Z",
"passed": false,
"duration": 1722,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:55:40.607Z",
"passed": false,
"duration": 1171,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T18:55:45.810Z",
"passed": false,
"duration": 5199,
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:55:47.634Z",
"passed": false,
"duration": 1820,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:55:48.336Z",
"passed": false,
"duration": 699,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A quick brown fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T18:55:55.720Z",
"passed": false,
"duration": 7380,
"reason": "Expected A fox jumps over a dog, but got a quick brown fox leaps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:55:57.453Z",
"passed": false,
"duration": 1725,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:55:58.233Z",
"passed": false,
"duration": 776,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T18:56:03.483Z",
"passed": false,
"duration": 5247,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Content"
],
"expected": "joyful",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T18:56:05.453Z",
"passed": false,
"duration": 1967,
"reason": "Expected joyful, but got content",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T18:56:06.005Z",
"passed": true,
"duration": 548,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T18:56:08.626Z",
"passed": true,
"duration": 2616,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:30:21.412Z",
"passed": true,
"duration": 1560,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:30:22.869Z",
"passed": true,
"duration": 1451,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:30:28.307Z",
"passed": true,
"duration": 5434,
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:30:29.513Z",
"passed": false,
"duration": 1201,
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:30:30.212Z",
"passed": false,
"duration": 695,
"reason": "Expected I went to the store yesterday, but got \"I went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:30:33.611Z",
"passed": false,
"duration": 3395,
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A brown fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:30:34.920Z",
"passed": false,
"duration": 1304,
"reason": "Expected A fox jumps over a dog, but got A brown fox leaps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:30:35.620Z",
"passed": false,
"duration": 692,
"reason": "Expected A fox jumps over a dog, but got A fox jumps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"\"A quick brown fox leaps over a dog.\""
],
"expected": "A fox jumps over a dog",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:30:49.662Z",
"passed": false,
"duration": 14038,
"reason": "Expected A fox jumps over a dog, but got \"A quick brown fox leaps over a dog.\"",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:30:50.805Z",
"passed": true,
"duration": 1137,
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:30:51.269Z",
"passed": true,
"duration": 459,
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:30:55.198Z",
"passed": true,
"duration": 3924,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:30:56.455Z",
"passed": true,
"duration": 1251,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:30:57.083Z",
"passed": true,
"duration": 622,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"joyful"
],
"expected": "joyful",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:31:00.924Z",
"passed": true,
"duration": 3836,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:41:10.838Z",
"passed": true,
"duration": 1465,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:41:11.649Z",
"passed": true,
"duration": 805,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:41:16.717Z",
"passed": true,
"duration": 5063,
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:41:17.368Z",
"passed": false,
"duration": 646,
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:41:18.259Z",
"passed": false,
"duration": 886,
"reason": "Expected I went to the store yesterday, but got \"I went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:41:23.006Z",
"passed": false,
"duration": 4742,
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A brown fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:41:32.126Z",
"passed": false,
"duration": 9115,
"reason": "Expected A fox jumps over a dog, but got A brown fox leaps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A brown fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:44:29.781Z",
"passed": false,
"duration": 6689,
"reason": "Expected A fox jumps over a dog, but got A brown fox jumps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:45:17.372Z",
"passed": true,
"duration": 47581,
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:52:22.430Z",
"passed": true,
"duration": 27328,
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:52:30.984Z",
"passed": true,
"duration": 8548,
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [],
"expected": "A fox jumps over a dog",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:53:31.003Z",
"passed": false,
"duration": 60014,
"error": {
"message": "API call timed out",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:296:33)\n at listOnTimeout (node:internal/timers:588:17)\n at processTimers (node:internal/timers:523:7)",
"message": "API call timed out"
}
},
"reason": "API call timed out",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:53:32.418Z",
"passed": true,
"duration": 1408,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:53:33.063Z",
"passed": true,
"duration": 639,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:53:35.309Z",
"passed": true,
"duration": 2241,
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:53:35.959Z",
"passed": false,
"duration": 645,
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:53:36.606Z",
"passed": false,
"duration": 641,
"reason": "Expected I went to the store yesterday, but got \"I went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:53:41.701Z",
"passed": false,
"duration": 5090,
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:53:42.454Z",
"passed": true,
"duration": 747,
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:53:43.116Z",
"passed": true,
"duration": 657,
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:53:47.420Z",
"passed": true,
"duration": 4299,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:53:48.762Z",
"passed": true,
"duration": 1336,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:54:17.429Z",
"passed": true,
"duration": 561,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:54:21.005Z",
"passed": true,
"duration": 3571,
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"Fox jumps over dog."
],
"expected": "A fox jumps over a dog",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:55:58.691Z",
"passed": true,
"duration": 1621,
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:56:00.205Z",
"passed": true,
"duration": 1508,
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
"result": [
"A quick brown fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:56:06.210Z",
"passed": true,
"duration": 6000,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:56:07.465Z",
"passed": true,
"duration": 1250,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:56:07.992Z",
"passed": true,
"duration": 521,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:56:10.966Z",
"passed": true,
"duration": 2969,
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:56:12.216Z",
"passed": false,
"duration": 1246,
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"\"I went to the store yesterday.\""
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:56:12.844Z",
"passed": false,
"duration": 623,
"reason": "Expected I went to the store yesterday, but got \"I went to the store yesterday.\"",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:56:17.444Z",
"passed": false,
"duration": 4594,
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:56:18.557Z",
"passed": true,
"duration": 1107,
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:56:19.252Z",
"passed": true,
"duration": 689,
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:56:23.475Z",
"passed": true,
"duration": 4218,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "anthropic/claude-sonnet-4",
"router": "anthropic/claude-sonnet-4",
"timestamp": "2025-06-05T22:56:25.120Z",
"passed": true,
"duration": 1639,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-06-05T22:56:25.785Z",
"passed": true,
"duration": 661,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "deepseek/deepseek-r1:free",
"router": "deepseek/deepseek-r1:free",
"timestamp": "2025-06-05T22:56:29.261Z",
"passed": true,
"duration": 3471,
"category": "language"
}
],
"highscores": [
{
"test": "translation",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 521,
"duration_secs": 0.521
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 818,
"duration_secs": 0.818
}
]
},
{
"test": "grammar",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 576,
"duration_secs": 0.576
},
{
"model": "openai/gpt-4o-mini",
"duration": 623,
"duration_secs": 0.623
}
]
},
{
"test": "summarization",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 612,
"duration_secs": 0.612
},
{
"model": "openai/gpt-4o-mini",
"duration": 1508,
"duration_secs": 1.508
}
]
},
{
"test": "language_detection",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 689,
"duration_secs": 0.689
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 695,
"duration_secs": 0.695
}
]
},
{
"test": "synonyms",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 570,
"duration_secs": 0.57
},
{
"model": "openai/gpt-4o-mini",
"duration": 661,
"duration_secs": 0.661
}
]
}
],
"lastUpdated": "2025-06-05T22:56:29.262Z"
}