mono/packages/kbot/tests/unit/reports/language.json

669 lines
22 KiB
JSON

{
"results": [
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:04.594Z",
"passed": false,
"duration": 1183,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:07.871Z",
"passed": false,
"duration": 3265,
"reason": "Expected ¡Hola, mundo!, but got hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:09.128Z",
"passed": false,
"duration": 1244,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:10.051Z",
"passed": false,
"duration": 914,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:10.948Z",
"passed": false,
"duration": 886,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:15.782Z",
"passed": false,
"duration": 4822,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:16.691Z",
"passed": false,
"duration": 895,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:17.494Z",
"passed": false,
"duration": 789,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A quick brown fox jumps over a lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:18.910Z",
"passed": false,
"duration": 1405,
"reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a lazy dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"\"The quick brown fox jumps over the lazy dog.\""
],
"expected": "A fox jumps over a dog",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:23.978Z",
"passed": false,
"duration": 5056,
"reason": "Expected A fox jumps over a dog, but got \"the quick brown fox jumps over the lazy dog.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:24.931Z",
"passed": false,
"duration": 942,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:25.886Z",
"passed": false,
"duration": 944,
"reason": "Expected A fox jumps over a dog, but got a fox leaps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:26.837Z",
"passed": false,
"duration": 939,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [],
"expected": "French",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:27.292Z",
"passed": false,
"duration": 442,
"reason": "Model returned empty response",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:28.460Z",
"passed": false,
"duration": 1152,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:29.493Z",
"passed": false,
"duration": 1022,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:30.442Z",
"passed": true,
"duration": 938,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [],
"expected": "joyful",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:30.888Z",
"passed": false,
"duration": 436,
"reason": "Model returned empty response",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:31.838Z",
"passed": true,
"duration": 947,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:32.705Z",
"passed": true,
"duration": 857,
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T22:28:08.128Z",
"passed": false,
"duration": 1322,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T22:28:12.115Z",
"passed": false,
"duration": 3972,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T22:28:12.895Z",
"passed": false,
"duration": 769,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T22:28:13.738Z",
"passed": false,
"duration": 832,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"**Corrected Sentence:** I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T22:28:14.567Z",
"passed": false,
"duration": 819,
"reason": "Expected I went to the store yesterday, but got **corrected sentence:** i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T22:28:21.611Z",
"passed": false,
"duration": 7029,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T22:28:22.737Z",
"passed": false,
"duration": 1113,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T22:28:23.760Z",
"passed": false,
"duration": 1011,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"Summary: The quick brown fox jumps over the lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T22:28:24.602Z",
"passed": false,
"duration": 832,
"reason": "Expected A fox jumps over a dog, but got summary: the quick brown fox jumps over the lazy dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A quick brown fox jumps over a lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T22:28:29.608Z",
"passed": false,
"duration": 4994,
"reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a lazy dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A swift fox leaps over a sluggish dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T22:28:30.667Z",
"passed": false,
"duration": 1048,
"reason": "Expected A fox jumps over a dog, but got a swift fox leaps over a sluggish dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T22:28:31.511Z",
"passed": false,
"duration": 832,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T22:28:32.264Z",
"passed": false,
"duration": 741,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [],
"expected": "French",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T22:28:32.694Z",
"passed": false,
"duration": 419,
"reason": "Model returned empty response",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T22:28:33.593Z",
"passed": false,
"duration": 887,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T22:28:34.490Z",
"passed": false,
"duration": 886,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Delighted"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T22:28:35.215Z",
"passed": false,
"duration": 715,
"reason": "Expected joyful, but got delighted",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [],
"expected": "joyful",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T22:28:35.639Z",
"passed": false,
"duration": 411,
"reason": "Model returned empty response",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T22:28:36.294Z",
"passed": true,
"duration": 644,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T22:28:37.021Z",
"passed": true,
"duration": 716,
"category": "language"
}
],
"highscores": [
{
"test": "translation",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 769,
"duration_secs": 0.769
},
{
"model": "openrouter/quasar-alpha",
"duration": 832,
"duration_secs": 0.832
}
]
},
{
"test": "grammar",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 819,
"duration_secs": 0.819
},
{
"model": "openrouter/quasar-alpha",
"duration": 1011,
"duration_secs": 1.011
}
]
},
{
"test": "summarization",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 832,
"duration_secs": 0.832
},
{
"model": "openrouter/quasar-alpha",
"duration": 832,
"duration_secs": 0.832
}
]
},
{
"test": "language_detection",
"rankings": [
{
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"duration": 419,
"duration_secs": 0.419
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 741,
"duration_secs": 0.741
}
]
},
{
"test": "synonyms",
"rankings": [
{
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"duration": 411,
"duration_secs": 0.411
},
{
"model": "openai/gpt-4o-mini",
"duration": 644,
"duration_secs": 0.644
}
]
}
],
"lastUpdated": "2025-04-06T22:28:37.021Z"
}