mono/packages/kbot/tests/unit/reports/language.json
2025-04-06 17:49:35 +02:00

375 lines
12 KiB
JSON

{
"results": [
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:04.594Z",
"passed": false,
"duration": 1183,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:07.871Z",
"passed": false,
"duration": 3265,
"reason": "Expected ¡Hola, mundo!, but got hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:09.128Z",
"passed": false,
"duration": 1244,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "translation",
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
"result": [
"¡Hola, mundo!"
],
"expected": "¡Hola, mundo!",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:10.051Z",
"passed": false,
"duration": 914,
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:10.948Z",
"passed": false,
"duration": 886,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:15.782Z",
"passed": false,
"duration": 4822,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:16.691Z",
"passed": false,
"duration": 895,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "grammar",
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
"result": [
"I went to the store yesterday."
],
"expected": "I went to the store yesterday",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:17.494Z",
"passed": false,
"duration": 789,
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A quick brown fox jumps over a lazy dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:18.910Z",
"passed": false,
"duration": 1405,
"reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a lazy dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"\"The quick brown fox jumps over the lazy dog.\""
],
"expected": "A fox jumps over a dog",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:23.978Z",
"passed": false,
"duration": 5056,
"reason": "Expected A fox jumps over a dog, but got \"the quick brown fox jumps over the lazy dog.\"",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A fox jumps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:24.931Z",
"passed": false,
"duration": 942,
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
"category": "language"
},
{
"test": "summarization",
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
"result": [
"A fox leaps over a dog."
],
"expected": "A fox jumps over a dog",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:25.886Z",
"passed": false,
"duration": 944,
"reason": "Expected A fox jumps over a dog, but got a fox leaps over a dog.",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:26.837Z",
"passed": false,
"duration": 939,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [],
"expected": "French",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:27.292Z",
"passed": false,
"duration": 442,
"reason": "Model returned empty response",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:28.460Z",
"passed": false,
"duration": 1152,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "language_detection",
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
"result": [
"French"
],
"expected": "French",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:29.493Z",
"passed": false,
"duration": 1022,
"reason": "Expected French, but got french",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-06T15:42:30.442Z",
"passed": true,
"duration": 938,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [],
"expected": "joyful",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-06T15:42:30.888Z",
"passed": false,
"duration": 436,
"reason": "Model returned empty response",
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-06T15:42:31.838Z",
"passed": true,
"duration": 947,
"category": "language"
},
{
"test": "synonyms",
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
"result": [
"Joyful"
],
"expected": "joyful",
"model": "openrouter/quasar-alpha",
"router": "openrouter/quasar-alpha",
"timestamp": "2025-04-06T15:42:32.705Z",
"passed": true,
"duration": 857,
"category": "language"
}
],
"highscores": [
{
"test": "translation",
"rankings": [
{
"model": "openrouter/quasar-alpha",
"duration": 914,
"duration_secs": 0.914
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 1183,
"duration_secs": 1.183
}
]
},
{
"test": "grammar",
"rankings": [
{
"model": "openrouter/quasar-alpha",
"duration": 789,
"duration_secs": 0.789
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 886,
"duration_secs": 0.886
}
]
},
{
"test": "summarization",
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 942,
"duration_secs": 0.942
},
{
"model": "openrouter/quasar-alpha",
"duration": 944,
"duration_secs": 0.944
}
]
},
{
"test": "language_detection",
"rankings": [
{
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"duration": 442,
"duration_secs": 0.442
},
{
"model": "openai/gpt-3.5-turbo",
"duration": 939,
"duration_secs": 0.939
}
]
},
{
"test": "synonyms",
"rankings": [
{
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"duration": 436,
"duration_secs": 0.436
},
{
"model": "openrouter/quasar-alpha",
"duration": 857,
"duration_secs": 0.857
}
]
}
],
"lastUpdated": "2025-04-06T15:42:32.705Z"
}