669 lines
22 KiB
JSON
669 lines
22 KiB
JSON
{
|
|
"results": [
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:42:04.594Z",
|
|
"passed": false,
|
|
"duration": 1183,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T15:42:07.871Z",
|
|
"passed": false,
|
|
"duration": 3265,
|
|
"reason": "Expected ¡Hola, mundo!, but got hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T15:42:09.128Z",
|
|
"passed": false,
|
|
"duration": 1244,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T15:42:10.051Z",
|
|
"passed": false,
|
|
"duration": 914,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:42:10.948Z",
|
|
"passed": false,
|
|
"duration": 886,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T15:42:15.782Z",
|
|
"passed": false,
|
|
"duration": 4822,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T15:42:16.691Z",
|
|
"passed": false,
|
|
"duration": 895,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T15:42:17.494Z",
|
|
"passed": false,
|
|
"duration": 789,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A quick brown fox jumps over a lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:42:18.910Z",
|
|
"passed": false,
|
|
"duration": 1405,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"\"The quick brown fox jumps over the lazy dog.\""
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T15:42:23.978Z",
|
|
"passed": false,
|
|
"duration": 5056,
|
|
"reason": "Expected A fox jumps over a dog, but got \"the quick brown fox jumps over the lazy dog.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T15:42:24.931Z",
|
|
"passed": false,
|
|
"duration": 942,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T15:42:25.886Z",
|
|
"passed": false,
|
|
"duration": 944,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:42:26.837Z",
|
|
"passed": false,
|
|
"duration": 939,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [],
|
|
"expected": "French",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T15:42:27.292Z",
|
|
"passed": false,
|
|
"duration": 442,
|
|
"reason": "Model returned empty response",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T15:42:28.460Z",
|
|
"passed": false,
|
|
"duration": 1152,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T15:42:29.493Z",
|
|
"passed": false,
|
|
"duration": 1022,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T15:42:30.442Z",
|
|
"passed": true,
|
|
"duration": 938,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [],
|
|
"expected": "joyful",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T15:42:30.888Z",
|
|
"passed": false,
|
|
"duration": 436,
|
|
"reason": "Model returned empty response",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T15:42:31.838Z",
|
|
"passed": true,
|
|
"duration": 947,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T15:42:32.705Z",
|
|
"passed": true,
|
|
"duration": 857,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T22:28:08.128Z",
|
|
"passed": false,
|
|
"duration": 1322,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T22:28:12.115Z",
|
|
"passed": false,
|
|
"duration": 3972,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T22:28:12.895Z",
|
|
"passed": false,
|
|
"duration": 769,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T22:28:13.738Z",
|
|
"passed": false,
|
|
"duration": 832,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"**Corrected Sentence:** I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T22:28:14.567Z",
|
|
"passed": false,
|
|
"duration": 819,
|
|
"reason": "Expected I went to the store yesterday, but got **corrected sentence:** i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T22:28:21.611Z",
|
|
"passed": false,
|
|
"duration": 7029,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T22:28:22.737Z",
|
|
"passed": false,
|
|
"duration": 1113,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T22:28:23.760Z",
|
|
"passed": false,
|
|
"duration": 1011,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"Summary: The quick brown fox jumps over the lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T22:28:24.602Z",
|
|
"passed": false,
|
|
"duration": 832,
|
|
"reason": "Expected A fox jumps over a dog, but got summary: the quick brown fox jumps over the lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A quick brown fox jumps over a lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T22:28:29.608Z",
|
|
"passed": false,
|
|
"duration": 4994,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A swift fox leaps over a sluggish dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T22:28:30.667Z",
|
|
"passed": false,
|
|
"duration": 1048,
|
|
"reason": "Expected A fox jumps over a dog, but got a swift fox leaps over a sluggish dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T22:28:31.511Z",
|
|
"passed": false,
|
|
"duration": 832,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T22:28:32.264Z",
|
|
"passed": false,
|
|
"duration": 741,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [],
|
|
"expected": "French",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T22:28:32.694Z",
|
|
"passed": false,
|
|
"duration": 419,
|
|
"reason": "Model returned empty response",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T22:28:33.593Z",
|
|
"passed": false,
|
|
"duration": 887,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T22:28:34.490Z",
|
|
"passed": false,
|
|
"duration": 886,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Delighted"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-04-06T22:28:35.215Z",
|
|
"passed": false,
|
|
"duration": 715,
|
|
"reason": "Expected joyful, but got delighted",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [],
|
|
"expected": "joyful",
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"timestamp": "2025-04-06T22:28:35.639Z",
|
|
"passed": false,
|
|
"duration": 411,
|
|
"reason": "Model returned empty response",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-04-06T22:28:36.294Z",
|
|
"passed": true,
|
|
"duration": 644,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openrouter/quasar-alpha",
|
|
"router": "openrouter/quasar-alpha",
|
|
"timestamp": "2025-04-06T22:28:37.021Z",
|
|
"passed": true,
|
|
"duration": 716,
|
|
"category": "language"
|
|
}
|
|
],
|
|
"highscores": [
|
|
{
|
|
"test": "translation",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 769,
|
|
"duration_secs": 0.769
|
|
},
|
|
{
|
|
"model": "openrouter/quasar-alpha",
|
|
"duration": 832,
|
|
"duration_secs": 0.832
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 819,
|
|
"duration_secs": 0.819
|
|
},
|
|
{
|
|
"model": "openrouter/quasar-alpha",
|
|
"duration": 1011,
|
|
"duration_secs": 1.011
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 832,
|
|
"duration_secs": 0.832
|
|
},
|
|
{
|
|
"model": "openrouter/quasar-alpha",
|
|
"duration": 832,
|
|
"duration_secs": 0.832
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"rankings": [
|
|
{
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"duration": 419,
|
|
"duration_secs": 0.419
|
|
},
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 741,
|
|
"duration_secs": 0.741
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"rankings": [
|
|
{
|
|
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
|
|
"duration": 411,
|
|
"duration_secs": 0.411
|
|
},
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 644,
|
|
"duration_secs": 0.644
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"lastUpdated": "2025-04-06T22:28:37.021Z"
|
|
} |