1534 lines
51 KiB
JSON
1534 lines
51 KiB
JSON
{
|
|
"results": [
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:34.735Z",
|
|
"passed": false,
|
|
"duration": 942,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:35.415Z",
|
|
"passed": false,
|
|
"duration": 677,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:35.981Z",
|
|
"passed": false,
|
|
"duration": 564,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:36.670Z",
|
|
"passed": false,
|
|
"duration": 684,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"The quick brown fox jumps over the lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:37.351Z",
|
|
"passed": false,
|
|
"duration": 678,
|
|
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:38.015Z",
|
|
"passed": false,
|
|
"duration": 661,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:39.002Z",
|
|
"passed": false,
|
|
"duration": 985,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:39.575Z",
|
|
"passed": false,
|
|
"duration": 571,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:40.170Z",
|
|
"passed": true,
|
|
"duration": 594,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:40.790Z",
|
|
"passed": true,
|
|
"duration": 617,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:07.876Z",
|
|
"passed": false,
|
|
"duration": 1153,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:08.421Z",
|
|
"passed": false,
|
|
"duration": 540,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:08.967Z",
|
|
"passed": false,
|
|
"duration": 542,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:09.649Z",
|
|
"passed": false,
|
|
"duration": 677,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"The quick brown fox jumps over the lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:10.358Z",
|
|
"passed": false,
|
|
"duration": 706,
|
|
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:10.973Z",
|
|
"passed": false,
|
|
"duration": 612,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:11.549Z",
|
|
"passed": false,
|
|
"duration": 573,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:12.127Z",
|
|
"passed": false,
|
|
"duration": 575,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:13.050Z",
|
|
"passed": true,
|
|
"duration": 920,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:13.615Z",
|
|
"passed": true,
|
|
"duration": 562,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:29.662Z",
|
|
"passed": false,
|
|
"duration": 756,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:30.776Z",
|
|
"passed": false,
|
|
"duration": 1109,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:31.560Z",
|
|
"passed": false,
|
|
"duration": 781,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:32.159Z",
|
|
"passed": false,
|
|
"duration": 595,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A quick fox jumps over a lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:33.261Z",
|
|
"passed": false,
|
|
"duration": 1099,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick fox jumps over a lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:34.108Z",
|
|
"passed": false,
|
|
"duration": 840,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:34.652Z",
|
|
"passed": false,
|
|
"duration": 541,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:35.143Z",
|
|
"passed": false,
|
|
"duration": 487,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:35.656Z",
|
|
"passed": true,
|
|
"duration": 510,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:36.124Z",
|
|
"passed": true,
|
|
"duration": 465,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:25.151Z",
|
|
"passed": false,
|
|
"duration": 871,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:26.099Z",
|
|
"passed": false,
|
|
"duration": 943,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:26.669Z",
|
|
"passed": false,
|
|
"duration": 567,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:27.226Z",
|
|
"passed": false,
|
|
"duration": 554,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"The quick brown fox jumps over the lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:28.019Z",
|
|
"passed": false,
|
|
"duration": 791,
|
|
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A quick fox jumps over a lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:28.771Z",
|
|
"passed": false,
|
|
"duration": 746,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick fox jumps over a lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:29.352Z",
|
|
"passed": false,
|
|
"duration": 578,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:30.320Z",
|
|
"passed": false,
|
|
"duration": 966,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:30.862Z",
|
|
"passed": true,
|
|
"duration": 539,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:33.736Z",
|
|
"passed": true,
|
|
"duration": 2872,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:53.263Z",
|
|
"passed": false,
|
|
"duration": 831,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:53.884Z",
|
|
"passed": false,
|
|
"duration": 617,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:54.462Z",
|
|
"passed": false,
|
|
"duration": 575,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:55.104Z",
|
|
"passed": false,
|
|
"duration": 639,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A quick brown fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:55.894Z",
|
|
"passed": false,
|
|
"duration": 787,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:56.484Z",
|
|
"passed": false,
|
|
"duration": 582,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:59.835Z",
|
|
"passed": false,
|
|
"duration": 3348,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:52:00.400Z",
|
|
"passed": false,
|
|
"duration": 562,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:52:01.616Z",
|
|
"passed": true,
|
|
"duration": 1214,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:52:02.542Z",
|
|
"passed": true,
|
|
"duration": 923,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:09.330Z",
|
|
"passed": false,
|
|
"duration": 844,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:10.262Z",
|
|
"passed": false,
|
|
"duration": 928,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:11.043Z",
|
|
"passed": false,
|
|
"duration": 779,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:11.724Z",
|
|
"passed": false,
|
|
"duration": 678,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:12.663Z",
|
|
"passed": false,
|
|
"duration": 937,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:13.482Z",
|
|
"passed": false,
|
|
"duration": 817,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:14.188Z",
|
|
"passed": false,
|
|
"duration": 704,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:14.748Z",
|
|
"passed": false,
|
|
"duration": 557,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:15.311Z",
|
|
"passed": true,
|
|
"duration": 559,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:15.852Z",
|
|
"passed": true,
|
|
"duration": 538,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:35.433Z",
|
|
"passed": false,
|
|
"duration": 941,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:36.309Z",
|
|
"passed": false,
|
|
"duration": 871,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:36.924Z",
|
|
"passed": false,
|
|
"duration": 612,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:06.162Z",
|
|
"passed": false,
|
|
"duration": 818,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:06.810Z",
|
|
"passed": false,
|
|
"duration": 642,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:07.390Z",
|
|
"passed": false,
|
|
"duration": 576,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:08.237Z",
|
|
"passed": false,
|
|
"duration": 844,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"The quick brown fox jumps over the dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:08.852Z",
|
|
"passed": false,
|
|
"duration": 612,
|
|
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:09.559Z",
|
|
"passed": false,
|
|
"duration": 699,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:10.257Z",
|
|
"passed": false,
|
|
"duration": 695,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:10.757Z",
|
|
"passed": false,
|
|
"duration": 497,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:11.331Z",
|
|
"passed": true,
|
|
"duration": 570,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:12.093Z",
|
|
"passed": true,
|
|
"duration": 760,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:43.059Z",
|
|
"passed": false,
|
|
"duration": 2067,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:43.754Z",
|
|
"passed": false,
|
|
"duration": 689,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:45.466Z",
|
|
"passed": false,
|
|
"duration": 1708,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:46.074Z",
|
|
"passed": false,
|
|
"duration": 605,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A brown fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:48.340Z",
|
|
"passed": false,
|
|
"duration": 2263,
|
|
"reason": "Expected A fox jumps over a dog, but got a brown fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:49.025Z",
|
|
"passed": false,
|
|
"duration": 675,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:50.753Z",
|
|
"passed": false,
|
|
"duration": 1724,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:51.307Z",
|
|
"passed": false,
|
|
"duration": 551,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:53.244Z",
|
|
"passed": true,
|
|
"duration": 1934,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:53.740Z",
|
|
"passed": true,
|
|
"duration": 493,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:55:31.636Z",
|
|
"passed": false,
|
|
"duration": 1317,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:55:32.306Z",
|
|
"passed": false,
|
|
"duration": 666,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:55:37.706Z",
|
|
"passed": false,
|
|
"duration": 5397,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:55:39.433Z",
|
|
"passed": false,
|
|
"duration": 1722,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:55:40.607Z",
|
|
"passed": false,
|
|
"duration": 1171,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:55:45.810Z",
|
|
"passed": false,
|
|
"duration": 5199,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:55:47.634Z",
|
|
"passed": false,
|
|
"duration": 1820,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:55:48.336Z",
|
|
"passed": false,
|
|
"duration": 699,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A quick brown fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:55:55.720Z",
|
|
"passed": false,
|
|
"duration": 7380,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick brown fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:55:57.453Z",
|
|
"passed": false,
|
|
"duration": 1725,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:55:58.233Z",
|
|
"passed": false,
|
|
"duration": 776,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:56:03.483Z",
|
|
"passed": false,
|
|
"duration": 5247,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Content"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:56:05.453Z",
|
|
"passed": false,
|
|
"duration": 1967,
|
|
"reason": "Expected joyful, but got content",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:56:06.005Z",
|
|
"passed": true,
|
|
"duration": 548,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:56:08.626Z",
|
|
"passed": true,
|
|
"duration": 2616,
|
|
"category": "language"
|
|
}
|
|
],
|
|
"highscores": [
|
|
{
|
|
"test": "translation",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 666,
|
|
"duration_secs": 0.666
|
|
},
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 818,
|
|
"duration_secs": 0.818
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 576,
|
|
"duration_secs": 0.576
|
|
},
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 1171,
|
|
"duration_secs": 1.171
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 612,
|
|
"duration_secs": 0.612
|
|
},
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 699,
|
|
"duration_secs": 0.699
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 695,
|
|
"duration_secs": 0.695
|
|
},
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 776,
|
|
"duration_secs": 0.776
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 548,
|
|
"duration_secs": 0.548
|
|
},
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 570,
|
|
"duration_secs": 0.57
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"lastUpdated": "2025-06-05T18:56:08.627Z"
|
|
} |