2074 lines
69 KiB
JSON
2074 lines
69 KiB
JSON
{
|
|
"results": [
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:34.735Z",
|
|
"passed": false,
|
|
"duration": 942,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:35.415Z",
|
|
"passed": false,
|
|
"duration": 677,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:35.981Z",
|
|
"passed": false,
|
|
"duration": 564,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:36.670Z",
|
|
"passed": false,
|
|
"duration": 684,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"The quick brown fox jumps over the lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:37.351Z",
|
|
"passed": false,
|
|
"duration": 678,
|
|
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:38.015Z",
|
|
"passed": false,
|
|
"duration": 661,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:39.002Z",
|
|
"passed": false,
|
|
"duration": 985,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:39.575Z",
|
|
"passed": false,
|
|
"duration": 571,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T05:14:40.170Z",
|
|
"passed": true,
|
|
"duration": 594,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T05:14:40.790Z",
|
|
"passed": true,
|
|
"duration": 617,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:07.876Z",
|
|
"passed": false,
|
|
"duration": 1153,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:08.421Z",
|
|
"passed": false,
|
|
"duration": 540,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:08.967Z",
|
|
"passed": false,
|
|
"duration": 542,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:09.649Z",
|
|
"passed": false,
|
|
"duration": 677,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"The quick brown fox jumps over the lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:10.358Z",
|
|
"passed": false,
|
|
"duration": 706,
|
|
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:10.973Z",
|
|
"passed": false,
|
|
"duration": 612,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:11.549Z",
|
|
"passed": false,
|
|
"duration": 573,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:12.127Z",
|
|
"passed": false,
|
|
"duration": 575,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:46:13.050Z",
|
|
"passed": true,
|
|
"duration": 920,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:46:13.615Z",
|
|
"passed": true,
|
|
"duration": 562,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:29.662Z",
|
|
"passed": false,
|
|
"duration": 756,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:30.776Z",
|
|
"passed": false,
|
|
"duration": 1109,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:31.560Z",
|
|
"passed": false,
|
|
"duration": 781,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:32.159Z",
|
|
"passed": false,
|
|
"duration": 595,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A quick fox jumps over a lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:33.261Z",
|
|
"passed": false,
|
|
"duration": 1099,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick fox jumps over a lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:34.108Z",
|
|
"passed": false,
|
|
"duration": 840,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:34.652Z",
|
|
"passed": false,
|
|
"duration": 541,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:35.143Z",
|
|
"passed": false,
|
|
"duration": 487,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:50:35.656Z",
|
|
"passed": true,
|
|
"duration": 510,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:50:36.124Z",
|
|
"passed": true,
|
|
"duration": 465,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:25.151Z",
|
|
"passed": false,
|
|
"duration": 871,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:26.099Z",
|
|
"passed": false,
|
|
"duration": 943,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:26.669Z",
|
|
"passed": false,
|
|
"duration": 567,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:27.226Z",
|
|
"passed": false,
|
|
"duration": 554,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"The quick brown fox jumps over the lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:28.019Z",
|
|
"passed": false,
|
|
"duration": 791,
|
|
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A quick fox jumps over a lazy dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:28.771Z",
|
|
"passed": false,
|
|
"duration": 746,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick fox jumps over a lazy dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:29.352Z",
|
|
"passed": false,
|
|
"duration": 578,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:30.320Z",
|
|
"passed": false,
|
|
"duration": 966,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:30.862Z",
|
|
"passed": true,
|
|
"duration": 539,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:33.736Z",
|
|
"passed": true,
|
|
"duration": 2872,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:53.263Z",
|
|
"passed": false,
|
|
"duration": 831,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:53.884Z",
|
|
"passed": false,
|
|
"duration": 617,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:54.462Z",
|
|
"passed": false,
|
|
"duration": 575,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:55.104Z",
|
|
"passed": false,
|
|
"duration": 639,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A quick brown fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:55.894Z",
|
|
"passed": false,
|
|
"duration": 787,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:51:56.484Z",
|
|
"passed": false,
|
|
"duration": 582,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:51:59.835Z",
|
|
"passed": false,
|
|
"duration": 3348,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:52:00.400Z",
|
|
"passed": false,
|
|
"duration": 562,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:52:01.616Z",
|
|
"passed": true,
|
|
"duration": 1214,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:52:02.542Z",
|
|
"passed": true,
|
|
"duration": 923,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:09.330Z",
|
|
"passed": false,
|
|
"duration": 844,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:10.262Z",
|
|
"passed": false,
|
|
"duration": 928,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:11.043Z",
|
|
"passed": false,
|
|
"duration": 779,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:11.724Z",
|
|
"passed": false,
|
|
"duration": 678,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:12.663Z",
|
|
"passed": false,
|
|
"duration": 937,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:13.482Z",
|
|
"passed": false,
|
|
"duration": 817,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:14.188Z",
|
|
"passed": false,
|
|
"duration": 704,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:14.748Z",
|
|
"passed": false,
|
|
"duration": 557,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:15.311Z",
|
|
"passed": true,
|
|
"duration": 559,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:15.852Z",
|
|
"passed": true,
|
|
"duration": 538,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:35.433Z",
|
|
"passed": false,
|
|
"duration": 941,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:53:36.309Z",
|
|
"passed": false,
|
|
"duration": 871,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:53:36.924Z",
|
|
"passed": false,
|
|
"duration": 612,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:06.162Z",
|
|
"passed": false,
|
|
"duration": 818,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:06.810Z",
|
|
"passed": false,
|
|
"duration": 642,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:07.390Z",
|
|
"passed": false,
|
|
"duration": 576,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:08.237Z",
|
|
"passed": false,
|
|
"duration": 844,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"The quick brown fox jumps over the dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:08.852Z",
|
|
"passed": false,
|
|
"duration": 612,
|
|
"reason": "Expected A fox jumps over a dog, but got the quick brown fox jumps over the dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:09.559Z",
|
|
"passed": false,
|
|
"duration": 699,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:10.257Z",
|
|
"passed": false,
|
|
"duration": 695,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:10.757Z",
|
|
"passed": false,
|
|
"duration": 497,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"router": "openai/gpt-3.5-turbo",
|
|
"timestamp": "2025-06-05T18:54:11.331Z",
|
|
"passed": true,
|
|
"duration": 570,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:12.093Z",
|
|
"passed": true,
|
|
"duration": 760,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:43.059Z",
|
|
"passed": false,
|
|
"duration": 2067,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:43.754Z",
|
|
"passed": false,
|
|
"duration": 689,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:45.466Z",
|
|
"passed": false,
|
|
"duration": 1708,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:46.074Z",
|
|
"passed": false,
|
|
"duration": 605,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A brown fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:48.340Z",
|
|
"passed": false,
|
|
"duration": 2263,
|
|
"reason": "Expected A fox jumps over a dog, but got a brown fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:49.025Z",
|
|
"passed": false,
|
|
"duration": 675,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:50.753Z",
|
|
"passed": false,
|
|
"duration": 1724,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:51.307Z",
|
|
"passed": false,
|
|
"duration": 551,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:54:53.244Z",
|
|
"passed": true,
|
|
"duration": 1934,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:54:53.740Z",
|
|
"passed": true,
|
|
"duration": 493,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:55:31.636Z",
|
|
"passed": false,
|
|
"duration": 1317,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:55:32.306Z",
|
|
"passed": false,
|
|
"duration": 666,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:55:37.706Z",
|
|
"passed": false,
|
|
"duration": 5397,
|
|
"reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:55:39.433Z",
|
|
"passed": false,
|
|
"duration": 1722,
|
|
"reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:55:40.607Z",
|
|
"passed": false,
|
|
"duration": 1171,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:55:45.810Z",
|
|
"passed": false,
|
|
"duration": 5199,
|
|
"reason": "Expected I went to the store yesterday, but got \"i went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:55:47.634Z",
|
|
"passed": false,
|
|
"duration": 1820,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:55:48.336Z",
|
|
"passed": false,
|
|
"duration": 699,
|
|
"reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A quick brown fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:55:55.720Z",
|
|
"passed": false,
|
|
"duration": 7380,
|
|
"reason": "Expected A fox jumps over a dog, but got a quick brown fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:55:57.453Z",
|
|
"passed": false,
|
|
"duration": 1725,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:55:58.233Z",
|
|
"passed": false,
|
|
"duration": 776,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:56:03.483Z",
|
|
"passed": false,
|
|
"duration": 5247,
|
|
"reason": "Expected French, but got french",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Content"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T18:56:05.453Z",
|
|
"passed": false,
|
|
"duration": 1967,
|
|
"reason": "Expected joyful, but got content",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T18:56:06.005Z",
|
|
"passed": true,
|
|
"duration": 548,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T18:56:08.626Z",
|
|
"passed": true,
|
|
"duration": 2616,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:30:21.412Z",
|
|
"passed": true,
|
|
"duration": 1560,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:30:22.869Z",
|
|
"passed": true,
|
|
"duration": 1451,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:30:28.307Z",
|
|
"passed": true,
|
|
"duration": 5434,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:30:29.513Z",
|
|
"passed": false,
|
|
"duration": 1201,
|
|
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:30:30.212Z",
|
|
"passed": false,
|
|
"duration": 695,
|
|
"reason": "Expected I went to the store yesterday, but got \"I went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:30:33.611Z",
|
|
"passed": false,
|
|
"duration": 3395,
|
|
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A brown fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:30:34.920Z",
|
|
"passed": false,
|
|
"duration": 1304,
|
|
"reason": "Expected A fox jumps over a dog, but got A brown fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:30:35.620Z",
|
|
"passed": false,
|
|
"duration": 692,
|
|
"reason": "Expected A fox jumps over a dog, but got A fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"\"A quick brown fox leaps over a dog.\""
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:30:49.662Z",
|
|
"passed": false,
|
|
"duration": 14038,
|
|
"reason": "Expected A fox jumps over a dog, but got \"A quick brown fox leaps over a dog.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:30:50.805Z",
|
|
"passed": true,
|
|
"duration": 1137,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:30:51.269Z",
|
|
"passed": true,
|
|
"duration": 459,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:30:55.198Z",
|
|
"passed": true,
|
|
"duration": 3924,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:30:56.455Z",
|
|
"passed": true,
|
|
"duration": 1251,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:30:57.083Z",
|
|
"passed": true,
|
|
"duration": 622,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:31:00.924Z",
|
|
"passed": true,
|
|
"duration": 3836,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:41:10.838Z",
|
|
"passed": true,
|
|
"duration": 1465,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:41:11.649Z",
|
|
"passed": true,
|
|
"duration": 805,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:41:16.717Z",
|
|
"passed": true,
|
|
"duration": 5063,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:41:17.368Z",
|
|
"passed": false,
|
|
"duration": 646,
|
|
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:41:18.259Z",
|
|
"passed": false,
|
|
"duration": 886,
|
|
"reason": "Expected I went to the store yesterday, but got \"I went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:41:23.006Z",
|
|
"passed": false,
|
|
"duration": 4742,
|
|
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A brown fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:41:32.126Z",
|
|
"passed": false,
|
|
"duration": 9115,
|
|
"reason": "Expected A fox jumps over a dog, but got A brown fox leaps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A brown fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:44:29.781Z",
|
|
"passed": false,
|
|
"duration": 6689,
|
|
"reason": "Expected A fox jumps over a dog, but got A brown fox jumps over a dog.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:45:17.372Z",
|
|
"passed": true,
|
|
"duration": 47581,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox jumps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:52:22.430Z",
|
|
"passed": true,
|
|
"duration": 27328,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [
|
|
"A fox leaps over a dog."
|
|
],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:52:30.984Z",
|
|
"passed": true,
|
|
"duration": 8548,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"prompt": "Summarize: \"The quick brown fox jumps over the dog\". Return only the summary, compact, no explanation.",
|
|
"result": [],
|
|
"expected": "A fox jumps over a dog",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:53:31.003Z",
|
|
"passed": false,
|
|
"duration": 60014,
|
|
"error": {
|
|
"message": "API call timed out",
|
|
"code": "UNKNOWN",
|
|
"type": "Error",
|
|
"details": {
|
|
"stack": "Error: API call timed out\n at Timeout._onTimeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\commons.ts:296:33)\n at listOnTimeout (node:internal/timers:588:17)\n at processTimers (node:internal/timers:523:7)",
|
|
"message": "API call timed out"
|
|
}
|
|
},
|
|
"reason": "API call timed out",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:53:32.418Z",
|
|
"passed": true,
|
|
"duration": 1408,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:53:33.063Z",
|
|
"passed": true,
|
|
"duration": 639,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "translation",
|
|
"prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
|
|
"result": [
|
|
"¡Hola, mundo!"
|
|
],
|
|
"expected": "¡Hola, mundo!",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:53:35.309Z",
|
|
"passed": true,
|
|
"duration": 2241,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:53:35.959Z",
|
|
"passed": false,
|
|
"duration": 645,
|
|
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"\"I went to the store yesterday.\""
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:53:36.606Z",
|
|
"passed": false,
|
|
"duration": 641,
|
|
"reason": "Expected I went to the store yesterday, but got \"I went to the store yesterday.\"",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
|
|
"result": [
|
|
"I went to the store yesterday."
|
|
],
|
|
"expected": "I went to the store yesterday",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:53:41.701Z",
|
|
"passed": false,
|
|
"duration": 5090,
|
|
"reason": "Expected I went to the store yesterday, but got I went to the store yesterday.",
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:53:42.454Z",
|
|
"passed": true,
|
|
"duration": 747,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "openai/gpt-4o-mini",
|
|
"router": "openai/gpt-4o-mini",
|
|
"timestamp": "2025-06-05T22:53:43.116Z",
|
|
"passed": true,
|
|
"duration": 657,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
|
|
"result": [
|
|
"French"
|
|
],
|
|
"expected": "French",
|
|
"model": "deepseek/deepseek-r1:free",
|
|
"router": "deepseek/deepseek-r1:free",
|
|
"timestamp": "2025-06-05T22:53:47.420Z",
|
|
"passed": true,
|
|
"duration": 4299,
|
|
"category": "language"
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
|
|
"result": [
|
|
"Joyful"
|
|
],
|
|
"expected": "joyful",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"router": "anthropic/claude-sonnet-4",
|
|
"timestamp": "2025-06-05T22:53:48.762Z",
|
|
"passed": true,
|
|
"duration": 1336,
|
|
"category": "language"
|
|
}
|
|
],
|
|
"highscores": [
|
|
{
|
|
"test": "translation",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 639,
|
|
"duration_secs": 0.639
|
|
},
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 818,
|
|
"duration_secs": 0.818
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "grammar",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 576,
|
|
"duration_secs": 0.576
|
|
},
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 641,
|
|
"duration_secs": 0.641
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "summarization",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 612,
|
|
"duration_secs": 0.612
|
|
},
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 8548,
|
|
"duration_secs": 8.548
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "language_detection",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 657,
|
|
"duration_secs": 0.657
|
|
},
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 695,
|
|
"duration_secs": 0.695
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"test": "synonyms",
|
|
"rankings": [
|
|
{
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"duration": 570,
|
|
"duration_secs": 0.57
|
|
},
|
|
{
|
|
"model": "openai/gpt-4o-mini",
|
|
"duration": 622,
|
|
"duration_secs": 0.622
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"lastUpdated": "2025-06-05T22:53:48.763Z"
|
|
} |