mono/packages/kbot/tests/unit/reports/language.json

{
  "results": [
    {
      "test": "translation",
      "prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
      "result": [
        "¡Hola, mundo!"
      ],
      "expected": "¡Hola, mundo!",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T15:42:04.594Z",
      "passed": false,
      "duration": 1183,
      "reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
      "category": "language"
    },
    {
      "test": "translation",
      "prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
      "result": [
        "Hola, mundo!"
      ],
      "expected": "¡Hola, mundo!",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T15:42:07.871Z",
      "passed": false,
      "duration": 3265,
      "reason": "Expected ¡Hola, mundo!, but got hola, mundo!",
      "category": "language"
    },
    {
      "test": "translation",
      "prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
      "result": [
        "¡Hola, mundo!"
      ],
      "expected": "¡Hola, mundo!",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T15:42:09.128Z",
      "passed": false,
      "duration": 1244,
      "reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
      "category": "language"
    },
    {
      "test": "translation",
      "prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
      "result": [
        "¡Hola, mundo!"
      ],
      "expected": "¡Hola, mundo!",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T15:42:10.051Z",
      "passed": false,
      "duration": 914,
      "reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
      "category": "language"
    },
    {
      "test": "grammar",
      "prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
      "result": [
        "I went to the store yesterday."
      ],
      "expected": "I went to the store yesterday",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T15:42:10.948Z",
      "passed": false,
      "duration": 886,
      "reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
      "category": "language"
    },
    {
      "test": "grammar",
      "prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
      "result": [
        "I went to the store yesterday."
      ],
      "expected": "I went to the store yesterday",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T15:42:15.782Z",
      "passed": false,
      "duration": 4822,
      "reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
      "category": "language"
    },
    {
      "test": "grammar",
      "prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
      "result": [
        "I went to the store yesterday."
      ],
      "expected": "I went to the store yesterday",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T15:42:16.691Z",
      "passed": false,
      "duration": 895,
      "reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
      "category": "language"
    },
    {
      "test": "grammar",
      "prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
      "result": [
        "I went to the store yesterday."
      ],
      "expected": "I went to the store yesterday",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T15:42:17.494Z",
      "passed": false,
      "duration": 789,
      "reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
      "category": "language"
    },
    {
      "test": "summarization",
      "prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
      "result": [
        "A quick brown fox jumps over a lazy dog."
      ],
      "expected": "A fox jumps over a dog",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T15:42:18.910Z",
      "passed": false,
      "duration": 1405,
      "reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a lazy dog.",
      "category": "language"
    },
    {
      "test": "summarization",
      "prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
      "result": [
        "\"The quick brown fox jumps over the lazy dog.\""
      ],
      "expected": "A fox jumps over a dog",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T15:42:23.978Z",
      "passed": false,
      "duration": 5056,
      "reason": "Expected A fox jumps over a dog, but got \"the quick brown fox jumps over the lazy dog.\"",
      "category": "language"
    },
    {
      "test": "summarization",
      "prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
      "result": [
        "A fox jumps over a dog."
      ],
      "expected": "A fox jumps over a dog",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T15:42:24.931Z",
      "passed": false,
      "duration": 942,
      "reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
      "category": "language"
    },
    {
      "test": "summarization",
      "prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
      "result": [
        "A fox leaps over a dog."
      ],
      "expected": "A fox jumps over a dog",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T15:42:25.886Z",
      "passed": false,
      "duration": 944,
      "reason": "Expected A fox jumps over a dog, but got a fox leaps over a dog.",
      "category": "language"
    },
    {
      "test": "language_detection",
      "prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
      "result": [
        "French"
      ],
      "expected": "French",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T15:42:26.837Z",
      "passed": false,
      "duration": 939,
      "reason": "Expected French, but got french",
      "category": "language"
    },
    {
      "test": "language_detection",
      "prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
      "result": [],
      "expected": "French",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T15:42:27.292Z",
      "passed": false,
      "duration": 442,
      "reason": "Model returned empty response",
      "category": "language"
    },
    {
      "test": "language_detection",
      "prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
      "result": [
        "French"
      ],
      "expected": "French",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T15:42:28.460Z",
      "passed": false,
      "duration": 1152,
      "reason": "Expected French, but got french",
      "category": "language"
    },
    {
      "test": "language_detection",
      "prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
      "result": [
        "French"
      ],
      "expected": "French",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T15:42:29.493Z",
      "passed": false,
      "duration": 1022,
      "reason": "Expected French, but got french",
      "category": "language"
    },
    {
      "test": "synonyms",
      "prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
      "result": [
        "Joyful"
      ],
      "expected": "joyful",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T15:42:30.442Z",
      "passed": true,
      "duration": 938,
      "category": "language"
    },
    {
      "test": "synonyms",
      "prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
      "result": [],
      "expected": "joyful",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T15:42:30.888Z",
      "passed": false,
      "duration": 436,
      "reason": "Model returned empty response",
      "category": "language"
    },
    {
      "test": "synonyms",
      "prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
      "result": [
        "Joyful"
      ],
      "expected": "joyful",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T15:42:31.838Z",
      "passed": true,
      "duration": 947,
      "category": "language"
    },
    {
      "test": "synonyms",
      "prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
      "result": [
        "Joyful"
      ],
      "expected": "joyful",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T15:42:32.705Z",
      "passed": true,
      "duration": 857,
      "category": "language"
    },
    {
      "test": "translation",
      "prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
      "result": [
        "¡Hola, mundo!"
      ],
      "expected": "¡Hola, mundo!",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T22:28:08.128Z",
      "passed": false,
      "duration": 1322,
      "reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
      "category": "language"
    },
    {
      "test": "translation",
      "prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
      "result": [
        "¡Hola, mundo!"
      ],
      "expected": "¡Hola, mundo!",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T22:28:12.115Z",
      "passed": false,
      "duration": 3972,
      "reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
      "category": "language"
    },
    {
      "test": "translation",
      "prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
      "result": [
        "¡Hola, mundo!"
      ],
      "expected": "¡Hola, mundo!",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T22:28:12.895Z",
      "passed": false,
      "duration": 769,
      "reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
      "category": "language"
    },
    {
      "test": "translation",
      "prompt": "Translate \"Hello, world!\" to Spanish. Return only the translation, no explanation.",
      "result": [
        "¡Hola, mundo!"
      ],
      "expected": "¡Hola, mundo!",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T22:28:13.738Z",
      "passed": false,
      "duration": 832,
      "reason": "Expected ¡Hola, mundo!, but got ¡hola, mundo!",
      "category": "language"
    },
    {
      "test": "grammar",
      "prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
      "result": [
        "**Corrected Sentence:** I went to the store yesterday."
      ],
      "expected": "I went to the store yesterday",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T22:28:14.567Z",
      "passed": false,
      "duration": 819,
      "reason": "Expected I went to the store yesterday, but got **corrected sentence:** i went to the store yesterday.",
      "category": "language"
    },
    {
      "test": "grammar",
      "prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
      "result": [
        "I went to the store yesterday."
      ],
      "expected": "I went to the store yesterday",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T22:28:21.611Z",
      "passed": false,
      "duration": 7029,
      "reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
      "category": "language"
    },
    {
      "test": "grammar",
      "prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
      "result": [
        "I went to the store yesterday."
      ],
      "expected": "I went to the store yesterday",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T22:28:22.737Z",
      "passed": false,
      "duration": 1113,
      "reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
      "category": "language"
    },
    {
      "test": "grammar",
      "prompt": "Correct the grammar in: \"I goes to the store yesterday\". Return only the corrected sentence, no explanation.",
      "result": [
        "I went to the store yesterday."
      ],
      "expected": "I went to the store yesterday",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T22:28:23.760Z",
      "passed": false,
      "duration": 1011,
      "reason": "Expected I went to the store yesterday, but got i went to the store yesterday.",
      "category": "language"
    },
    {
      "test": "summarization",
      "prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
      "result": [
        "Summary: The quick brown fox jumps over the lazy dog."
      ],
      "expected": "A fox jumps over a dog",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T22:28:24.602Z",
      "passed": false,
      "duration": 832,
      "reason": "Expected A fox jumps over a dog, but got summary: the quick brown fox jumps over the lazy dog.",
      "category": "language"
    },
    {
      "test": "summarization",
      "prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
      "result": [
        "A quick brown fox jumps over a lazy dog."
      ],
      "expected": "A fox jumps over a dog",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T22:28:29.608Z",
      "passed": false,
      "duration": 4994,
      "reason": "Expected A fox jumps over a dog, but got a quick brown fox jumps over a lazy dog.",
      "category": "language"
    },
    {
      "test": "summarization",
      "prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
      "result": [
        "A swift fox leaps over a sluggish dog."
      ],
      "expected": "A fox jumps over a dog",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T22:28:30.667Z",
      "passed": false,
      "duration": 1048,
      "reason": "Expected A fox jumps over a dog, but got a swift fox leaps over a sluggish dog.",
      "category": "language"
    },
    {
      "test": "summarization",
      "prompt": "Summarize: \"The quick brown fox jumps over the lazy dog\". Return only the summary, no explanation.",
      "result": [
        "A fox jumps over a dog."
      ],
      "expected": "A fox jumps over a dog",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T22:28:31.511Z",
      "passed": false,
      "duration": 832,
      "reason": "Expected A fox jumps over a dog, but got a fox jumps over a dog.",
      "category": "language"
    },
    {
      "test": "language_detection",
      "prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
      "result": [
        "French"
      ],
      "expected": "French",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T22:28:32.264Z",
      "passed": false,
      "duration": 741,
      "reason": "Expected French, but got french",
      "category": "language"
    },
    {
      "test": "language_detection",
      "prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
      "result": [],
      "expected": "French",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T22:28:32.694Z",
      "passed": false,
      "duration": 419,
      "reason": "Model returned empty response",
      "category": "language"
    },
    {
      "test": "language_detection",
      "prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
      "result": [
        "French"
      ],
      "expected": "French",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T22:28:33.593Z",
      "passed": false,
      "duration": 887,
      "reason": "Expected French, but got french",
      "category": "language"
    },
    {
      "test": "language_detection",
      "prompt": "Identify the language of: \"Bonjour, comment allez-vous?\". Return only the language name, no explanation.",
      "result": [
        "French"
      ],
      "expected": "French",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T22:28:34.490Z",
      "passed": false,
      "duration": 886,
      "reason": "Expected French, but got french",
      "category": "language"
    },
    {
      "test": "synonyms",
      "prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
      "result": [
        "Delighted"
      ],
      "expected": "joyful",
      "model": "openai/gpt-3.5-turbo",
      "router": "openai/gpt-3.5-turbo",
      "timestamp": "2025-04-06T22:28:35.215Z",
      "passed": false,
      "duration": 715,
      "reason": "Expected joyful, but got delighted",
      "category": "language"
    },
    {
      "test": "synonyms",
      "prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
      "result": [],
      "expected": "joyful",
      "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "router": "deepseek/deepseek-r1-distill-qwen-14b:free",
      "timestamp": "2025-04-06T22:28:35.639Z",
      "passed": false,
      "duration": 411,
      "reason": "Model returned empty response",
      "category": "language"
    },
    {
      "test": "synonyms",
      "prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
      "result": [
        "Joyful"
      ],
      "expected": "joyful",
      "model": "openai/gpt-4o-mini",
      "router": "openai/gpt-4o-mini",
      "timestamp": "2025-04-06T22:28:36.294Z",
      "passed": true,
      "duration": 644,
      "category": "language"
    },
    {
      "test": "synonyms",
      "prompt": "Provide a synonym for \"happy\". Return only the synonym, no explanation.",
      "result": [
        "Joyful"
      ],
      "expected": "joyful",
      "model": "openrouter/quasar-alpha",
      "router": "openrouter/quasar-alpha",
      "timestamp": "2025-04-06T22:28:37.021Z",
      "passed": true,
      "duration": 716,
      "category": "language"
    }
  ],
  "highscores": [
    {
      "test": "translation",
      "rankings": [
        {
          "model": "openai/gpt-4o-mini",
          "duration": 769,
          "duration_secs": 0.769
        },
        {
          "model": "openrouter/quasar-alpha",
          "duration": 832,
          "duration_secs": 0.832
        }
      ]
    },
    {
      "test": "grammar",
      "rankings": [
        {
          "model": "openai/gpt-3.5-turbo",
          "duration": 819,
          "duration_secs": 0.819
        },
        {
          "model": "openrouter/quasar-alpha",
          "duration": 1011,
          "duration_secs": 1.011
        }
      ]
    },
    {
      "test": "summarization",
      "rankings": [
        {
          "model": "openai/gpt-3.5-turbo",
          "duration": 832,
          "duration_secs": 0.832
        },
        {
          "model": "openrouter/quasar-alpha",
          "duration": 832,
          "duration_secs": 0.832
        }
      ]
    },
    {
      "test": "language_detection",
      "rankings": [
        {
          "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
          "duration": 419,
          "duration_secs": 0.419
        },
        {
          "model": "openai/gpt-3.5-turbo",
          "duration": 741,
          "duration_secs": 0.741
        }
      ]
    },
    {
      "test": "synonyms",
      "rankings": [
        {
          "model": "deepseek/deepseek-r1-distill-qwen-14b:free",
          "duration": 411,
          "duration_secs": 0.411
        },
        {
          "model": "openai/gpt-4o-mini",
          "duration": 644,
          "duration_secs": 0.644
        }
      ]
    }
  ],
  "lastUpdated": "2025-04-06T22:28:37.021Z"
}