mono/packages/kbot/tests/unit/commons.ts

import * as path from 'node:path'
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL, E_OPENROUTER_MODEL, E_Mode } from '../../src/index'
import { run } from '../../src/index'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { sync as mkdirp } from "mkdirp"

export enum ModelCategory {
  FAST = 'fast',
  LANGUAGE = 'language',
  TOOL = 'tool',
  ALL = 'all',
  CODING = 'coding',
  FILES = 'file'
}

export const getFastModels = (): string[] => {
  return [
    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_3_5_TURBO,
    E_OPENROUTER_MODEL.MODEL_DEEPSEEK_DEEPSEEK_R1_DISTILL_QWEN_14B_FREE,
    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_4O_MINI,
    E_OPENROUTER_MODEL.MODEL_OPENROUTER_QUASAR_ALPHA
  ]
}

export const getCodingModels = (): string[] => {
  return [
    E_OPENROUTER_MODEL.MODEL_OPENROUTER_QUASAR_ALPHA
  ]
}

export const getFileModels = (): string[] => {
  return [
    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_4O_MINI,
    E_OPENROUTER_MODEL.MODEL_OPENROUTER_QUASAR_ALPHA,
    E_OPENROUTER_MODEL.MODEL_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE
  ]
}

export const getLanguageModels = (): string[] => {
  return [
    E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_3_5_SONNET,
    E_OPENROUTER_MODEL.MODEL_QWEN_QWQ_32B,
    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_4O_MINI,
    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_3_5_TURBO
  ]
}

export const getToolModels = (): string[] => {
  return [
    E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_4O
  ]
}

export const getDefaultModels = (category: ModelCategory = ModelCategory.FAST): string[] => {
  switch (category) {
    case ModelCategory.FAST:
      return getFastModels()
    case ModelCategory.LANGUAGE:
      return getLanguageModels()
    case ModelCategory.TOOL:
      return getToolModels()
    case ModelCategory.CODING:
      return getCodingModels()
    case ModelCategory.FILES:
      return getFileModels()
    case ModelCategory.ALL:
    default:
      return [
        E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_3_5_SONNET,
        E_OPENROUTER_MODEL.MODEL_QWEN_QWQ_32B,
        E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_4O_MINI,
        E_OPENROUTER_MODEL.MODEL_OPENAI_GPT_3_5_TURBO,
        E_OPENROUTER_MODEL.MODEL_DEEPSEEK_DEEPSEEK_R1,
        E_OPENROUTER_MODEL.MODEL_DEEPSEEK_DEEPSEEK_R1_DISTILL_QWEN_14B_FREE
      ]
  }
}

export const isOpenRouterModel = (model: string): boolean => {
  return Object.values(E_OPENROUTER_MODEL).includes(model as E_OPENROUTER_MODEL)
}

export const TEST_BASE_PATH = path.resolve(__dirname, '../../')
export const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
export const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
export const TEST_TIMEOUT = 30000 // 30 seconds timeout for API calls

// Report paths configuration
export const REPORTS_DIR = path.resolve(__dirname, './reports')

// Ensure reports directory exists
if (exists(REPORTS_DIR) !== 'directory') {
  mkdirp(REPORTS_DIR)
}

export const getReportPaths = (category: string, type: 'json' | 'md'): string => {
  const base = path.resolve(REPORTS_DIR, category)
  return `${base}.${type}`
}

export interface TestResult {
  test: string;
  prompt: string;
  result: string[];
  expected: string;
  model: string;
  router: string;
  timestamp: string;
  passed: boolean;
  reason?: string;
  error?: {
    message: string;
    code?: string;
    type?: string;
    details?: any;
  };
  duration?: number
  category?: string;
}

export interface TestHighscore {
  test: string;
  rankings: {
    model: string;
    duration: number;
    duration_secs: number;
  }[];
}

export const formatError = (error: any): TestResult['error'] => {
  return {
    message: error?.message || 'Unknown error',
    code: error?.code || 'UNKNOWN',
    type: error?.type || error?.constructor?.name || 'Error',
    details: error?.response?.data || error?.response || error
  }
}

export const isEmptyResponse = (result: string[] | null | undefined): boolean => {
  return !result || result.length === 0 || result.every(r => !r || r.trim() === '')
}

export const generateHighscores = (latestResults: Map<string, Map<string, TestResult>>): TestHighscore[] => {
  const highscores: TestHighscore[] = []

  for (const [testName, modelResults] of latestResults) {
    // Convert model results to array and sort by duration
    const sortedResults = Array.from(modelResults.entries())
      .map(([model, result]) => ({ model, result }))
      .sort((a, b) => (a.result.duration || 0) - (b.result.duration || 0))
      .slice(0, 2) // Get top 2

    if (sortedResults.length > 0) {
      highscores.push({
        test: testName,
        rankings: sortedResults.map(({ model, result }) => ({
          model,
          duration: result.duration || 0,
          duration_secs: (result.duration || 0) / 1000
        }))
      })
    }
  }

  return highscores
}

export const runTest = async (
  prompt: string,
  expected: string,
  testName: string,
  modelName: string,
  logPath: string,
  mode: "completion" | "tools" | "assistant" | "custom" = "completion",
  options: any = {}
): Promise<TestResult> => {
  let model = 'unknown'
  let router = 'openrouter'
  let startTime = Date.now()
  let error: TestResult['error'] | undefined
  let testResult: TestResult | undefined
  let defaultOptions = {
    filters: 'code'
  }
  try {
    const result = await Promise.race([
      run({
        prompt,
        mode,
        model: modelName,
        path: TEST_BASE_PATH,
        logs: TEST_LOGS_PATH,
        preferences: TEST_PREFERENCES_PATH,
        logLevel: 2,
        ...{ ...defaultOptions, ...options },
        onRun: async (options) => {
          model = options.model || 'unknown'
          router = options.model as string
          return options
        }
      }),
      new Promise((_, reject) =>
        setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
      )
    ]) as string[]

    if (isEmptyResponse(result)) {
      testResult = {
        test: testName,
        prompt,
        result: [],
        expected,
        model,
        router,
        timestamp: new Date().toISOString(),
        passed: false,
        duration: Date.now() - startTime,
        reason: 'Model returned empty response'
      }
    } else {
      const actual = result?.[0]?.trim()?.toLowerCase() || ''
      const passed = actual === expected

      testResult = {
        test: testName,
        prompt,
        result: result || [],
        expected,
        model,
        router,
        timestamp: new Date().toISOString(),
        passed,
        duration: Date.now() - startTime,
        reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
      }
    }
  } catch (e) {
    error = formatError(e)
    testResult = {
      test: testName,
      prompt,
      result: [],
      expected,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed: false,
      duration: Date.now() - startTime,
      error,
      reason: error?.message || 'Unknown error occurred'
    }
    throw e
  } finally {
    if (testResult) {
      // Extract category from logPath (e.g., 'reports/basic.json' -> 'basic')
      const category = path.basename(logPath, path.extname(logPath))

      // Add category to test result
      testResult.category = category

      // Update category-specific log
      const existingData = exists(logPath) === 'file' ? JSON.parse(read(logPath) as string) : { results: [], highscores: [] }
      const updatedResults = [...(existingData.results || []), testResult]

      // Group results by test and model
      const latestResults = new Map<string, Map<string, TestResult>>()
      updatedResults.forEach(result => {
        if (!latestResults.has(result.test)) {
          latestResults.set(result.test, new Map())
        }
        const testMap = latestResults.get(result.test)!
        const existingResult = testMap.get(result.model)
        if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
          testMap.set(result.model, result)
        }
      })

      // Generate highscores
      const highscores = generateHighscores(latestResults)

      // Write category-specific results
      write(logPath, JSON.stringify({
        results: updatedResults,
        highscores,
        lastUpdated: new Date().toISOString()
      }, null, 2))

      // Update central all.json log
      const allLogPath = path.resolve(REPORTS_DIR, 'all.json')
      const allExistingData = exists(allLogPath) === 'file' ? JSON.parse(read(allLogPath) as string) : { results: [], highscores: [] }
      const allUpdatedResults = [...(allExistingData.results || []), testResult]

      // Group all results by test and model
      const allLatestResults = new Map<string, Map<string, TestResult>>()
      allUpdatedResults.forEach(result => {
        if (!allLatestResults.has(result.test)) {
          allLatestResults.set(result.test, new Map())
        }
        const testMap = allLatestResults.get(result.test)!
        const existingResult = testMap.get(result.model)
        if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
          testMap.set(result.model, result)
        }
      })

      // Generate highscores for all results
      const allHighscores = generateHighscores(allLatestResults)

      // Write all results
      write(allLogPath, JSON.stringify({
        results: allUpdatedResults,
        highscores: allHighscores,
        lastUpdated: new Date().toISOString()
      }, null, 2))
    }
  }
  return testResult
}

export const generateTestReport = (
  testResults: TestResult[],
  reportTitle: string,
  reportPath: string
): void => {
  // Group results by test and model
  const latestResults = new Map<string, Map<string, TestResult>>()

  // Get only the latest result for each test+model combination
  testResults.forEach(result => {
    if (!latestResults.has(result.test)) {
      latestResults.set(result.test, new Map())
    }
    const testMap = latestResults.get(result.test)!
    const existingResult = testMap.get(result.model)
    if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
      testMap.set(result.model, result)
    }
  })

  // Generate markdown report
  let report = `# ${reportTitle}\n\n`

  // Add highscore section
  report += '## Highscores\n\n'

  // Add regular test rankings
  report += '### Performance Rankings (Duration)\n\n'
  report += '| Test | Model | Duration (ms) | Duration (s) |\n'
  report += '|------|-------|--------------|--------------|\n'

  Array.from(latestResults.entries()).forEach(([testName, modelResults]) => {
    const sortedResults = Array.from(modelResults.entries())
      .map(([model, result]) => ({
        model,
        duration: result.duration || 0
      }))
      .sort((a, b) => a.duration - b.duration)

    sortedResults.forEach(({ model, duration }) => {
      report += `| ${testName} | ${model} | ${duration.toFixed(0)} | ${(duration / 1000).toFixed(2)} |\n`
    })
  })
  report += '\n'

  // Add summary section
  report += '## Summary\n\n'
  const totalTests = testResults.length
  const passedTests = testResults.filter(r => r.passed).length
  const failedTests = totalTests - passedTests
  const avgDuration = testResults.reduce((sum, r) => sum + (r.duration || 0), 0) / totalTests
  report += `- Total Tests: ${totalTests}\n`
  report += `- Passed: ${passedTests}\n`
  report += `- Failed: ${failedTests}\n`
  report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n`
  report += `- Average Duration: ${avgDuration.toFixed(0)}ms (${(avgDuration / 1000).toFixed(2)}s)\n\n`

  // First list failed tests
  report += '## Failed Tests\n\n'
  let hasFailures = false
  for (const [testName, modelResults] of latestResults) {
    for (const [model, result] of modelResults) {
      if (!result.passed) {
        hasFailures = true
        report += `### ${testName} - ${model}\n\n`
        report += `- Prompt: \`${result.prompt}\`\n`
        report += `- Expected: \`${result.expected}\`\n`
        report += `- Actual: \`${result.result[0] || ''}\`\n`
        report += `- Duration: ${result.duration || 0}ms (${((result.duration || 0) / 1000).toFixed(2)}s)\n`
        if (result.error) {
          report += `- Error Type: ${result.error.type}\n`
          report += `- Error Code: ${result.error.code}\n`
          report += `- Error Message: ${result.error.message}\n`
          if (result.error.details?.message) {
            report += `- Error Details: ${result.error.details.message}\n`
          }
        }
        report += `- Reason: ${result.reason}\n`
        report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
      }
    }
  }

  if (!hasFailures) {
    report += '*No failed tests*\n\n'
  }

  // Then list passed tests
  report += '## Passed Tests\n\n'
  let hasPassed = false
  for (const [testName, modelResults] of latestResults) {
    for (const [model, result] of modelResults) {
      if (result.passed) {
        hasPassed = true
        report += `### ${testName} - ${model}\n\n`
        report += `- Prompt: \`${result.prompt}\`\n`
        report += `- Expected: \`${result.expected}\`\n`
        report += `- Actual: \`${result.result[0] || ''}\`\n`
        report += `- Duration: ${result.duration || 0}ms (${((result.duration || 0) / 1000).toFixed(2)}s)\n`
        report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
      }
    }
  }

  if (!hasPassed) {
    report += '*No passed tests*\n\n'
  }

  // Write report to file
  write(reportPath, report)
}