mono/packages/kbot/tests/unit/math.test.ts

import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TestResult } from './commons'

import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL  } from '../../src/index'

const TEST_LOG_PATH = path.resolve(__dirname, './math.json')

describe('Advanced Math Operations', () => {
  let testResults: TestResult[] = []

  // Load existing results if any
  if (exists(TEST_LOG_PATH)) {
    const data = read(TEST_LOG_PATH, 'json')
    testResults = Array.isArray(data) ? data : []
  }

  it.each(models)('should calculate factorial of 5 with model %s', async (modelName) => {
    const prompt = 'calculate the factorial of 5 (5!). Return only the number, no explanation.'
    const expected = '120'
    let model = 'unknown'
    let router = 'unknown'

    const result = await run({
      prompt,
      mode: 'completion',
      model: modelName,
      path: TEST_BASE_PATH,
      logs: TEST_LOGS_PATH,
      preferences: TEST_PREFERENCES_PATH,
      onRun: async (options) => {
        model = options.model || 'unknown'
        router = options.router || 'unknown'
        return options
      }
    }) as string[]

    const actual = result?.[0]?.trim() || ''
    const passed = actual === expected
    expect(actual).toEqual(expected)

    // Add test result to array
    testResults.push({
      test: 'factorial',
      prompt,
      result: result || [],
      expected,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
    })

    // Write all results to the same file
    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
  })

  it.each(models)('should calculate fibonacci sequence up to 5th number with model %s', async (modelName) => {
    const prompt = 'calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.'
    const expected = '0,1,1,2,3'
    let model = 'unknown'
    let router = 'unknown'

    const result = await run({
      prompt,
      mode: 'completion',
      model: modelName,
      path: TEST_BASE_PATH,
      logs: TEST_LOGS_PATH,
      preferences: TEST_PREFERENCES_PATH,
      onRun: async (options) => {
        model = options.model || 'unknown'
        router = options.router || 'unknown'
        return options
      }
    }) as string[]

    // Handle both formats: "0,1,1,2,3" and "0, 1, 1, 2, 3"
    const numbers = result?.[0]?.trim()?.split(',')?.map(n => n.trim()) || []
    const actual = numbers.join(',')
    const passed = actual === expected
    expect(numbers).toEqual(['0', '1', '1', '2', '3'])

    // Add test result to array
    testResults.push({
      test: 'fibonacci',
      prompt,
      result: result || [],
      expected,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
      reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
    })

    // Write all results to the same file
    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
  })

  it.each(models)('should solve quadratic equation x² + 5x + 6 = 0 with model %s', async (modelName) => {
    const prompt = 'solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.'
    const expectedDisplay = '[-3,-2]'
    let model = 'unknown'
    let router = 'unknown'

    const result = await run({
      prompt,
      mode: 'completion',
      model: modelName,
      filters: 'code',
      path: TEST_BASE_PATH,
      logs: TEST_LOGS_PATH,
      preferences: TEST_PREFERENCES_PATH,
      onRun: async (options) => {
        model = options.model || 'unknown'
        router = options.router || 'unknown'
        return options
      }
    }) as string[]

    // Parse the result as JSON (markdown already stripped by filter)
    let jsonResult: number[]
    try {
      const resultStr = result?.[0]?.trim() || '[]'
      if (!resultStr) {
        throw new Error('No result returned from model')
      }
      jsonResult = JSON.parse(resultStr)
      if (!Array.isArray(jsonResult)) {
        throw new Error('Result is not an array')
      }
      // Convert any string numbers to actual numbers
      jsonResult = jsonResult.map(n => typeof n === 'string' ? parseFloat(n) : n)
    } catch (error) {
      // If parsing fails, try to extract numbers from the string
      const numbers = result?.[0]?.match(/-?\d+/g)?.map(n => parseInt(n, 10)) || []
      jsonResult = numbers
    }

    const actual = JSON.stringify(jsonResult.sort())
    const expectedSorted = JSON.stringify([-3, -2].sort())
    const passed = actual === expectedSorted
    expect(jsonResult.sort()).toEqual([-3, -2].sort())

    // Add test result to array
    testResults.push({
      test: 'quadratic',
      prompt,
      result: result || [],
      expected: expectedDisplay,
      model,
      router,
      timestamp: new Date().toISOString(),
      passed,
      reason: !result?.[0] ? 'No result returned from model' : passed ? undefined : `Expected ${expectedDisplay}, but got ${result?.[0] || ''}`
    })

    // Write all results to the same file
    write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
  })

  it('should generate markdown report', () => {
    // Group results by test and model
    const latestResults = new Map<string, Map<string, TestResult>>()

    // Get only the latest result for each test+model combination
    testResults.forEach(result => {
      if (!latestResults.has(result.test)) {
        latestResults.set(result.test, new Map())
      }
      const testMap = latestResults.get(result.test)!
      const existingResult = testMap.get(result.model)
      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
        testMap.set(result.model, result)
      }
    })

    // Generate markdown report
    let report = '# Math Test Results\n\n'

    // First list failed tests
    report += '## Failed Tests\n\n'
    for (const [testName, modelResults] of latestResults) {
      for (const [model, result] of modelResults) {
        if (!result.passed) {
          report += `### ${testName} - ${model}\n`
          report += `- Prompt: \`${result.prompt}\`\n`
          report += `- Expected: \`${result.expected}\`\n`
          report += `- Actual: \`${result.result[0] || ''}\`\n`
          report += `- Reason: ${result.reason}\n`
          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
        }
      }
    }

    // Then list passed tests
    report += '## Passed Tests\n\n'
    for (const [testName, modelResults] of latestResults) {
      for (const [model, result] of modelResults) {
        if (result.passed) {
          report += `### ${testName} - ${model}\n`
          report += `- Prompt: \`${result.prompt}\`\n`
          report += `- Expected: \`${result.expected}\`\n`
          report += `- Actual: \`${result.result[0] || ''}\`\n`
          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
        }
      }
    }

    // Write report to file
    const reportPath = path.resolve(__dirname, './math-report.md')
    write(reportPath, report)

    // Verify report was written
    expect(exists(reportPath) === 'file').toBe(true)
  })
})