mono/packages/kbot/tests/unit/format.test.ts

import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { z } from 'zod'
import {
  models,
  TEST_BASE_PATH,
  TEST_LOGS_PATH,
  TEST_PREFERENCES_PATH,
  TEST_TIMEOUT,
  TestResult,
  formatError,
  isEmptyResponse
} from './commons'

const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')

// Sample JSON Schema for testing
const testJsonSchema = {
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "https://example.com/user-profile.schema.json",
  "title": "User Profile",
  "description": "A user profile containing name, age, and tags",
  "type": "object",
  "properties": {
    "name": {
      "type": "string",
      "description": "User's full name",
      "minLength": 1,
      "pattern": "^[A-Za-z\\s]+$"
    },
    "age": {
      "type": "number",
      "description": "User's age in years",
      "minimum": 0,
      "maximum": 150
    },
    "email": {
      "type": "string",
      "description": "User's email address",
      "format": "email"
    },
    "tags": {
      "type": "array",
      "description": "List of user's tags",
      "items": {
        "type": "string",
        "enum": ["developer", "designer", "manager", "admin", "user"]
      },
      "minItems": 1,
      "maxItems": 5,
      "uniqueItems": true
    },
    "address": {
      "type": "object",
      "description": "User's address",
      "properties": {
        "street": {
          "type": "string",
          "description": "Street address"
        },
        "city": {
          "type": "string",
          "description": "City name"
        },
        "country": {
          "type": "string",
          "description": "Country name",
          "enum": ["US", "UK", "CA", "AU"]
        },
        "zipCode": {
          "type": "string",
          "description": "ZIP/Postal code",
          "pattern": "^[0-9]{5}(-[0-9]{4})?$"
        }
      },
      "required": ["street", "city", "country"]
    },
    "preferences": {
      "type": "object",
      "description": "User preferences",
      "properties": {
        "theme": {
          "type": "string",
          "enum": ["light", "dark", "system"],
          "default": "system"
        },
        "notifications": {
          "type": "boolean",
          "default": true
        },
        "language": {
          "type": "string",
          "enum": ["en", "es", "fr", "de", "ja"],
          "default": "en"
        }
      }
    }
  },
  "required": ["name", "age", "email"],
  "additionalProperties": false
}

// Write test schema to file
write(TEST_SCHEMA_PATH, JSON.stringify(testJsonSchema, null, 2))

// Helper function to normalize JSON strings
const normalizeJson = (json: string) => {
  try {
    // Remove markdown code block if present
    const cleanJson = json.replace(/```json\n|\n```/g, '').trim()
    return JSON.stringify(JSON.parse(cleanJson))
  } catch {
    return json
  }
}

// Helper function to validate email
const isValidEmail = (email: string) => {
  return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)
}

// Helper function to validate number in range
const isNumberInRange = (num: number, min: number, max: number) => {
  return num >= min && num <= max
}

// Helper function to validate array length
const hasValidArrayLength = (arr: any[], length: number) => {
  return Array.isArray(arr) && arr.length === length && arr.every(item => typeof item === 'string')
}

describe('Format Options', () => {
  let testResults: TestResult[] = []

  // Load existing results if any
  if (exists(TEST_LOG_PATH)) {
    const data = read(TEST_LOG_PATH, 'json')
    testResults = Array.isArray(data) ? data : []
  }

  const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => {
    let model = 'gpt-4o'
    let router = 'openai'
    let startTime = Date.now()
    let error: TestResult['error'] | undefined
    let testResult: TestResult | undefined

    try {
      const result = await Promise.race([
        run({
          prompt,
          mode: 'completion',
          model: 'gpt-4o',
          router: 'openai',
          path: TEST_BASE_PATH,
          logs: TEST_LOGS_PATH,
          preferences: TEST_PREFERENCES_PATH,
          ...options,
          onRun: async (options) => {
            model = options.model || 'unknown'
            router = options.router || 'unknown'
            return options
          }
        }),
        new Promise((_, reject) =>
          setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
        )
      ]) as string[]

      if (isEmptyResponse(result)) {
        throw new Error('Model returned empty response')
      }

      const actual = result?.[0]?.trim() || ''
      const normalizedActual = normalizeJson(actual)
      const normalizedExpected = normalizeJson(expected)
      const passed = normalizedActual === normalizedExpected

      expect(normalizedActual).toEqual(normalizedExpected)

      testResult = {
        test: testName,
        prompt,
        result: result || [],
        expected,
        model,
        router,
        timestamp: new Date().toISOString(),
        passed,
        duration: Date.now() - startTime,
        reason: passed ? undefined : `Expected ${normalizedExpected}, but got ${normalizedActual}`,
      }
    } catch (e) {
      error = formatError(e)
      testResult = {
        test: testName,
        prompt,
        result: [],
        expected,
        model,
        router,
        timestamp: new Date().toISOString(),
        passed: false,
        duration: Date.now() - startTime,
        error,
        reason: error?.message || 'Unknown error occurred'
      }
      throw e
    } finally {
      if (testResult) {
        testResults.push(testResult)
        write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
      }
    }
  }

  // Test JSON Schema format using file path
  it('should format response according to JSON Schema file', async () => {
    const prompt = 'Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.'
    const expected = JSON.stringify({
      name: "John Doe",
      age: 30,
      tags: ["developer", "javascript"]
    })

    await runFormatTest(
      prompt,
      expected,
      'json-schema-file-format',
      'gpt-4o',
      {
        format: TEST_SCHEMA_PATH
      }
    )
  }, { timeout: 10000 })

  // Test JSON Schema format using schema object
  it('should format response according to JSON Schema object', async () => {
    const prompt = `Create a user profile with the following details:
    - Name: Jane Smith
    - Age: 25
    - Email: jane.smith@company.com
    - Tags: ["developer", "designer"]
    - Address: 123 Main St, New York, US, 10001
    - Preferences: light theme, notifications enabled, English language
    Return only the JSON object, no explanation.`

    try {
      const result = await run({
        prompt,
        mode: 'completion',
        model: 'gpt-4o',
        router: 'openai',
        path: TEST_BASE_PATH,
        logs: TEST_LOGS_PATH,
        preferences: TEST_PREFERENCES_PATH,
        format: testJsonSchema
      }) as string[]

      const response = JSON.parse(normalizeJson(result?.[0] || '{}'))

      // Validate required fields
      expect(response.name).toBe('Jane Smith')
      expect(response.age).toBe(25)
      expect(response.email).toBe('jane.smith@company.com')

      // Validate tags
      expect(Array.isArray(response.tags)).toBe(true)
      expect(response.tags).toContain('developer')
      expect(response.tags).toContain('designer')

      // Validate address
      expect(response.address.street).toBe('123 Main St')
      expect(response.address.city).toBe('New York')
      expect(response.address.country).toBe('US')
      expect(response.address.zipCode || response.address.postal_code).toMatch(/^[0-9]{5}$/)

      // Validate preferences
      expect(response.preferences.theme).toBe('light')
      expect(['true', true, 'enabled'].includes(response.preferences.notifications)).toBe(true)
      expect(['en', 'English'].includes(response.preferences.language)).toBe(true)
    } catch (e) {
      throw e
    }
  }, { timeout: 10000 })

  // Test Zod Schema format with string
  it('should format response according to Zod string schema', async () => {
    const prompt = 'Generate a valid email address for a business domain. Return only the email, no explanation.'

    try {
      const result = await run({
        prompt,
        mode: 'completion',
        model: 'gpt-4o',
        router: 'openai',
        path: TEST_BASE_PATH,
        logs: TEST_LOGS_PATH,
        preferences: TEST_PREFERENCES_PATH,
        format: {
          type: "object",
          properties: {
            email: {
              type: "string",
              format: "email"
            }
          },
          required: ["email"]
        }
      }) as string[]

      const email = result?.[0]?.trim() || ''
      expect(isValidEmail(email)).toBe(true)
    } catch (e) {
      throw e
    }
  }, { timeout: 10000 })

  // Test Zod Schema format with number
  it('should format response according to Zod number schema', async () => {
    const prompt = 'Generate a random age between 18 and 65. Return only the number, no explanation.'

    try {
      const result = await run({
        prompt,
        mode: 'completion',
        model: 'gpt-4o',
        router: 'openai',
        path: TEST_BASE_PATH,
        logs: TEST_LOGS_PATH,
        preferences: TEST_PREFERENCES_PATH,
        format: {
          type: "object",
          properties: {
            age: {
              type: "number",
              minimum: 18,
              maximum: 65
            }
          },
          required: ["age"]
        }
      }) as string[]

      const age = parseInt(result?.[0]?.trim() || '0', 10)
      expect(isNumberInRange(age, 18, 65)).toBe(true)
    } catch (e) {
      throw e
    }
  }, { timeout: 10000 })

  // Test Zod Schema format with array
  it('should format response according to Zod array schema', async () => {
    const prompt = 'Generate a list of 3 programming languages. Return only the array, no explanation.'

    try {
      const result = await run({
        prompt,
        mode: 'completion',
        model: 'gpt-4o',
        router: 'openai',
        path: TEST_BASE_PATH,
        logs: TEST_LOGS_PATH,
        preferences: TEST_PREFERENCES_PATH,
        format: {
          type: "object",
          properties: {
            languages: {
              type: "array",
              items: {
                type: "string"
              },
              minItems: 3,
              maxItems: 3
            }
          },
          required: ["languages"]
        }
      }) as string[]

      const languages = JSON.parse(result?.[0]?.trim() || '[]')
      expect(hasValidArrayLength(languages, 3)).toBe(true)
    } catch (e) {
      throw e
    }
  }, { timeout: 10000 })

  // Test invalid format option
  it('should handle invalid format option', async () => {
    const prompt = 'Generate a random number.'

    try {
      await run({
        prompt,
        mode: 'completion',
        model: 'gpt-4o',
        router: 'openai',
        path: TEST_BASE_PATH,
        logs: TEST_LOGS_PATH,
        preferences: TEST_PREFERENCES_PATH,
        format: {
          type: "invalid",
          properties: {}
        }
      })
      // If we get here, the format validation didn't work
      throw new Error('Expected format validation to fail')
    } catch (e: any) {
      // The error should be about invalid format/schema
      if (!e.message.match(/invalid|Invalid|schema|Schema/)) {
        throw new Error(`Unexpected error: ${e.message}`)
      }
    }
  }, { timeout: 10000 })

  it('should generate markdown report', () => {
    // Group results by test and model
    const latestResults = new Map<string, Map<string, TestResult>>()

    // Get only the latest result for each test+model combination
    testResults.forEach(result => {
      if (!latestResults.has(result.test)) {
        latestResults.set(result.test, new Map())
      }
      const testMap = latestResults.get(result.test)!
      const existingResult = testMap.get(result.model)
      if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
        testMap.set(result.model, result)
      }
    })

    // Generate markdown report
    let report = '# Format Test Results\n\n'

    // First list failed tests
    report += '## Failed Tests\n\n'
    let hasFailures = false
    for (const [testName, modelResults] of latestResults) {
      for (const [model, result] of modelResults) {
        if (!result.passed) {
          hasFailures = true
          report += `### ${testName} - ${model}\n`
          report += `- Prompt: \`${result.prompt}\`\n`
          report += `- Expected: \`${result.expected}\`\n`
          report += `- Actual: \`${result.result[0] || ''}\`\n`
          report += `- Duration: ${result.duration}ms\n`
          if (result.error) {
            report += `- Error Type: ${result.error.type}\n`
            report += `- Error Code: ${result.error.code}\n`
            report += `- Error Message: ${result.error.message}\n`
            if (result.error.details?.message) {
              report += `- Error Details: ${result.error.details.message}\n`
            }
          }
          report += `- Reason: ${result.reason}\n`
          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
        }
      }
    }

    if (!hasFailures) {
      report += '*No failed tests*\n\n'
    }

    // Then list passed tests
    report += '## Passed Tests\n\n'
    let hasPassed = false
    for (const [testName, modelResults] of latestResults) {
      for (const [model, result] of modelResults) {
        if (result.passed) {
          hasPassed = true
          report += `### ${testName} - ${model}\n`
          report += `- Prompt: \`${result.prompt}\`\n`
          report += `- Expected: \`${result.expected}\`\n`
          report += `- Actual: \`${result.result[0] || ''}\`\n`
          report += `- Duration: ${result.duration}ms\n`
          report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
        }
      }
    }

    if (!hasPassed) {
      report += '*No passed tests*\n\n'
    }

    // Add summary section
    report += '## Summary\n\n'
    const totalTests = testResults.length
    const passedTests = testResults.filter(r => r.passed).length
    const failedTests = totalTests - passedTests
    report += `- Total Tests: ${totalTests}\n`
    report += `- Passed: ${passedTests}\n`
    report += `- Failed: ${failedTests}\n`
    report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n`

    // Write report to file
    const reportPath = path.resolve(__dirname, './format-report.md')
    write(reportPath, report)

    // Verify report was written
    expect(exists(reportPath) === 'file').toBe(true)
  })
})