mono/packages/kbot/tests/unit/format.test.ts
2025-04-02 12:49:47 +02:00

507 lines
15 KiB
TypeScript

import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { z } from 'zod'
import {
TEST_BASE_PATH,
TEST_LOGS_PATH,
TEST_PREFERENCES_PATH,
TEST_TIMEOUT,
TestResult,
formatError,
isEmptyResponse
} from './commons'
const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
const TEST_MODEL_FAST = 'mistralai/codestral-2501'
const TEST_MODEL = 'mistralai/mistral-tiny'
const TEST_ROUTER = 'openrouter'
// Sample JSON Schema for testing
const testJsonSchema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://example.com/user-profile.schema.json",
"title": "User Profile",
"description": "A user profile containing name, age, and tags",
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "User's full name",
"minLength": 1,
"pattern": "^[A-Za-z\\s]+$"
},
"age": {
"type": "number",
"description": "User's age in years",
"minimum": 0,
"maximum": 150
},
"email": {
"type": "string",
"description": "User's email address",
"format": "email"
},
"tags": {
"type": "array",
"description": "List of user's tags",
"items": {
"type": "string",
"enum": ["developer", "designer", "manager", "admin", "user"]
},
"minItems": 1,
"maxItems": 5,
"uniqueItems": true
},
"address": {
"type": "object",
"description": "User's address",
"properties": {
"street": {
"type": "string",
"description": "Street address"
},
"city": {
"type": "string",
"description": "City name"
},
"country": {
"type": "string",
"description": "Country name",
"enum": ["US", "UK", "CA", "AU"]
},
"zipCode": {
"type": "string",
"description": "ZIP/Postal code",
"pattern": "^[0-9]{5}(-[0-9]{4})?$"
}
},
"required": ["street", "city", "country"]
},
"preferences": {
"type": "object",
"description": "User preferences",
"properties": {
"theme": {
"type": "string",
"enum": ["light", "dark", "system"],
"default": "system"
},
"notifications": {
"type": "boolean",
"default": true
},
"language": {
"type": "string",
"enum": ["en", "es", "fr", "de", "ja"],
"default": "en"
}
}
}
},
"required": ["name", "age", "email"],
"additionalProperties": false
}
// Write test schema to file
write(TEST_SCHEMA_PATH, JSON.stringify(testJsonSchema, null, 2))
// Helper function to normalize JSON strings
const normalizeJson = (json: string) => {
try {
// Remove markdown code block if present
const cleanJson = json.replace(/```json\n|\n```/g, '').trim()
return JSON.stringify(JSON.parse(cleanJson))
} catch {
return json
}
}
// Helper function to validate email
const isValidEmail = (email: string) => {
return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)
}
// Helper function to validate number in range
const isNumberInRange = (num: number, min: number, max: number) => {
return num >= min && num <= max
}
// Helper function to validate array length
const hasValidArrayLength = (arr: any[], length: number) => {
return Array.isArray(arr) && arr.length === length && arr.every(item => typeof item === 'string')
}
describe('Format Options', () => {
let testResults: TestResult[] = []
const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => {
let model = TEST_MODEL
let router = TEST_ROUTER
let startTime = Date.now()
let error: TestResult['error'] | undefined
let testResult: TestResult | undefined
try {
const result = await Promise.race([
run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
...options,
onRun: async (options) => {
model = options.model || TEST_MODEL
router = options.router || TEST_ROUTER
return options
}
}),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
)
]) as string[]
if (isEmptyResponse(result)) {
throw new Error('Model returned empty response')
}
const actual = result?.[0]?.trim() || ''
const normalizedActual = normalizeJson(actual).toLowerCase()
const normalizedExpected = normalizeJson(expected).toLowerCase()
const passed = normalizedActual === normalizedExpected
expect(normalizedActual).toEqual(normalizedExpected)
testResult = {
test: testName,
prompt,
result: result || [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
duration: Date.now() - startTime,
reason: passed ? undefined : `Expected ${normalizedExpected}, but got ${normalizedActual}`,
}
} catch (e) {
error = formatError(e)
testResult = {
test: testName,
prompt,
result: [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed: false,
duration: Date.now() - startTime,
error,
reason: error?.message || 'Unknown error occurred'
}
throw e
} finally {
if (testResult) {
testResults.push(testResult)
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
}
}
}
// Test JSON Schema format using file path
it('should format response according to JSON Schema file', async () => {
const prompt = 'Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.'
const expected = JSON.stringify({
name: "John Doe",
age: 30,
tags: ["developer", "javascript"]
})
await runFormatTest(
prompt,
expected,
'json-schema-file-format',
TEST_MODEL,
{
format: TEST_SCHEMA_PATH
}
)
}, { timeout: TEST_TIMEOUT })
// Test JSON Schema format using schema object
it('should format response according to JSON Schema object', async () => {
const prompt = `Create a user profile with the following details:
- Name: Jane Smith
- Age: 25
- Email: jane.smith@company.com
- Tags: ["developer", "designer"]
- Address: {
"street": "123 Main St",
"city": "New York",
"country": "US",
"postal_code": "10001"
}
- Preferences: {
"theme": "light",
"notifications": "enabled",
"language": "English"
}
Return only the JSON object, no explanation.`
const expected = JSON.stringify({
name: "Jane Smith",
age: 25,
email: "jane.smith@company.com",
tags: ["developer", "designer"],
address: {
street: "123 Main St",
city: "New York",
country: "US",
postal_code: "10001"
},
preferences: {
theme: "light",
notifications: "enabled",
language: "English"
}
})
await runFormatTest(
prompt,
expected,
'json-schema-object-format',
TEST_MODEL,
{
format: testJsonSchema
}
)
}, { timeout: TEST_TIMEOUT })
// Test Zod Schema format with string
it('should format response according to Zod string schema', async () => {
const prompt = 'Generate a valid email address for a business domain. Return only the email, no explanation.'
try {
const result = await run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format: {
type: "object",
properties: {
email: {
type: "string",
format: "email"
}
},
required: ["email"]
}
}) as string[]
const email = result?.[0]?.trim() || ''
expect(isValidEmail(email)).toBe(true)
} catch (e) {
throw e
}
}, { timeout: TEST_TIMEOUT })
// Test Zod Schema format with number
it('should format response according to Zod number schema', async () => {
const prompt = 'Generate a random age between 18 and 65. Return only the number, no explanation.'
try {
const result = await run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format: {
type: "object",
properties: {
age: {
type: "number",
minimum: 18,
maximum: 65
}
},
required: ["age"]
}
}) as string[]
const age = parseInt(result?.[0]?.trim() || '0', 10)
expect(isNumberInRange(age, 18, 65)).toBe(true)
} catch (e) {
throw e
}
}, { timeout: TEST_TIMEOUT })
// Test Zod Schema format with array
it('should format response according to Zod array schema', async () => {
const prompt = 'Generate a list of 3 programming languages. Return only the array, no explanation.'
try {
const result = await run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format: {
type: "object",
properties: {
languages: {
type: "array",
items: {
type: "string"
},
minItems: 3,
maxItems: 3
}
},
required: ["languages"]
}
}) as string[]
const languages = JSON.parse(result?.[0]?.trim() || '[]')
expect(hasValidArrayLength(languages, 3)).toBe(true)
} catch (e) {
throw e
}
}, { timeout: TEST_TIMEOUT })
// Test invalid format option
it('should handle invalid format option', async () => {
const prompt = 'Generate a random number'
try {
await run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format: {
type: "object",
properties: {
number: {
type: "not_a_valid_type"
}
}
}
})
throw new Error('Expected format validation to fail')
} catch (e: any) {
// The error should be about invalid format/schema
if (!e.message.match(/invalid|Invalid|schema|Schema|type|Type|format|Format/i)) {
console.error('Actual error message:', e.message)
throw new Error('Expected format validation to fail with an invalid format/schema/type error')
}
}
}, { timeout: TEST_TIMEOUT })
it('should generate markdown report', () => {
// Group results by test and model
const latestResults = new Map<string, Map<string, TestResult>>()
// Get only the latest result for each test+model combination
testResults.forEach(result => {
if (!latestResults.has(result.test)) {
latestResults.set(result.test, new Map())
}
const testMap = latestResults.get(result.test)!
const existingResult = testMap.get(result.model)
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
testMap.set(result.model, result)
}
})
// Generate markdown report
let report = '# Format Test Results\n\n'
// First list failed tests
report += '## Failed Tests\n\n'
let hasFailures = false
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (!result.passed) {
hasFailures = true
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${result.expected}\`\n`
report += `- Actual: \`${result.result[0] || ''}\`\n`
report += `- Duration: ${result.duration}ms\n`
if (result.error) {
report += `- Error Type: ${result.error.type}\n`
report += `- Error Code: ${result.error.code}\n`
report += `- Error Message: ${result.error.message}\n`
if (result.error.details?.message) {
report += `- Error Details: ${result.error.details.message}\n`
}
}
report += `- Reason: ${result.reason}\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
if (!hasFailures) {
report += '*No failed tests*\n\n'
}
// Then list passed tests
report += '## Passed Tests\n\n'
let hasPassed = false
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (result.passed) {
hasPassed = true
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${result.expected}\`\n`
report += `- Actual: \`${result.result[0] || ''}\`\n`
report += `- Duration: ${result.duration}ms\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
if (!hasPassed) {
report += '*No passed tests*\n\n'
}
// Add summary section
report += '## Summary\n\n'
const totalTests = testResults.length
const passedTests = testResults.filter(r => r.passed).length
const failedTests = totalTests - passedTests
report += `- Total Tests: ${totalTests}\n`
report += `- Passed: ${passedTests}\n`
report += `- Failed: ${failedTests}\n`
report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n`
// Write report to file
const reportPath = path.resolve(__dirname, './format-report.md')
write(reportPath, report)
// Verify report was written
expect(exists(reportPath) === 'file').toBe(true)
})
})