mono/packages/kbot/tests/unit/basic.test.ts
2025-04-02 00:34:47 +02:00

210 lines
6.4 KiB
TypeScript

import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import {
models,
TEST_BASE_PATH,
TEST_LOGS_PATH,
TEST_PREFERENCES_PATH,
TEST_TIMEOUT,
TestResult,
formatError,
isEmptyResponse
} from './commons'
const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')
describe('Basic Operations', () => {
let testResults: TestResult[] = []
const runBasicTest = async (prompt: string, expected: string, testName: string, modelName: string) => {
let model = 'unknown'
let router = 'unknown'
let startTime = Date.now()
let error: TestResult['error'] | undefined
let testResult: TestResult | undefined
try {
const result = await Promise.race([
run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
)
]) as string[]
if (isEmptyResponse(result)) {
throw new Error('Model returned empty response')
}
const actual = result?.[0]?.trim()?.toLowerCase() || ''
const passed = actual === expected
expect(actual).toEqual(expected)
testResult = {
test: testName,
prompt,
result: result || [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
duration: Date.now() - startTime,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
}
} catch (e) {
error = formatError(e)
testResult = {
test: testName,
prompt,
result: [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed: false,
duration: Date.now() - startTime,
error,
reason: error?.message || 'Unknown error occurred'
}
throw e
} finally {
if (testResult) {
testResults.push(testResult)
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
}
}
}
it.each(models)('should add two numbers with model %s', async (modelName) => {
await runBasicTest(
'add 5 and 3. Return only the number, no explanation.',
'8',
'addition',
modelName
)
}, { timeout: 10000 })
it.each(models)('should multiply two numbers with model %s', async (modelName) => {
await runBasicTest(
'multiply 8 and 3. Return only the number, no explanation.',
'24',
'multiplication',
modelName
)
}, { timeout: 10000 })
it.each(models)('should divide two numbers with model %s', async (modelName) => {
await runBasicTest(
'divide 15 by 3. Return only the number, no explanation.',
'5',
'division',
modelName
)
}, { timeout: 10000 })
it('should generate markdown report', () => {
// Group results by test and model
const latestResults = new Map<string, Map<string, TestResult>>()
// Get only the latest result for each test+model combination
testResults.forEach(result => {
if (!latestResults.has(result.test)) {
latestResults.set(result.test, new Map())
}
const testMap = latestResults.get(result.test)!
const existingResult = testMap.get(result.model)
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
testMap.set(result.model, result)
}
})
// Generate markdown report
let report = '# Basic Operations Test Results\n\n'
// First list failed tests
report += '## Failed Tests\n\n'
let hasFailures = false
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (!result.passed) {
hasFailures = true
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${result.expected}\`\n`
report += `- Actual: \`${result.result[0] || ''}\`\n`
report += `- Duration: ${result.duration}ms\n`
if (result.error) {
report += `- Error Type: ${result.error.type}\n`
report += `- Error Code: ${result.error.code}\n`
report += `- Error Message: ${result.error.message}\n`
if (result.error.details?.message) {
report += `- Error Details: ${result.error.details.message}\n`
}
}
report += `- Reason: ${result.reason}\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
if (!hasFailures) {
report += '*No failed tests*\n\n'
}
// Then list passed tests
report += '## Passed Tests\n\n'
let hasPassed = false
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (result.passed) {
hasPassed = true
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${result.expected}\`\n`
report += `- Actual: \`${result.result[0] || ''}\`\n`
report += `- Duration: ${result.duration}ms\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
if (!hasPassed) {
report += '*No passed tests*\n\n'
}
// Add summary section
report += '## Summary\n\n'
const totalTests = testResults.length
const passedTests = testResults.filter(r => r.passed).length
const failedTests = totalTests - passedTests
report += `- Total Tests: ${totalTests}\n`
report += `- Passed: ${passedTests}\n`
report += `- Failed: ${failedTests}\n`
report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n`
// Write report to file
const reportPath = path.resolve(__dirname, './basic-report.md')
write(reportPath, report)
// Verify report was written
expect(exists(reportPath) === 'file').toBe(true)
})
})