import { describe, it, expect } from 'vitest' import { run } from '../../src/index' import * as path from 'node:path' import { sync as write } from "@polymech/fs/write" import { sync as read } from "@polymech/fs/read" import { sync as exists } from "@polymech/fs/exists" import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TEST_TIMEOUT, TestResult, formatError, isEmptyResponse } from './commons' const TEST_LOG_PATH = path.resolve(__dirname, './basic.json') describe('Basic Operations', () => { let testResults: TestResult[] = [] const runBasicTest = async (prompt: string, expected: string, testName: string, modelName: string) => { let model = 'unknown' let router = 'unknown' let startTime = Date.now() let error: TestResult['error'] | undefined let testResult: TestResult | undefined try { const result = await Promise.race([ run({ prompt, mode: 'completion', model: modelName, path: TEST_BASE_PATH, logs: TEST_LOGS_PATH, preferences: TEST_PREFERENCES_PATH, onRun: async (options) => { model = options.model || 'unknown' router = options.router || 'unknown' return options } }), new Promise((_, reject) => setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT) ) ]) as string[] if (isEmptyResponse(result)) { throw new Error('Model returned empty response') } const actual = result?.[0]?.trim()?.toLowerCase() || '' const passed = actual === expected expect(actual).toEqual(expected) testResult = { test: testName, prompt, result: result || [], expected, model, router, timestamp: new Date().toISOString(), passed, duration: Date.now() - startTime, reason: passed ? undefined : `Expected ${expected}, but got ${actual}`, } } catch (e) { error = formatError(e) testResult = { test: testName, prompt, result: [], expected, model, router, timestamp: new Date().toISOString(), passed: false, duration: Date.now() - startTime, error, reason: error?.message || 'Unknown error occurred' } throw e } finally { if (testResult) { testResults.push(testResult) write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2)) } } } it.each(models)('should add two numbers with model %s', async (modelName) => { await runBasicTest( 'add 5 and 3. Return only the number, no explanation.', '8', 'addition', modelName ) }, { timeout: 10000 }) it.each(models)('should multiply two numbers with model %s', async (modelName) => { await runBasicTest( 'multiply 8 and 3. Return only the number, no explanation.', '24', 'multiplication', modelName ) }, { timeout: 10000 }) it.each(models)('should divide two numbers with model %s', async (modelName) => { await runBasicTest( 'divide 15 by 3. Return only the number, no explanation.', '5', 'division', modelName ) }, { timeout: 10000 }) it('should generate markdown report', () => { // Group results by test and model const latestResults = new Map>() // Get only the latest result for each test+model combination testResults.forEach(result => { if (!latestResults.has(result.test)) { latestResults.set(result.test, new Map()) } const testMap = latestResults.get(result.test)! const existingResult = testMap.get(result.model) if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) { testMap.set(result.model, result) } }) // Generate markdown report let report = '# Basic Operations Test Results\n\n' // First list failed tests report += '## Failed Tests\n\n' let hasFailures = false for (const [testName, modelResults] of latestResults) { for (const [model, result] of modelResults) { if (!result.passed) { hasFailures = true report += `### ${testName} - ${model}\n` report += `- Prompt: \`${result.prompt}\`\n` report += `- Expected: \`${result.expected}\`\n` report += `- Actual: \`${result.result[0] || ''}\`\n` report += `- Duration: ${result.duration}ms\n` if (result.error) { report += `- Error Type: ${result.error.type}\n` report += `- Error Code: ${result.error.code}\n` report += `- Error Message: ${result.error.message}\n` if (result.error.details?.message) { report += `- Error Details: ${result.error.details.message}\n` } } report += `- Reason: ${result.reason}\n` report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n` } } } if (!hasFailures) { report += '*No failed tests*\n\n' } // Then list passed tests report += '## Passed Tests\n\n' let hasPassed = false for (const [testName, modelResults] of latestResults) { for (const [model, result] of modelResults) { if (result.passed) { hasPassed = true report += `### ${testName} - ${model}\n` report += `- Prompt: \`${result.prompt}\`\n` report += `- Expected: \`${result.expected}\`\n` report += `- Actual: \`${result.result[0] || ''}\`\n` report += `- Duration: ${result.duration}ms\n` report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n` } } } if (!hasPassed) { report += '*No passed tests*\n\n' } // Add summary section report += '## Summary\n\n' const totalTests = testResults.length const passedTests = testResults.filter(r => r.passed).length const failedTests = totalTests - passedTests report += `- Total Tests: ${totalTests}\n` report += `- Passed: ${passedTests}\n` report += `- Failed: ${failedTests}\n` report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n` // Write report to file const reportPath = path.resolve(__dirname, './basic-report.md') write(reportPath, report) // Verify report was written expect(exists(reportPath) === 'file').toBe(true) }) })