223 lines
7.7 KiB
TypeScript
223 lines
7.7 KiB
TypeScript
import { describe, it, expect } from 'vitest'
|
|
import { run } from '../../src/index'
|
|
import * as path from 'node:path'
|
|
import { sync as write } from "@polymech/fs/write"
|
|
import { sync as read } from "@polymech/fs/read"
|
|
import { sync as exists } from "@polymech/fs/exists"
|
|
import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TestResult } from './commons'
|
|
|
|
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
|
|
|
|
const TEST_LOG_PATH = path.resolve(__dirname, './math.json')
|
|
|
|
describe('Advanced Math Operations', () => {
|
|
let testResults: TestResult[] = []
|
|
|
|
// Load existing results if any
|
|
if (exists(TEST_LOG_PATH)) {
|
|
const data = read(TEST_LOG_PATH, 'json')
|
|
testResults = Array.isArray(data) ? data : []
|
|
}
|
|
|
|
it.each(models)('should calculate factorial of 5 with model %s', async (modelName) => {
|
|
const prompt = 'calculate the factorial of 5 (5!). Return only the number, no explanation.'
|
|
const expected = '120'
|
|
let model = 'unknown'
|
|
let router = 'unknown'
|
|
|
|
const result = await run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: modelName,
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
onRun: async (options) => {
|
|
model = options.model || 'unknown'
|
|
router = options.router || 'unknown'
|
|
return options
|
|
}
|
|
}) as string[]
|
|
|
|
const actual = result?.[0]?.trim() || ''
|
|
const passed = actual === expected
|
|
expect(actual).toEqual(expected)
|
|
|
|
// Add test result to array
|
|
testResults.push({
|
|
test: 'factorial',
|
|
prompt,
|
|
result: result || [],
|
|
expected,
|
|
model,
|
|
router,
|
|
timestamp: new Date().toISOString(),
|
|
passed,
|
|
reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
|
|
})
|
|
|
|
// Write all results to the same file
|
|
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
|
})
|
|
|
|
it.each(models)('should calculate fibonacci sequence up to 5th number with model %s', async (modelName) => {
|
|
const prompt = 'calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.'
|
|
const expected = '0,1,1,2,3'
|
|
let model = 'unknown'
|
|
let router = 'unknown'
|
|
|
|
const result = await run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: modelName,
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
onRun: async (options) => {
|
|
model = options.model || 'unknown'
|
|
router = options.router || 'unknown'
|
|
return options
|
|
}
|
|
}) as string[]
|
|
|
|
// Handle both formats: "0,1,1,2,3" and "0, 1, 1, 2, 3"
|
|
const numbers = result?.[0]?.trim()?.split(',')?.map(n => n.trim()) || []
|
|
const actual = numbers.join(',')
|
|
const passed = actual === expected
|
|
expect(numbers).toEqual(['0', '1', '1', '2', '3'])
|
|
|
|
// Add test result to array
|
|
testResults.push({
|
|
test: 'fibonacci',
|
|
prompt,
|
|
result: result || [],
|
|
expected,
|
|
model,
|
|
router,
|
|
timestamp: new Date().toISOString(),
|
|
passed,
|
|
reason: !actual ? 'No result returned from model' : passed ? undefined : `Expected ${expected}, but got ${actual}`
|
|
})
|
|
|
|
// Write all results to the same file
|
|
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
|
})
|
|
|
|
it.each(models)('should solve quadratic equation x² + 5x + 6 = 0 with model %s', async (modelName) => {
|
|
const prompt = 'solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.'
|
|
const expectedDisplay = '[-3,-2]'
|
|
let model = 'unknown'
|
|
let router = 'unknown'
|
|
|
|
const result = await run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: modelName,
|
|
filters: 'code',
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
onRun: async (options) => {
|
|
model = options.model || 'unknown'
|
|
router = options.router || 'unknown'
|
|
return options
|
|
}
|
|
}) as string[]
|
|
|
|
// Parse the result as JSON (markdown already stripped by filter)
|
|
let jsonResult: number[]
|
|
try {
|
|
const resultStr = result?.[0]?.trim() || '[]'
|
|
if (!resultStr) {
|
|
throw new Error('No result returned from model')
|
|
}
|
|
jsonResult = JSON.parse(resultStr)
|
|
if (!Array.isArray(jsonResult)) {
|
|
throw new Error('Result is not an array')
|
|
}
|
|
// Convert any string numbers to actual numbers
|
|
jsonResult = jsonResult.map(n => typeof n === 'string' ? parseFloat(n) : n)
|
|
} catch (error) {
|
|
// If parsing fails, try to extract numbers from the string
|
|
const numbers = result?.[0]?.match(/-?\d+/g)?.map(n => parseInt(n, 10)) || []
|
|
jsonResult = numbers
|
|
}
|
|
|
|
const actual = JSON.stringify(jsonResult.sort())
|
|
const expectedSorted = JSON.stringify([-3, -2].sort())
|
|
const passed = actual === expectedSorted
|
|
expect(jsonResult.sort()).toEqual([-3, -2].sort())
|
|
|
|
// Add test result to array
|
|
testResults.push({
|
|
test: 'quadratic',
|
|
prompt,
|
|
result: result || [],
|
|
expected: expectedDisplay,
|
|
model,
|
|
router,
|
|
timestamp: new Date().toISOString(),
|
|
passed,
|
|
reason: !result?.[0] ? 'No result returned from model' : passed ? undefined : `Expected ${expectedDisplay}, but got ${result?.[0] || ''}`
|
|
})
|
|
|
|
// Write all results to the same file
|
|
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
|
})
|
|
|
|
it('should generate markdown report', () => {
|
|
// Group results by test and model
|
|
const latestResults = new Map<string, Map<string, TestResult>>()
|
|
|
|
// Get only the latest result for each test+model combination
|
|
testResults.forEach(result => {
|
|
if (!latestResults.has(result.test)) {
|
|
latestResults.set(result.test, new Map())
|
|
}
|
|
const testMap = latestResults.get(result.test)!
|
|
const existingResult = testMap.get(result.model)
|
|
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
|
|
testMap.set(result.model, result)
|
|
}
|
|
})
|
|
|
|
// Generate markdown report
|
|
let report = '# Math Test Results\n\n'
|
|
|
|
// First list failed tests
|
|
report += '## Failed Tests\n\n'
|
|
for (const [testName, modelResults] of latestResults) {
|
|
for (const [model, result] of modelResults) {
|
|
if (!result.passed) {
|
|
report += `### ${testName} - ${model}\n`
|
|
report += `- Prompt: \`${result.prompt}\`\n`
|
|
report += `- Expected: \`${result.expected}\`\n`
|
|
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
|
report += `- Reason: ${result.reason}\n`
|
|
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
|
}
|
|
}
|
|
}
|
|
|
|
// Then list passed tests
|
|
report += '## Passed Tests\n\n'
|
|
for (const [testName, modelResults] of latestResults) {
|
|
for (const [model, result] of modelResults) {
|
|
if (result.passed) {
|
|
report += `### ${testName} - ${model}\n`
|
|
report += `- Prompt: \`${result.prompt}\`\n`
|
|
report += `- Expected: \`${result.expected}\`\n`
|
|
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
|
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
|
}
|
|
}
|
|
}
|
|
|
|
// Write report to file
|
|
const reportPath = path.resolve(__dirname, './math-report.md')
|
|
write(reportPath, report)
|
|
|
|
// Verify report was written
|
|
expect(exists(reportPath) === 'file').toBe(true)
|
|
})
|
|
})
|