295 lines
8.3 KiB
TypeScript
295 lines
8.3 KiB
TypeScript
import { describe, it, expect } from 'vitest'
|
|
import { run } from '../../src/index'
|
|
import * as path from 'node:path'
|
|
import { sync as write } from "@polymech/fs/write"
|
|
import { sync as read } from "@polymech/fs/read"
|
|
import { sync as exists } from "@polymech/fs/exists"
|
|
import { z } from 'zod'
|
|
|
|
|
|
import {
|
|
models_premium as models,
|
|
TEST_BASE_PATH,
|
|
TEST_LOGS_PATH,
|
|
TEST_PREFERENCES_PATH,
|
|
TEST_TIMEOUT,
|
|
TestResult,
|
|
formatError,
|
|
isEmptyResponse,
|
|
getRouterForModel,
|
|
getApiKeyForRouter
|
|
} from './commons'
|
|
|
|
const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
|
|
|
|
describe('Structured Output Format', () => {
|
|
let testResults: TestResult[] = []
|
|
|
|
// Load existing results if any
|
|
if (exists(TEST_LOG_PATH)) {
|
|
const data = read(TEST_LOG_PATH, 'json')
|
|
testResults = Array.isArray(data) ? data : []
|
|
}
|
|
|
|
const runFormatTest = async (prompt: string, format: z.ZodType<any>, expected: any, testName: string, modelName: string) => {
|
|
let model = 'unknown'
|
|
let router = 'unknown'
|
|
let startTime = Date.now()
|
|
let error: TestResult['error'] | undefined
|
|
|
|
try {
|
|
const result = await Promise.race([
|
|
run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: modelName,
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
format,
|
|
onRun: async (options) => {
|
|
model = options.model || 'unknown'
|
|
router = options.router || 'unknown'
|
|
return options
|
|
}
|
|
}),
|
|
new Promise((_, reject) =>
|
|
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
|
|
)
|
|
]) as any[]
|
|
|
|
const actual = result?.[0]
|
|
let parsed: any
|
|
|
|
try {
|
|
parsed = typeof actual === 'string' ? JSON.parse(actual) : actual
|
|
// Validate against the format schema
|
|
parsed = format.parse(parsed)
|
|
} catch (parseError) {
|
|
throw new Error(`Failed to parse or validate response: ${parseError.message}`)
|
|
}
|
|
|
|
const passed = JSON.stringify(parsed) === JSON.stringify(expected) && !isEmptyResponse(result)
|
|
|
|
if (isEmptyResponse(result)) {
|
|
throw new Error('Model returned empty response')
|
|
}
|
|
|
|
expect(parsed).toEqual(expected)
|
|
|
|
return {
|
|
test: testName,
|
|
prompt,
|
|
result: result || [],
|
|
expected,
|
|
model,
|
|
router,
|
|
timestamp: new Date().toISOString(),
|
|
passed,
|
|
duration: Date.now() - startTime,
|
|
reason: passed ? undefined : `Expected ${JSON.stringify(expected)}, but got ${JSON.stringify(parsed)}`,
|
|
config: {
|
|
router: getRouterForModel(modelName)
|
|
}
|
|
}
|
|
} catch (e) {
|
|
error = formatError(e)
|
|
throw e
|
|
} finally {
|
|
const testResult: TestResult = {
|
|
test: testName,
|
|
prompt,
|
|
result: [],
|
|
expected,
|
|
model,
|
|
router,
|
|
timestamp: new Date().toISOString(),
|
|
passed: false,
|
|
duration: Date.now() - startTime,
|
|
error,
|
|
reason: error?.message || 'Unknown error occurred',
|
|
config: {
|
|
router: getRouterForModel(modelName),
|
|
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
|
|
}
|
|
}
|
|
|
|
testResults.push(testResult)
|
|
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
|
}
|
|
}
|
|
|
|
it.each(models)('should return basic structured output with model %s', async (modelName) => {
|
|
const format = z.object({
|
|
greeting: z.string(),
|
|
count: z.number()
|
|
})
|
|
|
|
await runFormatTest(
|
|
'return a greeting "hello" with count 42',
|
|
format,
|
|
{ greeting: 'hello', count: 42 },
|
|
'basic_structure',
|
|
modelName
|
|
)
|
|
})
|
|
|
|
it.each(models)('should handle nested structures with model %s', async (modelName) => {
|
|
const format = z.object({
|
|
user: z.object({
|
|
name: z.string(),
|
|
age: z.number()
|
|
}),
|
|
settings: z.object({
|
|
theme: z.string(),
|
|
notifications: z.boolean()
|
|
})
|
|
})
|
|
|
|
await runFormatTest(
|
|
'return user John age 30 with dark theme and notifications enabled',
|
|
format,
|
|
{
|
|
user: { name: 'John', age: 30 },
|
|
settings: { theme: 'dark', notifications: true }
|
|
},
|
|
'nested_structure',
|
|
modelName
|
|
)
|
|
})
|
|
|
|
it.each(models)('should handle arrays with model %s', async (modelName) => {
|
|
const format = z.object({
|
|
items: z.array(z.object({
|
|
id: z.number(),
|
|
name: z.string()
|
|
}))
|
|
})
|
|
|
|
await runFormatTest(
|
|
'return a list of 2 items with ids 1 and 2, names "first" and "second"',
|
|
format,
|
|
{
|
|
items: [
|
|
{ id: 1, name: 'first' },
|
|
{ id: 2, name: 'second' }
|
|
]
|
|
},
|
|
'array_structure',
|
|
modelName
|
|
)
|
|
})
|
|
|
|
it.each(models)('should handle enums with model %s', async (modelName) => {
|
|
const format = z.object({
|
|
status: z.enum(['success', 'error', 'pending']),
|
|
message: z.string()
|
|
})
|
|
|
|
await runFormatTest(
|
|
'return status success with message "Operation completed"',
|
|
format,
|
|
{
|
|
status: 'success',
|
|
message: 'Operation completed'
|
|
},
|
|
'enum_structure',
|
|
modelName
|
|
)
|
|
})
|
|
|
|
it.each(models)('should handle optional fields with model %s', async (modelName) => {
|
|
const format = z.object({
|
|
name: z.string(),
|
|
age: z.number().optional(),
|
|
email: z.string().email().optional()
|
|
})
|
|
|
|
await runFormatTest(
|
|
'return name "John" with age 30 and email "john@example.com"',
|
|
format,
|
|
{
|
|
name: 'John',
|
|
age: 30,
|
|
email: 'john@example.com'
|
|
},
|
|
'optional_fields',
|
|
modelName
|
|
)
|
|
})
|
|
|
|
it('should generate markdown report', () => {
|
|
// Group results by test and model
|
|
const latestResults = new Map<string, Map<string, TestResult>>()
|
|
|
|
// Get only the latest result for each test+model combination
|
|
testResults.forEach(result => {
|
|
if (!latestResults.has(result.test)) {
|
|
latestResults.set(result.test, new Map())
|
|
}
|
|
const testMap = latestResults.get(result.test)!
|
|
const existingResult = testMap.get(result.model)
|
|
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
|
|
testMap.set(result.model, result)
|
|
}
|
|
})
|
|
|
|
// Generate markdown report
|
|
let report = '# Format Test Results\n\n'
|
|
|
|
// First list failed tests
|
|
report += '## Failed Tests\n\n'
|
|
let hasFailures = false
|
|
for (const [testName, modelResults] of latestResults) {
|
|
for (const [model, result] of modelResults) {
|
|
if (!result.passed) {
|
|
hasFailures = true
|
|
report += `### ${testName} - ${model}\n`
|
|
report += `- Prompt: \`${result.prompt}\`\n`
|
|
report += `- Expected: \`${JSON.stringify(result.expected)}\`\n`
|
|
report += `- Actual: \`${JSON.stringify(result.result[0] || '')}\`\n`
|
|
report += `- Duration: ${result.duration}ms\n`
|
|
if (result.error) {
|
|
report += `- Error Type: ${result.error.type}\n`
|
|
report += `- Error Code: ${result.error.code}\n`
|
|
report += `- Error Message: ${result.error.message}\n`
|
|
}
|
|
report += `- Reason: ${result.reason}\n`
|
|
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!hasFailures) {
|
|
report += '*No failed tests*\n\n'
|
|
}
|
|
|
|
// Then list passed tests
|
|
report += '## Passed Tests\n\n'
|
|
let hasPassed = false
|
|
for (const [testName, modelResults] of latestResults) {
|
|
for (const [model, result] of modelResults) {
|
|
if (result.passed) {
|
|
hasPassed = true
|
|
report += `### ${testName} - ${model}\n`
|
|
report += `- Prompt: \`${result.prompt}\`\n`
|
|
report += `- Expected: \`${JSON.stringify(result.expected)}\`\n`
|
|
report += `- Actual: \`${JSON.stringify(result.result[0] || '')}\`\n`
|
|
report += `- Duration: ${result.duration}ms\n`
|
|
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!hasPassed) {
|
|
report += '*No passed tests*\n\n'
|
|
}
|
|
|
|
// Write report to file
|
|
const reportPath = path.resolve(__dirname, './format-report.md')
|
|
write(reportPath, report)
|
|
|
|
// Verify report was written
|
|
expect(exists(reportPath) === 'file').toBe(true)
|
|
})
|
|
})
|