508 lines
16 KiB
TypeScript
508 lines
16 KiB
TypeScript
import { describe, it, expect } from 'vitest'
|
|
import { run } from '../../src/index'
|
|
import * as path from 'node:path'
|
|
import { sync as write } from "@polymech/fs/write"
|
|
import { sync as read } from "@polymech/fs/read"
|
|
import { sync as exists } from "@polymech/fs/exists"
|
|
import { z } from 'zod'
|
|
import {
|
|
models,
|
|
TEST_BASE_PATH,
|
|
TEST_LOGS_PATH,
|
|
TEST_PREFERENCES_PATH,
|
|
TEST_TIMEOUT,
|
|
TestResult,
|
|
formatError,
|
|
isEmptyResponse
|
|
} from './commons'
|
|
|
|
const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
|
|
const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
|
|
|
|
// Sample JSON Schema for testing
|
|
const testJsonSchema = {
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
"$id": "https://example.com/user-profile.schema.json",
|
|
"title": "User Profile",
|
|
"description": "A user profile containing name, age, and tags",
|
|
"type": "object",
|
|
"properties": {
|
|
"name": {
|
|
"type": "string",
|
|
"description": "User's full name",
|
|
"minLength": 1,
|
|
"pattern": "^[A-Za-z\\s]+$"
|
|
},
|
|
"age": {
|
|
"type": "number",
|
|
"description": "User's age in years",
|
|
"minimum": 0,
|
|
"maximum": 150
|
|
},
|
|
"email": {
|
|
"type": "string",
|
|
"description": "User's email address",
|
|
"format": "email"
|
|
},
|
|
"tags": {
|
|
"type": "array",
|
|
"description": "List of user's tags",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": ["developer", "designer", "manager", "admin", "user"]
|
|
},
|
|
"minItems": 1,
|
|
"maxItems": 5,
|
|
"uniqueItems": true
|
|
},
|
|
"address": {
|
|
"type": "object",
|
|
"description": "User's address",
|
|
"properties": {
|
|
"street": {
|
|
"type": "string",
|
|
"description": "Street address"
|
|
},
|
|
"city": {
|
|
"type": "string",
|
|
"description": "City name"
|
|
},
|
|
"country": {
|
|
"type": "string",
|
|
"description": "Country name",
|
|
"enum": ["US", "UK", "CA", "AU"]
|
|
},
|
|
"zipCode": {
|
|
"type": "string",
|
|
"description": "ZIP/Postal code",
|
|
"pattern": "^[0-9]{5}(-[0-9]{4})?$"
|
|
}
|
|
},
|
|
"required": ["street", "city", "country"]
|
|
},
|
|
"preferences": {
|
|
"type": "object",
|
|
"description": "User preferences",
|
|
"properties": {
|
|
"theme": {
|
|
"type": "string",
|
|
"enum": ["light", "dark", "system"],
|
|
"default": "system"
|
|
},
|
|
"notifications": {
|
|
"type": "boolean",
|
|
"default": true
|
|
},
|
|
"language": {
|
|
"type": "string",
|
|
"enum": ["en", "es", "fr", "de", "ja"],
|
|
"default": "en"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"required": ["name", "age", "email"],
|
|
"additionalProperties": false
|
|
}
|
|
|
|
// Write test schema to file
|
|
write(TEST_SCHEMA_PATH, JSON.stringify(testJsonSchema, null, 2))
|
|
|
|
// Helper function to normalize JSON strings
|
|
const normalizeJson = (json: string) => {
|
|
try {
|
|
// Remove markdown code block if present
|
|
const cleanJson = json.replace(/```json\n|\n```/g, '').trim()
|
|
return JSON.stringify(JSON.parse(cleanJson))
|
|
} catch {
|
|
return json
|
|
}
|
|
}
|
|
|
|
// Helper function to validate email
|
|
const isValidEmail = (email: string) => {
|
|
return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)
|
|
}
|
|
|
|
// Helper function to validate number in range
|
|
const isNumberInRange = (num: number, min: number, max: number) => {
|
|
return num >= min && num <= max
|
|
}
|
|
|
|
// Helper function to validate array length
|
|
const hasValidArrayLength = (arr: any[], length: number) => {
|
|
return Array.isArray(arr) && arr.length === length && arr.every(item => typeof item === 'string')
|
|
}
|
|
|
|
describe('Format Options', () => {
|
|
let testResults: TestResult[] = []
|
|
|
|
// Load existing results if any
|
|
if (exists(TEST_LOG_PATH)) {
|
|
const data = read(TEST_LOG_PATH, 'json')
|
|
testResults = Array.isArray(data) ? data : []
|
|
}
|
|
|
|
const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => {
|
|
let model = 'gpt-4o'
|
|
let router = 'openai'
|
|
let startTime = Date.now()
|
|
let error: TestResult['error'] | undefined
|
|
let testResult: TestResult | undefined
|
|
|
|
try {
|
|
const result = await Promise.race([
|
|
run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: 'gpt-4o',
|
|
router: 'openai',
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
...options,
|
|
onRun: async (options) => {
|
|
model = options.model || 'unknown'
|
|
router = options.router || 'unknown'
|
|
return options
|
|
}
|
|
}),
|
|
new Promise((_, reject) =>
|
|
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
|
|
)
|
|
]) as string[]
|
|
|
|
if (isEmptyResponse(result)) {
|
|
throw new Error('Model returned empty response')
|
|
}
|
|
|
|
const actual = result?.[0]?.trim() || ''
|
|
const normalizedActual = normalizeJson(actual)
|
|
const normalizedExpected = normalizeJson(expected)
|
|
const passed = normalizedActual === normalizedExpected
|
|
|
|
expect(normalizedActual).toEqual(normalizedExpected)
|
|
|
|
testResult = {
|
|
test: testName,
|
|
prompt,
|
|
result: result || [],
|
|
expected,
|
|
model,
|
|
router,
|
|
timestamp: new Date().toISOString(),
|
|
passed,
|
|
duration: Date.now() - startTime,
|
|
reason: passed ? undefined : `Expected ${normalizedExpected}, but got ${normalizedActual}`,
|
|
}
|
|
} catch (e) {
|
|
error = formatError(e)
|
|
testResult = {
|
|
test: testName,
|
|
prompt,
|
|
result: [],
|
|
expected,
|
|
model,
|
|
router,
|
|
timestamp: new Date().toISOString(),
|
|
passed: false,
|
|
duration: Date.now() - startTime,
|
|
error,
|
|
reason: error?.message || 'Unknown error occurred'
|
|
}
|
|
throw e
|
|
} finally {
|
|
if (testResult) {
|
|
testResults.push(testResult)
|
|
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Test JSON Schema format using file path
|
|
it('should format response according to JSON Schema file', async () => {
|
|
const prompt = 'Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.'
|
|
const expected = JSON.stringify({
|
|
name: "John Doe",
|
|
age: 30,
|
|
tags: ["developer", "javascript"]
|
|
})
|
|
|
|
await runFormatTest(
|
|
prompt,
|
|
expected,
|
|
'json-schema-file-format',
|
|
'gpt-4o',
|
|
{
|
|
format: TEST_SCHEMA_PATH
|
|
}
|
|
)
|
|
}, { timeout: 10000 })
|
|
|
|
// Test JSON Schema format using schema object
|
|
it('should format response according to JSON Schema object', async () => {
|
|
const prompt = `Create a user profile with the following details:
|
|
- Name: Jane Smith
|
|
- Age: 25
|
|
- Email: jane.smith@company.com
|
|
- Tags: ["developer", "designer"]
|
|
- Address: 123 Main St, New York, US, 10001
|
|
- Preferences: light theme, notifications enabled, English language
|
|
Return only the JSON object, no explanation.`
|
|
|
|
try {
|
|
const result = await run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: 'gpt-4o',
|
|
router: 'openai',
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
format: testJsonSchema
|
|
}) as string[]
|
|
|
|
const response = JSON.parse(normalizeJson(result?.[0] || '{}'))
|
|
|
|
// Validate required fields
|
|
expect(response.name).toBe('Jane Smith')
|
|
expect(response.age).toBe(25)
|
|
expect(response.email).toBe('jane.smith@company.com')
|
|
|
|
// Validate tags
|
|
expect(Array.isArray(response.tags)).toBe(true)
|
|
expect(response.tags).toContain('developer')
|
|
expect(response.tags).toContain('designer')
|
|
|
|
// Validate address
|
|
expect(response.address.street).toBe('123 Main St')
|
|
expect(response.address.city).toBe('New York')
|
|
expect(response.address.country).toBe('US')
|
|
expect(response.address.zipCode || response.address.postal_code).toMatch(/^[0-9]{5}$/)
|
|
|
|
// Validate preferences
|
|
expect(response.preferences.theme).toBe('light')
|
|
expect(['true', true, 'enabled'].includes(response.preferences.notifications)).toBe(true)
|
|
expect(['en', 'English'].includes(response.preferences.language)).toBe(true)
|
|
} catch (e) {
|
|
throw e
|
|
}
|
|
}, { timeout: 10000 })
|
|
|
|
// Test Zod Schema format with string
|
|
it('should format response according to Zod string schema', async () => {
|
|
const prompt = 'Generate a valid email address for a business domain. Return only the email, no explanation.'
|
|
|
|
try {
|
|
const result = await run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: 'gpt-4o',
|
|
router: 'openai',
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
format: {
|
|
type: "object",
|
|
properties: {
|
|
email: {
|
|
type: "string",
|
|
format: "email"
|
|
}
|
|
},
|
|
required: ["email"]
|
|
}
|
|
}) as string[]
|
|
|
|
const email = result?.[0]?.trim() || ''
|
|
expect(isValidEmail(email)).toBe(true)
|
|
} catch (e) {
|
|
throw e
|
|
}
|
|
}, { timeout: 10000 })
|
|
|
|
// Test Zod Schema format with number
|
|
it('should format response according to Zod number schema', async () => {
|
|
const prompt = 'Generate a random age between 18 and 65. Return only the number, no explanation.'
|
|
|
|
try {
|
|
const result = await run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: 'gpt-4o',
|
|
router: 'openai',
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
format: {
|
|
type: "object",
|
|
properties: {
|
|
age: {
|
|
type: "number",
|
|
minimum: 18,
|
|
maximum: 65
|
|
}
|
|
},
|
|
required: ["age"]
|
|
}
|
|
}) as string[]
|
|
|
|
const age = parseInt(result?.[0]?.trim() || '0', 10)
|
|
expect(isNumberInRange(age, 18, 65)).toBe(true)
|
|
} catch (e) {
|
|
throw e
|
|
}
|
|
}, { timeout: 10000 })
|
|
|
|
// Test Zod Schema format with array
|
|
it('should format response according to Zod array schema', async () => {
|
|
const prompt = 'Generate a list of 3 programming languages. Return only the array, no explanation.'
|
|
|
|
try {
|
|
const result = await run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: 'gpt-4o',
|
|
router: 'openai',
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
format: {
|
|
type: "object",
|
|
properties: {
|
|
languages: {
|
|
type: "array",
|
|
items: {
|
|
type: "string"
|
|
},
|
|
minItems: 3,
|
|
maxItems: 3
|
|
}
|
|
},
|
|
required: ["languages"]
|
|
}
|
|
}) as string[]
|
|
|
|
const languages = JSON.parse(result?.[0]?.trim() || '[]')
|
|
expect(hasValidArrayLength(languages, 3)).toBe(true)
|
|
} catch (e) {
|
|
throw e
|
|
}
|
|
}, { timeout: 10000 })
|
|
|
|
// Test invalid format option
|
|
it('should handle invalid format option', async () => {
|
|
const prompt = 'Generate a random number.'
|
|
|
|
try {
|
|
await run({
|
|
prompt,
|
|
mode: 'completion',
|
|
model: 'gpt-4o',
|
|
router: 'openai',
|
|
path: TEST_BASE_PATH,
|
|
logs: TEST_LOGS_PATH,
|
|
preferences: TEST_PREFERENCES_PATH,
|
|
format: {
|
|
type: "invalid",
|
|
properties: {}
|
|
}
|
|
})
|
|
// If we get here, the format validation didn't work
|
|
throw new Error('Expected format validation to fail')
|
|
} catch (e: any) {
|
|
// The error should be about invalid format/schema
|
|
if (!e.message.match(/invalid|Invalid|schema|Schema/)) {
|
|
throw new Error(`Unexpected error: ${e.message}`)
|
|
}
|
|
}
|
|
}, { timeout: 10000 })
|
|
|
|
it('should generate markdown report', () => {
|
|
// Group results by test and model
|
|
const latestResults = new Map<string, Map<string, TestResult>>()
|
|
|
|
// Get only the latest result for each test+model combination
|
|
testResults.forEach(result => {
|
|
if (!latestResults.has(result.test)) {
|
|
latestResults.set(result.test, new Map())
|
|
}
|
|
const testMap = latestResults.get(result.test)!
|
|
const existingResult = testMap.get(result.model)
|
|
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
|
|
testMap.set(result.model, result)
|
|
}
|
|
})
|
|
|
|
// Generate markdown report
|
|
let report = '# Format Test Results\n\n'
|
|
|
|
// First list failed tests
|
|
report += '## Failed Tests\n\n'
|
|
let hasFailures = false
|
|
for (const [testName, modelResults] of latestResults) {
|
|
for (const [model, result] of modelResults) {
|
|
if (!result.passed) {
|
|
hasFailures = true
|
|
report += `### ${testName} - ${model}\n`
|
|
report += `- Prompt: \`${result.prompt}\`\n`
|
|
report += `- Expected: \`${result.expected}\`\n`
|
|
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
|
report += `- Duration: ${result.duration}ms\n`
|
|
if (result.error) {
|
|
report += `- Error Type: ${result.error.type}\n`
|
|
report += `- Error Code: ${result.error.code}\n`
|
|
report += `- Error Message: ${result.error.message}\n`
|
|
if (result.error.details?.message) {
|
|
report += `- Error Details: ${result.error.details.message}\n`
|
|
}
|
|
}
|
|
report += `- Reason: ${result.reason}\n`
|
|
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!hasFailures) {
|
|
report += '*No failed tests*\n\n'
|
|
}
|
|
|
|
// Then list passed tests
|
|
report += '## Passed Tests\n\n'
|
|
let hasPassed = false
|
|
for (const [testName, modelResults] of latestResults) {
|
|
for (const [model, result] of modelResults) {
|
|
if (result.passed) {
|
|
hasPassed = true
|
|
report += `### ${testName} - ${model}\n`
|
|
report += `- Prompt: \`${result.prompt}\`\n`
|
|
report += `- Expected: \`${result.expected}\`\n`
|
|
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
|
report += `- Duration: ${result.duration}ms\n`
|
|
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!hasPassed) {
|
|
report += '*No passed tests*\n\n'
|
|
}
|
|
|
|
// Add summary section
|
|
report += '## Summary\n\n'
|
|
const totalTests = testResults.length
|
|
const passedTests = testResults.filter(r => r.passed).length
|
|
const failedTests = totalTests - passedTests
|
|
report += `- Total Tests: ${totalTests}\n`
|
|
report += `- Passed: ${passedTests}\n`
|
|
report += `- Failed: ${failedTests}\n`
|
|
report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n\n`
|
|
|
|
// Write report to file
|
|
const reportPath = path.resolve(__dirname, './format-report.md')
|
|
write(reportPath, report)
|
|
|
|
// Verify report was written
|
|
expect(exists(reportPath) === 'file').toBe(true)
|
|
})
|
|
})
|