mono/packages/kbot/tests/unit/format.test.ts

564 lines
18 KiB
TypeScript

import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { z } from 'zod'
import {
getDefaultModels,
TEST_BASE_PATH,
TEST_LOGS_PATH,
TEST_PREFERENCES_PATH,
TEST_TIMEOUT,
TestResult,
formatError,
isEmptyResponse,
runTest,
generateTestReport,
getReportPaths
} from './commons'
// Optionally override models for this specific test file
const models = getDefaultModels()
const TEST_LOG_PATH = getReportPaths('format', 'json')
const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
const TEST_MODEL_FAST = 'mistralai/codestral-2501'
const TEST_MODEL = 'mistralai/mistral-tiny'
const TEST_ROUTER = 'openrouter'
// Sample JSON Schema for testing
const testJsonSchema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://example.com/user-profile.schema.json",
"title": "User Profile",
"description": "A user profile containing name, age, and tags",
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "User's full name",
"minLength": 1,
"pattern": "^[A-Za-z\\s]+$"
},
"age": {
"type": "number",
"description": "User's age in years",
"minimum": 0,
"maximum": 150
},
"email": {
"type": "string",
"description": "User's email address",
"format": "email"
},
"tags": {
"type": "array",
"description": "List of user's tags",
"items": {
"type": "string",
"enum": ["developer", "designer", "manager", "admin", "user"]
},
"minItems": 1,
"maxItems": 5,
"uniqueItems": true
},
"address": {
"type": "object",
"description": "User's address",
"properties": {
"street": {
"type": "string",
"description": "Street address"
},
"city": {
"type": "string",
"description": "City name"
},
"country": {
"type": "string",
"description": "Country name",
"enum": ["US", "UK", "CA", "AU"]
},
"zipCode": {
"type": "string",
"description": "ZIP/Postal code",
"pattern": "^[0-9]{5}(-[0-9]{4})?$"
}
},
"required": ["street", "city", "country"]
},
"preferences": {
"type": "object",
"description": "User preferences",
"properties": {
"theme": {
"type": "string",
"enum": ["light", "dark", "system"],
"default": "system"
},
"notifications": {
"type": "boolean",
"default": true
},
"language": {
"type": "string",
"enum": ["en", "es", "fr", "de", "ja"],
"default": "en"
}
}
}
},
"required": ["name", "age", "email"],
"additionalProperties": false
}
// Write test schema to file
write(TEST_SCHEMA_PATH, JSON.stringify(testJsonSchema, null, 2))
// Helper function to normalize JSON strings (more robustly)
const normalizeJson = (json: string | undefined): string => {
if (!json) return '';
try {
// Remove markdown code block if present and trim
let cleanJson = json.replace(/```json\n?|\n?```/g, '').trim();
// Find the start of the JSON object or array
const firstBrace = cleanJson.indexOf('{');
const firstBracket = cleanJson.indexOf('[');
let startIndex = -1;
if (firstBrace === -1 && firstBracket === -1) {
// No JSON object/array found
return cleanJson; // Return cleaned text as is, might fail later validation
}
if (firstBrace === -1) {
startIndex = firstBracket;
} else if (firstBracket === -1) {
startIndex = firstBrace;
} else {
startIndex = Math.min(firstBrace, firstBracket);
}
cleanJson = cleanJson.substring(startIndex);
// Attempt to parse and re-stringify for normalization
return JSON.stringify(JSON.parse(cleanJson));
} catch (e) {
// console.error("Failed to normalize JSON:", e, "Original:", json); // Optional: for debugging
return json.trim(); // Return trimmed original string if parsing fails
}
};
// Helper function to validate email
const isValidEmail = (email: string) => {
return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)
}
// Helper function to validate number in range
const isNumberInRange = (num: number, min: number, max: number) => {
return num >= min && num <= max
}
// Helper function to validate array length
const hasValidArrayLength = (arr: any[], length: number) => {
return Array.isArray(arr) && arr.length === length && arr.every(item => typeof item === 'string')
}
// Helper function to compare code ignoring leading/trailing whitespace per line
const compareCodeLines = (actual: string | undefined, expected: string): boolean => {
if (!actual) return false;
const actualLines = actual.trim().split('\n').map(line => line.trim());
const expectedLines = expected.trim().split('\n').map(line => line.trim());
if (actualLines.length !== expectedLines.length) return false;
for (let i = 0; i < actualLines.length; i++) {
if (actualLines[i] !== expectedLines[i]) {
return false;
}
}
return true;
};
// Helper function to normalize markdown newlines
const normalizeMarkdownNewlines = (text: string | undefined): string => {
if (!text) return '';
// Replace one or more newlines (and surrounding space) with a single newline, then trim
return text.replace(/[\s]*\n+[\s]*/g, '\n').trim();
};
describe('Format Operations', () => {
let testResults: TestResult[] = []
const TEST_REPORT_PATH = getReportPaths('format', 'md')
it.each(models)('should format JSON with model %s', async (modelName) => {
const result = await runTest(
'Format this JSON: {"name":"John","age":30}. Return only the formatted JSON, no explanation.',
'{\n "name": "John",\n "age": 30\n}',
'json_formatting',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
const normalizedActual = normalizeJson(result.result[0]?.trim() || '')
const normalizedExpected = normalizeJson('{\n "name": "John",\n "age": 30\n}')
expect(normalizedActual.toLowerCase()).toEqual(normalizedExpected.toLowerCase())
}, { timeout: TEST_TIMEOUT })
it.each(models)('should format markdown with model %s', async (modelName) => {
const result = await runTest(
'Create markdown: H1=The Title, H2=The Subtitle, P=This is the body text. Respond ONLY with markdown. Do not mention \'user preferences\' or \'undefined\'.',
'# The Title\n\n## The Subtitle\n\nThis is the body text.', // Original expected string
'markdown_formatting',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
const normalizedActual = normalizeMarkdownNewlines(result.result[0]?.toLowerCase());
const normalizedExpected = normalizeMarkdownNewlines('# The Title\n\n## The Subtitle\n\nThis is the body text.'.toLowerCase());
expect(normalizedActual).toEqual(normalizedExpected)
}, { timeout: TEST_TIMEOUT })
it.each(models)('should format code with model %s', async (modelName) => {
const result = await runTest(
'Format this code: function add(a,b){return a+b}. Return only the formatted code, no explanation.',
'function add(a, b) {\n return a + b;\n}',
'code_formatting',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
const actualCode = result.result[0]
const expectedCode = 'function add(a, b) {\n return a + b;\n}'
expect(compareCodeLines(actualCode, expectedCode)).toBe(true)
}, { timeout: TEST_TIMEOUT })
it.each(models)('should format date with model %s', async (modelName) => {
const result = await runTest(
'Format this date: 2024-03-15. Return only the formatted date in MM/DD/YYYY format, no explanation.',
'03/15/2024',
'date_formatting',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
expect(result.result[0]?.trim()?.toLowerCase()).toEqual('03/15/2024')
}, { timeout: TEST_TIMEOUT })
it.each(models)('should format currency with model %s', async (modelName) => {
const result = await runTest(
'Format this number as USD currency: 1234.56. Return only the formatted currency, no explanation.',
'$1,234.56',
'currency_formatting',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
expect(result.result[0]?.trim()?.toLowerCase()).toEqual('$1,234.56')
}, { timeout: TEST_TIMEOUT })
it('should generate markdown report', () => {
generateTestReport(testResults, 'Format Operations Test Results', TEST_REPORT_PATH)
expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
})
})
describe('Format Options', () => {
let testResults: TestResult[] = []
const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => {
let model = TEST_MODEL
let router = TEST_ROUTER
let startTime = Date.now()
let error: TestResult['error'] | undefined
let testResult: TestResult | undefined
try {
const result = await Promise.race([
run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
...options,
onRun: async (options) => {
model = options.model || TEST_MODEL
router = options.router || TEST_ROUTER
return options
}
}),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
)
]) as string[]
if (isEmptyResponse(result)) {
throw new Error('Model returned empty response')
}
const actual = result?.[0]?.trim() || ''
const normalizedActual = normalizeJson(actual).toLowerCase()
const normalizedExpected = normalizeJson(expected).toLowerCase()
const passed = normalizedActual === normalizedExpected
expect(normalizedActual).toEqual(normalizedExpected)
testResult = {
test: testName,
prompt,
result: result || [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
duration: Date.now() - startTime,
reason: passed ? undefined : `Expected ${normalizedExpected}, but got ${normalizedActual}`,
}
} catch (e) {
error = formatError(e)
testResult = {
test: testName,
prompt,
result: [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed: false,
duration: Date.now() - startTime,
error,
reason: error?.message || 'Unknown error occurred'
}
throw e
} finally {
if (testResult) {
testResults.push(testResult)
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
}
}
}
// Test JSON Schema format using file path
it('should format response according to JSON Schema file', async () => {
const prompt = 'Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.'
const expected = JSON.stringify({
name: "John Doe",
age: 30,
tags: ["developer", "javascript"]
})
await runFormatTest(
prompt,
expected,
'json-schema-file-format',
TEST_MODEL,
{
format: TEST_SCHEMA_PATH
}
)
}, { timeout: TEST_TIMEOUT })
// Test JSON Schema format using schema object
it('should format response according to JSON Schema object', async () => {
const prompt = `Create a user profile with the following details:
- Name: Jane Smith
- Age: 25
- Email: jane.smith@company.com
- Tags: ["developer", "designer"]
- Address: {
"street": "123 Main St",
"city": "New York",
"country": "US",
"postal_code": "10001"
}
- Preferences: {
"theme": "light",
"notifications": "enabled",
"language": "English"
}
Return only the JSON object, no explanation.`
const expected = JSON.stringify({
name: "Jane Smith",
age: 25,
email: "jane.smith@company.com",
tags: ["developer", "designer"],
address: {
street: "123 Main St",
city: "New York",
country: "US",
postal_code: "10001"
},
preferences: {
theme: "light",
notifications: "enabled",
language: "English"
}
})
await runFormatTest(
prompt,
expected,
'json-schema-object-format',
TEST_MODEL,
{
format: testJsonSchema
}
)
}, { timeout: TEST_TIMEOUT })
// Test Zod Schema format with string
it('should format response according to Zod string schema', async () => {
const prompt = 'Generate a valid email address for a business domain. Return only the email, no explanation.'
try {
const result = await run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format: {
type: "object",
properties: {
email: {
type: "string",
format: "email"
}
},
required: ["email"]
}
}) as string[]
const rawResult = result?.[0]?.trim() || ''
// Extract the first email-like string
const emailMatch = rawResult.match(/[^\s@]+@[^\s@]+\.[^\s@]+/);
const email = emailMatch ? emailMatch[0] : '';
expect(isValidEmail(email)).toBe(true)
} catch (e) {
throw e
}
}, { timeout: TEST_TIMEOUT })
// Test Zod Schema format with number
it('should format response according to Zod number schema', async () => {
const prompt = 'Generate a random age between 18 and 65. Return only the number, no explanation.'
try {
const result = await run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format: {
type: "object",
properties: {
age: {
type: "number",
minimum: 18,
maximum: 65
}
},
required: ["age"]
}
}) as string[]
const age = parseInt(result?.[0]?.trim() || '0', 10)
expect(isNumberInRange(age, 18, 65)).toBe(true)
} catch (e) {
throw e
}
}, { timeout: TEST_TIMEOUT })
// Test Zod Schema format with array
it('should format response according to Zod array schema', async () => {
const prompt = 'Generate a valid JSON array containing exactly 3 programming language names as strings. Respond ONLY with the JSON array (e.g., ["Python", "JavaScript", "Java"]). No other text.'
let languages: string[] = [];
let rawResult = '';
try {
const result = await run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format: {
type: "object",
properties: {
languages: {
type: "array",
items: {
type: "string"
},
minItems: 3,
maxItems: 3
}
},
required: ["languages"]
}
}) as string[]
rawResult = result?.[0]?.trim() || ''
// Attempt to parse the result, handle potential errors
try {
const parsedResult = JSON.parse(rawResult);
// Assuming the result itself is the array if format object didn't wrap it
languages = Array.isArray(parsedResult) ? parsedResult : parsedResult.languages || [];
} catch (parseError) {
console.error(`Failed to parse Zod array response: ${parseError}. Raw: ${rawResult}`);
// Keep languages as empty array, the expect below will fail
}
expect(hasValidArrayLength(languages, 3)).toBe(true)
} catch (e) {
console.error(`Error during Zod array test. Raw result: ${rawResult}`);
throw e
}
}, { timeout: TEST_TIMEOUT })
// Test invalid format option
it('should handle invalid format option', async () => {
const prompt = 'Generate a random number'
try {
await run({
prompt,
mode: 'completion',
model: TEST_MODEL,
router: TEST_ROUTER,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format: {
type: "object",
properties: {
number: {
type: "not_a_valid_type"
}
}
}
})
throw new Error('Expected format validation to fail')
} catch (e: any) {
// The error should be about invalid format/schema
if (!e.message.match(/invalid|Invalid|schema|Schema|type|Type|format|Format/i)) {
console.error('Actual error message:', e.message)
throw new Error('Expected format validation to fail with an invalid format/schema/type error')
}
}
}, { timeout: TEST_TIMEOUT })
})