245 lines
9.9 KiB
TypeScript
245 lines
9.9 KiB
TypeScript
import { describe, it, expect } from 'vitest'
|
|
import * as path from 'node:path'
|
|
import { sync as exists } from "@polymech/fs/exists"
|
|
import { sync as write } from "@polymech/fs/write"
|
|
import {
|
|
getDefaultModels,
|
|
TEST_BASE_PATH,
|
|
TEST_LOGS_PATH,
|
|
TEST_PREFERENCES_PATH,
|
|
TEST_TIMEOUT,
|
|
TestResult,
|
|
runTest,
|
|
getReportPaths
|
|
} from './commons'
|
|
|
|
// Optionally override models for this specific test file
|
|
const models = getDefaultModels()
|
|
|
|
describe('SEO Keyword Generation', () => {
|
|
let testResults: TestResult[] = []
|
|
const TEST_LOG_PATH = getReportPaths('seo', 'json')
|
|
const TEST_REPORT_PATH = getReportPaths('seo', 'md')
|
|
|
|
// Track model performance
|
|
const modelScores: Record<string, { total: number, tests: number }> = {}
|
|
|
|
it.each(models)('should generate SEO keywords for a descriptive text with model %s', async (modelName) => {
|
|
try {
|
|
const result = await runTest(
|
|
'Generate 5 SEO keywords for this text: "The ancient city of Rome, with its magnificent Colosseum, historic Roman Forum, and stunning Vatican City, offers visitors a unique blend of history, art, and culture." Return only the keywords separated by commas, no explanation.',
|
|
'Rome, Colosseum, Roman Forum, Vatican City, ancient history',
|
|
'seo_keywords_text',
|
|
modelName,
|
|
TEST_LOG_PATH
|
|
)
|
|
testResults.push(result)
|
|
|
|
// Handle potential empty or invalid results
|
|
if (!result.result?.[0]) {
|
|
result.passed = false
|
|
result.reason = 'No keywords generated'
|
|
return
|
|
}
|
|
|
|
const resultKeywords = result.result[0]
|
|
.trim()
|
|
.toLowerCase()
|
|
.split(',')
|
|
.map(k => k.trim())
|
|
.filter(k => k.length > 0)
|
|
|
|
const expectedKeywords = ['rome', 'colosseum', 'roman forum', 'vatican city', 'ancient history']
|
|
const matchedKeywords = expectedKeywords.filter(keyword =>
|
|
resultKeywords.some(result => result.includes(keyword))
|
|
)
|
|
|
|
// Update model score
|
|
if (!modelScores[modelName]) {
|
|
modelScores[modelName] = { total: 0, tests: 0 }
|
|
}
|
|
modelScores[modelName].total += matchedKeywords.length
|
|
modelScores[modelName].tests += 1
|
|
|
|
// Log the actual results for debugging
|
|
console.log(`Model ${modelName} generated keywords:`, resultKeywords)
|
|
console.log(`Matched keywords:`, matchedKeywords)
|
|
console.log(`Current score for ${modelName}: ${matchedKeywords.length}/${expectedKeywords.length}`)
|
|
|
|
// Update test result to reflect passing if we have at least 2 matches
|
|
result.passed = matchedKeywords.length >= 2
|
|
result.reason = result.passed ? undefined : `Only matched ${matchedKeywords.length} keywords, expected at least 2`
|
|
} catch (error) {
|
|
// Handle timeout or other errors gracefully
|
|
const result: TestResult = {
|
|
test: 'seo_keywords_text',
|
|
prompt: 'Generate SEO keywords for descriptive text',
|
|
result: [],
|
|
expected: 'Rome, Colosseum, Roman Forum, Vatican City, ancient history',
|
|
model: modelName,
|
|
router: 'openrouter',
|
|
timestamp: new Date().toISOString(),
|
|
passed: false,
|
|
duration: TEST_TIMEOUT,
|
|
reason: error instanceof Error ? error.message : 'Unknown error occurred'
|
|
}
|
|
testResults.push(result)
|
|
}
|
|
}, { timeout: TEST_TIMEOUT })
|
|
|
|
it.each(models)('should generate SEO keywords for a technical text with model %s', async (modelName) => {
|
|
try {
|
|
const result = await runTest(
|
|
'Generate 5 SEO keywords for this text: "Machine learning algorithms use statistical methods to enable computers to learn from data and improve their performance over time without being explicitly programmed." Return only the keywords separated by commas, no explanation.',
|
|
'machine learning, algorithms, artificial intelligence, data science, statistical methods',
|
|
'seo_keywords_technical',
|
|
modelName,
|
|
TEST_LOG_PATH
|
|
)
|
|
testResults.push(result)
|
|
|
|
// Handle potential empty or invalid results
|
|
if (!result.result?.[0]) {
|
|
result.passed = false
|
|
result.reason = 'No keywords generated'
|
|
return
|
|
}
|
|
|
|
const resultKeywords = result.result[0]
|
|
.trim()
|
|
.toLowerCase()
|
|
.split(',')
|
|
.map(k => k.trim())
|
|
.filter(k => k.length > 0)
|
|
|
|
const expectedKeywords = ['machine learning', 'algorithms', 'artificial intelligence', 'data science', 'statistical methods']
|
|
const matchedKeywords = expectedKeywords.filter(keyword =>
|
|
resultKeywords.some(result => result.includes(keyword))
|
|
)
|
|
|
|
// Update model score
|
|
if (!modelScores[modelName]) {
|
|
modelScores[modelName] = { total: 0, tests: 0 }
|
|
}
|
|
modelScores[modelName].total += matchedKeywords.length
|
|
modelScores[modelName].tests += 1
|
|
|
|
// Log the actual results for debugging
|
|
console.log(`Model ${modelName} generated keywords:`, resultKeywords)
|
|
console.log(`Matched keywords:`, matchedKeywords)
|
|
console.log(`Current score for ${modelName}: ${matchedKeywords.length}/${expectedKeywords.length}`)
|
|
|
|
// Update test result to reflect passing if we have at least 2 matches
|
|
result.passed = matchedKeywords.length >= 2
|
|
result.reason = result.passed ? undefined : `Only matched ${matchedKeywords.length} keywords, expected at least 2`
|
|
} catch (error) {
|
|
// Handle timeout or other errors gracefully
|
|
const result: TestResult = {
|
|
test: 'seo_keywords_technical',
|
|
prompt: 'Generate SEO keywords for technical text',
|
|
result: [],
|
|
expected: 'machine learning, algorithms, artificial intelligence, data science, statistical methods',
|
|
model: modelName,
|
|
router: 'openrouter',
|
|
timestamp: new Date().toISOString(),
|
|
passed: false,
|
|
duration: TEST_TIMEOUT,
|
|
reason: error instanceof Error ? error.message : 'Unknown error occurred'
|
|
}
|
|
testResults.push(result)
|
|
}
|
|
}, { timeout: TEST_TIMEOUT })
|
|
|
|
it('should generate markdown report with model rankings', () => {
|
|
// Calculate average scores and sort models
|
|
const modelRankings = Object.entries(modelScores)
|
|
.map(([model, scores]) => ({
|
|
model,
|
|
averageScore: scores.total / scores.tests,
|
|
totalMatches: scores.total,
|
|
testsRun: scores.tests,
|
|
maxPossibleMatches: scores.tests * 5 // Each test has 5 expected keywords
|
|
}))
|
|
.sort((a, b) => b.averageScore - a.averageScore)
|
|
|
|
// Log the rankings
|
|
console.log('\nSEO Keyword Generation Rankings:')
|
|
console.log('--------------------------------')
|
|
modelRankings.forEach((ranking, index) => {
|
|
console.log(`${index + 1}. ${ranking.model}:`)
|
|
console.log(` Average matches per test: ${ranking.averageScore.toFixed(2)}`)
|
|
console.log(` Total matches: ${ranking.totalMatches}/${ranking.maxPossibleMatches}`)
|
|
console.log(` Tests run: ${ranking.testsRun}`)
|
|
})
|
|
|
|
// Generate markdown report
|
|
let report = `# SEO Keyword Generation Test Results\n\n`
|
|
|
|
// Add SEO rankings section
|
|
report += '## Model Rankings\n\n'
|
|
report += '| Model | Avg Matches | Total Matches | Tests Run |\n'
|
|
report += '|-------|-------------|---------------|-----------|'
|
|
|
|
modelRankings.forEach(({ model, averageScore, totalMatches, testsRun, maxPossibleMatches }) => {
|
|
report += `\n| ${model} | ${averageScore.toFixed(2)} | ${totalMatches}/${maxPossibleMatches} | ${testsRun} |`
|
|
})
|
|
report += '\n\n'
|
|
|
|
// Add summary section
|
|
const totalTests = testResults.length
|
|
const passedTests = testResults.filter(r => r.passed).length
|
|
const failedTests = totalTests - passedTests
|
|
const avgDuration = testResults.reduce((sum, r) => sum + (r.duration || 0), 0) / totalTests
|
|
|
|
report += '## Summary\n\n'
|
|
report += `- Total Tests: ${totalTests}\n`
|
|
report += `- Passed: ${passedTests}\n`
|
|
report += `- Failed: ${failedTests}\n`
|
|
report += `- Success Rate: ${((passedTests / totalTests) * 100).toFixed(2)}%\n`
|
|
report += `- Average Duration: ${avgDuration.toFixed(0)}ms (${(avgDuration / 1000).toFixed(2)}s)\n\n`
|
|
|
|
// Add failed tests section
|
|
report += '## Failed Tests\n\n'
|
|
const failedResults = testResults.filter(r => !r.passed)
|
|
|
|
if (failedResults.length === 0) {
|
|
report += '*No failed tests*\n\n'
|
|
} else {
|
|
failedResults.forEach(result => {
|
|
report += `### ${result.test} - ${result.model}\n\n`
|
|
report += `- Prompt: \`${result.prompt}\`\n`
|
|
report += `- Expected: \`${result.expected}\`\n`
|
|
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
|
report += `- Duration: ${result.duration || 0}ms (${((result.duration || 0) / 1000).toFixed(2)}s)\n`
|
|
if (result.error) {
|
|
report += `- Error: ${result.error.message}\n`
|
|
}
|
|
if (result.reason) {
|
|
report += `- Reason: ${result.reason}\n`
|
|
}
|
|
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
|
})
|
|
}
|
|
|
|
// Add passed tests section
|
|
report += '## Passed Tests\n\n'
|
|
const passedResults = testResults.filter(r => r.passed)
|
|
|
|
if (passedResults.length === 0) {
|
|
report += '*No passed tests*\n\n'
|
|
} else {
|
|
passedResults.forEach(result => {
|
|
report += `### ${result.test} - ${result.model}\n\n`
|
|
report += `- Prompt: \`${result.prompt}\`\n`
|
|
report += `- Expected: \`${result.expected}\`\n`
|
|
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
|
report += `- Duration: ${result.duration || 0}ms (${((result.duration || 0) / 1000).toFixed(2)}s)\n`
|
|
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
|
})
|
|
}
|
|
|
|
// Write report to file
|
|
write(TEST_REPORT_PATH, report)
|
|
expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
|
|
})
|
|
})
|