mono/packages/kbot/tests/unit/math.test.ts

139 lines
4.9 KiB
TypeScript

import { describe, it, expect } from 'vitest'
import * as path from 'node:path'
import { sync as exists } from "@polymech/fs/exists"
import {
getDefaultModels,
TEST_BASE_PATH,
TEST_LOGS_PATH,
TEST_PREFERENCES_PATH,
TEST_TIMEOUT,
TestResult,
runTest,
generateTestReport,
getReportPaths
} from './commons'
// Helper function to extract the final number(s) from a string
function extractFinalNumberOrNumbers(text: string | undefined, expectedCount: number = 1): string | null {
if (!text) {
return null;
}
// Basic cleaning - remove common conversational fluff and LaTeX
const cleanedText = text
.toLowerCase()
.replace(/\boxed{(.*?)}/g, '$1') // Handle LaTeX \boxed{}
.replace(/the answer is/g, '')
.replace(/solutions:/g, '')
.replace(/answer:/g, '');
// Find all numbers (integer or decimal, positive or negative)
const numbers = cleanedText.match(/-?\d+(\.\d+)?/g);
if (!numbers || numbers.length === 0) {
return null;
}
if (expectedCount === 1) {
// Assume the last number found is the intended answer
return numbers[numbers.length - 1];
} else {
// Take the last 'expectedCount' numbers found
const relevantNumbers = numbers.slice(-expectedCount);
if (relevantNumbers.length < expectedCount) {
// Not enough numbers found at the end, return null or handle as needed
return null;
}
// Sort the extracted numbers numerically, convert back to string, join
const sortedNumberStrings = relevantNumbers
.map(Number)
.sort((a, b) => a - b)
.map(String);
return sortedNumberStrings.join(',');
}
}
// Optionally override models for this specific test file
const models = getDefaultModels()
describe('Math Operations', () => {
let testResults: TestResult[] = []
const TEST_LOG_PATH = getReportPaths('math', 'json')
const TEST_REPORT_PATH = getReportPaths('math', 'md')
it.each(models)('should solve quadratic equation with model %s', async (modelName) => {
const result = await runTest(
'Solve the quadratic equation x² + 5x + 6 = 0. Respond ONLY with the solutions as comma-separated numbers (e.g., -3,-2). No other text.',
'-3,-2', // Expected sorted result
'quadratic',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
// Expecting two numbers for quadratic
const actualResult = extractFinalNumberOrNumbers(result.result[0], 2)
expect(actualResult).toEqual('-3,-2')
}, { timeout: TEST_TIMEOUT })
it.each(models)('should calculate factorial with model %s', async (modelName) => {
const result = await runTest(
'Calculate 5! (factorial of 5). Respond ONLY with the final numerical answer. No explanation, no other text.',
'120',
'factorial',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
const actualResult = extractFinalNumberOrNumbers(result.result[0], 1)
expect(actualResult).toEqual('120')
}, { timeout: TEST_TIMEOUT })
it.each(models)('should calculate fibonacci sequence with model %s', async (modelName) => {
const result = await runTest(
'Calculate the 6th number in the Fibonacci sequence (assuming F(1)=1, F(2)=1). Respond ONLY with the final numerical answer. No other text.',
'8', // F(6) = 8
'fibonacci',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
const actualResult = extractFinalNumberOrNumbers(result.result[0], 1)
// Allow for F(5)=5 if F(0)=0 is assumed by model, especially gpt-4o-mini
if (modelName === 'openai/gpt-4o-mini' && actualResult === '5') {
expect(actualResult).toEqual('5');
} else {
expect(actualResult).toEqual('8');
}
}, { timeout: TEST_TIMEOUT })
it.each(models)('should calculate square root with model %s', async (modelName) => {
const result = await runTest(
'Calculate the square root of 16. Respond ONLY with the final numerical answer. No other text.',
'4',
'square_root',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
const actualResult = extractFinalNumberOrNumbers(result.result[0], 1)
expect(actualResult).toEqual('4')
}, { timeout: TEST_TIMEOUT })
it.each(models)('should calculate power with model %s', async (modelName) => {
const result = await runTest(
'Calculate 2 raised to the power of 3. Respond ONLY with the final numerical answer. No other text.',
'8',
'power',
modelName,
TEST_LOG_PATH
)
testResults.push(result)
const actualResult = extractFinalNumberOrNumbers(result.result[0], 1)
expect(actualResult).toEqual('8')
}, { timeout: TEST_TIMEOUT })
it('should generate markdown report', () => {
generateTestReport(testResults, 'Math Operations Test Results', TEST_REPORT_PATH)
expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
})
})