tests:math - habsch selbstgemacht :)

This commit is contained in:
lovebird 2025-04-01 12:45:50 +02:00
parent 4b43c8b2dd
commit 4ec4053898
7 changed files with 1069 additions and 16 deletions

View File

@ -3,7 +3,7 @@
"messages": [
{
"role": "user",
"content": "return the result of 2+2, dont comment"
"content": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation."
},
{
"role": "user",

View File

@ -18,6 +18,7 @@
"lint": "eslint src --ext .ts",
"test": "vitest run",
"test:unit": "vitest run tests/unit",
"test:math": "vitest run tests/unit/math.test.ts",
"test2:watch": "vitest",
"test2:coverage": "vitest run --coverage",
"webpack": "webpack --config webpack.config.js --stats-error-details",

View File

@ -58,5 +58,175 @@
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:21:52.849Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:24:35.975Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:24:37.455Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:24:38.366Z"
},
{
"test": "json_structure",
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
"result": [
"{\"name\": \"test\", \"value\": 42}"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:24:39.480Z"
},
{
"test": "json_structure",
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
"result": [
"{\"name\": \"test\", \"value\": 42}"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:24:42.228Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:25:06.304Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:25:07.392Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:25:08.429Z"
},
{
"test": "json_structure",
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
"result": [
"```json\n{\"name\":\"test\",\"value\":42}\n```"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:25:09.627Z"
},
{
"test": "json_structure",
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
"result": [
"```json\n{\n \"name\": \"test\",\n \"value\": 42\n}\n```\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:25:10.720Z"
},
{
"test": "json_structure",
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
"result": [
"{\"name\": \"test\", \"value\": 42}"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:25:12.099Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:26:30.128Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:26:31.316Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:26:32.279Z"
},
{
"test": "json_structure",
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
"result": [
"{\"name\":\"test\",\"value\":42}"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:26:33.459Z"
},
{
"test": "json_structure",
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
"result": [
"{\n \"name\": \"test\",\n \"value\": 42\n}"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:26:35.012Z"
},
{
"test": "json_structure",
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
"result": [
"{\"name\": \"test\", \"value\": 42}"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:26:36.239Z"
}
]

View File

@ -7,35 +7,41 @@ import { sync as exists } from "@polymech/fs/exists"
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
const DEFAULT_MODEL = E_OPENROUTER_MODEL_FREE.MODEL_FREE_MISTRALAI_MISTRAL_NEMO_FREE
const models = [
E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
E_OPENAI_MODEL.MODEL_GPT_4
]
const TEST_BASE_PATH = path.resolve(__dirname, '../../')
const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')
interface TestResult {
test: string;
prompt: string;
result: string[];
expected: string;
model: string;
router: string;
timestamp: string;
passed: boolean;
reason?: string;
}
describe('Basic Functionality', () => {
const logPath = path.resolve(__dirname, './basic.json')
describe('Basic Operations', () => {
let testResults: TestResult[] = []
// Load existing results if any
if (exists(logPath)) {
const data = read(logPath, 'json')
if (exists(TEST_LOG_PATH)) {
const data = read(TEST_LOG_PATH, 'json')
testResults = Array.isArray(data) ? data : []
}
it.each(models)('should return 4 when given "return the result of 2+2, dont comment" in completion mode with model %s', async (modelName) => {
const prompt = 'return the result of 2+2, dont comment'
it.each(models)('should add two numbers with model %s', async (modelName) => {
const prompt = 'add 5 and 3. Return only the number, no explanation.'
const expected = '8'
let model = 'unknown'
let router = 'unknown'
@ -43,28 +49,116 @@ describe('Basic Functionality', () => {
prompt,
mode: 'completion',
model: modelName,
path: path.resolve(__dirname, '../../'),
logs: path.resolve(__dirname, '../../logs'),
preferences: path.resolve(__dirname, '../../preferences.md'),
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
expect(result.map(r => r.trim())).toEqual(['4'])
const actual = result.map(r => r.trim())[0]
const passed = actual === expected
expect(actual).toEqual(expected)
// Add test result to array
testResults.push({
test: 'basic_arithmetic',
test: 'addition',
prompt,
result,
expected,
model,
router,
timestamp: new Date().toISOString()
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
})
// Write all results to the same file
write(logPath, JSON.stringify(testResults, null, 2))
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
it.each(models)('should multiply two numbers with model %s', async (modelName) => {
const prompt = 'multiply 4 and 6. Return only the number, no explanation.'
const expected = '24'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
const actual = result.map(r => r.trim())[0]
const passed = actual === expected
expect(actual).toEqual(expected)
// Add test result to array
testResults.push({
test: 'multiplication',
prompt,
result,
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
})
// Write all results to the same file
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
it.each(models)('should divide two numbers with model %s', async (modelName) => {
const prompt = 'divide 15 by 3. Return only the number, no explanation.'
const expected = '5'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
const actual = result.map(r => r.trim())[0]
const passed = actual === expected
expect(actual).toEqual(expected)
// Add test result to array
testResults.push({
test: 'division',
prompt,
result,
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
})
// Write all results to the same file
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
})

View File

@ -0,0 +1,62 @@
# Math Test Results
## Failed Tests
## Passed Tests
### factorial - deepseek/deepseek-chat:free
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
- Expected: `120`
- Actual: `120`
- Timestamp: 4/1/2025, 12:42:00 PM
### factorial - google/gemini-2.0-flash-exp:free
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
- Expected: `120`
- Actual: `120
`
- Timestamp: 4/1/2025, 12:42:02 PM
### factorial - gpt-4
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
- Expected: `120`
- Actual: `120`
- Timestamp: 4/1/2025, 12:42:03 PM
### fibonacci - deepseek/deepseek-chat:free
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
- Expected: `0,1,1,2,3`
- Actual: `0,1,1,2,3`
- Timestamp: 4/1/2025, 12:41:49 PM
### fibonacci - google/gemini-2.0-flash-exp:free
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
- Expected: `0,1,1,2,3`
- Actual: `0,1,1,2,3
`
- Timestamp: 4/1/2025, 12:42:10 PM
### fibonacci - gpt-4
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
- Expected: `0,1,1,2,3`
- Actual: `0, 1, 1, 2, 3`
- Timestamp: 4/1/2025, 12:42:12 PM
### quadratic - deepseek/deepseek-chat:free
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Timestamp: 4/1/2025, 12:42:16 PM
### quadratic - google/gemini-2.0-flash-exp:free
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2.0, -3.0]`
- Timestamp: 4/1/2025, 12:42:17 PM
### quadratic - gpt-4
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Timestamp: 4/1/2025, 12:42:18 PM

View File

@ -0,0 +1,556 @@
[
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:34:59.561Z"
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:01.881Z"
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:02.849Z"
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:04.513Z"
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:05.862Z"
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:42.778Z"
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:44.194Z"
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:45.420Z"
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:46.805Z"
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:48.153Z"
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0, 1, 1, 2, 3"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:49.492Z"
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:50.745Z"
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:51.918Z"
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:35:53.368Z"
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:08.969Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:10.154Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:11.066Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:12.258Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:14.007Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0, 1, 1, 2, 3"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:15.272Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:16.397Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:17.622Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:18.768Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:41.644Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:43.969Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:44.945Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:46.261Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:47.915Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0, 1, 1, 2, 3"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:49.298Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:50.593Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:52.768Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:38:54.254Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:40:45.957Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120\n"
],
"expected": "120",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:40:47.272Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:40:48.185Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3"
],
"expected": "0,1,1,2,3",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:40:51.236Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3\n"
],
"expected": "0,1,1,2,3",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:40:52.419Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0, 1, 1, 2, 3"
],
"expected": "0,1,1,2,3",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:40:54.327Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:41:45.219Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120\n"
],
"expected": "120",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:41:46.489Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:41:47.701Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3"
],
"expected": "0,1,1,2,3",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:41:49.929Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:42:00.754Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120\n"
],
"expected": "120",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:42:02.899Z",
"passed": true
},
{
"test": "factorial",
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
"result": [
"120"
],
"expected": "120",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:42:03.927Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0,1,1,2,3\n"
],
"expected": "0,1,1,2,3",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:42:10.515Z",
"passed": true
},
{
"test": "fibonacci",
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
"result": [
"0, 1, 1, 2, 3"
],
"expected": "0,1,1,2,3",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:42:12.203Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"expected": "[-3,-2]",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:42:16.443Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2.0, -3.0]"
],
"expected": "[-3,-2]",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:42:17.598Z",
"passed": true
},
{
"test": "quadratic",
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
"result": [
"[-2, -3]"
],
"expected": "[-3,-2]",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:42:18.707Z",
"passed": true
}
]

View File

@ -0,0 +1,170 @@
import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
const models = [
E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
E_OPENAI_MODEL.MODEL_GPT_4
]
const TEST_BASE_PATH = path.resolve(__dirname, '../../')
const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
const TEST_LOG_PATH = path.resolve(__dirname, './math.json')
interface TestResult {
test: string;
prompt: string;
result: string[];
expected: string;
model: string;
router: string;
timestamp: string;
passed: boolean;
reason?: string;
}
describe('Advanced Math Operations', () => {
let testResults: TestResult[] = []
// Load existing results if any
if (exists(TEST_LOG_PATH)) {
const data = read(TEST_LOG_PATH, 'json')
testResults = Array.isArray(data) ? data : []
}
it.each(models)('should calculate factorial of 5 with model %s', async (modelName) => {
const prompt = 'calculate the factorial of 5 (5!). Return only the number, no explanation.'
const expected = '120'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
const actual = result.map(r => r.trim())[0]
const passed = actual === expected
expect(actual).toEqual(expected)
// Add test result to array
testResults.push({
test: 'factorial',
prompt,
result,
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
})
// Write all results to the same file
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
it.each(models)('should calculate fibonacci sequence up to 5th number with model %s', async (modelName) => {
const prompt = 'calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.'
const expected = '0,1,1,2,3'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
// Handle both formats: "0,1,1,2,3" and "0, 1, 1, 2, 3"
const numbers = result[0].trim().split(',').map(n => n.trim())
const actual = numbers.join(',')
const passed = actual === expected
expect(numbers).toEqual(['0', '1', '1', '2', '3'])
// Add test result to array
testResults.push({
test: 'fibonacci',
prompt,
result,
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
})
// Write all results to the same file
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
it.each(models)('should solve quadratic equation x² + 5x + 6 = 0 with model %s', async (modelName) => {
const prompt = 'solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.'
const expectedDisplay = '[-3,-2]'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
filters: 'code',
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
// Parse the result as JSON (markdown already stripped by filter)
const jsonResult = JSON.parse(result[0].trim())
const actual = JSON.stringify(jsonResult.sort())
const expectedSorted = JSON.stringify([-3, -2].sort())
const passed = actual === expectedSorted
expect(jsonResult.sort()).toEqual([-3, -2].sort())
// Add test result to array
testResults.push({
test: 'quadratic',
prompt,
result,
expected: expectedDisplay,
model,
router,
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expectedDisplay}, but got ${result[0]}`
})
// Write all results to the same file
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
})