tests:math - habsch selbstgemacht :)
This commit is contained in:
parent
4b43c8b2dd
commit
4ec4053898
@ -3,7 +3,7 @@
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "return the result of 2+2, dont comment"
|
||||
"content": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
"lint": "eslint src --ext .ts",
|
||||
"test": "vitest run",
|
||||
"test:unit": "vitest run tests/unit",
|
||||
"test:math": "vitest run tests/unit/math.test.ts",
|
||||
"test2:watch": "vitest",
|
||||
"test2:coverage": "vitest run --coverage",
|
||||
"webpack": "webpack --config webpack.config.js --stats-error-details",
|
||||
|
||||
@ -58,5 +58,175 @@
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:21:52.849Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:24:35.975Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:24:37.455Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:24:38.366Z"
|
||||
},
|
||||
{
|
||||
"test": "json_structure",
|
||||
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
|
||||
"result": [
|
||||
"{\"name\": \"test\", \"value\": 42}"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:24:39.480Z"
|
||||
},
|
||||
{
|
||||
"test": "json_structure",
|
||||
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
|
||||
"result": [
|
||||
"{\"name\": \"test\", \"value\": 42}"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:24:42.228Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:25:06.304Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:25:07.392Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:25:08.429Z"
|
||||
},
|
||||
{
|
||||
"test": "json_structure",
|
||||
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
|
||||
"result": [
|
||||
"```json\n{\"name\":\"test\",\"value\":42}\n```"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:25:09.627Z"
|
||||
},
|
||||
{
|
||||
"test": "json_structure",
|
||||
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"test\",\n \"value\": 42\n}\n```\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:25:10.720Z"
|
||||
},
|
||||
{
|
||||
"test": "json_structure",
|
||||
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
|
||||
"result": [
|
||||
"{\"name\": \"test\", \"value\": 42}"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:25:12.099Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:26:30.128Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:26:31.316Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:26:32.279Z"
|
||||
},
|
||||
{
|
||||
"test": "json_structure",
|
||||
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
|
||||
"result": [
|
||||
"{\"name\":\"test\",\"value\":42}"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:26:33.459Z"
|
||||
},
|
||||
{
|
||||
"test": "json_structure",
|
||||
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
|
||||
"result": [
|
||||
"{\n \"name\": \"test\",\n \"value\": 42\n}"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:26:35.012Z"
|
||||
},
|
||||
{
|
||||
"test": "json_structure",
|
||||
"prompt": "return a JSON object with two fields: \"name\" as \"test\" and \"value\" as 42. Return only the JSON, no other text.",
|
||||
"result": [
|
||||
"{\"name\": \"test\", \"value\": 42}"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:26:36.239Z"
|
||||
}
|
||||
]
|
||||
@ -7,35 +7,41 @@ import { sync as exists } from "@polymech/fs/exists"
|
||||
|
||||
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
|
||||
|
||||
const DEFAULT_MODEL = E_OPENROUTER_MODEL_FREE.MODEL_FREE_MISTRALAI_MISTRAL_NEMO_FREE
|
||||
|
||||
const models = [
|
||||
E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
|
||||
E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
|
||||
E_OPENAI_MODEL.MODEL_GPT_4
|
||||
]
|
||||
|
||||
const TEST_BASE_PATH = path.resolve(__dirname, '../../')
|
||||
const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
|
||||
const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
|
||||
const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')
|
||||
|
||||
interface TestResult {
|
||||
test: string;
|
||||
prompt: string;
|
||||
result: string[];
|
||||
expected: string;
|
||||
model: string;
|
||||
router: string;
|
||||
timestamp: string;
|
||||
passed: boolean;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
describe('Basic Functionality', () => {
|
||||
const logPath = path.resolve(__dirname, './basic.json')
|
||||
describe('Basic Operations', () => {
|
||||
let testResults: TestResult[] = []
|
||||
|
||||
// Load existing results if any
|
||||
if (exists(logPath)) {
|
||||
const data = read(logPath, 'json')
|
||||
if (exists(TEST_LOG_PATH)) {
|
||||
const data = read(TEST_LOG_PATH, 'json')
|
||||
testResults = Array.isArray(data) ? data : []
|
||||
}
|
||||
|
||||
it.each(models)('should return 4 when given "return the result of 2+2, dont comment" in completion mode with model %s', async (modelName) => {
|
||||
const prompt = 'return the result of 2+2, dont comment'
|
||||
it.each(models)('should add two numbers with model %s', async (modelName) => {
|
||||
const prompt = 'add 5 and 3. Return only the number, no explanation.'
|
||||
const expected = '8'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
@ -43,28 +49,116 @@ describe('Basic Functionality', () => {
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: path.resolve(__dirname, '../../'),
|
||||
logs: path.resolve(__dirname, '../../logs'),
|
||||
preferences: path.resolve(__dirname, '../../preferences.md'),
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
expect(result.map(r => r.trim())).toEqual(['4'])
|
||||
|
||||
const actual = result.map(r => r.trim())[0]
|
||||
const passed = actual === expected
|
||||
expect(actual).toEqual(expected)
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'basic_arithmetic',
|
||||
test: 'addition',
|
||||
prompt,
|
||||
result,
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString()
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(logPath, JSON.stringify(testResults, null, 2))
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
|
||||
it.each(models)('should multiply two numbers with model %s', async (modelName) => {
|
||||
const prompt = 'multiply 4 and 6. Return only the number, no explanation.'
|
||||
const expected = '24'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
|
||||
const actual = result.map(r => r.trim())[0]
|
||||
const passed = actual === expected
|
||||
expect(actual).toEqual(expected)
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'multiplication',
|
||||
prompt,
|
||||
result,
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
|
||||
it.each(models)('should divide two numbers with model %s', async (modelName) => {
|
||||
const prompt = 'divide 15 by 3. Return only the number, no explanation.'
|
||||
const expected = '5'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
|
||||
const actual = result.map(r => r.trim())[0]
|
||||
const passed = actual === expected
|
||||
expect(actual).toEqual(expected)
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'division',
|
||||
prompt,
|
||||
result,
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
})
|
||||
62
packages/kbot/tests/unit/math-report.md
Normal file
62
packages/kbot/tests/unit/math-report.md
Normal file
@ -0,0 +1,62 @@
|
||||
# Math Test Results
|
||||
|
||||
## Failed Tests
|
||||
|
||||
## Passed Tests
|
||||
|
||||
### factorial - deepseek/deepseek-chat:free
|
||||
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
|
||||
- Expected: `120`
|
||||
- Actual: `120`
|
||||
- Timestamp: 4/1/2025, 12:42:00 PM
|
||||
|
||||
### factorial - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
|
||||
- Expected: `120`
|
||||
- Actual: `120
|
||||
`
|
||||
- Timestamp: 4/1/2025, 12:42:02 PM
|
||||
|
||||
### factorial - gpt-4
|
||||
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
|
||||
- Expected: `120`
|
||||
- Actual: `120`
|
||||
- Timestamp: 4/1/2025, 12:42:03 PM
|
||||
|
||||
### fibonacci - deepseek/deepseek-chat:free
|
||||
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
|
||||
- Expected: `0,1,1,2,3`
|
||||
- Actual: `0,1,1,2,3`
|
||||
- Timestamp: 4/1/2025, 12:41:49 PM
|
||||
|
||||
### fibonacci - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
|
||||
- Expected: `0,1,1,2,3`
|
||||
- Actual: `0,1,1,2,3
|
||||
`
|
||||
- Timestamp: 4/1/2025, 12:42:10 PM
|
||||
|
||||
### fibonacci - gpt-4
|
||||
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
|
||||
- Expected: `0,1,1,2,3`
|
||||
- Actual: `0, 1, 1, 2, 3`
|
||||
- Timestamp: 4/1/2025, 12:42:12 PM
|
||||
|
||||
### quadratic - deepseek/deepseek-chat:free
|
||||
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
|
||||
- Expected: `[-3,-2]`
|
||||
- Actual: `[-2, -3]`
|
||||
- Timestamp: 4/1/2025, 12:42:16 PM
|
||||
|
||||
### quadratic - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
|
||||
- Expected: `[-3,-2]`
|
||||
- Actual: `[-2.0, -3.0]`
|
||||
- Timestamp: 4/1/2025, 12:42:17 PM
|
||||
|
||||
### quadratic - gpt-4
|
||||
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
|
||||
- Expected: `[-3,-2]`
|
||||
- Actual: `[-2, -3]`
|
||||
- Timestamp: 4/1/2025, 12:42:18 PM
|
||||
|
||||
556
packages/kbot/tests/unit/math.json
Normal file
556
packages/kbot/tests/unit/math.json
Normal file
@ -0,0 +1,556 @@
|
||||
[
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:34:59.561Z"
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:01.881Z"
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:02.849Z"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:04.513Z"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:05.862Z"
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:42.778Z"
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:44.194Z"
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:45.420Z"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:46.805Z"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:48.153Z"
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0, 1, 1, 2, 3"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:49.492Z"
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:50.745Z"
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:51.918Z"
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:35:53.368Z"
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:08.969Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:10.154Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:11.066Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:12.258Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:14.007Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0, 1, 1, 2, 3"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:15.272Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:16.397Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:17.622Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:18.768Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:41.644Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:43.969Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:44.945Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:46.261Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:47.915Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0, 1, 1, 2, 3"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:49.298Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:50.593Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:52.768Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:38:54.254Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:40:45.957Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120\n"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:40:47.272Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:40:48.185Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3"
|
||||
],
|
||||
"expected": "0,1,1,2,3",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:40:51.236Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3\n"
|
||||
],
|
||||
"expected": "0,1,1,2,3",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:40:52.419Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0, 1, 1, 2, 3"
|
||||
],
|
||||
"expected": "0,1,1,2,3",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:40:54.327Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:41:45.219Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120\n"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:41:46.489Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:41:47.701Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3"
|
||||
],
|
||||
"expected": "0,1,1,2,3",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:41:49.929Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:42:00.754Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120\n"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:42:02.899Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "factorial",
|
||||
"prompt": "calculate the factorial of 5 (5!). Return only the number, no explanation.",
|
||||
"result": [
|
||||
"120"
|
||||
],
|
||||
"expected": "120",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:42:03.927Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0,1,1,2,3\n"
|
||||
],
|
||||
"expected": "0,1,1,2,3",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:42:10.515Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "fibonacci",
|
||||
"prompt": "calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.",
|
||||
"result": [
|
||||
"0, 1, 1, 2, 3"
|
||||
],
|
||||
"expected": "0,1,1,2,3",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:42:12.203Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"expected": "[-3,-2]",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:42:16.443Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2.0, -3.0]"
|
||||
],
|
||||
"expected": "[-3,-2]",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:42:17.598Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "quadratic",
|
||||
"prompt": "solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.",
|
||||
"result": [
|
||||
"[-2, -3]"
|
||||
],
|
||||
"expected": "[-3,-2]",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:42:18.707Z",
|
||||
"passed": true
|
||||
}
|
||||
]
|
||||
170
packages/kbot/tests/unit/math.test.ts
Normal file
170
packages/kbot/tests/unit/math.test.ts
Normal file
@ -0,0 +1,170 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { run } from '../../src/index'
|
||||
import * as path from 'node:path'
|
||||
import { sync as write } from "@polymech/fs/write"
|
||||
import { sync as read } from "@polymech/fs/read"
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
|
||||
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
|
||||
|
||||
const models = [
|
||||
E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
|
||||
E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
|
||||
E_OPENAI_MODEL.MODEL_GPT_4
|
||||
]
|
||||
|
||||
const TEST_BASE_PATH = path.resolve(__dirname, '../../')
|
||||
const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
|
||||
const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
|
||||
const TEST_LOG_PATH = path.resolve(__dirname, './math.json')
|
||||
|
||||
interface TestResult {
|
||||
test: string;
|
||||
prompt: string;
|
||||
result: string[];
|
||||
expected: string;
|
||||
model: string;
|
||||
router: string;
|
||||
timestamp: string;
|
||||
passed: boolean;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
describe('Advanced Math Operations', () => {
|
||||
let testResults: TestResult[] = []
|
||||
|
||||
// Load existing results if any
|
||||
if (exists(TEST_LOG_PATH)) {
|
||||
const data = read(TEST_LOG_PATH, 'json')
|
||||
testResults = Array.isArray(data) ? data : []
|
||||
}
|
||||
|
||||
it.each(models)('should calculate factorial of 5 with model %s', async (modelName) => {
|
||||
const prompt = 'calculate the factorial of 5 (5!). Return only the number, no explanation.'
|
||||
const expected = '120'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
|
||||
const actual = result.map(r => r.trim())[0]
|
||||
const passed = actual === expected
|
||||
expect(actual).toEqual(expected)
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'factorial',
|
||||
prompt,
|
||||
result,
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
|
||||
it.each(models)('should calculate fibonacci sequence up to 5th number with model %s', async (modelName) => {
|
||||
const prompt = 'calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.'
|
||||
const expected = '0,1,1,2,3'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
|
||||
// Handle both formats: "0,1,1,2,3" and "0, 1, 1, 2, 3"
|
||||
const numbers = result[0].trim().split(',').map(n => n.trim())
|
||||
const actual = numbers.join(',')
|
||||
const passed = actual === expected
|
||||
expect(numbers).toEqual(['0', '1', '1', '2', '3'])
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'fibonacci',
|
||||
prompt,
|
||||
result,
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
|
||||
it.each(models)('should solve quadratic equation x² + 5x + 6 = 0 with model %s', async (modelName) => {
|
||||
const prompt = 'solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.'
|
||||
const expectedDisplay = '[-3,-2]'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
filters: 'code',
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
|
||||
// Parse the result as JSON (markdown already stripped by filter)
|
||||
const jsonResult = JSON.parse(result[0].trim())
|
||||
const actual = JSON.stringify(jsonResult.sort())
|
||||
const expectedSorted = JSON.stringify([-3, -2].sort())
|
||||
const passed = actual === expectedSorted
|
||||
expect(jsonResult.sort()).toEqual([-3, -2].sort())
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'quadratic',
|
||||
prompt,
|
||||
result,
|
||||
expected: expectedDisplay,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expectedDisplay}, but got ${result[0]}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue
Block a user