model testing :)

This commit is contained in:
lovebird 2025-04-01 12:22:12 +02:00
parent 7d66da3ed8
commit 4b43c8b2dd
8 changed files with 159 additions and 1 deletions

File diff suppressed because one or more lines are too long

View File

@ -19,3 +19,7 @@
### Initialize folder
```kbot init```
### Internal : Build
```kbot build```

View File

@ -0,0 +1,14 @@
{
"model": "gpt-4",
"messages": [
{
"role": "user",
"content": "return the result of 2+2, dont comment"
},
{
"role": "user",
"content": "USER Preferences : # Preferences\r\n\r\nYou are a helpful AI assistant. When asked to perform calculations, you should return only the numerical result without any explanation or comments. "
}
],
"tools": []
}

View File

@ -17,6 +17,7 @@
"dev": "tsc -p . --watch",
"lint": "eslint src --ext .ts",
"test": "vitest run",
"test:unit": "vitest run tests/unit",
"test2:watch": "vitest",
"test2:coverage": "vitest run --coverage",
"webpack": "webpack --config webpack.config.js --stats-error-details",

View File

@ -0,0 +1,3 @@
# Preferences
You are a helpful AI assistant. When asked to perform calculations, you should return only the numerical result without any explanation or comments.

View File

@ -54,6 +54,7 @@ export const processRun = async (opts: IKBotTask) => {
}
options.client = client
options.collector = collector(options, client)
options.onRun = options.onRun || (async (options) => options)
let messages: Array<ChatCompletionMessageParam> = []
@ -85,6 +86,7 @@ export const processRun = async (opts: IKBotTask) => {
write(paramsPath, JSON.stringify({ ...params }, null, 2))
logger.debug(`Read ${files.length} files from project ${path.resolve(options.path)} with ${options.include}`, files.map(f => f.path), params.tools.map(t => `${t.function.name} : ${t.function.description}`))
let ret = null
options = await options.onRun(options) || options
try {
switch (options.mode) {
case EMode.COMPLETION:

View File

@ -0,0 +1,62 @@
[
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:20:31.673Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:20:33.287Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:20:34.328Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:21:49.790Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4\n"
],
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T10:21:51.896Z"
},
{
"test": "basic_arithmetic",
"prompt": "return the result of 2+2, dont comment",
"result": [
"4"
],
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T10:21:52.849Z"
}
]

View File

@ -0,0 +1,70 @@
import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
const DEFAULT_MODEL = E_OPENROUTER_MODEL_FREE.MODEL_FREE_MISTRALAI_MISTRAL_NEMO_FREE
const models = [
E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
E_OPENAI_MODEL.MODEL_GPT_4
]
interface TestResult {
test: string;
prompt: string;
result: string[];
model: string;
router: string;
timestamp: string;
}
describe('Basic Functionality', () => {
const logPath = path.resolve(__dirname, './basic.json')
let testResults: TestResult[] = []
// Load existing results if any
if (exists(logPath)) {
const data = read(logPath, 'json')
testResults = Array.isArray(data) ? data : []
}
it.each(models)('should return 4 when given "return the result of 2+2, dont comment" in completion mode with model %s', async (modelName) => {
const prompt = 'return the result of 2+2, dont comment'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
path: path.resolve(__dirname, '../../'),
logs: path.resolve(__dirname, '../../logs'),
preferences: path.resolve(__dirname, '../../preferences.md'),
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
expect(result.map(r => r.trim())).toEqual(['4'])
// Add test result to array
testResults.push({
test: 'basic_arithmetic',
prompt,
result,
model,
router,
timestamp: new Date().toISOString()
})
// Write all results to the same file
write(logPath, JSON.stringify(testResults, null, 2))
})
})