model testing :)
This commit is contained in:
parent
7d66da3ed8
commit
4b43c8b2dd
File diff suppressed because one or more lines are too long
@ -19,3 +19,7 @@
|
||||
### Initialize folder
|
||||
|
||||
```kbot init```
|
||||
|
||||
### Internal : Build
|
||||
|
||||
```kbot build```
|
||||
|
||||
14
packages/kbot/logs/params.json
Normal file
14
packages/kbot/logs/params.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"model": "gpt-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "return the result of 2+2, dont comment"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "USER Preferences : # Preferences\r\n\r\nYou are a helpful AI assistant. When asked to perform calculations, you should return only the numerical result without any explanation or comments. "
|
||||
}
|
||||
],
|
||||
"tools": []
|
||||
}
|
||||
@ -17,6 +17,7 @@
|
||||
"dev": "tsc -p . --watch",
|
||||
"lint": "eslint src --ext .ts",
|
||||
"test": "vitest run",
|
||||
"test:unit": "vitest run tests/unit",
|
||||
"test2:watch": "vitest",
|
||||
"test2:coverage": "vitest run --coverage",
|
||||
"webpack": "webpack --config webpack.config.js --stats-error-details",
|
||||
|
||||
3
packages/kbot/preferences.md
Normal file
3
packages/kbot/preferences.md
Normal file
@ -0,0 +1,3 @@
|
||||
# Preferences
|
||||
|
||||
You are a helpful AI assistant. When asked to perform calculations, you should return only the numerical result without any explanation or comments.
|
||||
@ -54,6 +54,7 @@ export const processRun = async (opts: IKBotTask) => {
|
||||
}
|
||||
options.client = client
|
||||
options.collector = collector(options, client)
|
||||
options.onRun = options.onRun || (async (options) => options)
|
||||
|
||||
let messages: Array<ChatCompletionMessageParam> = []
|
||||
|
||||
@ -85,6 +86,7 @@ export const processRun = async (opts: IKBotTask) => {
|
||||
write(paramsPath, JSON.stringify({ ...params }, null, 2))
|
||||
logger.debug(`Read ${files.length} files from project ${path.resolve(options.path)} with ${options.include}`, files.map(f => f.path), params.tools.map(t => `${t.function.name} : ${t.function.description}`))
|
||||
let ret = null
|
||||
options = await options.onRun(options) || options
|
||||
try {
|
||||
switch (options.mode) {
|
||||
case EMode.COMPLETION:
|
||||
|
||||
62
packages/kbot/tests/unit/basic.json
Normal file
62
packages/kbot/tests/unit/basic.json
Normal file
@ -0,0 +1,62 @@
|
||||
[
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:20:31.673Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:20:33.287Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:20:34.328Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:21:49.790Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4\n"
|
||||
],
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:21:51.896Z"
|
||||
},
|
||||
{
|
||||
"test": "basic_arithmetic",
|
||||
"prompt": "return the result of 2+2, dont comment",
|
||||
"result": [
|
||||
"4"
|
||||
],
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T10:21:52.849Z"
|
||||
}
|
||||
]
|
||||
70
packages/kbot/tests/unit/basic.test.ts
Normal file
70
packages/kbot/tests/unit/basic.test.ts
Normal file
@ -0,0 +1,70 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { run } from '../../src/index'
|
||||
import * as path from 'node:path'
|
||||
import { sync as write } from "@polymech/fs/write"
|
||||
import { sync as read } from "@polymech/fs/read"
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
|
||||
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
|
||||
|
||||
const DEFAULT_MODEL = E_OPENROUTER_MODEL_FREE.MODEL_FREE_MISTRALAI_MISTRAL_NEMO_FREE
|
||||
|
||||
const models = [
|
||||
E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
|
||||
E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
|
||||
E_OPENAI_MODEL.MODEL_GPT_4
|
||||
]
|
||||
|
||||
interface TestResult {
|
||||
test: string;
|
||||
prompt: string;
|
||||
result: string[];
|
||||
model: string;
|
||||
router: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
describe('Basic Functionality', () => {
|
||||
const logPath = path.resolve(__dirname, './basic.json')
|
||||
let testResults: TestResult[] = []
|
||||
|
||||
// Load existing results if any
|
||||
if (exists(logPath)) {
|
||||
const data = read(logPath, 'json')
|
||||
testResults = Array.isArray(data) ? data : []
|
||||
}
|
||||
|
||||
it.each(models)('should return 4 when given "return the result of 2+2, dont comment" in completion mode with model %s', async (modelName) => {
|
||||
const prompt = 'return the result of 2+2, dont comment'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: path.resolve(__dirname, '../../'),
|
||||
logs: path.resolve(__dirname, '../../logs'),
|
||||
preferences: path.resolve(__dirname, '../../preferences.md'),
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
expect(result.map(r => r.trim())).toEqual(['4'])
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'basic_arithmetic',
|
||||
prompt,
|
||||
result,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString()
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(logPath, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue
Block a user