kbot:format tests

This commit is contained in:
lovebird 2025-04-02 09:04:12 +02:00
parent fb34d15369
commit 2afc1a8051
5 changed files with 31 additions and 1426 deletions

View File

@ -1,5 +1,5 @@
{
"model": "gpt-4o",
"model": "google/gemini-2.0-flash-lite-001",
"messages": [
{
"role": "user",

View File

@ -31,7 +31,6 @@ export const EMode = {
CUSTOM: 'custom'
} as const
export const EType = z.enum([
EMode.COMPLETION,
EMode.TOOLS,
@ -291,14 +290,11 @@ export const OptionsSchema = (opts?: any): any => {
.passthrough()
.describe('IKBotOptions')
}
export const types = () => {
generate_interfaces([OptionsSchema()], 'src/zod_types.ts')
generate_interfaces([OptionsSchema()], path.resolve(resolve('../ai-tools/src/types_kbot.ts')))
schemas()
}
export const schemas = () => {
write([OptionsSchema()], 'schema.json', 'kbot', {})
writeFS('schema_ui.json', schemaMap.getUISchema())

View File

@ -2,189 +2,11 @@
## Failed Tests
### json-schema-file-format - anthropic/claude-3.5-sonnet
- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
- Actual: ``
- Duration: 1690ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '{\n "name": "John Doe",\n "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
- Error Details: expected '{\n "name": "John Doe",\n "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
- Reason: expected '{\n "name": "John Doe",\n "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
- Timestamp: 4/2/2025, 12:23:43 AM
### json-schema-file-format - qwen/qwq-32b
- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
- Actual: ``
- Duration: 3426ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '"John Doe",\n "age": 30,\n "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
- Error Details: expected '"John Doe",\n "age": 30,\n "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
- Reason: expected '"John Doe",\n "age": 30,\n "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
- Timestamp: 4/2/2025, 12:23:47 AM
### json-schema-object-format - anthropic/claude-3.5-sonnet
- Prompt: `Create a user profile with name Jane Smith, age 25, and tags ["designer", "ui"]. Return only the JSON object, no explanation.`
- Expected: `{"name":"Jane Smith","age":25,"tags":["designer","ui"]}`
- Actual: ``
- Duration: 1918ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '{\n "name": "Jane Smith",\n "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
- Error Details: expected '{\n "name": "Jane Smith",\n "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
- Reason: expected '{\n "name": "Jane Smith",\n "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
- Timestamp: 4/2/2025, 12:23:49 AM
### json-schema-object-format - qwen/qwq-32b
- Prompt: `Create a user profile with name Jane Smith, age 25, and tags ["designer", "ui"]. Return only the JSON object, no explanation.`
- Expected: `{"name":"Jane Smith","age":25,"tags":["designer","ui"]}`
- Actual: ``
- Duration: 9789ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
- Error Details: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
- Reason: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
- Timestamp: 4/2/2025, 12:23:58 AM
### zod-string-format - anthropic/claude-3.5-sonnet
- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
- Expected: `john.doe@company.com`
- Actual: ``
- Duration: 1347ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
- Error Details: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
- Reason: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
- Timestamp: 4/2/2025, 12:24:00 AM
### zod-string-format - qwen/qwq-32b
- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
- Expected: `john.doe@company.com`
- Actual: ``
- Duration: 13704ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
- Error Details: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
- Reason: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
- Timestamp: 4/2/2025, 12:24:13 AM
### zod-string-format - gpt-4o
- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
- Expected: `john.doe@company.com`
- Actual: ``
- Duration: 1794ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
- Error Details: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
- Reason: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
- Timestamp: 4/2/2025, 12:32:20 AM
### zod-number-format - anthropic/claude-3.5-sonnet
- Prompt: `Generate a random age between 18 and 65. Return only the number, no explanation.`
- Expected: `25`
- Actual: ``
- Duration: 1376ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '42' to deeply equal '25'
- Error Details: expected '42' to deeply equal '25'
- Reason: expected '42' to deeply equal '25'
- Timestamp: 4/2/2025, 12:24:11 AM
### zod-number-format - gpt-4o
- Prompt: `Generate a random age between 18 and 65. Return only the number, no explanation.`
- Expected: `25`
- Actual: ``
- Duration: 2399ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '39' to deeply equal '25'
- Error Details: expected '39' to deeply equal '25'
- Reason: expected '39' to deeply equal '25'
- Timestamp: 4/2/2025, 12:32:23 AM
### zod-array-format - anthropic/claude-3.5-sonnet
- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
- Expected: `["JavaScript","Python","Java"]`
- Actual: ``
- Duration: 1009ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
- Error Details: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
- Reason: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
- Timestamp: 4/2/2025, 12:24:22 AM
### zod-array-format - qwen/qwq-32b
- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
- Expected: `["JavaScript","Python","Java"]`
- Actual: ``
- Duration: 4147ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
- Error Details: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
- Reason: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
- Timestamp: 4/2/2025, 12:24:26 AM
### zod-array-format - gpt-4o
- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
- Expected: `["JavaScript","Python","Java"]`
- Actual: ``
- Duration: 693ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
- Error Details: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
- Reason: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
- Timestamp: 4/2/2025, 12:32:23 AM
### invalid-format - anthropic/claude-3.5-sonnet
- Prompt: `Generate a random number.`
- Expected: `Invalid format option`
- Actual: ``
- Duration: 1026ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '73' to deeply equal 'Invalid format option'
- Error Details: expected '73' to deeply equal 'Invalid format option'
- Reason: expected '73' to deeply equal 'Invalid format option'
- Timestamp: 4/2/2025, 12:24:27 AM
### invalid-format - qwen/qwq-32b
- Prompt: `Generate a random number.`
- Expected: `Invalid format option`
- Actual: ``
- Duration: 7614ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '72' to deeply equal 'Invalid format option'
- Error Details: expected '72' to deeply equal 'Invalid format option'
- Reason: expected '72' to deeply equal 'Invalid format option'
- Timestamp: 4/2/2025, 12:24:35 AM
### invalid-format - gpt-4o
- Prompt: `Generate a random number.`
- Expected: `Invalid format option`
- Actual: ``
- Duration: 826ms
- Error Type: AssertionError
- Error Code: UNKNOWN
- Error Message: expected '786984' to deeply equal 'Invalid format option'
- Error Details: expected '786984' to deeply equal 'Invalid format option'
- Reason: expected '786984' to deeply equal 'Invalid format option'
- Timestamp: 4/2/2025, 12:32:24 AM
*No failed tests*
## Passed Tests
### json-schema-file-format - gpt-4o
### json-schema-file-format - google/gemini-2.0-flash-lite-001
- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
- Actual: ````json
@ -193,11 +15,12 @@
"age": 30,
"tags": ["developer", "javascript"]
}
````
- Duration: 960ms
- Timestamp: 4/2/2025, 12:38:51 AM
```
`
- Duration: 1122ms
- Timestamp: 4/2/2025, 9:03:33 AM
### json-schema-object-format - gpt-4o
### json-schema-object-format - google/gemini-2.0-flash-lite-001
- Prompt: `Create a user profile with the following details:
- Name: Jane Smith
- Age: 25
@ -234,14 +57,15 @@
"language": "English"
}
}
````
- Duration: 1505ms
- Timestamp: 4/2/2025, 12:38:52 AM
```
`
- Duration: 1289ms
- Timestamp: 4/2/2025, 9:03:34 AM
## Summary
- Total Tests: 51
- Passed: 13
- Failed: 38
- Success Rate: 25.49%
- Total Tests: 2
- Passed: 2
- Failed: 0
- Success Rate: 100.00%

File diff suppressed because it is too large Load Diff

View File

@ -17,8 +17,8 @@ import {
const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
const TEST_MODEL = 'gpt-4o'
const TEST_ROUTER = 'openai'
const TEST_MODEL = 'google/gemini-2.0-flash-lite-001'
const TEST_ROUTER = 'openrouter'
// Sample JSON Schema for testing
const testJsonSchema = {
@ -138,12 +138,6 @@ const hasValidArrayLength = (arr: any[], length: number) => {
describe('Format Options', () => {
let testResults: TestResult[] = []
// Load existing results if any
if (exists(TEST_LOG_PATH)) {
const data = read(TEST_LOG_PATH, 'json')
testResults = Array.isArray(data) ? data : []
}
const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => {
let model = TEST_MODEL
let router = TEST_ROUTER
@ -178,8 +172,8 @@ describe('Format Options', () => {
}
const actual = result?.[0]?.trim() || ''
const normalizedActual = normalizeJson(actual)
const normalizedExpected = normalizeJson(expected)
const normalizedActual = normalizeJson(actual).toLowerCase()
const normalizedExpected = normalizeJson(expected).toLowerCase()
const passed = normalizedActual === normalizedExpected
expect(normalizedActual).toEqual(normalizedExpected)