kbot:format tests
This commit is contained in:
parent
fb34d15369
commit
2afc1a8051
@ -1,5 +1,5 @@
|
||||
{
|
||||
"model": "gpt-4o",
|
||||
"model": "google/gemini-2.0-flash-lite-001",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
|
||||
@ -31,7 +31,6 @@ export const EMode = {
|
||||
CUSTOM: 'custom'
|
||||
} as const
|
||||
|
||||
|
||||
export const EType = z.enum([
|
||||
EMode.COMPLETION,
|
||||
EMode.TOOLS,
|
||||
@ -291,14 +290,11 @@ export const OptionsSchema = (opts?: any): any => {
|
||||
.passthrough()
|
||||
.describe('IKBotOptions')
|
||||
}
|
||||
|
||||
|
||||
export const types = () => {
|
||||
generate_interfaces([OptionsSchema()], 'src/zod_types.ts')
|
||||
generate_interfaces([OptionsSchema()], path.resolve(resolve('../ai-tools/src/types_kbot.ts')))
|
||||
schemas()
|
||||
}
|
||||
|
||||
export const schemas = () => {
|
||||
write([OptionsSchema()], 'schema.json', 'kbot', {})
|
||||
writeFS('schema_ui.json', schemaMap.getUISchema())
|
||||
|
||||
@ -2,189 +2,11 @@
|
||||
|
||||
## Failed Tests
|
||||
|
||||
### json-schema-file-format - anthropic/claude-3.5-sonnet
|
||||
- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
|
||||
- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
|
||||
- Actual: ``
|
||||
- Duration: 1690ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '{\n "name": "John Doe",\n "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
|
||||
- Error Details: expected '{\n "name": "John Doe",\n "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
|
||||
- Reason: expected '{\n "name": "John Doe",\n "age": 30…' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
|
||||
- Timestamp: 4/2/2025, 12:23:43 AM
|
||||
|
||||
### json-schema-file-format - qwen/qwq-32b
|
||||
- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
|
||||
- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
|
||||
- Actual: ``
|
||||
- Duration: 3426ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '"John Doe",\n "age": 30,\n "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
|
||||
- Error Details: expected '"John Doe",\n "age": 30,\n "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
|
||||
- Reason: expected '"John Doe",\n "age": 30,\n "tags": …' to deeply equal '{"name":"John Doe","age":30,"tags":["…'
|
||||
- Timestamp: 4/2/2025, 12:23:47 AM
|
||||
|
||||
### json-schema-object-format - anthropic/claude-3.5-sonnet
|
||||
- Prompt: `Create a user profile with name Jane Smith, age 25, and tags ["designer", "ui"]. Return only the JSON object, no explanation.`
|
||||
- Expected: `{"name":"Jane Smith","age":25,"tags":["designer","ui"]}`
|
||||
- Actual: ``
|
||||
- Duration: 1918ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '{\n "name": "Jane Smith",\n "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
|
||||
- Error Details: expected '{\n "name": "Jane Smith",\n "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
|
||||
- Reason: expected '{\n "name": "Jane Smith",\n "age": …' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
|
||||
- Timestamp: 4/2/2025, 12:23:49 AM
|
||||
|
||||
### json-schema-object-format - qwen/qwq-32b
|
||||
- Prompt: `Create a user profile with name Jane Smith, age 25, and tags ["designer", "ui"]. Return only the JSON object, no explanation.`
|
||||
- Expected: `{"name":"Jane Smith","age":25,"tags":["designer","ui"]}`
|
||||
- Actual: ``
|
||||
- Duration: 9789ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
|
||||
- Error Details: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
|
||||
- Reason: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
|
||||
- Timestamp: 4/2/2025, 12:23:58 AM
|
||||
|
||||
### zod-string-format - anthropic/claude-3.5-sonnet
|
||||
- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
|
||||
- Expected: `john.doe@company.com`
|
||||
- Actual: ``
|
||||
- Duration: 1347ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
|
||||
- Error Details: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
|
||||
- Reason: expected 'sales@companyplus.com' to deeply equal 'john.doe@company.com'
|
||||
- Timestamp: 4/2/2025, 12:24:00 AM
|
||||
|
||||
### zod-string-format - qwen/qwq-32b
|
||||
- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
|
||||
- Expected: `john.doe@company.com`
|
||||
- Actual: ``
|
||||
- Duration: 13704ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
|
||||
- Error Details: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
|
||||
- Reason: expected 'info@techstart.com' to deeply equal 'john.doe@company.com'
|
||||
- Timestamp: 4/2/2025, 12:24:13 AM
|
||||
|
||||
### zod-string-format - gpt-4o
|
||||
- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
|
||||
- Expected: `john.doe@company.com`
|
||||
- Actual: ``
|
||||
- Duration: 1794ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
|
||||
- Error Details: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
|
||||
- Reason: expected 'contact@businessdomain.com' to deeply equal 'john.doe@company.com'
|
||||
- Timestamp: 4/2/2025, 12:32:20 AM
|
||||
|
||||
### zod-number-format - anthropic/claude-3.5-sonnet
|
||||
- Prompt: `Generate a random age between 18 and 65. Return only the number, no explanation.`
|
||||
- Expected: `25`
|
||||
- Actual: ``
|
||||
- Duration: 1376ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '42' to deeply equal '25'
|
||||
- Error Details: expected '42' to deeply equal '25'
|
||||
- Reason: expected '42' to deeply equal '25'
|
||||
- Timestamp: 4/2/2025, 12:24:11 AM
|
||||
|
||||
### zod-number-format - gpt-4o
|
||||
- Prompt: `Generate a random age between 18 and 65. Return only the number, no explanation.`
|
||||
- Expected: `25`
|
||||
- Actual: ``
|
||||
- Duration: 2399ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '39' to deeply equal '25'
|
||||
- Error Details: expected '39' to deeply equal '25'
|
||||
- Reason: expected '39' to deeply equal '25'
|
||||
- Timestamp: 4/2/2025, 12:32:23 AM
|
||||
|
||||
### zod-array-format - anthropic/claude-3.5-sonnet
|
||||
- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
|
||||
- Expected: `["JavaScript","Python","Java"]`
|
||||
- Actual: ``
|
||||
- Duration: 1009ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Error Details: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Reason: expected '["Python", "Java", "JavaScript"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Timestamp: 4/2/2025, 12:24:22 AM
|
||||
|
||||
### zod-array-format - qwen/qwq-32b
|
||||
- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
|
||||
- Expected: `["JavaScript","Python","Java"]`
|
||||
- Actual: ``
|
||||
- Duration: 4147ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Error Details: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Reason: expected '["Python", "JavaScript", "Java"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Timestamp: 4/2/2025, 12:24:26 AM
|
||||
|
||||
### zod-array-format - gpt-4o
|
||||
- Prompt: `Generate a list of 3 programming languages. Return only the array, no explanation.`
|
||||
- Expected: `["JavaScript","Python","Java"]`
|
||||
- Actual: ``
|
||||
- Duration: 693ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Error Details: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Reason: expected '["Python","JavaScript","Java"]' to deeply equal '["JavaScript","Python","Java"]'
|
||||
- Timestamp: 4/2/2025, 12:32:23 AM
|
||||
|
||||
### invalid-format - anthropic/claude-3.5-sonnet
|
||||
- Prompt: `Generate a random number.`
|
||||
- Expected: `Invalid format option`
|
||||
- Actual: ``
|
||||
- Duration: 1026ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '73' to deeply equal 'Invalid format option'
|
||||
- Error Details: expected '73' to deeply equal 'Invalid format option'
|
||||
- Reason: expected '73' to deeply equal 'Invalid format option'
|
||||
- Timestamp: 4/2/2025, 12:24:27 AM
|
||||
|
||||
### invalid-format - qwen/qwq-32b
|
||||
- Prompt: `Generate a random number.`
|
||||
- Expected: `Invalid format option`
|
||||
- Actual: ``
|
||||
- Duration: 7614ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '72' to deeply equal 'Invalid format option'
|
||||
- Error Details: expected '72' to deeply equal 'Invalid format option'
|
||||
- Reason: expected '72' to deeply equal 'Invalid format option'
|
||||
- Timestamp: 4/2/2025, 12:24:35 AM
|
||||
|
||||
### invalid-format - gpt-4o
|
||||
- Prompt: `Generate a random number.`
|
||||
- Expected: `Invalid format option`
|
||||
- Actual: ``
|
||||
- Duration: 826ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '786984' to deeply equal 'Invalid format option'
|
||||
- Error Details: expected '786984' to deeply equal 'Invalid format option'
|
||||
- Reason: expected '786984' to deeply equal 'Invalid format option'
|
||||
- Timestamp: 4/2/2025, 12:32:24 AM
|
||||
*No failed tests*
|
||||
|
||||
## Passed Tests
|
||||
|
||||
### json-schema-file-format - gpt-4o
|
||||
### json-schema-file-format - google/gemini-2.0-flash-lite-001
|
||||
- Prompt: `Create a user profile with name John Doe, age 30, and tags ["developer", "javascript"]. Return only the JSON object, no explanation.`
|
||||
- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
|
||||
- Actual: ````json
|
||||
@ -193,11 +15,12 @@
|
||||
"age": 30,
|
||||
"tags": ["developer", "javascript"]
|
||||
}
|
||||
````
|
||||
- Duration: 960ms
|
||||
- Timestamp: 4/2/2025, 12:38:51 AM
|
||||
```
|
||||
`
|
||||
- Duration: 1122ms
|
||||
- Timestamp: 4/2/2025, 9:03:33 AM
|
||||
|
||||
### json-schema-object-format - gpt-4o
|
||||
### json-schema-object-format - google/gemini-2.0-flash-lite-001
|
||||
- Prompt: `Create a user profile with the following details:
|
||||
- Name: Jane Smith
|
||||
- Age: 25
|
||||
@ -234,14 +57,15 @@
|
||||
"language": "English"
|
||||
}
|
||||
}
|
||||
````
|
||||
- Duration: 1505ms
|
||||
- Timestamp: 4/2/2025, 12:38:52 AM
|
||||
```
|
||||
`
|
||||
- Duration: 1289ms
|
||||
- Timestamp: 4/2/2025, 9:03:34 AM
|
||||
|
||||
## Summary
|
||||
|
||||
- Total Tests: 51
|
||||
- Passed: 13
|
||||
- Failed: 38
|
||||
- Success Rate: 25.49%
|
||||
- Total Tests: 2
|
||||
- Passed: 2
|
||||
- Failed: 0
|
||||
- Success Rate: 100.00%
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -17,8 +17,8 @@ import {
|
||||
|
||||
const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
|
||||
const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
|
||||
const TEST_MODEL = 'gpt-4o'
|
||||
const TEST_ROUTER = 'openai'
|
||||
const TEST_MODEL = 'google/gemini-2.0-flash-lite-001'
|
||||
const TEST_ROUTER = 'openrouter'
|
||||
|
||||
// Sample JSON Schema for testing
|
||||
const testJsonSchema = {
|
||||
@ -138,12 +138,6 @@ const hasValidArrayLength = (arr: any[], length: number) => {
|
||||
describe('Format Options', () => {
|
||||
let testResults: TestResult[] = []
|
||||
|
||||
// Load existing results if any
|
||||
if (exists(TEST_LOG_PATH)) {
|
||||
const data = read(TEST_LOG_PATH, 'json')
|
||||
testResults = Array.isArray(data) ? data : []
|
||||
}
|
||||
|
||||
const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => {
|
||||
let model = TEST_MODEL
|
||||
let router = TEST_ROUTER
|
||||
@ -178,8 +172,8 @@ describe('Format Options', () => {
|
||||
}
|
||||
|
||||
const actual = result?.[0]?.trim() || ''
|
||||
const normalizedActual = normalizeJson(actual)
|
||||
const normalizedExpected = normalizeJson(expected)
|
||||
const normalizedActual = normalizeJson(actual).toLowerCase()
|
||||
const normalizedExpected = normalizeJson(expected).toLowerCase()
|
||||
const passed = normalizedActual === normalizedExpected
|
||||
|
||||
expect(normalizedActual).toEqual(normalizedExpected)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user