tests:response_format
This commit is contained in:
parent
20cb6861af
commit
fb34d15369
@ -3,7 +3,7 @@
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Generate a random number."
|
||||
"content": "Generate a random number"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
|
||||
@ -50,25 +50,6 @@
|
||||
- Reason: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…'
|
||||
- Timestamp: 4/2/2025, 12:23:58 AM
|
||||
|
||||
### json-schema-object-format - gpt-4o
|
||||
- Prompt: `Create a user profile with the following details:
|
||||
- Name: Jane Smith
|
||||
- Age: 25
|
||||
- Email: jane.smith@company.com
|
||||
- Tags: ["developer", "designer"]
|
||||
- Address: 123 Main St, New York, US, 10001
|
||||
- Preferences: light theme, notifications enabled, English language
|
||||
Return only the JSON object, no explanation.`
|
||||
- Expected: `{"name":"Jane Smith","age":25,"email":"jane.smith@company.com","tags":["developer","designer"],"address":{"street":"123 Main St","city":"New York","country":"US","zipCode":"10001"},"preferences":{"theme":"light","notifications":true,"language":"en"}}`
|
||||
- Actual: ``
|
||||
- Duration: 2618ms
|
||||
- Error Type: AssertionError
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…'
|
||||
- Error Details: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…'
|
||||
- Reason: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…'
|
||||
- Timestamp: 4/2/2025, 12:33:08 AM
|
||||
|
||||
### zod-string-format - anthropic/claude-3.5-sonnet
|
||||
- Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.`
|
||||
- Expected: `john.doe@company.com`
|
||||
@ -208,18 +189,59 @@
|
||||
- Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}`
|
||||
- Actual: ````json
|
||||
{
|
||||
"name": "John Doe",
|
||||
"age": 30,
|
||||
"tags": ["developer", "javascript"]
|
||||
"name": "John Doe",
|
||||
"age": 30,
|
||||
"tags": ["developer", "javascript"]
|
||||
}
|
||||
````
|
||||
- Duration: 995ms
|
||||
- Timestamp: 4/2/2025, 12:34:28 AM
|
||||
- Duration: 960ms
|
||||
- Timestamp: 4/2/2025, 12:38:51 AM
|
||||
|
||||
### json-schema-object-format - gpt-4o
|
||||
- Prompt: `Create a user profile with the following details:
|
||||
- Name: Jane Smith
|
||||
- Age: 25
|
||||
- Email: jane.smith@company.com
|
||||
- Tags: ["developer", "designer"]
|
||||
- Address: {
|
||||
"street": "123 Main St",
|
||||
"city": "New York",
|
||||
"country": "US",
|
||||
"postal_code": "10001"
|
||||
}
|
||||
- Preferences: {
|
||||
"theme": "light",
|
||||
"notifications": "enabled",
|
||||
"language": "English"
|
||||
}
|
||||
Return only the JSON object, no explanation.`
|
||||
- Expected: `{"name":"Jane Smith","age":25,"email":"jane.smith@company.com","tags":["developer","designer"],"address":{"street":"123 Main St","city":"New York","country":"US","postal_code":"10001"},"preferences":{"theme":"light","notifications":"enabled","language":"English"}}`
|
||||
- Actual: ````json
|
||||
{
|
||||
"name": "Jane Smith",
|
||||
"age": 25,
|
||||
"email": "jane.smith@company.com",
|
||||
"tags": ["developer", "designer"],
|
||||
"address": {
|
||||
"street": "123 Main St",
|
||||
"city": "New York",
|
||||
"country": "US",
|
||||
"postal_code": "10001"
|
||||
},
|
||||
"preferences": {
|
||||
"theme": "light",
|
||||
"notifications": "enabled",
|
||||
"language": "English"
|
||||
}
|
||||
}
|
||||
````
|
||||
- Duration: 1505ms
|
||||
- Timestamp: 4/2/2025, 12:38:52 AM
|
||||
|
||||
## Summary
|
||||
|
||||
- Total Tests: 38
|
||||
- Passed: 5
|
||||
- Failed: 33
|
||||
- Success Rate: 13.16%
|
||||
- Total Tests: 51
|
||||
- Passed: 13
|
||||
- Failed: 38
|
||||
- Success Rate: 25.49%
|
||||
|
||||
|
||||
@ -991,5 +991,247 @@
|
||||
"timestamp": "2025-04-01T22:34:28.660Z",
|
||||
"passed": true,
|
||||
"duration": 995
|
||||
},
|
||||
{
|
||||
"test": "json-schema-file-format",
|
||||
"prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```"
|
||||
],
|
||||
"expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:35:05.732Z",
|
||||
"passed": true,
|
||||
"duration": 1101
|
||||
},
|
||||
{
|
||||
"test": "json-schema-file-format",
|
||||
"prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```"
|
||||
],
|
||||
"expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:35:37.746Z",
|
||||
"passed": true,
|
||||
"duration": 1187
|
||||
},
|
||||
{
|
||||
"test": "json-schema-file-format",
|
||||
"prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```"
|
||||
],
|
||||
"expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:36:19.070Z",
|
||||
"passed": true,
|
||||
"duration": 1238
|
||||
},
|
||||
{
|
||||
"test": "json-schema-object-format",
|
||||
"prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.",
|
||||
"result": [],
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:36:20.664Z",
|
||||
"passed": false,
|
||||
"duration": 1592,
|
||||
"error": {
|
||||
"message": "expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"code": "UNKNOWN",
|
||||
"type": "AssertionError",
|
||||
"details": {
|
||||
"message": "expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"actual": "{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\":\"jane.smith@company.com\",\"Tags\":[\"developer\",\"designer\"],\"Address\":{\"Street\":\"123 Main St\",\"City\":\"New York\",\"Country\":\"US\",\"PostalCode\":\"10001\"},\"Preferences\":{\"Theme\":\"light\",\"Notifications\":\"enabled\",\"Language\":\"English\"}}",
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"showDiff": true,
|
||||
"operator": "strictEqual",
|
||||
"stackStr": "AssertionError: expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:184:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:271:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "AssertionError",
|
||||
"diff": "Expected: \u001b[32m\"{\"\u001b[7mn\u001b[27mame\":\"Jane Smith\",\"\u001b[7ma\u001b[27mge\":25,\"\u001b[7me\u001b[27mmail\":\"jane.smith@company.com\",\"\u001b[7mt\u001b[27mags\":[\"developer\",\"designer\"],\"\u001b[7ma\u001b[27mddress\":{\"\u001b[7ms\u001b[27mtreet\":\"123 Main St\",\"\u001b[7mc\u001b[27mity\":\"New York\",\"\u001b[7mc\u001b[27mountry\":\"US\",\"\u001b[7mp\u001b[27mostalCode\":\"10001\"},\"\u001b[7mp\u001b[27mreferences\":{\"\u001b[7mt\u001b[27mheme\":\"light\",\"\u001b[7mn\u001b[27motifications\":\"enabled\",\"\u001b[7ml\u001b[27manguage\":\"English\"}}\"\u001b[39m\nReceived: \u001b[31m\"{\"\u001b[7mN\u001b[27mame\":\"Jane Smith\",\"\u001b[7mA\u001b[27mge\":25,\"\u001b[7mE\u001b[27mmail\":\"jane.smith@company.com\",\"\u001b[7mT\u001b[27mags\":[\"developer\",\"designer\"],\"\u001b[7mA\u001b[27mddress\":{\"\u001b[7mS\u001b[27mtreet\":\"123 Main St\",\"\u001b[7mC\u001b[27mity\":\"New York\",\"\u001b[7mC\u001b[27mountry\":\"US\",\"\u001b[7mP\u001b[27mostalCode\":\"10001\"},\"\u001b[7mP\u001b[27mreferences\":{\"\u001b[7mT\u001b[27mheme\":\"light\",\"\u001b[7mN\u001b[27motifications\":\"enabled\",\"\u001b[7mL\u001b[27manguage\":\"English\"}}\"\u001b[39m",
|
||||
"name": "AssertionError",
|
||||
"ok": false,
|
||||
"stack": "AssertionError: expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:184:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:271:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11"
|
||||
}
|
||||
},
|
||||
"reason": "expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'"
|
||||
},
|
||||
{
|
||||
"test": "json-schema-object-format",
|
||||
"prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.",
|
||||
"result": [],
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:36:23.463Z",
|
||||
"passed": false,
|
||||
"duration": 2794,
|
||||
"error": {
|
||||
"message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"code": "UNKNOWN",
|
||||
"type": "AssertionError",
|
||||
"details": {
|
||||
"message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"actual": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":\"123 Main St, New York, US, 10001\",\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"showDiff": true,
|
||||
"operator": "strictEqual",
|
||||
"name": "AssertionError",
|
||||
"ok": false,
|
||||
"stack": "AssertionError: expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:184:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:271:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11"
|
||||
}
|
||||
},
|
||||
"reason": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'"
|
||||
},
|
||||
{
|
||||
"test": "json-schema-file-format",
|
||||
"prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```"
|
||||
],
|
||||
"expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:37:16.453Z",
|
||||
"passed": true,
|
||||
"duration": 2477
|
||||
},
|
||||
{
|
||||
"test": "json-schema-object-format",
|
||||
"prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.",
|
||||
"result": [],
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:37:19.610Z",
|
||||
"passed": false,
|
||||
"duration": 3155,
|
||||
"error": {
|
||||
"message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"code": "UNKNOWN",
|
||||
"type": "AssertionError",
|
||||
"details": {
|
||||
"message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"actual": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":true,\"language\":\"English\"}}",
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"showDiff": true,
|
||||
"operator": "strictEqual",
|
||||
"name": "AssertionError",
|
||||
"ok": false,
|
||||
"stack": "AssertionError: expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:183:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:270:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11"
|
||||
}
|
||||
},
|
||||
"reason": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'"
|
||||
},
|
||||
{
|
||||
"test": "json-schema-file-format",
|
||||
"prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\n \"developer\",\n \"javascript\"\n ]\n}\n```"
|
||||
],
|
||||
"expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:38:14.271Z",
|
||||
"passed": true,
|
||||
"duration": 1276
|
||||
},
|
||||
{
|
||||
"test": "json-schema-object-format",
|
||||
"prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.",
|
||||
"result": [],
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:38:15.883Z",
|
||||
"passed": false,
|
||||
"duration": 1609,
|
||||
"error": {
|
||||
"message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"code": "UNKNOWN",
|
||||
"type": "AssertionError",
|
||||
"details": {
|
||||
"message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"actual": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"showDiff": true,
|
||||
"operator": "strictEqual",
|
||||
"name": "AssertionError",
|
||||
"ok": false,
|
||||
"stack": "AssertionError: expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:185:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:272:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11"
|
||||
}
|
||||
},
|
||||
"reason": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'"
|
||||
},
|
||||
{
|
||||
"test": "json-schema-file-format",
|
||||
"prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```"
|
||||
],
|
||||
"expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:38:32.055Z",
|
||||
"passed": true,
|
||||
"duration": 1134
|
||||
},
|
||||
{
|
||||
"test": "json-schema-object-format",
|
||||
"prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.",
|
||||
"result": [],
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:38:33.304Z",
|
||||
"passed": false,
|
||||
"duration": 1247,
|
||||
"error": {
|
||||
"message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"code": "UNKNOWN",
|
||||
"type": "AssertionError",
|
||||
"details": {
|
||||
"message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'",
|
||||
"actual": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":\"123 Main St, New York, US, 10001\",\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"showDiff": true,
|
||||
"operator": "strictEqual",
|
||||
"name": "AssertionError",
|
||||
"ok": false,
|
||||
"stack": "AssertionError: expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:185:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:272:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11"
|
||||
}
|
||||
},
|
||||
"reason": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'"
|
||||
},
|
||||
{
|
||||
"test": "json-schema-file-format",
|
||||
"prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```"
|
||||
],
|
||||
"expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:38:51.385Z",
|
||||
"passed": true,
|
||||
"duration": 960
|
||||
},
|
||||
{
|
||||
"test": "json-schema-object-format",
|
||||
"prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: {\n \"street\": \"123 Main St\",\n \"city\": \"New York\",\n \"country\": \"US\",\n \"postal_code\": \"10001\"\n }\n - Preferences: {\n \"theme\": \"light\",\n \"notifications\": \"enabled\",\n \"language\": \"English\"\n }\n Return only the JSON object, no explanation.",
|
||||
"result": [
|
||||
"```json\n{\n \"name\": \"Jane Smith\",\n \"age\": 25,\n \"email\": \"jane.smith@company.com\",\n \"tags\": [\"developer\", \"designer\"],\n \"address\": {\n \"street\": \"123 Main St\",\n \"city\": \"New York\",\n \"country\": \"US\",\n \"postal_code\": \"10001\"\n },\n \"preferences\": {\n \"theme\": \"light\",\n \"notifications\": \"enabled\",\n \"language\": \"English\"\n }\n}\n```"
|
||||
],
|
||||
"expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}",
|
||||
"model": "gpt-4o",
|
||||
"router": "openai",
|
||||
"timestamp": "2025-04-01T22:38:52.892Z",
|
||||
"passed": true,
|
||||
"duration": 1505
|
||||
}
|
||||
]
|
||||
@ -6,7 +6,6 @@ import { sync as read } from "@polymech/fs/read"
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
import { z } from 'zod'
|
||||
import {
|
||||
models,
|
||||
TEST_BASE_PATH,
|
||||
TEST_LOGS_PATH,
|
||||
TEST_PREFERENCES_PATH,
|
||||
@ -18,6 +17,8 @@ import {
|
||||
|
||||
const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
|
||||
const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json')
|
||||
const TEST_MODEL = 'gpt-4o'
|
||||
const TEST_ROUTER = 'openai'
|
||||
|
||||
// Sample JSON Schema for testing
|
||||
const testJsonSchema = {
|
||||
@ -144,8 +145,8 @@ describe('Format Options', () => {
|
||||
}
|
||||
|
||||
const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => {
|
||||
let model = 'gpt-4o'
|
||||
let router = 'openai'
|
||||
let model = TEST_MODEL
|
||||
let router = TEST_ROUTER
|
||||
let startTime = Date.now()
|
||||
let error: TestResult['error'] | undefined
|
||||
let testResult: TestResult | undefined
|
||||
@ -155,15 +156,15 @@ describe('Format Options', () => {
|
||||
run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: 'gpt-4o',
|
||||
router: 'openai',
|
||||
model: TEST_MODEL,
|
||||
router: TEST_ROUTER,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
...options,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
model = options.model || TEST_MODEL
|
||||
router = options.router || TEST_ROUTER
|
||||
return options
|
||||
}
|
||||
}),
|
||||
@ -232,12 +233,12 @@ describe('Format Options', () => {
|
||||
prompt,
|
||||
expected,
|
||||
'json-schema-file-format',
|
||||
'gpt-4o',
|
||||
TEST_MODEL,
|
||||
{
|
||||
format: TEST_SCHEMA_PATH
|
||||
}
|
||||
)
|
||||
}, { timeout: 10000 })
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
// Test JSON Schema format using schema object
|
||||
it('should format response according to JSON Schema object', async () => {
|
||||
@ -246,48 +247,47 @@ describe('Format Options', () => {
|
||||
- Age: 25
|
||||
- Email: jane.smith@company.com
|
||||
- Tags: ["developer", "designer"]
|
||||
- Address: 123 Main St, New York, US, 10001
|
||||
- Preferences: light theme, notifications enabled, English language
|
||||
- Address: {
|
||||
"street": "123 Main St",
|
||||
"city": "New York",
|
||||
"country": "US",
|
||||
"postal_code": "10001"
|
||||
}
|
||||
- Preferences: {
|
||||
"theme": "light",
|
||||
"notifications": "enabled",
|
||||
"language": "English"
|
||||
}
|
||||
Return only the JSON object, no explanation.`
|
||||
|
||||
try {
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: 'gpt-4o',
|
||||
router: 'openai',
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
format: testJsonSchema
|
||||
}) as string[]
|
||||
const expected = JSON.stringify({
|
||||
name: "Jane Smith",
|
||||
age: 25,
|
||||
email: "jane.smith@company.com",
|
||||
tags: ["developer", "designer"],
|
||||
address: {
|
||||
street: "123 Main St",
|
||||
city: "New York",
|
||||
country: "US",
|
||||
postal_code: "10001"
|
||||
},
|
||||
preferences: {
|
||||
theme: "light",
|
||||
notifications: "enabled",
|
||||
language: "English"
|
||||
}
|
||||
})
|
||||
|
||||
const response = JSON.parse(normalizeJson(result?.[0] || '{}'))
|
||||
|
||||
// Validate required fields
|
||||
expect(response.name).toBe('Jane Smith')
|
||||
expect(response.age).toBe(25)
|
||||
expect(response.email).toBe('jane.smith@company.com')
|
||||
|
||||
// Validate tags
|
||||
expect(Array.isArray(response.tags)).toBe(true)
|
||||
expect(response.tags).toContain('developer')
|
||||
expect(response.tags).toContain('designer')
|
||||
|
||||
// Validate address
|
||||
expect(response.address.street).toBe('123 Main St')
|
||||
expect(response.address.city).toBe('New York')
|
||||
expect(response.address.country).toBe('US')
|
||||
expect(response.address.zipCode || response.address.postal_code).toMatch(/^[0-9]{5}$/)
|
||||
|
||||
// Validate preferences
|
||||
expect(response.preferences.theme).toBe('light')
|
||||
expect(['true', true, 'enabled'].includes(response.preferences.notifications)).toBe(true)
|
||||
expect(['en', 'English'].includes(response.preferences.language)).toBe(true)
|
||||
} catch (e) {
|
||||
throw e
|
||||
}
|
||||
}, { timeout: 10000 })
|
||||
await runFormatTest(
|
||||
prompt,
|
||||
expected,
|
||||
'json-schema-object-format',
|
||||
TEST_MODEL,
|
||||
{
|
||||
format: testJsonSchema
|
||||
}
|
||||
)
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
// Test Zod Schema format with string
|
||||
it('should format response according to Zod string schema', async () => {
|
||||
@ -297,8 +297,8 @@ describe('Format Options', () => {
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: 'gpt-4o',
|
||||
router: 'openai',
|
||||
model: TEST_MODEL,
|
||||
router: TEST_ROUTER,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
@ -319,7 +319,7 @@ describe('Format Options', () => {
|
||||
} catch (e) {
|
||||
throw e
|
||||
}
|
||||
}, { timeout: 10000 })
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
// Test Zod Schema format with number
|
||||
it('should format response according to Zod number schema', async () => {
|
||||
@ -329,8 +329,8 @@ describe('Format Options', () => {
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: 'gpt-4o',
|
||||
router: 'openai',
|
||||
model: TEST_MODEL,
|
||||
router: TEST_ROUTER,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
@ -352,7 +352,7 @@ describe('Format Options', () => {
|
||||
} catch (e) {
|
||||
throw e
|
||||
}
|
||||
}, { timeout: 10000 })
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
// Test Zod Schema format with array
|
||||
it('should format response according to Zod array schema', async () => {
|
||||
@ -362,8 +362,8 @@ describe('Format Options', () => {
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: 'gpt-4o',
|
||||
router: 'openai',
|
||||
model: TEST_MODEL,
|
||||
router: TEST_ROUTER,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
@ -388,35 +388,39 @@ describe('Format Options', () => {
|
||||
} catch (e) {
|
||||
throw e
|
||||
}
|
||||
}, { timeout: 10000 })
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
// Test invalid format option
|
||||
it('should handle invalid format option', async () => {
|
||||
const prompt = 'Generate a random number.'
|
||||
const prompt = 'Generate a random number'
|
||||
|
||||
try {
|
||||
await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: 'gpt-4o',
|
||||
router: 'openai',
|
||||
model: TEST_MODEL,
|
||||
router: TEST_ROUTER,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
format: {
|
||||
type: "invalid",
|
||||
properties: {}
|
||||
type: "object",
|
||||
properties: {
|
||||
number: {
|
||||
type: "not_a_valid_type"
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
// If we get here, the format validation didn't work
|
||||
throw new Error('Expected format validation to fail')
|
||||
} catch (e: any) {
|
||||
// The error should be about invalid format/schema
|
||||
if (!e.message.match(/invalid|Invalid|schema|Schema/)) {
|
||||
throw new Error(`Unexpected error: ${e.message}`)
|
||||
if (!e.message.match(/invalid|Invalid|schema|Schema|type|Type|format|Format/i)) {
|
||||
console.error('Actual error message:', e.message)
|
||||
throw new Error('Expected format validation to fail with an invalid format/schema/type error')
|
||||
}
|
||||
}
|
||||
}, { timeout: 10000 })
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
|
||||
it('should generate markdown report', () => {
|
||||
// Group results by test and model
|
||||
|
||||
Loading…
Reference in New Issue
Block a user