From fb34d1536944b2f2400ada4d97aa2ca5b2388811 Mon Sep 17 00:00:00 2001 From: babayaga Date: Wed, 2 Apr 2025 00:39:15 +0200 Subject: [PATCH] tests:response_format --- packages/kbot/logs/params.json | 2 +- packages/kbot/tests/unit/format-report.md | 78 ++++--- packages/kbot/tests/unit/format.json | 242 ++++++++++++++++++++++ packages/kbot/tests/unit/format.test.ts | 136 ++++++------ 4 files changed, 363 insertions(+), 95 deletions(-) diff --git a/packages/kbot/logs/params.json b/packages/kbot/logs/params.json index 4e9cabb1..cee79f77 100644 --- a/packages/kbot/logs/params.json +++ b/packages/kbot/logs/params.json @@ -3,7 +3,7 @@ "messages": [ { "role": "user", - "content": "Generate a random number." + "content": "Generate a random number" }, { "role": "user", diff --git a/packages/kbot/tests/unit/format-report.md b/packages/kbot/tests/unit/format-report.md index 3e968672..9bd03afe 100644 --- a/packages/kbot/tests/unit/format-report.md +++ b/packages/kbot/tests/unit/format-report.md @@ -50,25 +50,6 @@ - Reason: expected '{"name": "Jane Smith", "age": 25, "ta…' to deeply equal '{"name":"Jane Smith","age":25,"tags":…' - Timestamp: 4/2/2025, 12:23:58 AM -### json-schema-object-format - gpt-4o -- Prompt: `Create a user profile with the following details: - - Name: Jane Smith - - Age: 25 - - Email: jane.smith@company.com - - Tags: ["developer", "designer"] - - Address: 123 Main St, New York, US, 10001 - - Preferences: light theme, notifications enabled, English language - Return only the JSON object, no explanation.` -- Expected: `{"name":"Jane Smith","age":25,"email":"jane.smith@company.com","tags":["developer","designer"],"address":{"street":"123 Main St","city":"New York","country":"US","zipCode":"10001"},"preferences":{"theme":"light","notifications":true,"language":"en"}}` -- Actual: `` -- Duration: 2618ms -- Error Type: AssertionError -- Error Code: UNKNOWN -- Error Message: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…' -- Error Details: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…' -- Reason: expected '{"name":"Jane Smith","age":25,"email"…' to deeply equal '{"name":"Jane Smith","age":25,"email"…' -- Timestamp: 4/2/2025, 12:33:08 AM - ### zod-string-format - anthropic/claude-3.5-sonnet - Prompt: `Generate a valid email address for a business domain. Return only the email, no explanation.` - Expected: `john.doe@company.com` @@ -208,18 +189,59 @@ - Expected: `{"name":"John Doe","age":30,"tags":["developer","javascript"]}` - Actual: ````json { - "name": "John Doe", - "age": 30, - "tags": ["developer", "javascript"] + "name": "John Doe", + "age": 30, + "tags": ["developer", "javascript"] } ```` -- Duration: 995ms -- Timestamp: 4/2/2025, 12:34:28 AM +- Duration: 960ms +- Timestamp: 4/2/2025, 12:38:51 AM + +### json-schema-object-format - gpt-4o +- Prompt: `Create a user profile with the following details: + - Name: Jane Smith + - Age: 25 + - Email: jane.smith@company.com + - Tags: ["developer", "designer"] + - Address: { + "street": "123 Main St", + "city": "New York", + "country": "US", + "postal_code": "10001" + } + - Preferences: { + "theme": "light", + "notifications": "enabled", + "language": "English" + } + Return only the JSON object, no explanation.` +- Expected: `{"name":"Jane Smith","age":25,"email":"jane.smith@company.com","tags":["developer","designer"],"address":{"street":"123 Main St","city":"New York","country":"US","postal_code":"10001"},"preferences":{"theme":"light","notifications":"enabled","language":"English"}}` +- Actual: ````json +{ + "name": "Jane Smith", + "age": 25, + "email": "jane.smith@company.com", + "tags": ["developer", "designer"], + "address": { + "street": "123 Main St", + "city": "New York", + "country": "US", + "postal_code": "10001" + }, + "preferences": { + "theme": "light", + "notifications": "enabled", + "language": "English" + } +} +```` +- Duration: 1505ms +- Timestamp: 4/2/2025, 12:38:52 AM ## Summary -- Total Tests: 38 -- Passed: 5 -- Failed: 33 -- Success Rate: 13.16% +- Total Tests: 51 +- Passed: 13 +- Failed: 38 +- Success Rate: 25.49% diff --git a/packages/kbot/tests/unit/format.json b/packages/kbot/tests/unit/format.json index b444081e..c9ab7d09 100644 --- a/packages/kbot/tests/unit/format.json +++ b/packages/kbot/tests/unit/format.json @@ -991,5 +991,247 @@ "timestamp": "2025-04-01T22:34:28.660Z", "passed": true, "duration": 995 + }, + { + "test": "json-schema-file-format", + "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.", + "result": [ + "```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```" + ], + "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:35:05.732Z", + "passed": true, + "duration": 1101 + }, + { + "test": "json-schema-file-format", + "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.", + "result": [ + "```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```" + ], + "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:35:37.746Z", + "passed": true, + "duration": 1187 + }, + { + "test": "json-schema-file-format", + "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.", + "result": [ + "```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```" + ], + "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:36:19.070Z", + "passed": true, + "duration": 1238 + }, + { + "test": "json-schema-object-format", + "prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.", + "result": [], + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:36:20.664Z", + "passed": false, + "duration": 1592, + "error": { + "message": "expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "code": "UNKNOWN", + "type": "AssertionError", + "details": { + "message": "expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "actual": "{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\":\"jane.smith@company.com\",\"Tags\":[\"developer\",\"designer\"],\"Address\":{\"Street\":\"123 Main St\",\"City\":\"New York\",\"Country\":\"US\",\"PostalCode\":\"10001\"},\"Preferences\":{\"Theme\":\"light\",\"Notifications\":\"enabled\",\"Language\":\"English\"}}", + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "showDiff": true, + "operator": "strictEqual", + "stackStr": "AssertionError: expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:184:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:271:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11", + "nameStr": "AssertionError", + "diff": "Expected: \u001b[32m\"{\"\u001b[7mn\u001b[27mame\":\"Jane Smith\",\"\u001b[7ma\u001b[27mge\":25,\"\u001b[7me\u001b[27mmail\":\"jane.smith@company.com\",\"\u001b[7mt\u001b[27mags\":[\"developer\",\"designer\"],\"\u001b[7ma\u001b[27mddress\":{\"\u001b[7ms\u001b[27mtreet\":\"123 Main St\",\"\u001b[7mc\u001b[27mity\":\"New York\",\"\u001b[7mc\u001b[27mountry\":\"US\",\"\u001b[7mp\u001b[27mostalCode\":\"10001\"},\"\u001b[7mp\u001b[27mreferences\":{\"\u001b[7mt\u001b[27mheme\":\"light\",\"\u001b[7mn\u001b[27motifications\":\"enabled\",\"\u001b[7ml\u001b[27manguage\":\"English\"}}\"\u001b[39m\nReceived: \u001b[31m\"{\"\u001b[7mN\u001b[27mame\":\"Jane Smith\",\"\u001b[7mA\u001b[27mge\":25,\"\u001b[7mE\u001b[27mmail\":\"jane.smith@company.com\",\"\u001b[7mT\u001b[27mags\":[\"developer\",\"designer\"],\"\u001b[7mA\u001b[27mddress\":{\"\u001b[7mS\u001b[27mtreet\":\"123 Main St\",\"\u001b[7mC\u001b[27mity\":\"New York\",\"\u001b[7mC\u001b[27mountry\":\"US\",\"\u001b[7mP\u001b[27mostalCode\":\"10001\"},\"\u001b[7mP\u001b[27mreferences\":{\"\u001b[7mT\u001b[27mheme\":\"light\",\"\u001b[7mN\u001b[27motifications\":\"enabled\",\"\u001b[7mL\u001b[27manguage\":\"English\"}}\"\u001b[39m", + "name": "AssertionError", + "ok": false, + "stack": "AssertionError: expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:184:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:271:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11" + } + }, + "reason": "expected '{\"Name\":\"Jane Smith\",\"Age\":25,\"Email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'" + }, + { + "test": "json-schema-object-format", + "prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.", + "result": [], + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:36:23.463Z", + "passed": false, + "duration": 2794, + "error": { + "message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "code": "UNKNOWN", + "type": "AssertionError", + "details": { + "message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "actual": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":\"123 Main St, New York, US, 10001\",\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "showDiff": true, + "operator": "strictEqual", + "name": "AssertionError", + "ok": false, + "stack": "AssertionError: expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:184:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.each.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:271:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11" + } + }, + "reason": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'" + }, + { + "test": "json-schema-file-format", + "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.", + "result": [ + "```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```" + ], + "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:37:16.453Z", + "passed": true, + "duration": 2477 + }, + { + "test": "json-schema-object-format", + "prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.", + "result": [], + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:37:19.610Z", + "passed": false, + "duration": 3155, + "error": { + "message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "code": "UNKNOWN", + "type": "AssertionError", + "details": { + "message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "actual": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":true,\"language\":\"English\"}}", + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "showDiff": true, + "operator": "strictEqual", + "name": "AssertionError", + "ok": false, + "stack": "AssertionError: expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:183:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:270:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11" + } + }, + "reason": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'" + }, + { + "test": "json-schema-file-format", + "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.", + "result": [ + "```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\n \"developer\",\n \"javascript\"\n ]\n}\n```" + ], + "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:38:14.271Z", + "passed": true, + "duration": 1276 + }, + { + "test": "json-schema-object-format", + "prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.", + "result": [], + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:38:15.883Z", + "passed": false, + "duration": 1609, + "error": { + "message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "code": "UNKNOWN", + "type": "AssertionError", + "details": { + "message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "actual": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postalCode\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "showDiff": true, + "operator": "strictEqual", + "name": "AssertionError", + "ok": false, + "stack": "AssertionError: expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:185:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:272:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11" + } + }, + "reason": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'" + }, + { + "test": "json-schema-file-format", + "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.", + "result": [ + "```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```" + ], + "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:38:32.055Z", + "passed": true, + "duration": 1134 + }, + { + "test": "json-schema-object-format", + "prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: 123 Main St, New York, US, 10001\n - Preferences: light theme, notifications enabled, English language\n Return only the JSON object, no explanation.", + "result": [], + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:38:33.304Z", + "passed": false, + "duration": 1247, + "error": { + "message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "code": "UNKNOWN", + "type": "AssertionError", + "details": { + "message": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'", + "actual": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":\"123 Main St, New York, US, 10001\",\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "showDiff": true, + "operator": "strictEqual", + "name": "AssertionError", + "ok": false, + "stack": "AssertionError: expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'\n at runFormatTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:185:32)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at __vite_ssr_import_0__.it.timeout (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\format.test.ts:272:5)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11" + } + }, + "reason": "expected '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…' to deeply equal '{\"name\":\"Jane Smith\",\"age\":25,\"email\"…'" + }, + { + "test": "json-schema-file-format", + "prompt": "Create a user profile with name John Doe, age 30, and tags [\"developer\", \"javascript\"]. Return only the JSON object, no explanation.", + "result": [ + "```json\n{\n \"name\": \"John Doe\",\n \"age\": 30,\n \"tags\": [\"developer\", \"javascript\"]\n}\n```" + ], + "expected": "{\"name\":\"John Doe\",\"age\":30,\"tags\":[\"developer\",\"javascript\"]}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:38:51.385Z", + "passed": true, + "duration": 960 + }, + { + "test": "json-schema-object-format", + "prompt": "Create a user profile with the following details:\n - Name: Jane Smith\n - Age: 25\n - Email: jane.smith@company.com\n - Tags: [\"developer\", \"designer\"]\n - Address: {\n \"street\": \"123 Main St\",\n \"city\": \"New York\",\n \"country\": \"US\",\n \"postal_code\": \"10001\"\n }\n - Preferences: {\n \"theme\": \"light\",\n \"notifications\": \"enabled\",\n \"language\": \"English\"\n }\n Return only the JSON object, no explanation.", + "result": [ + "```json\n{\n \"name\": \"Jane Smith\",\n \"age\": 25,\n \"email\": \"jane.smith@company.com\",\n \"tags\": [\"developer\", \"designer\"],\n \"address\": {\n \"street\": \"123 Main St\",\n \"city\": \"New York\",\n \"country\": \"US\",\n \"postal_code\": \"10001\"\n },\n \"preferences\": {\n \"theme\": \"light\",\n \"notifications\": \"enabled\",\n \"language\": \"English\"\n }\n}\n```" + ], + "expected": "{\"name\":\"Jane Smith\",\"age\":25,\"email\":\"jane.smith@company.com\",\"tags\":[\"developer\",\"designer\"],\"address\":{\"street\":\"123 Main St\",\"city\":\"New York\",\"country\":\"US\",\"postal_code\":\"10001\"},\"preferences\":{\"theme\":\"light\",\"notifications\":\"enabled\",\"language\":\"English\"}}", + "model": "gpt-4o", + "router": "openai", + "timestamp": "2025-04-01T22:38:52.892Z", + "passed": true, + "duration": 1505 } ] \ No newline at end of file diff --git a/packages/kbot/tests/unit/format.test.ts b/packages/kbot/tests/unit/format.test.ts index 22294d09..59906af7 100644 --- a/packages/kbot/tests/unit/format.test.ts +++ b/packages/kbot/tests/unit/format.test.ts @@ -6,7 +6,6 @@ import { sync as read } from "@polymech/fs/read" import { sync as exists } from "@polymech/fs/exists" import { z } from 'zod' import { - models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, @@ -18,6 +17,8 @@ import { const TEST_LOG_PATH = path.resolve(__dirname, './format.json') const TEST_SCHEMA_PATH = path.resolve(__dirname, './test-schema.json') +const TEST_MODEL = 'gpt-4o' +const TEST_ROUTER = 'openai' // Sample JSON Schema for testing const testJsonSchema = { @@ -144,8 +145,8 @@ describe('Format Options', () => { } const runFormatTest = async (prompt: string, expected: string, testName: string, modelName: string, options: any = {}) => { - let model = 'gpt-4o' - let router = 'openai' + let model = TEST_MODEL + let router = TEST_ROUTER let startTime = Date.now() let error: TestResult['error'] | undefined let testResult: TestResult | undefined @@ -155,15 +156,15 @@ describe('Format Options', () => { run({ prompt, mode: 'completion', - model: 'gpt-4o', - router: 'openai', + model: TEST_MODEL, + router: TEST_ROUTER, path: TEST_BASE_PATH, logs: TEST_LOGS_PATH, preferences: TEST_PREFERENCES_PATH, ...options, onRun: async (options) => { - model = options.model || 'unknown' - router = options.router || 'unknown' + model = options.model || TEST_MODEL + router = options.router || TEST_ROUTER return options } }), @@ -232,12 +233,12 @@ describe('Format Options', () => { prompt, expected, 'json-schema-file-format', - 'gpt-4o', + TEST_MODEL, { format: TEST_SCHEMA_PATH } ) - }, { timeout: 10000 }) + }, { timeout: TEST_TIMEOUT }) // Test JSON Schema format using schema object it('should format response according to JSON Schema object', async () => { @@ -246,48 +247,47 @@ describe('Format Options', () => { - Age: 25 - Email: jane.smith@company.com - Tags: ["developer", "designer"] - - Address: 123 Main St, New York, US, 10001 - - Preferences: light theme, notifications enabled, English language + - Address: { + "street": "123 Main St", + "city": "New York", + "country": "US", + "postal_code": "10001" + } + - Preferences: { + "theme": "light", + "notifications": "enabled", + "language": "English" + } Return only the JSON object, no explanation.` - try { - const result = await run({ - prompt, - mode: 'completion', - model: 'gpt-4o', - router: 'openai', - path: TEST_BASE_PATH, - logs: TEST_LOGS_PATH, - preferences: TEST_PREFERENCES_PATH, - format: testJsonSchema - }) as string[] + const expected = JSON.stringify({ + name: "Jane Smith", + age: 25, + email: "jane.smith@company.com", + tags: ["developer", "designer"], + address: { + street: "123 Main St", + city: "New York", + country: "US", + postal_code: "10001" + }, + preferences: { + theme: "light", + notifications: "enabled", + language: "English" + } + }) - const response = JSON.parse(normalizeJson(result?.[0] || '{}')) - - // Validate required fields - expect(response.name).toBe('Jane Smith') - expect(response.age).toBe(25) - expect(response.email).toBe('jane.smith@company.com') - - // Validate tags - expect(Array.isArray(response.tags)).toBe(true) - expect(response.tags).toContain('developer') - expect(response.tags).toContain('designer') - - // Validate address - expect(response.address.street).toBe('123 Main St') - expect(response.address.city).toBe('New York') - expect(response.address.country).toBe('US') - expect(response.address.zipCode || response.address.postal_code).toMatch(/^[0-9]{5}$/) - - // Validate preferences - expect(response.preferences.theme).toBe('light') - expect(['true', true, 'enabled'].includes(response.preferences.notifications)).toBe(true) - expect(['en', 'English'].includes(response.preferences.language)).toBe(true) - } catch (e) { - throw e - } - }, { timeout: 10000 }) + await runFormatTest( + prompt, + expected, + 'json-schema-object-format', + TEST_MODEL, + { + format: testJsonSchema + } + ) + }, { timeout: TEST_TIMEOUT }) // Test Zod Schema format with string it('should format response according to Zod string schema', async () => { @@ -297,8 +297,8 @@ describe('Format Options', () => { const result = await run({ prompt, mode: 'completion', - model: 'gpt-4o', - router: 'openai', + model: TEST_MODEL, + router: TEST_ROUTER, path: TEST_BASE_PATH, logs: TEST_LOGS_PATH, preferences: TEST_PREFERENCES_PATH, @@ -319,7 +319,7 @@ describe('Format Options', () => { } catch (e) { throw e } - }, { timeout: 10000 }) + }, { timeout: TEST_TIMEOUT }) // Test Zod Schema format with number it('should format response according to Zod number schema', async () => { @@ -329,8 +329,8 @@ describe('Format Options', () => { const result = await run({ prompt, mode: 'completion', - model: 'gpt-4o', - router: 'openai', + model: TEST_MODEL, + router: TEST_ROUTER, path: TEST_BASE_PATH, logs: TEST_LOGS_PATH, preferences: TEST_PREFERENCES_PATH, @@ -352,7 +352,7 @@ describe('Format Options', () => { } catch (e) { throw e } - }, { timeout: 10000 }) + }, { timeout: TEST_TIMEOUT }) // Test Zod Schema format with array it('should format response according to Zod array schema', async () => { @@ -362,8 +362,8 @@ describe('Format Options', () => { const result = await run({ prompt, mode: 'completion', - model: 'gpt-4o', - router: 'openai', + model: TEST_MODEL, + router: TEST_ROUTER, path: TEST_BASE_PATH, logs: TEST_LOGS_PATH, preferences: TEST_PREFERENCES_PATH, @@ -388,35 +388,39 @@ describe('Format Options', () => { } catch (e) { throw e } - }, { timeout: 10000 }) + }, { timeout: TEST_TIMEOUT }) // Test invalid format option it('should handle invalid format option', async () => { - const prompt = 'Generate a random number.' + const prompt = 'Generate a random number' try { await run({ prompt, mode: 'completion', - model: 'gpt-4o', - router: 'openai', + model: TEST_MODEL, + router: TEST_ROUTER, path: TEST_BASE_PATH, logs: TEST_LOGS_PATH, preferences: TEST_PREFERENCES_PATH, format: { - type: "invalid", - properties: {} + type: "object", + properties: { + number: { + type: "not_a_valid_type" + } + } } }) - // If we get here, the format validation didn't work throw new Error('Expected format validation to fail') } catch (e: any) { // The error should be about invalid format/schema - if (!e.message.match(/invalid|Invalid|schema|Schema/)) { - throw new Error(`Unexpected error: ${e.message}`) + if (!e.message.match(/invalid|Invalid|schema|Schema|type|Type|format|Format/i)) { + console.error('Actual error message:', e.message) + throw new Error('Expected format validation to fail with an invalid format/schema/type error') } } - }, { timeout: 10000 }) + }, { timeout: TEST_TIMEOUT }) it('should generate markdown report', () => { // Group results by test and model