From 54b9c5dd9c152c266ec70e92439a88e5afe22302 Mon Sep 17 00:00:00 2001 From: babayaga Date: Thu, 3 Apr 2025 00:07:12 +0200 Subject: [PATCH] revert --- packages/kbot/dist-in/zod_types.d.ts | 2 +- packages/kbot/logs/params.json | 199 +-------------- packages/kbot/schema.json | 5 +- packages/kbot/schema_ui.json | 5 +- packages/kbot/src/zod_types.ts | 2 +- packages/kbot/tests/unit/reports/all.json | 155 ++++++++++-- packages/kbot/tests/unit/reports/basic.json | 155 ++++++++++-- packages/kbot/tests/unit/reports/basic.md | 254 ++++---------------- 8 files changed, 339 insertions(+), 438 deletions(-) diff --git a/packages/kbot/dist-in/zod_types.d.ts b/packages/kbot/dist-in/zod_types.d.ts index f1c47495..568e8094 100644 --- a/packages/kbot/dist-in/zod_types.d.ts +++ b/packages/kbot/dist-in/zod_types.d.ts @@ -2,7 +2,7 @@ export interface IKBotOptions { /** Target directory */ path?: string; /** The prompt. Supports file paths and environment variables. */ - prompt?: string; + prompt?: string | undefined; /** Optional output path for modified files (Tool mode only) */ output?: string | undefined; /** Optional destination path for the result, will substitute ${MODEL_NAME} and ${ROUTER} in the path. Optional, used for "completion" mode */ diff --git a/packages/kbot/logs/params.json b/packages/kbot/logs/params.json index c1331d05..2926ab73 100644 --- a/packages/kbot/logs/params.json +++ b/packages/kbot/logs/params.json @@ -1,207 +1,14 @@ { - "model": "openai/gpt-4o", + "model": "openai/gpt-4o-mini", "messages": [ { "role": "user", - "content": "List all files in the directory C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\test-data. Return the list as a JSON array of filenames." + "content": "divide 15 by 3. Return only the number, no explanation." }, { "role": "user", "content": "USER Preferences : # Preferences\r\n\r\nYou are a helpful AI assistant. When asked to perform calculations, you should return only the numerical result without any explanation or comments. " } ], - "tools": [ - { - "type": "function", - "function": { - "name": "list_files", - "description": "List all files in a directory", - "parameters": { - "type": "object", - "properties": { - "directory": { - "type": "string" - }, - "pattern": { - "type": "string", - "optional": true - } - }, - "required": [ - "directory" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "read_files", - "description": "Reads files in a directory with a given pattern", - "parameters": { - "type": "object", - "properties": { - "directory": { - "type": "string" - }, - "pattern": { - "type": "string", - "optional": true - } - }, - "required": [ - "directory" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "remove_file", - "description": "Remove a file at given path", - "parameters": { - "type": "object", - "properties": { - "path": { - "type": "string" - } - }, - "required": [ - "path" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "rename_file", - "description": "Rename or move a file or directory", - "parameters": { - "type": "object", - "properties": { - "src": { - "type": "string" - }, - "dst": { - "type": "string" - } - }, - "required": [ - "path" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "modify_project_files", - "description": "Create or modify existing project files in one shot, preferably used for creating project structure)", - "parameters": { - "type": "object", - "properties": { - "files": { - "type": "array", - "items": { - "type": "object", - "properties": { - "path": { - "type": "string" - }, - "content": { - "type": "string", - "description": "base64 encoded string" - } - }, - "required": [ - "path", - "content" - ] - } - } - }, - "required": [ - "files" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "write_file", - "description": "Writes to a file, given a path and content (base64). No directory or file exists check needed!", - "parameters": { - "type": "object", - "properties": { - "file": { - "type": "object", - "properties": { - "path": { - "type": "string" - }, - "content": { - "type": "string", - "description": "base64 encoded string" - } - } - } - }, - "required": [ - "file" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "file_exists", - "description": "check if a file or folder exists", - "parameters": { - "type": "object", - "properties": { - "file": { - "type": "object", - "properties": { - "path": { - "type": "string" - } - } - } - }, - "required": [ - "file" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "read_file", - "description": "read a file, at given a path", - "parameters": { - "type": "object", - "properties": { - "file": { - "type": "object", - "properties": { - "path": { - "type": "string" - } - } - } - }, - "required": [ - "file" - ] - } - } - } - ], - "tool_choice": "auto", - "parallel_tool_calls": false + "tools": [] } \ No newline at end of file diff --git a/packages/kbot/schema.json b/packages/kbot/schema.json index 299d9285..005143db 100644 --- a/packages/kbot/schema.json +++ b/packages/kbot/schema.json @@ -13,8 +13,7 @@ }, "prompt": { "type": "string", - "description": "The prompt. Supports file paths and environment variables.", - "default": "./prompt.md" + "description": "The prompt. Supports file paths and environment variables." }, "output": { "type": "string", @@ -101,7 +100,7 @@ }, "logLevel": { "type": "number", - "default": 2, + "default": 4, "description": "Logging level for the application" }, "profile": { diff --git a/packages/kbot/schema_ui.json b/packages/kbot/schema_ui.json index 06c8e0f9..4933fb2f 100644 --- a/packages/kbot/schema_ui.json +++ b/packages/kbot/schema_ui.json @@ -15,8 +15,7 @@ }, "prompt": { "ui:description": "The prompt. Supports file paths and environment variables.", - "ui:title": "Prompt", - "ui:placeholder": "./prompt.md" + "ui:title": "Prompt" }, "output": { "ui:description": "Optional output path for modified files (Tool mode only)", @@ -79,7 +78,7 @@ "logLevel": { "ui:description": "Logging level for the application", "ui:title": "Loglevel", - "ui:placeholder": 2 + "ui:placeholder": 4 }, "profile": { "ui:description": "Path to profile for variables. Supports environment variables.", diff --git a/packages/kbot/src/zod_types.ts b/packages/kbot/src/zod_types.ts index 958f2dca..f9fd90ec 100644 --- a/packages/kbot/src/zod_types.ts +++ b/packages/kbot/src/zod_types.ts @@ -2,7 +2,7 @@ export interface IKBotOptions { /** Target directory */ path?: string; /** The prompt. Supports file paths and environment variables. */ - prompt?: string; + prompt?: string | undefined; /** Optional output path for modified files (Tool mode only) */ output?: string | undefined; /** Optional destination path for the result, will substitute ${MODEL_NAME} and ${ROUTER} in the path. Optional, used for "completion" mode */ diff --git a/packages/kbot/tests/unit/reports/all.json b/packages/kbot/tests/unit/reports/all.json index 3d0a789c..b93e3f42 100644 --- a/packages/kbot/tests/unit/reports/all.json +++ b/packages/kbot/tests/unit/reports/all.json @@ -6276,6 +6276,133 @@ "duration": 2274, "reason": "Expected [], but got {\"files\":[]}", "category": "tools" + }, + { + "test": "addition", + "prompt": "add 5 and 3. Return only the number, no explanation.", + "result": [ + "8" + ], + "expected": "8", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-02T22:06:38.904Z", + "passed": true, + "duration": 2263, + "category": "basic" + }, + { + "test": "addition", + "prompt": "add 5 and 3. Return only the number, no explanation.", + "result": [ + "8" + ], + "expected": "8", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-02T22:06:41.138Z", + "passed": true, + "duration": 2228, + "category": "basic" + }, + { + "test": "addition", + "prompt": "add 5 and 3. Return only the number, no explanation.", + "result": [ + "8" + ], + "expected": "8", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-02T22:06:41.934Z", + "passed": true, + "duration": 791, + "category": "basic" + }, + { + "test": "multiplication", + "prompt": "multiply 8 and 3. Return only the number, no explanation.", + "result": [ + "24" + ], + "expected": "24", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-02T22:06:43.239Z", + "passed": true, + "duration": 1300, + "category": "basic" + }, + { + "test": "multiplication", + "prompt": "multiply 8 and 3. Return only the number, no explanation.", + "result": [ + "8 × 3 = 24" + ], + "expected": "24", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-02T22:06:46.025Z", + "passed": false, + "duration": 2782, + "reason": "Expected 24, but got 8 × 3 = 24", + "category": "basic" + }, + { + "test": "multiplication", + "prompt": "multiply 8 and 3. Return only the number, no explanation.", + "result": [ + "24" + ], + "expected": "24", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-02T22:06:47.239Z", + "passed": true, + "duration": 1206, + "category": "basic" + }, + { + "test": "division", + "prompt": "divide 15 by 3. Return only the number, no explanation.", + "result": [ + "5" + ], + "expected": "5", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-02T22:06:48.026Z", + "passed": true, + "duration": 783, + "category": "basic" + }, + { + "test": "division", + "prompt": "divide 15 by 3. Return only the number, no explanation.", + "result": [ + "5" + ], + "expected": "5", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-02T22:06:51.012Z", + "passed": true, + "duration": 2982, + "category": "basic" + }, + { + "test": "division", + "prompt": "divide 15 by 3. Return only the number, no explanation.", + "result": [ + "5" + ], + "expected": "5", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-02T22:06:51.777Z", + "passed": true, + "duration": 760, + "category": "basic" } ], "highscores": [ @@ -6359,8 +6486,8 @@ "rankings": [ { "model": "openai/gpt-4o-mini", - "duration": 885, - "duration_secs": 0.885 + "duration": 791, + "duration_secs": 0.791 }, { "model": "anthropic/claude-3.5-sonnet", @@ -6373,14 +6500,14 @@ "test": "multiplication", "rankings": [ { - "model": "openai/gpt-3.5-turbo", - "duration": 984, - "duration_secs": 0.984 + "model": "anthropic/claude-3.5-sonnet", + "duration": 1190, + "duration_secs": 1.19 }, { "model": "openai/gpt-4o-mini", - "duration": 1111, - "duration_secs": 1.111 + "duration": 1206, + "duration_secs": 1.206 } ] }, @@ -6388,14 +6515,14 @@ "test": "division", "rankings": [ { - "model": "openai/gpt-3.5-turbo", - "duration": 889, - "duration_secs": 0.889 + "model": "openai/gpt-4o-mini", + "duration": 760, + "duration_secs": 0.76 }, { - "model": "qwen/qwq-32b", - "duration": 917, - "duration_secs": 0.917 + "model": "openai/gpt-3.5-turbo", + "duration": 783, + "duration_secs": 0.783 } ] }, @@ -6460,5 +6587,5 @@ ] } ], - "lastUpdated": "2025-04-02T19:29:30.523Z" + "lastUpdated": "2025-04-02T22:06:51.780Z" } \ No newline at end of file diff --git a/packages/kbot/tests/unit/reports/basic.json b/packages/kbot/tests/unit/reports/basic.json index 466ccbcb..5bd79cfc 100644 --- a/packages/kbot/tests/unit/reports/basic.json +++ b/packages/kbot/tests/unit/reports/basic.json @@ -997,6 +997,133 @@ "passed": true, "duration": 3646, "category": "basic" + }, + { + "test": "addition", + "prompt": "add 5 and 3. Return only the number, no explanation.", + "result": [ + "8" + ], + "expected": "8", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-02T22:06:38.904Z", + "passed": true, + "duration": 2263, + "category": "basic" + }, + { + "test": "addition", + "prompt": "add 5 and 3. Return only the number, no explanation.", + "result": [ + "8" + ], + "expected": "8", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-02T22:06:41.138Z", + "passed": true, + "duration": 2228, + "category": "basic" + }, + { + "test": "addition", + "prompt": "add 5 and 3. Return only the number, no explanation.", + "result": [ + "8" + ], + "expected": "8", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-02T22:06:41.934Z", + "passed": true, + "duration": 791, + "category": "basic" + }, + { + "test": "multiplication", + "prompt": "multiply 8 and 3. Return only the number, no explanation.", + "result": [ + "24" + ], + "expected": "24", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-02T22:06:43.239Z", + "passed": true, + "duration": 1300, + "category": "basic" + }, + { + "test": "multiplication", + "prompt": "multiply 8 and 3. Return only the number, no explanation.", + "result": [ + "8 × 3 = 24" + ], + "expected": "24", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-02T22:06:46.025Z", + "passed": false, + "duration": 2782, + "reason": "Expected 24, but got 8 × 3 = 24", + "category": "basic" + }, + { + "test": "multiplication", + "prompt": "multiply 8 and 3. Return only the number, no explanation.", + "result": [ + "24" + ], + "expected": "24", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-02T22:06:47.239Z", + "passed": true, + "duration": 1206, + "category": "basic" + }, + { + "test": "division", + "prompt": "divide 15 by 3. Return only the number, no explanation.", + "result": [ + "5" + ], + "expected": "5", + "model": "openai/gpt-3.5-turbo", + "router": "openai/gpt-3.5-turbo", + "timestamp": "2025-04-02T22:06:48.026Z", + "passed": true, + "duration": 783, + "category": "basic" + }, + { + "test": "division", + "prompt": "divide 15 by 3. Return only the number, no explanation.", + "result": [ + "5" + ], + "expected": "5", + "model": "deepseek/deepseek-r1-distill-qwen-14b:free", + "router": "deepseek/deepseek-r1-distill-qwen-14b:free", + "timestamp": "2025-04-02T22:06:51.012Z", + "passed": true, + "duration": 2982, + "category": "basic" + }, + { + "test": "division", + "prompt": "divide 15 by 3. Return only the number, no explanation.", + "result": [ + "5" + ], + "expected": "5", + "model": "openai/gpt-4o-mini", + "router": "openai/gpt-4o-mini", + "timestamp": "2025-04-02T22:06:51.777Z", + "passed": true, + "duration": 760, + "category": "basic" } ], "highscores": [ @@ -1005,8 +1132,8 @@ "rankings": [ { "model": "openai/gpt-4o-mini", - "duration": 885, - "duration_secs": 0.885 + "duration": 791, + "duration_secs": 0.791 }, { "model": "anthropic/claude-3.5-sonnet", @@ -1019,14 +1146,14 @@ "test": "multiplication", "rankings": [ { - "model": "openai/gpt-3.5-turbo", - "duration": 984, - "duration_secs": 0.984 + "model": "anthropic/claude-3.5-sonnet", + "duration": 1190, + "duration_secs": 1.19 }, { "model": "openai/gpt-4o-mini", - "duration": 1111, - "duration_secs": 1.111 + "duration": 1206, + "duration_secs": 1.206 } ] }, @@ -1034,17 +1161,17 @@ "test": "division", "rankings": [ { - "model": "openai/gpt-3.5-turbo", - "duration": 889, - "duration_secs": 0.889 + "model": "openai/gpt-4o-mini", + "duration": 760, + "duration_secs": 0.76 }, { - "model": "qwen/qwq-32b", - "duration": 917, - "duration_secs": 0.917 + "model": "openai/gpt-3.5-turbo", + "duration": 783, + "duration_secs": 0.783 } ] } ], - "lastUpdated": "2025-04-02T13:45:10.308Z" + "lastUpdated": "2025-04-02T22:06:51.777Z" } \ No newline at end of file diff --git a/packages/kbot/tests/unit/reports/basic.md b/packages/kbot/tests/unit/reports/basic.md index c938831e..76c367ef 100644 --- a/packages/kbot/tests/unit/reports/basic.md +++ b/packages/kbot/tests/unit/reports/basic.md @@ -2,260 +2,102 @@ ## Highscores +### Performance Rankings (Duration) + | Test | Model | Duration (ms) | Duration (s) | |------|-------|--------------|--------------| -| addition | openai/gpt-4o-mini | 885 | 0.89 | -| division | openai/gpt-3.5-turbo | 889 | 0.89 | -| division | qwen/qwq-32b | 917 | 0.92 | -| multiplication | openai/gpt-3.5-turbo | 984 | 0.98 | -| division | openai/gpt-4o-mini | 1104 | 1.10 | -| multiplication | openai/gpt-4o-mini | 1111 | 1.11 | -| multiplication | anthropic/claude-3.5-sonnet | 1190 | 1.19 | -| division | anthropic/claude-3.5-sonnet | 1405 | 1.41 | -| multiplication | deepseek/deepseek-r1-distill-qwen-14b:free | 1558 | 1.56 | -| addition | anthropic/claude-3.5-sonnet | 1689 | 1.69 | -| division | deepseek/deepseek-r1-distill-qwen-14b:free | 3646 | 3.65 | -| addition | qwen/qwq-32b | 3807 | 3.81 | -| multiplication | qwen/qwq-32b | 5008 | 5.01 | -| division | deepseek/deepseek-r1 | 7130 | 7.13 | -| addition | openai/gpt-3.5-turbo | 10455 | 10.46 | -| addition | deepseek/deepseek-r1 | 12064 | 12.06 | +| addition | openai/gpt-4o-mini | 791 | 0.79 | +| addition | deepseek/deepseek-r1-distill-qwen-14b:free | 2228 | 2.23 | +| addition | openai/gpt-3.5-turbo | 2263 | 2.26 | +| multiplication | openai/gpt-4o-mini | 1206 | 1.21 | +| multiplication | openai/gpt-3.5-turbo | 1300 | 1.30 | +| multiplication | deepseek/deepseek-r1-distill-qwen-14b:free | 2782 | 2.78 | +| division | openai/gpt-4o-mini | 760 | 0.76 | +| division | openai/gpt-3.5-turbo | 783 | 0.78 | +| division | deepseek/deepseek-r1-distill-qwen-14b:free | 2982 | 2.98 | ## Summary -- Total Tests: 18 -- Passed: 16 -- Failed: 2 +- Total Tests: 9 +- Passed: 8 +- Failed: 1 - Success Rate: 88.89% -- Average Duration: 3639ms (3.64s) +- Average Duration: 1677ms (1.68s) ## Failed Tests -### addition - deepseek/deepseek-r1-distill-qwen-14b:free - -- Prompt: `add 5 and 3. Return only the number, no explanation.` -- Expected: `8` -- Actual: `The sum of 5 and 3 is 8. Therefore, the result is \boxed{8}.` -- Duration: 6405ms (6405.00s) -- Reason: Expected 8, but got the sum of 5 and 3 is 8. therefore, the result is \boxed{8}. -- Timestamp: 4/2/2025, 3:44:40 PM - -### multiplication - deepseek/deepseek-r1 +### multiplication - deepseek/deepseek-r1-distill-qwen-14b:free - Prompt: `multiply 8 and 3. Return only the number, no explanation.` - Expected: `24` -- Actual: `24 - -24 - -The result is 24. - -24 - -Here's the answer: 24 - -The answer will be 24. - -24 - -24 - -The product of 8 and 3 is 24. - -24 - -The answer is 24. - -24 - -24 - -24 - -The result is 24. - -24 - -Here's the numerical result: 24 - -The answer is 24. - -24 - -24 - -The answer is 24.` -- Duration: 5258ms (5258.00s) -- Reason: Expected 24, but got 24 - -24 - -the result is 24. - -24 - -here's the answer: 24 - -the answer will be 24. - -24 - -24 - -the product of 8 and 3 is 24. - -24 - -the answer is 24. - -24 - -24 - -24 - -the result is 24. - -24 - -here's the numerical result: 24 - -the answer is 24. - -24 - -24 - -the answer is 24. -- Timestamp: 4/2/2025, 3:44:53 PM +- Actual: `8 × 3 = 24` +- Duration: 2782ms (2.78s) +- Reason: Expected 24, but got 8 × 3 = 24 +- Timestamp: 4/3/2025, 12:06:46 AM ## Passed Tests -### addition - anthropic/claude-3.5-sonnet - -- Prompt: `add 5 and 3. Return only the number, no explanation.` -- Expected: `8` -- Actual: `8` -- Duration: 1689ms (1689.00s) -- Timestamp: 4/2/2025, 3:44:06 PM - -### addition - qwen/qwq-32b - -- Prompt: `add 5 and 3. Return only the number, no explanation.` -- Expected: `8` -- Actual: `8` -- Duration: 3807ms (3807.00s) -- Timestamp: 4/2/2025, 3:44:10 PM - -### addition - openai/gpt-4o-mini - -- Prompt: `add 5 and 3. Return only the number, no explanation.` -- Expected: `8` -- Actual: `8` -- Duration: 885ms (885.00s) -- Timestamp: 4/2/2025, 3:44:11 PM - ### addition - openai/gpt-3.5-turbo - Prompt: `add 5 and 3. Return only the number, no explanation.` - Expected: `8` - Actual: `8` -- Duration: 10455ms (10455.00s) -- Timestamp: 4/2/2025, 3:44:21 PM +- Duration: 2263ms (2.26s) +- Timestamp: 4/3/2025, 12:06:38 AM -### addition - deepseek/deepseek-r1 +### addition - deepseek/deepseek-r1-distill-qwen-14b:free - Prompt: `add 5 and 3. Return only the number, no explanation.` - Expected: `8` - Actual: `8` -- Duration: 12064ms (12064.00s) -- Timestamp: 4/2/2025, 3:44:33 PM +- Duration: 2228ms (2.23s) +- Timestamp: 4/3/2025, 12:06:41 AM -### multiplication - anthropic/claude-3.5-sonnet +### addition - openai/gpt-4o-mini -- Prompt: `multiply 8 and 3. Return only the number, no explanation.` -- Expected: `24` -- Actual: `24` -- Duration: 1190ms (1190.00s) -- Timestamp: 4/2/2025, 3:44:41 PM - -### multiplication - qwen/qwq-32b - -- Prompt: `multiply 8 and 3. Return only the number, no explanation.` -- Expected: `24` -- Actual: `24` -- Duration: 5008ms (5008.00s) -- Timestamp: 4/2/2025, 3:44:46 PM - -### multiplication - openai/gpt-4o-mini - -- Prompt: `multiply 8 and 3. Return only the number, no explanation.` -- Expected: `24` -- Actual: `24` -- Duration: 1111ms (1111.00s) -- Timestamp: 4/2/2025, 3:44:47 PM +- Prompt: `add 5 and 3. Return only the number, no explanation.` +- Expected: `8` +- Actual: `8` +- Duration: 791ms (0.79s) +- Timestamp: 4/3/2025, 12:06:41 AM ### multiplication - openai/gpt-3.5-turbo - Prompt: `multiply 8 and 3. Return only the number, no explanation.` - Expected: `24` - Actual: `24` -- Duration: 984ms (984.00s) -- Timestamp: 4/2/2025, 3:44:48 PM +- Duration: 1300ms (1.30s) +- Timestamp: 4/3/2025, 12:06:43 AM -### multiplication - deepseek/deepseek-r1-distill-qwen-14b:free +### multiplication - openai/gpt-4o-mini - Prompt: `multiply 8 and 3. Return only the number, no explanation.` - Expected: `24` - Actual: `24` -- Duration: 1558ms (1558.00s) -- Timestamp: 4/2/2025, 3:44:55 PM - -### division - anthropic/claude-3.5-sonnet - -- Prompt: `divide 15 by 3. Return only the number, no explanation.` -- Expected: `5` -- Actual: `5` -- Duration: 1405ms (1405.00s) -- Timestamp: 4/2/2025, 3:44:56 PM - -### division - qwen/qwq-32b - -- Prompt: `divide 15 by 3. Return only the number, no explanation.` -- Expected: `5` -- Actual: `5` -- Duration: 917ms (917.00s) -- Timestamp: 4/2/2025, 3:44:57 PM - -### division - openai/gpt-4o-mini - -- Prompt: `divide 15 by 3. Return only the number, no explanation.` -- Expected: `5` -- Actual: `5` -- Duration: 1104ms (1104.00s) -- Timestamp: 4/2/2025, 3:44:58 PM +- Duration: 1206ms (1.21s) +- Timestamp: 4/3/2025, 12:06:47 AM ### division - openai/gpt-3.5-turbo - Prompt: `divide 15 by 3. Return only the number, no explanation.` - Expected: `5` - Actual: `5` -- Duration: 889ms (889.00s) -- Timestamp: 4/2/2025, 3:44:59 PM - -### division - deepseek/deepseek-r1 - -- Prompt: `divide 15 by 3. Return only the number, no explanation.` -- Expected: `5` -- Actual: `5` -- Duration: 7130ms (7130.00s) -- Timestamp: 4/2/2025, 3:45:06 PM +- Duration: 783ms (0.78s) +- Timestamp: 4/3/2025, 12:06:48 AM ### division - deepseek/deepseek-r1-distill-qwen-14b:free - Prompt: `divide 15 by 3. Return only the number, no explanation.` - Expected: `5` - Actual: `5` -- Duration: 3646ms (3646.00s) -- Timestamp: 4/2/2025, 3:45:10 PM +- Duration: 2982ms (2.98s) +- Timestamp: 4/3/2025, 12:06:51 AM + +### division - openai/gpt-4o-mini + +- Prompt: `divide 15 by 3. Return only the number, no explanation.` +- Expected: `5` +- Actual: `5` +- Duration: 760ms (0.76s) +- Timestamp: 4/3/2025, 12:06:51 AM