This commit is contained in:
lovebird 2025-04-03 00:07:12 +02:00
parent 66cc6ac754
commit 54b9c5dd9c
8 changed files with 339 additions and 438 deletions

View File

@ -2,7 +2,7 @@ export interface IKBotOptions {
/** Target directory */
path?: string;
/** The prompt. Supports file paths and environment variables. */
prompt?: string;
prompt?: string | undefined;
/** Optional output path for modified files (Tool mode only) */
output?: string | undefined;
/** Optional destination path for the result, will substitute ${MODEL_NAME} and ${ROUTER} in the path. Optional, used for "completion" mode */

View File

@ -1,207 +1,14 @@
{
"model": "openai/gpt-4o",
"model": "openai/gpt-4o-mini",
"messages": [
{
"role": "user",
"content": "List all files in the directory C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\test-data. Return the list as a JSON array of filenames."
"content": "divide 15 by 3. Return only the number, no explanation."
},
{
"role": "user",
"content": "USER Preferences : # Preferences\r\n\r\nYou are a helpful AI assistant. When asked to perform calculations, you should return only the numerical result without any explanation or comments. "
}
],
"tools": [
{
"type": "function",
"function": {
"name": "list_files",
"description": "List all files in a directory",
"parameters": {
"type": "object",
"properties": {
"directory": {
"type": "string"
},
"pattern": {
"type": "string",
"optional": true
}
},
"required": [
"directory"
]
}
}
},
{
"type": "function",
"function": {
"name": "read_files",
"description": "Reads files in a directory with a given pattern",
"parameters": {
"type": "object",
"properties": {
"directory": {
"type": "string"
},
"pattern": {
"type": "string",
"optional": true
}
},
"required": [
"directory"
]
}
}
},
{
"type": "function",
"function": {
"name": "remove_file",
"description": "Remove a file at given path",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string"
}
},
"required": [
"path"
]
}
}
},
{
"type": "function",
"function": {
"name": "rename_file",
"description": "Rename or move a file or directory",
"parameters": {
"type": "object",
"properties": {
"src": {
"type": "string"
},
"dst": {
"type": "string"
}
},
"required": [
"path"
]
}
}
},
{
"type": "function",
"function": {
"name": "modify_project_files",
"description": "Create or modify existing project files in one shot, preferably used for creating project structure)",
"parameters": {
"type": "object",
"properties": {
"files": {
"type": "array",
"items": {
"type": "object",
"properties": {
"path": {
"type": "string"
},
"content": {
"type": "string",
"description": "base64 encoded string"
}
},
"required": [
"path",
"content"
]
}
}
},
"required": [
"files"
]
}
}
},
{
"type": "function",
"function": {
"name": "write_file",
"description": "Writes to a file, given a path and content (base64). No directory or file exists check needed!",
"parameters": {
"type": "object",
"properties": {
"file": {
"type": "object",
"properties": {
"path": {
"type": "string"
},
"content": {
"type": "string",
"description": "base64 encoded string"
}
}
}
},
"required": [
"file"
]
}
}
},
{
"type": "function",
"function": {
"name": "file_exists",
"description": "check if a file or folder exists",
"parameters": {
"type": "object",
"properties": {
"file": {
"type": "object",
"properties": {
"path": {
"type": "string"
}
}
}
},
"required": [
"file"
]
}
}
},
{
"type": "function",
"function": {
"name": "read_file",
"description": "read a file, at given a path",
"parameters": {
"type": "object",
"properties": {
"file": {
"type": "object",
"properties": {
"path": {
"type": "string"
}
}
}
},
"required": [
"file"
]
}
}
}
],
"tool_choice": "auto",
"parallel_tool_calls": false
"tools": []
}

View File

@ -13,8 +13,7 @@
},
"prompt": {
"type": "string",
"description": "The prompt. Supports file paths and environment variables.",
"default": "./prompt.md"
"description": "The prompt. Supports file paths and environment variables."
},
"output": {
"type": "string",
@ -101,7 +100,7 @@
},
"logLevel": {
"type": "number",
"default": 2,
"default": 4,
"description": "Logging level for the application"
},
"profile": {

View File

@ -15,8 +15,7 @@
},
"prompt": {
"ui:description": "The prompt. Supports file paths and environment variables.",
"ui:title": "Prompt",
"ui:placeholder": "./prompt.md"
"ui:title": "Prompt"
},
"output": {
"ui:description": "Optional output path for modified files (Tool mode only)",
@ -79,7 +78,7 @@
"logLevel": {
"ui:description": "Logging level for the application",
"ui:title": "Loglevel",
"ui:placeholder": 2
"ui:placeholder": 4
},
"profile": {
"ui:description": "Path to profile for variables. Supports environment variables.",

View File

@ -2,7 +2,7 @@ export interface IKBotOptions {
/** Target directory */
path?: string;
/** The prompt. Supports file paths and environment variables. */
prompt?: string;
prompt?: string | undefined;
/** Optional output path for modified files (Tool mode only) */
output?: string | undefined;
/** Optional destination path for the result, will substitute ${MODEL_NAME} and ${ROUTER} in the path. Optional, used for "completion" mode */

View File

@ -6276,6 +6276,133 @@
"duration": 2274,
"reason": "Expected [], but got {\"files\":[]}",
"category": "tools"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:38.904Z",
"passed": true,
"duration": 2263,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:41.138Z",
"passed": true,
"duration": 2228,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:41.934Z",
"passed": true,
"duration": 791,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:43.239Z",
"passed": true,
"duration": 1300,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"8 × 3 = 24"
],
"expected": "24",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:46.025Z",
"passed": false,
"duration": 2782,
"reason": "Expected 24, but got 8 × 3 = 24",
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:47.239Z",
"passed": true,
"duration": 1206,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:48.026Z",
"passed": true,
"duration": 783,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:51.012Z",
"passed": true,
"duration": 2982,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:51.777Z",
"passed": true,
"duration": 760,
"category": "basic"
}
],
"highscores": [
@ -6359,8 +6486,8 @@
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 885,
"duration_secs": 0.885
"duration": 791,
"duration_secs": 0.791
},
{
"model": "anthropic/claude-3.5-sonnet",
@ -6373,14 +6500,14 @@
"test": "multiplication",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 984,
"duration_secs": 0.984
"model": "anthropic/claude-3.5-sonnet",
"duration": 1190,
"duration_secs": 1.19
},
{
"model": "openai/gpt-4o-mini",
"duration": 1111,
"duration_secs": 1.111
"duration": 1206,
"duration_secs": 1.206
}
]
},
@ -6388,14 +6515,14 @@
"test": "division",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 889,
"duration_secs": 0.889
"model": "openai/gpt-4o-mini",
"duration": 760,
"duration_secs": 0.76
},
{
"model": "qwen/qwq-32b",
"duration": 917,
"duration_secs": 0.917
"model": "openai/gpt-3.5-turbo",
"duration": 783,
"duration_secs": 0.783
}
]
},
@ -6460,5 +6587,5 @@
]
}
],
"lastUpdated": "2025-04-02T19:29:30.523Z"
"lastUpdated": "2025-04-02T22:06:51.780Z"
}

View File

@ -997,6 +997,133 @@
"passed": true,
"duration": 3646,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:38.904Z",
"passed": true,
"duration": 2263,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:41.138Z",
"passed": true,
"duration": 2228,
"category": "basic"
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:41.934Z",
"passed": true,
"duration": 791,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:43.239Z",
"passed": true,
"duration": 1300,
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"8 × 3 = 24"
],
"expected": "24",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:46.025Z",
"passed": false,
"duration": 2782,
"reason": "Expected 24, but got 8 × 3 = 24",
"category": "basic"
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:47.239Z",
"passed": true,
"duration": 1206,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-3.5-turbo",
"router": "openai/gpt-3.5-turbo",
"timestamp": "2025-04-02T22:06:48.026Z",
"passed": true,
"duration": 783,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "deepseek/deepseek-r1-distill-qwen-14b:free",
"router": "deepseek/deepseek-r1-distill-qwen-14b:free",
"timestamp": "2025-04-02T22:06:51.012Z",
"passed": true,
"duration": 2982,
"category": "basic"
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "openai/gpt-4o-mini",
"router": "openai/gpt-4o-mini",
"timestamp": "2025-04-02T22:06:51.777Z",
"passed": true,
"duration": 760,
"category": "basic"
}
],
"highscores": [
@ -1005,8 +1132,8 @@
"rankings": [
{
"model": "openai/gpt-4o-mini",
"duration": 885,
"duration_secs": 0.885
"duration": 791,
"duration_secs": 0.791
},
{
"model": "anthropic/claude-3.5-sonnet",
@ -1019,14 +1146,14 @@
"test": "multiplication",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 984,
"duration_secs": 0.984
"model": "anthropic/claude-3.5-sonnet",
"duration": 1190,
"duration_secs": 1.19
},
{
"model": "openai/gpt-4o-mini",
"duration": 1111,
"duration_secs": 1.111
"duration": 1206,
"duration_secs": 1.206
}
]
},
@ -1034,17 +1161,17 @@
"test": "division",
"rankings": [
{
"model": "openai/gpt-3.5-turbo",
"duration": 889,
"duration_secs": 0.889
"model": "openai/gpt-4o-mini",
"duration": 760,
"duration_secs": 0.76
},
{
"model": "qwen/qwq-32b",
"duration": 917,
"duration_secs": 0.917
"model": "openai/gpt-3.5-turbo",
"duration": 783,
"duration_secs": 0.783
}
]
}
],
"lastUpdated": "2025-04-02T13:45:10.308Z"
"lastUpdated": "2025-04-02T22:06:51.777Z"
}

View File

@ -2,260 +2,102 @@
## Highscores
### Performance Rankings (Duration)
| Test | Model | Duration (ms) | Duration (s) |
|------|-------|--------------|--------------|
| addition | openai/gpt-4o-mini | 885 | 0.89 |
| division | openai/gpt-3.5-turbo | 889 | 0.89 |
| division | qwen/qwq-32b | 917 | 0.92 |
| multiplication | openai/gpt-3.5-turbo | 984 | 0.98 |
| division | openai/gpt-4o-mini | 1104 | 1.10 |
| multiplication | openai/gpt-4o-mini | 1111 | 1.11 |
| multiplication | anthropic/claude-3.5-sonnet | 1190 | 1.19 |
| division | anthropic/claude-3.5-sonnet | 1405 | 1.41 |
| multiplication | deepseek/deepseek-r1-distill-qwen-14b:free | 1558 | 1.56 |
| addition | anthropic/claude-3.5-sonnet | 1689 | 1.69 |
| division | deepseek/deepseek-r1-distill-qwen-14b:free | 3646 | 3.65 |
| addition | qwen/qwq-32b | 3807 | 3.81 |
| multiplication | qwen/qwq-32b | 5008 | 5.01 |
| division | deepseek/deepseek-r1 | 7130 | 7.13 |
| addition | openai/gpt-3.5-turbo | 10455 | 10.46 |
| addition | deepseek/deepseek-r1 | 12064 | 12.06 |
| addition | openai/gpt-4o-mini | 791 | 0.79 |
| addition | deepseek/deepseek-r1-distill-qwen-14b:free | 2228 | 2.23 |
| addition | openai/gpt-3.5-turbo | 2263 | 2.26 |
| multiplication | openai/gpt-4o-mini | 1206 | 1.21 |
| multiplication | openai/gpt-3.5-turbo | 1300 | 1.30 |
| multiplication | deepseek/deepseek-r1-distill-qwen-14b:free | 2782 | 2.78 |
| division | openai/gpt-4o-mini | 760 | 0.76 |
| division | openai/gpt-3.5-turbo | 783 | 0.78 |
| division | deepseek/deepseek-r1-distill-qwen-14b:free | 2982 | 2.98 |
## Summary
- Total Tests: 18
- Passed: 16
- Failed: 2
- Total Tests: 9
- Passed: 8
- Failed: 1
- Success Rate: 88.89%
- Average Duration: 3639ms (3.64s)
- Average Duration: 1677ms (1.68s)
## Failed Tests
### addition - deepseek/deepseek-r1-distill-qwen-14b:free
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `The sum of 5 and 3 is 8. Therefore, the result is \boxed{8}.`
- Duration: 6405ms (6405.00s)
- Reason: Expected 8, but got the sum of 5 and 3 is 8. therefore, the result is \boxed{8}.
- Timestamp: 4/2/2025, 3:44:40 PM
### multiplication - deepseek/deepseek-r1
### multiplication - deepseek/deepseek-r1-distill-qwen-14b:free
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24
24
The result is 24.
24
Here's the answer: 24
The answer will be 24.
24
24
The product of 8 and 3 is 24.
24
The answer is 24.
24
24
24
The result is 24.
24
Here's the numerical result: 24
The answer is 24.
24
24
The answer is 24.`
- Duration: 5258ms (5258.00s)
- Reason: Expected 24, but got 24
24
the result is 24.
24
here's the answer: 24
the answer will be 24.
24
24
the product of 8 and 3 is 24.
24
the answer is 24.
24
24
24
the result is 24.
24
here's the numerical result: 24
the answer is 24.
24
24
the answer is 24.
- Timestamp: 4/2/2025, 3:44:53 PM
- Actual: `8 × 3 = 24`
- Duration: 2782ms (2.78s)
- Reason: Expected 24, but got 8 × 3 = 24
- Timestamp: 4/3/2025, 12:06:46 AM
## Passed Tests
### addition - anthropic/claude-3.5-sonnet
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `8`
- Duration: 1689ms (1689.00s)
- Timestamp: 4/2/2025, 3:44:06 PM
### addition - qwen/qwq-32b
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `8`
- Duration: 3807ms (3807.00s)
- Timestamp: 4/2/2025, 3:44:10 PM
### addition - openai/gpt-4o-mini
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `8`
- Duration: 885ms (885.00s)
- Timestamp: 4/2/2025, 3:44:11 PM
### addition - openai/gpt-3.5-turbo
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `8`
- Duration: 10455ms (10455.00s)
- Timestamp: 4/2/2025, 3:44:21 PM
- Duration: 2263ms (2.26s)
- Timestamp: 4/3/2025, 12:06:38 AM
### addition - deepseek/deepseek-r1
### addition - deepseek/deepseek-r1-distill-qwen-14b:free
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `8`
- Duration: 12064ms (12064.00s)
- Timestamp: 4/2/2025, 3:44:33 PM
- Duration: 2228ms (2.23s)
- Timestamp: 4/3/2025, 12:06:41 AM
### multiplication - anthropic/claude-3.5-sonnet
### addition - openai/gpt-4o-mini
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24`
- Duration: 1190ms (1190.00s)
- Timestamp: 4/2/2025, 3:44:41 PM
### multiplication - qwen/qwq-32b
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24`
- Duration: 5008ms (5008.00s)
- Timestamp: 4/2/2025, 3:44:46 PM
### multiplication - openai/gpt-4o-mini
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24`
- Duration: 1111ms (1111.00s)
- Timestamp: 4/2/2025, 3:44:47 PM
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `8`
- Duration: 791ms (0.79s)
- Timestamp: 4/3/2025, 12:06:41 AM
### multiplication - openai/gpt-3.5-turbo
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24`
- Duration: 984ms (984.00s)
- Timestamp: 4/2/2025, 3:44:48 PM
- Duration: 1300ms (1.30s)
- Timestamp: 4/3/2025, 12:06:43 AM
### multiplication - deepseek/deepseek-r1-distill-qwen-14b:free
### multiplication - openai/gpt-4o-mini
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24`
- Duration: 1558ms (1558.00s)
- Timestamp: 4/2/2025, 3:44:55 PM
### division - anthropic/claude-3.5-sonnet
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5`
- Duration: 1405ms (1405.00s)
- Timestamp: 4/2/2025, 3:44:56 PM
### division - qwen/qwq-32b
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5`
- Duration: 917ms (917.00s)
- Timestamp: 4/2/2025, 3:44:57 PM
### division - openai/gpt-4o-mini
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5`
- Duration: 1104ms (1104.00s)
- Timestamp: 4/2/2025, 3:44:58 PM
- Duration: 1206ms (1.21s)
- Timestamp: 4/3/2025, 12:06:47 AM
### division - openai/gpt-3.5-turbo
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5`
- Duration: 889ms (889.00s)
- Timestamp: 4/2/2025, 3:44:59 PM
### division - deepseek/deepseek-r1
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5`
- Duration: 7130ms (7130.00s)
- Timestamp: 4/2/2025, 3:45:06 PM
- Duration: 783ms (0.78s)
- Timestamp: 4/3/2025, 12:06:48 AM
### division - deepseek/deepseek-r1-distill-qwen-14b:free
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5`
- Duration: 3646ms (3646.00s)
- Timestamp: 4/2/2025, 3:45:10 PM
- Duration: 2982ms (2.98s)
- Timestamp: 4/3/2025, 12:06:51 AM
### division - openai/gpt-4o-mini
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5`
- Duration: 760ms (0.76s)
- Timestamp: 4/3/2025, 12:06:51 AM