tests:language - merci, habsch selbstgemacht :)

This commit is contained in:
lovebird 2025-04-01 13:47:36 +02:00
parent 75af5d1a26
commit 32469204ed
14 changed files with 5131 additions and 327 deletions

View File

@ -1,5 +1,5 @@
{
"model": "gpt-4",
"model": "anthropic/claude-2.0",
"messages": [
{
"role": "user",

View File

@ -48,6 +48,69 @@
- Reason: undefined
- Timestamp: 4/1/2025, 12:26:35 PM
### hello - deepseek/deepseek-chat:free
- Prompt: `say "hello"`
- Expected: `hello`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:37 PM
### hello - google/gemini-2.0-flash-exp:free
- Prompt: `say "hello"`
- Expected: `hello`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:37 PM
### hello - gpt-4
- Prompt: `say "hello"`
- Expected: `hello`
- Actual: ``
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:36:42 PM
### goodbye - deepseek/deepseek-chat:free
- Prompt: `say "goodbye"`
- Expected: `goodbye`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:42 PM
### goodbye - google/gemini-2.0-flash-exp:free
- Prompt: `say "goodbye"`
- Expected: `goodbye`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:43 PM
### goodbye - gpt-4
- Prompt: `say "goodbye"`
- Expected: `goodbye`
- Actual: ``
- Reason: expected 'goodbye.' to deeply equal 'goodbye'
- Timestamp: 4/1/2025, 1:36:44 PM
### yes - deepseek/deepseek-chat:free
- Prompt: `say "yes"`
- Expected: `yes`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:45 PM
### yes - google/gemini-2.0-flash-exp:free
- Prompt: `say "yes"`
- Expected: `yes`
- Actual: ``
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:36:45 PM
### yes - gpt-4
- Prompt: `say "yes"`
- Expected: `yes`
- Actual: ``
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:36:46 PM
## Passed Tests
### addition - deepseek/deepseek-chat:free
@ -67,7 +130,7 @@
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: `8`
- Timestamp: 4/1/2025, 1:02:55 PM
- Timestamp: 4/1/2025, 1:39:04 PM
### multiplication - deepseek/deepseek-chat:free
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
@ -86,7 +149,7 @@
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: `24`
- Timestamp: 4/1/2025, 1:02:57 PM
- Timestamp: 4/1/2025, 1:39:06 PM
### division - deepseek/deepseek-chat:free
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
@ -105,5 +168,5 @@
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: `5`
- Timestamp: 4/1/2025, 1:03:00 PM
- Timestamp: 4/1/2025, 1:39:08 PM

View File

@ -816,5 +816,281 @@
"router": "openrouter",
"timestamp": "2025-04-01T11:03:00.064Z",
"passed": true
},
{
"test": "hello",
"prompt": "say \"hello\"",
"result": [],
"expected": "hello",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:37.192Z",
"passed": false,
"duration": 1107,
"error": {
"message": "Model returned empty response",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:110:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "Model returned empty response",
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:110:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "Error",
"expected": "undefined",
"actual": "undefined"
}
},
"reason": "Model returned empty response",
"config": {
"router": "openrouter",
"apiKey": ""
}
},
{
"test": "hello",
"prompt": "say \"hello\"",
"result": [],
"expected": "hello",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:37.957Z",
"passed": false,
"duration": 762,
"error": {
"message": "Model returned empty response",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:110:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "Model returned empty response",
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:110:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "Error",
"expected": "undefined",
"actual": "undefined"
}
},
"reason": "Model returned empty response",
"config": {
"router": "openrouter",
"apiKey": ""
}
},
{
"test": "hello",
"prompt": "say \"hello\"",
"result": [],
"expected": "hello",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:42.078Z",
"passed": false,
"duration": 4120,
"reason": "Unknown error occurred",
"config": {
"router": "openai",
"apiKey": ""
}
},
{
"test": "goodbye",
"prompt": "say \"goodbye\"",
"result": [],
"expected": "goodbye",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:42.695Z",
"passed": false,
"duration": 616,
"error": {
"message": "Model returned empty response",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "Model returned empty response",
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "Error",
"expected": "undefined",
"actual": "undefined"
}
},
"reason": "Model returned empty response",
"config": {
"router": "openrouter",
"apiKey": ""
}
},
{
"test": "goodbye",
"prompt": "say \"goodbye\"",
"result": [],
"expected": "goodbye",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:43.462Z",
"passed": false,
"duration": 765,
"error": {
"message": "Model returned empty response",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "Model returned empty response",
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "Error",
"expected": "undefined",
"actual": "undefined"
}
},
"reason": "Model returned empty response",
"config": {
"router": "openrouter",
"apiKey": ""
}
},
{
"test": "goodbye",
"prompt": "say \"goodbye\"",
"result": [],
"expected": "goodbye",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:44.537Z",
"passed": false,
"duration": 1073,
"error": {
"message": "expected 'goodbye.' to deeply equal 'goodbye'",
"code": "UNKNOWN",
"type": "AssertionError",
"details": {
"message": "expected 'goodbye.' to deeply equal 'goodbye'",
"actual": "goodbye.",
"expected": "goodbye",
"showDiff": true,
"operator": "strictEqual",
"stackStr": "AssertionError: expected 'goodbye.' to deeply equal 'goodbye'\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:64:22)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "AssertionError",
"diff": "Expected: \u001b[32m\"goodbye\"\u001b[39m\nReceived: \u001b[31m\"goodbye\u001b[7m.\u001b[27m\"\u001b[39m",
"name": "AssertionError",
"ok": false,
"stack": "AssertionError: expected 'goodbye.' to deeply equal 'goodbye'\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:64:22)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11"
}
},
"reason": "expected 'goodbye.' to deeply equal 'goodbye'",
"config": {
"router": "openai",
"apiKey": ""
}
},
{
"test": "yes",
"prompt": "say \"yes\"",
"result": [],
"expected": "yes",
"model": "deepseek/deepseek-chat:free",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:45.193Z",
"passed": false,
"duration": 654,
"error": {
"message": "Model returned empty response",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:128:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "Model returned empty response",
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:128:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "Error",
"expected": "undefined",
"actual": "undefined"
}
},
"reason": "Model returned empty response",
"config": {
"router": "openrouter",
"apiKey": ""
}
},
{
"test": "yes",
"prompt": "say \"yes\"",
"result": [],
"expected": "yes",
"model": "google/gemini-2.0-flash-exp:free",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:45.954Z",
"passed": false,
"duration": 760,
"error": {
"message": "Model returned empty response",
"code": "UNKNOWN",
"type": "Error",
"details": {
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:128:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "Model returned empty response",
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:128:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "Error",
"expected": "undefined",
"actual": "undefined"
}
},
"reason": "Model returned empty response",
"config": {
"router": "openrouter",
"apiKey": ""
}
},
{
"test": "yes",
"prompt": "say \"yes\"",
"result": [],
"expected": "yes",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T11:36:46.806Z",
"passed": false,
"duration": 851,
"reason": "Unknown error occurred",
"config": {
"router": "openai",
"apiKey": ""
}
},
{
"test": "addition",
"prompt": "add 5 and 3. Return only the number, no explanation.",
"result": [
"8"
],
"expected": "8",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T11:39:04.078Z",
"passed": true
},
{
"test": "multiplication",
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
"result": [
"24"
],
"expected": "24",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T11:39:06.572Z",
"passed": true
},
{
"test": "division",
"prompt": "divide 15 by 3. Return only the number, no explanation.",
"result": [
"5"
],
"expected": "5",
"model": "gpt-4",
"router": "openrouter",
"timestamp": "2025-04-01T11:39:08.868Z",
"passed": true
}
]

View File

@ -1,210 +1,210 @@
import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import {
models,
TEST_BASE_PATH,
TEST_LOGS_PATH,
TEST_PREFERENCES_PATH,
TEST_TIMEOUT,
TestResult,
formatError,
isEmptyResponse,
getRouterForModel,
getApiKeyForRouter
} from './commons'
const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')
describe('Basic Capabilities', () => {
let testResults: TestResult[] = []
// Load existing results if any
if (exists(TEST_LOG_PATH)) {
const data = read(TEST_LOG_PATH, 'json')
testResults = Array.isArray(data) ? data : []
}
const runBasicTest = async (prompt: string, expected: string, testName: string, modelName: string) => {
let model = 'unknown'
let router = 'unknown'
let startTime = Date.now()
let error: TestResult['error'] | undefined
try {
const result = await Promise.race([
run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
)
]) as string[]
const actual = result?.[0]?.trim()?.toLowerCase() || ''
const passed = actual === expected && !isEmptyResponse(result)
if (isEmptyResponse(result)) {
throw new Error('Model returned empty response')
}
expect(actual).toEqual(expected)
return {
test: testName,
prompt,
result: result || [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
duration: Date.now() - startTime,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
config: {
router: getRouterForModel(modelName),
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
}
}
} catch (e) {
error = formatError(e)
throw e
} finally {
const testResult: TestResult = {
test: testName,
prompt,
result: [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed: false,
duration: Date.now() - startTime,
error,
reason: error?.message || 'Unknown error occurred',
config: {
router: getRouterForModel(modelName),
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
}
}
testResults.push(testResult)
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
}
}
it.each(models)('should respond to "hello" with model %s', async (modelName) => {
await runBasicTest(
'say "hello"',
'hello',
'hello',
modelName
)
})
it.each(models)('should respond to "goodbye" with model %s', async (modelName) => {
await runBasicTest(
'say "goodbye"',
'goodbye',
'goodbye',
modelName
)
})
it.each(models)('should respond to "yes" with model %s', async (modelName) => {
await runBasicTest(
'say "yes"',
'yes',
'yes',
modelName
)
})
it('should generate markdown report', () => {
// Group results by test and model
const latestResults = new Map<string, Map<string, TestResult>>()
// Get only the latest result for each test+model combination
testResults.forEach(result => {
if (!latestResults.has(result.test)) {
latestResults.set(result.test, new Map())
}
const testMap = latestResults.get(result.test)!
const existingResult = testMap.get(result.model)
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
testMap.set(result.model, result)
}
})
// Generate markdown report
let report = '# Basic Test Results\n\n'
// First list failed tests
report += '## Failed Tests\n\n'
let hasFailures = false
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (!result.passed) {
hasFailures = true
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${result.expected}\`\n`
report += `- Actual: \`${result.result[0] || ''}\`\n`
report += `- Duration: ${result.duration}ms\n`
if (result.error) {
report += `- Error Type: ${result.error.type}\n`
report += `- Error Code: ${result.error.code}\n`
report += `- Error Message: ${result.error.message}\n`
}
report += `- Reason: ${result.reason}\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
if (!hasFailures) {
report += '*No failed tests*\n\n'
}
// Then list passed tests
report += '## Passed Tests\n\n'
let hasPassed = false
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (result.passed) {
hasPassed = true
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${result.expected}\`\n`
report += `- Actual: \`${result.result[0] || ''}\`\n`
report += `- Duration: ${result.duration}ms\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
if (!hasPassed) {
report += '*No passed tests*\n\n'
}
// Write report to file
const reportPath = path.resolve(__dirname, './basic-report.md')
write(reportPath, report)
// Verify report was written
expect(exists(reportPath) === 'file').toBe(true)
})
import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TestResult } from './commons'
const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')
describe('Basic Operations', () => {
let testResults: TestResult[] = []
// Load existing results if any
if (exists(TEST_LOG_PATH)) {
const data = read(TEST_LOG_PATH, 'json')
testResults = Array.isArray(data) ? data : []
}
it.each(models)('should add two numbers with model %s', async (modelName) => {
const prompt = 'add 5 and 3. Return only the number, no explanation.'
const expected = '8'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
const actual = result?.[0]?.trim() || ''
if (!actual) {
console.log(`Skipping test for model ${modelName} - no result returned`)
return
}
const passed = actual === expected
expect(actual).toEqual(expected)
// Add test result to array
testResults.push({
test: 'addition',
prompt,
result: result || [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
})
// Write all results to the same file
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
it.each(models)('should multiply two numbers with model %s', async (modelName) => {
const prompt = 'multiply 8 and 3. Return only the number, no explanation.'
const expected = '24'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
const actual = result?.[0]?.trim() || ''
if (!actual) {
console.log(`Skipping test for model ${modelName} - no result returned`)
return
}
const passed = actual === expected
expect(actual).toEqual(expected)
// Add test result to array
testResults.push({
test: 'multiplication',
prompt,
result: result || [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
})
// Write all results to the same file
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
it.each(models)('should divide two numbers with model %s', async (modelName) => {
const prompt = 'divide 15 by 3. Return only the number, no explanation.'
const expected = '5'
let model = 'unknown'
let router = 'unknown'
const result = await run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}) as string[]
const actual = result?.[0]?.trim() || ''
if (!actual) {
console.log(`Skipping test for model ${modelName} - no result returned`)
return
}
const passed = actual === expected
expect(actual).toEqual(expected)
// Add test result to array
testResults.push({
test: 'division',
prompt,
result: result || [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
})
// Write all results to the same file
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
})
it('should generate markdown report', () => {
// Group results by test and model
const latestResults = new Map<string, Map<string, TestResult>>()
// Get only the latest result for each test+model combination
testResults.forEach(result => {
if (!latestResults.has(result.test)) {
latestResults.set(result.test, new Map())
}
const testMap = latestResults.get(result.test)!
const existingResult = testMap.get(result.model)
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
testMap.set(result.model, result)
}
})
// Generate markdown report
let report = '# Basic Operations Test Results\n\n'
// First list failed tests
report += '## Failed Tests\n\n'
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (!result.passed) {
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${result.expected}\`\n`
report += `- Actual: \`${result.result[0] || ''}\`\n`
report += `- Reason: ${result.reason}\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
// Then list passed tests
report += '## Passed Tests\n\n'
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (result.passed) {
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${result.expected}\`\n`
report += `- Actual: \`${result.result[0] || ''}\`\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
// Write report to file
const reportPath = path.resolve(__dirname, './basic-report.md')
write(reportPath, report)
// Verify report was written
expect(exists(reportPath) === 'file').toBe(true)
})
})

View File

@ -1,88 +1,44 @@
import * as path from 'node:path'
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
// Test configuration
export const TEST_CONFIG = {
openrouter: {
key: process.env.OPENROUTER_API_KEY || '',
org: process.env.OPENROUTER_ORG_ID || ''
},
openai: {
key: process.env.OPENAI_API_KEY || '',
org: process.env.OPENAI_ORG_ID || ''
}
}
export const models = [
E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
E_OPENAI_MODEL.MODEL_GPT_4
]
export const TEST_BASE_PATH = path.resolve(__dirname, '../../')
export const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
export const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
export const TEST_TIMEOUT = 30000 // 30 seconds timeout for API calls
export interface TestResult {
test: string;
prompt: string;
result: string[];
expected: string;
model: string;
router: string;
timestamp: string;
passed: boolean;
reason?: string;
error?: {
message: string;
code?: string;
type?: string;
details?: any;
};
duration?: number;
config?: {
apiKey?: string;
baseURL?: string;
router?: string;
};
}
export const formatError = (error: any): TestResult['error'] => {
return {
message: error?.message || 'Unknown error',
code: error?.code || 'UNKNOWN',
type: error?.type || error?.constructor?.name || 'Error',
details: error?.response?.data || error?.response || error
}
}
export const isEmptyResponse = (result: string[] | null | undefined): boolean => {
return !result || result.length === 0 || result.every(r => !r || r.trim() === '')
}
export const getRouterForModel = (model: string): string => {
if (model.startsWith('gpt-')) return 'openai'
return 'openrouter'
}
export const getApiKeyForRouter = (router: string): string => {
switch (router) {
case 'openai':
return TEST_CONFIG.openai.key
case 'openrouter':
return TEST_CONFIG.openrouter.key
default:
return ''
}
}
export const validateConfig = () => {
const missingKeys: string[] = []
if (!TEST_CONFIG.openrouter.key) missingKeys.push('OPENROUTER_API_KEY')
if (!TEST_CONFIG.openai.key) missingKeys.push('OPENAI_API_KEY')
if (missingKeys.length > 0) {
throw new Error(`Missing required environment variables: ${missingKeys.join(', ')}`)
}
}
import * as path from 'node:path'
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL, E_OPENROUTER_MODEL } from '../../src/index'
export const models = [
//E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_2_0
]
export const TEST_BASE_PATH = path.resolve(__dirname, '../../')
export const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
export const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
export const TEST_TIMEOUT = 30000 // 30 seconds timeout for API calls
export interface TestResult {
test: string;
prompt: string;
result: string[];
expected: string;
model: string;
router: string;
timestamp: string;
passed: boolean;
reason?: string;
error?: {
message: string;
code?: string;
type?: string;
details?: any;
};
duration?: number
}
export const formatError = (error: any): TestResult['error'] => {
return {
message: error?.message || 'Unknown error',
code: error?.code || 'UNKNOWN',
type: error?.type || error?.constructor?.name || 'Error',
details: error?.response?.data || error?.response || error
}
}
export const isEmptyResponse = (result: string[] | null | undefined): boolean => {
return !result || result.length === 0 || result.every(r => !r || r.trim() === '')
}

View File

@ -0,0 +1,547 @@
# Format Test Results
## Failed Tests
### basic_structure - deepseek/deepseek-chat:free
- Prompt: `return a greeting "hello" with count 42`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 885ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:36 PM
### basic_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return a greeting "hello" with count 42`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 757ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:36 PM
### basic_structure - gpt-4
- Prompt: `return a greeting "hello" with count 42`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 1043ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:37 PM
### basic_structure - anthropic/claude-3.7-sonnet
- Prompt: `return a greeting "hello" with count 42`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 1790ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: Unexpected token 'h', "hello 42" is not valid JSON
- Reason: Failed to parse or validate response: Unexpected token 'h', "hello 42" is not valid JSON
- Timestamp: 4/1/2025, 1:23:05 PM
### basic_structure - openai/gpt-4
- Prompt: `Return a JSON object with a greeting "hello" and count 42. The response must be valid JSON with exactly these fields: { "greeting": string, "count": number }`
- Expected: `{"greeting":"hello","count":42}`
- Actual: `""`
- Duration: 1258ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:43 PM
### nested_structure - deepseek/deepseek-chat:free
- Prompt: `return user John age 30 with dark theme and notifications enabled`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 655ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:38 PM
### nested_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return user John age 30 with dark theme and notifications enabled`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 790ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:39 PM
### nested_structure - gpt-4
- Prompt: `return user John age 30 with dark theme and notifications enabled`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 717ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:40 PM
### nested_structure - anthropic/claude-3.7-sonnet
- Prompt: `return user John age 30 with dark theme and notifications enabled`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 1189ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: Unexpected token '#', "# John's U"... is not valid JSON
- Reason: Failed to parse or validate response: Unexpected token '#', "# John's U"... is not valid JSON
- Timestamp: 4/1/2025, 1:23:06 PM
### nested_structure - openai/gpt-4
- Prompt: `Return a JSON object with user John age 30, dark theme and notifications enabled. The response must be valid JSON with this structure: { "user": { "name": string, "age": number }, "settings": { "theme": string, "notifications": boolean } }`
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
- Actual: `""`
- Duration: 716ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:44 PM
### array_structure - deepseek/deepseek-chat:free
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 617ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:40 PM
### array_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 756ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:41 PM
### array_structure - gpt-4
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 1026ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:42 PM
### array_structure - anthropic/claude-3.7-sonnet
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 1190ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:23:08 PM
### array_structure - openai/gpt-4
- Prompt: `Return a JSON object with a list of 2 items. The response must be valid JSON with this structure: { "items": [{ "id": number, "name": string }] }. The first item should have id 1 and name "first", the second item should have id 2 and name "second".`
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
- Actual: `""`
- Duration: 703ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:44 PM
### enum_structure - deepseek/deepseek-chat:free
- Prompt: `return status success with message "Operation completed"`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 647ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:43 PM
### enum_structure - google/gemini-2.0-flash-exp:free
- Prompt: `return status success with message "Operation completed"`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 813ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:43 PM
### enum_structure - gpt-4
- Prompt: `return status success with message "Operation completed"`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 1138ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:45 PM
### enum_structure - anthropic/claude-3.7-sonnet
- Prompt: `return status success with message "Operation completed"`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 1728ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: Unexpected token '`', "```json
{
"... is not valid JSON
- Reason: Failed to parse or validate response: Unexpected token '`', "```json
{
"... is not valid JSON
- Timestamp: 4/1/2025, 1:23:09 PM
### enum_structure - openai/gpt-4
- Prompt: `Return a JSON object with status "success" and message "Operation completed". The response must be valid JSON with this structure: { "status": "success" | "error" | "pending", "message": string }`
- Expected: `{"status":"success","message":"Operation completed"}`
- Actual: `""`
- Duration: 688ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:45 PM
### optional_fields - deepseek/deepseek-chat:free
- Prompt: `return name "John" with age 30 and email "john@example.com"`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 676ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:45 PM
### optional_fields - google/gemini-2.0-flash-exp:free
- Prompt: `return name "John" with age 30 and email "john@example.com"`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 884ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:46 PM
### optional_fields - gpt-4
- Prompt: `return name "John" with age 30 and email "john@example.com"`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 669ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Reason: Failed to parse or validate response: [
{
"code": "invalid_type",
"expected": "object",
"received": "null",
"path": [],
"message": "Expected object, received null"
}
]
- Timestamp: 4/1/2025, 1:21:47 PM
### optional_fields - anthropic/claude-3.7-sonnet
- Prompt: `return name "John" with age 30 and email "john@example.com"`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 1576ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Failed to parse or validate response: Unexpected token '`', "```json
{
"... is not valid JSON
- Reason: Failed to parse or validate response: Unexpected token '`', "```json
{
"... is not valid JSON
- Timestamp: 4/1/2025, 1:23:11 PM
### optional_fields - openai/gpt-4
- Prompt: `Return a JSON object with name "John", age 30, and email "john@example.com". The response must be valid JSON with this structure: { "name": string, "age"?: number, "email"?: string }`
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
- Actual: `""`
- Duration: 682ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Invalid response from API
- Reason: Invalid response from API
- Timestamp: 4/1/2025, 1:32:46 PM
## Passed Tests
*No passed tests*

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,295 @@
import { describe, it, expect } from 'vitest'
import { run } from '../../src/index'
import * as path from 'node:path'
import { sync as write } from "@polymech/fs/write"
import { sync as read } from "@polymech/fs/read"
import { sync as exists } from "@polymech/fs/exists"
import { z } from 'zod'
import {
models_premium as models,
TEST_BASE_PATH,
TEST_LOGS_PATH,
TEST_PREFERENCES_PATH,
TEST_TIMEOUT,
TestResult,
formatError,
isEmptyResponse,
getRouterForModel,
getApiKeyForRouter
} from './commons'
const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
describe('Structured Output Format', () => {
let testResults: TestResult[] = []
// Load existing results if any
if (exists(TEST_LOG_PATH)) {
const data = read(TEST_LOG_PATH, 'json')
testResults = Array.isArray(data) ? data : []
}
const runFormatTest = async (prompt: string, format: z.ZodType<any>, expected: any, testName: string, modelName: string) => {
let model = 'unknown'
let router = 'unknown'
let startTime = Date.now()
let error: TestResult['error'] | undefined
try {
const result = await Promise.race([
run({
prompt,
mode: 'completion',
model: modelName,
path: TEST_BASE_PATH,
logs: TEST_LOGS_PATH,
preferences: TEST_PREFERENCES_PATH,
format,
onRun: async (options) => {
model = options.model || 'unknown'
router = options.router || 'unknown'
return options
}
}),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
)
]) as any[]
const actual = result?.[0]
let parsed: any
try {
parsed = typeof actual === 'string' ? JSON.parse(actual) : actual
// Validate against the format schema
parsed = format.parse(parsed)
} catch (parseError) {
throw new Error(`Failed to parse or validate response: ${parseError.message}`)
}
const passed = JSON.stringify(parsed) === JSON.stringify(expected) && !isEmptyResponse(result)
if (isEmptyResponse(result)) {
throw new Error('Model returned empty response')
}
expect(parsed).toEqual(expected)
return {
test: testName,
prompt,
result: result || [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed,
duration: Date.now() - startTime,
reason: passed ? undefined : `Expected ${JSON.stringify(expected)}, but got ${JSON.stringify(parsed)}`,
config: {
router: getRouterForModel(modelName)
}
}
} catch (e) {
error = formatError(e)
throw e
} finally {
const testResult: TestResult = {
test: testName,
prompt,
result: [],
expected,
model,
router,
timestamp: new Date().toISOString(),
passed: false,
duration: Date.now() - startTime,
error,
reason: error?.message || 'Unknown error occurred',
config: {
router: getRouterForModel(modelName),
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
}
}
testResults.push(testResult)
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
}
}
it.each(models)('should return basic structured output with model %s', async (modelName) => {
const format = z.object({
greeting: z.string(),
count: z.number()
})
await runFormatTest(
'return a greeting "hello" with count 42',
format,
{ greeting: 'hello', count: 42 },
'basic_structure',
modelName
)
})
it.each(models)('should handle nested structures with model %s', async (modelName) => {
const format = z.object({
user: z.object({
name: z.string(),
age: z.number()
}),
settings: z.object({
theme: z.string(),
notifications: z.boolean()
})
})
await runFormatTest(
'return user John age 30 with dark theme and notifications enabled',
format,
{
user: { name: 'John', age: 30 },
settings: { theme: 'dark', notifications: true }
},
'nested_structure',
modelName
)
})
it.each(models)('should handle arrays with model %s', async (modelName) => {
const format = z.object({
items: z.array(z.object({
id: z.number(),
name: z.string()
}))
})
await runFormatTest(
'return a list of 2 items with ids 1 and 2, names "first" and "second"',
format,
{
items: [
{ id: 1, name: 'first' },
{ id: 2, name: 'second' }
]
},
'array_structure',
modelName
)
})
it.each(models)('should handle enums with model %s', async (modelName) => {
const format = z.object({
status: z.enum(['success', 'error', 'pending']),
message: z.string()
})
await runFormatTest(
'return status success with message "Operation completed"',
format,
{
status: 'success',
message: 'Operation completed'
},
'enum_structure',
modelName
)
})
it.each(models)('should handle optional fields with model %s', async (modelName) => {
const format = z.object({
name: z.string(),
age: z.number().optional(),
email: z.string().email().optional()
})
await runFormatTest(
'return name "John" with age 30 and email "john@example.com"',
format,
{
name: 'John',
age: 30,
email: 'john@example.com'
},
'optional_fields',
modelName
)
})
it('should generate markdown report', () => {
// Group results by test and model
const latestResults = new Map<string, Map<string, TestResult>>()
// Get only the latest result for each test+model combination
testResults.forEach(result => {
if (!latestResults.has(result.test)) {
latestResults.set(result.test, new Map())
}
const testMap = latestResults.get(result.test)!
const existingResult = testMap.get(result.model)
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
testMap.set(result.model, result)
}
})
// Generate markdown report
let report = '# Format Test Results\n\n'
// First list failed tests
report += '## Failed Tests\n\n'
let hasFailures = false
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (!result.passed) {
hasFailures = true
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${JSON.stringify(result.expected)}\`\n`
report += `- Actual: \`${JSON.stringify(result.result[0] || '')}\`\n`
report += `- Duration: ${result.duration}ms\n`
if (result.error) {
report += `- Error Type: ${result.error.type}\n`
report += `- Error Code: ${result.error.code}\n`
report += `- Error Message: ${result.error.message}\n`
}
report += `- Reason: ${result.reason}\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
if (!hasFailures) {
report += '*No failed tests*\n\n'
}
// Then list passed tests
report += '## Passed Tests\n\n'
let hasPassed = false
for (const [testName, modelResults] of latestResults) {
for (const [model, result] of modelResults) {
if (result.passed) {
hasPassed = true
report += `### ${testName} - ${model}\n`
report += `- Prompt: \`${result.prompt}\`\n`
report += `- Expected: \`${JSON.stringify(result.expected)}\`\n`
report += `- Actual: \`${JSON.stringify(result.result[0] || '')}\`\n`
report += `- Duration: ${result.duration}ms\n`
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
}
}
}
if (!hasPassed) {
report += '*No passed tests*\n\n'
}
// Write report to file
const reportPath = path.resolve(__dirname, './format-report.md')
write(reportPath, report)
// Verify report was written
expect(exists(reportPath) === 'file').toBe(true)
})
})

View File

@ -32,6 +32,14 @@
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:05:52 PM
### german - anthropic/claude-2.0
- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
- Expected: `hallo`
- Actual: ``
- Duration: 1253ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:47:26 PM
### spanish - deepseek/deepseek-chat:free
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
- Expected: `sí`
@ -62,6 +70,14 @@
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:05:55 PM
### spanish - anthropic/claude-2.0
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
- Expected: `sí`
- Actual: ``
- Duration: 932ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:47:27 PM
### french - deepseek/deepseek-chat:free
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
- Expected: `non`
@ -92,6 +108,14 @@
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:05:56 PM
### french - anthropic/claude-2.0
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
- Expected: `non`
- Actual: ``
- Duration: 864ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:47:28 PM
## Passed Tests
### german_translation - deepseek/deepseek-chat:free

View File

@ -700,5 +700,112 @@
"passed": false,
"duration": 1341,
"reason": "Unknown error occurred"
},
{
"test": "german",
"prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
"result": [],
"expected": "hallo",
"model": "anthropic/claude-2.0",
"router": "openrouter",
"timestamp": "2025-04-01T11:47:02.933Z",
"passed": false,
"duration": 1416,
"error": {
"message": "getRouterForModel is not defined",
"code": "UNKNOWN",
"type": "ReferenceError",
"details": {
"stack": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:106:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "getRouterForModel is not defined",
"stackStr": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:106:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "ReferenceError",
"expected": "undefined",
"actual": "undefined"
}
},
"reason": "getRouterForModel is not defined"
},
{
"test": "spanish",
"prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
"result": [],
"expected": "sí",
"model": "anthropic/claude-2.0",
"router": "openrouter",
"timestamp": "2025-04-01T11:47:04.007Z",
"passed": false,
"duration": 1071,
"error": {
"message": "getRouterForModel is not defined",
"code": "UNKNOWN",
"type": "ReferenceError",
"details": {
"stack": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:115:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "getRouterForModel is not defined",
"stackStr": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:115:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"nameStr": "ReferenceError",
"expected": "undefined",
"actual": "undefined"
}
},
"reason": "getRouterForModel is not defined"
},
{
"test": "french",
"prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
"result": [],
"expected": "non",
"model": "anthropic/claude-2.0",
"router": "openrouter",
"timestamp": "2025-04-01T11:47:05.064Z",
"passed": false,
"duration": 1056,
"error": {
"message": "getRouterForModel is not defined",
"code": "UNKNOWN",
"type": "ReferenceError",
"details": {
"stack": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:124:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
"message": "getRouterForModel is not defined"
}
},
"reason": "getRouterForModel is not defined"
},
{
"test": "german",
"prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
"result": [],
"expected": "hallo",
"model": "anthropic/claude-2.0",
"router": "openrouter",
"timestamp": "2025-04-01T11:47:26.798Z",
"passed": false,
"duration": 1253,
"reason": "Unknown error occurred"
},
{
"test": "spanish",
"prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
"result": [],
"expected": "sí",
"model": "anthropic/claude-2.0",
"router": "openrouter",
"timestamp": "2025-04-01T11:47:27.731Z",
"passed": false,
"duration": 932,
"reason": "Unknown error occurred"
},
{
"test": "french",
"prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
"result": [],
"expected": "non",
"model": "anthropic/claude-2.0",
"router": "openrouter",
"timestamp": "2025-04-01T11:47:28.596Z",
"passed": false,
"duration": 864,
"reason": "Unknown error occurred"
}
]

View File

@ -12,9 +12,7 @@ import {
TEST_TIMEOUT,
TestResult,
formatError,
isEmptyResponse,
getRouterForModel,
getApiKeyForRouter
isEmptyResponse
} from './commons'
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
@ -75,11 +73,7 @@ describe('Language Capabilities', () => {
timestamp: new Date().toISOString(),
passed,
duration: Date.now() - startTime,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
config: {
router: getRouterForModel(modelName),
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
}
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
}
} catch (e) {
error = formatError(e)
@ -96,11 +90,7 @@ describe('Language Capabilities', () => {
passed: false,
duration: Date.now() - startTime,
error,
reason: error?.message || 'Unknown error occurred',
config: {
router: getRouterForModel(modelName),
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
}
reason: error?.message || 'Unknown error occurred'
}
testResults.push(testResult)

View File

@ -2,12 +2,127 @@
## Failed Tests
### addition - deepseek/deepseek-chat:free
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: ``
- Duration: 913ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:42:47 PM
### addition - google/gemini-2.0-flash-exp:free
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: ``
- Duration: 1112ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:45:50 PM
### addition - gpt-4
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: ``
- Duration: 1038ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:42:26 PM
### addition - anthropic/claude-2.0
- Prompt: `add 5 and 3. Return only the number, no explanation.`
- Expected: `8`
- Actual: ``
- Duration: 1218ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:46:27 PM
### multiplication - deepseek/deepseek-chat:free
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: ``
- Duration: 636ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:42:48 PM
### multiplication - google/gemini-2.0-flash-exp:free
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: ``
- Duration: 764ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:45:51 PM
### multiplication - gpt-4
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: ``
- Duration: 1052ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:42:28 PM
### multiplication - anthropic/claude-2.0
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
- Expected: `24`
- Actual: ``
- Duration: 911ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:46:27 PM
### division - deepseek/deepseek-chat:free
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: ``
- Duration: 648ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:42:50 PM
### division - google/gemini-2.0-flash-exp:free
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: ``
- Duration: 829ms
- Error Type: Error
- Error Code: UNKNOWN
- Error Message: Model returned empty response
- Reason: Model returned empty response
- Timestamp: 4/1/2025, 1:45:52 PM
### division - gpt-4
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: ``
- Duration: 959ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:42:31 PM
### division - anthropic/claude-2.0
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
- Expected: `5`
- Actual: ``
- Duration: 1485ms
- Reason: Unknown error occurred
- Timestamp: 4/1/2025, 1:46:29 PM
## Passed Tests
### factorial - deepseek/deepseek-chat:free
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
- Expected: `120`
- Actual: `120`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:09 PM
### factorial - google/gemini-2.0-flash-exp:free
@ -15,18 +130,21 @@
- Expected: `120`
- Actual: `120
`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:10 PM
### factorial - gpt-4
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
- Expected: `120`
- Actual: `120`
- Duration: undefinedms
- Timestamp: 4/1/2025, 1:03:25 PM
### fibonacci - deepseek/deepseek-chat:free
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
- Expected: `0,1,1,2,3`
- Actual: `0,1,1,2,3`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:13 PM
### fibonacci - google/gemini-2.0-flash-exp:free
@ -34,29 +152,34 @@
- Expected: `0,1,1,2,3`
- Actual: `0,1,1,2,3
`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:14 PM
### fibonacci - gpt-4
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
- Expected: `0,1,1,2,3`
- Actual: `0, 1, 1, 2, 3`
- Duration: undefinedms
- Timestamp: 4/1/2025, 1:03:27 PM
### quadratic - deepseek/deepseek-chat:free
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:59:19 PM
### quadratic - google/gemini-2.0-flash-exp:free
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Duration: undefinedms
- Timestamp: 4/1/2025, 12:46:13 PM
### quadratic - gpt-4
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
- Expected: `[-3,-2]`
- Actual: `[-2, -3]`
- Duration: undefinedms
- Timestamp: 4/1/2025, 1:03:30 PM

File diff suppressed because it is too large Load Diff

View File

@ -12,12 +12,9 @@ import {
TEST_TIMEOUT,
TestResult,
formatError,
isEmptyResponse,
getRouterForModel,
getApiKeyForRouter
isEmptyResponse
} from './commons'
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
const TEST_LOG_PATH = path.resolve(__dirname, './math.json')
@ -76,10 +73,6 @@ describe('Math Capabilities', () => {
passed,
duration: Date.now() - startTime,
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
config: {
router: getRouterForModel(modelName),
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
}
}
} catch (e) {
error = formatError(e)
@ -96,11 +89,7 @@ describe('Math Capabilities', () => {
passed: false,
duration: Date.now() - startTime,
error,
reason: error?.message || 'Unknown error occurred',
config: {
router: getRouterForModel(modelName),
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
}
reason: error?.message || 'Unknown error occurred'
}
testResults.push(testResult)