tests:language - merci, habsch selbstgemacht :)
This commit is contained in:
parent
75af5d1a26
commit
32469204ed
@ -1,5 +1,5 @@
|
||||
{
|
||||
"model": "gpt-4",
|
||||
"model": "anthropic/claude-2.0",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
|
||||
@ -48,6 +48,69 @@
|
||||
- Reason: undefined
|
||||
- Timestamp: 4/1/2025, 12:26:35 PM
|
||||
|
||||
### hello - deepseek/deepseek-chat:free
|
||||
- Prompt: `say "hello"`
|
||||
- Expected: `hello`
|
||||
- Actual: ``
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:36:37 PM
|
||||
|
||||
### hello - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `say "hello"`
|
||||
- Expected: `hello`
|
||||
- Actual: ``
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:36:37 PM
|
||||
|
||||
### hello - gpt-4
|
||||
- Prompt: `say "hello"`
|
||||
- Expected: `hello`
|
||||
- Actual: ``
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:36:42 PM
|
||||
|
||||
### goodbye - deepseek/deepseek-chat:free
|
||||
- Prompt: `say "goodbye"`
|
||||
- Expected: `goodbye`
|
||||
- Actual: ``
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:36:42 PM
|
||||
|
||||
### goodbye - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `say "goodbye"`
|
||||
- Expected: `goodbye`
|
||||
- Actual: ``
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:36:43 PM
|
||||
|
||||
### goodbye - gpt-4
|
||||
- Prompt: `say "goodbye"`
|
||||
- Expected: `goodbye`
|
||||
- Actual: ``
|
||||
- Reason: expected 'goodbye.' to deeply equal 'goodbye'
|
||||
- Timestamp: 4/1/2025, 1:36:44 PM
|
||||
|
||||
### yes - deepseek/deepseek-chat:free
|
||||
- Prompt: `say "yes"`
|
||||
- Expected: `yes`
|
||||
- Actual: ``
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:36:45 PM
|
||||
|
||||
### yes - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `say "yes"`
|
||||
- Expected: `yes`
|
||||
- Actual: ``
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:36:45 PM
|
||||
|
||||
### yes - gpt-4
|
||||
- Prompt: `say "yes"`
|
||||
- Expected: `yes`
|
||||
- Actual: ``
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:36:46 PM
|
||||
|
||||
## Passed Tests
|
||||
|
||||
### addition - deepseek/deepseek-chat:free
|
||||
@ -67,7 +130,7 @@
|
||||
- Prompt: `add 5 and 3. Return only the number, no explanation.`
|
||||
- Expected: `8`
|
||||
- Actual: `8`
|
||||
- Timestamp: 4/1/2025, 1:02:55 PM
|
||||
- Timestamp: 4/1/2025, 1:39:04 PM
|
||||
|
||||
### multiplication - deepseek/deepseek-chat:free
|
||||
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
|
||||
@ -86,7 +149,7 @@
|
||||
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
|
||||
- Expected: `24`
|
||||
- Actual: `24`
|
||||
- Timestamp: 4/1/2025, 1:02:57 PM
|
||||
- Timestamp: 4/1/2025, 1:39:06 PM
|
||||
|
||||
### division - deepseek/deepseek-chat:free
|
||||
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
|
||||
@ -105,5 +168,5 @@
|
||||
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
|
||||
- Expected: `5`
|
||||
- Actual: `5`
|
||||
- Timestamp: 4/1/2025, 1:03:00 PM
|
||||
- Timestamp: 4/1/2025, 1:39:08 PM
|
||||
|
||||
|
||||
@ -816,5 +816,281 @@
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:03:00.064Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "hello",
|
||||
"prompt": "say \"hello\"",
|
||||
"result": [],
|
||||
"expected": "hello",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:37.192Z",
|
||||
"passed": false,
|
||||
"duration": 1107,
|
||||
"error": {
|
||||
"message": "Model returned empty response",
|
||||
"code": "UNKNOWN",
|
||||
"type": "Error",
|
||||
"details": {
|
||||
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:110:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "Model returned empty response",
|
||||
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:110:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "Error",
|
||||
"expected": "undefined",
|
||||
"actual": "undefined"
|
||||
}
|
||||
},
|
||||
"reason": "Model returned empty response",
|
||||
"config": {
|
||||
"router": "openrouter",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "hello",
|
||||
"prompt": "say \"hello\"",
|
||||
"result": [],
|
||||
"expected": "hello",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:37.957Z",
|
||||
"passed": false,
|
||||
"duration": 762,
|
||||
"error": {
|
||||
"message": "Model returned empty response",
|
||||
"code": "UNKNOWN",
|
||||
"type": "Error",
|
||||
"details": {
|
||||
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:110:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "Model returned empty response",
|
||||
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:110:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "Error",
|
||||
"expected": "undefined",
|
||||
"actual": "undefined"
|
||||
}
|
||||
},
|
||||
"reason": "Model returned empty response",
|
||||
"config": {
|
||||
"router": "openrouter",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "hello",
|
||||
"prompt": "say \"hello\"",
|
||||
"result": [],
|
||||
"expected": "hello",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:42.078Z",
|
||||
"passed": false,
|
||||
"duration": 4120,
|
||||
"reason": "Unknown error occurred",
|
||||
"config": {
|
||||
"router": "openai",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "goodbye",
|
||||
"prompt": "say \"goodbye\"",
|
||||
"result": [],
|
||||
"expected": "goodbye",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:42.695Z",
|
||||
"passed": false,
|
||||
"duration": 616,
|
||||
"error": {
|
||||
"message": "Model returned empty response",
|
||||
"code": "UNKNOWN",
|
||||
"type": "Error",
|
||||
"details": {
|
||||
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "Model returned empty response",
|
||||
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "Error",
|
||||
"expected": "undefined",
|
||||
"actual": "undefined"
|
||||
}
|
||||
},
|
||||
"reason": "Model returned empty response",
|
||||
"config": {
|
||||
"router": "openrouter",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "goodbye",
|
||||
"prompt": "say \"goodbye\"",
|
||||
"result": [],
|
||||
"expected": "goodbye",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:43.462Z",
|
||||
"passed": false,
|
||||
"duration": 765,
|
||||
"error": {
|
||||
"message": "Model returned empty response",
|
||||
"code": "UNKNOWN",
|
||||
"type": "Error",
|
||||
"details": {
|
||||
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "Model returned empty response",
|
||||
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "Error",
|
||||
"expected": "undefined",
|
||||
"actual": "undefined"
|
||||
}
|
||||
},
|
||||
"reason": "Model returned empty response",
|
||||
"config": {
|
||||
"router": "openrouter",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "goodbye",
|
||||
"prompt": "say \"goodbye\"",
|
||||
"result": [],
|
||||
"expected": "goodbye",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:44.537Z",
|
||||
"passed": false,
|
||||
"duration": 1073,
|
||||
"error": {
|
||||
"message": "expected 'goodbye.' to deeply equal 'goodbye'",
|
||||
"code": "UNKNOWN",
|
||||
"type": "AssertionError",
|
||||
"details": {
|
||||
"message": "expected 'goodbye.' to deeply equal 'goodbye'",
|
||||
"actual": "goodbye.",
|
||||
"expected": "goodbye",
|
||||
"showDiff": true,
|
||||
"operator": "strictEqual",
|
||||
"stackStr": "AssertionError: expected 'goodbye.' to deeply equal 'goodbye'\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:64:22)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "AssertionError",
|
||||
"diff": "Expected: \u001b[32m\"goodbye\"\u001b[39m\nReceived: \u001b[31m\"goodbye\u001b[7m.\u001b[27m\"\u001b[39m",
|
||||
"name": "AssertionError",
|
||||
"ok": false,
|
||||
"stack": "AssertionError: expected 'goodbye.' to deeply equal 'goodbye'\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:64:22)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:119:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11"
|
||||
}
|
||||
},
|
||||
"reason": "expected 'goodbye.' to deeply equal 'goodbye'",
|
||||
"config": {
|
||||
"router": "openai",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "yes",
|
||||
"prompt": "say \"yes\"",
|
||||
"result": [],
|
||||
"expected": "yes",
|
||||
"model": "deepseek/deepseek-chat:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:45.193Z",
|
||||
"passed": false,
|
||||
"duration": 654,
|
||||
"error": {
|
||||
"message": "Model returned empty response",
|
||||
"code": "UNKNOWN",
|
||||
"type": "Error",
|
||||
"details": {
|
||||
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:128:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "Model returned empty response",
|
||||
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:128:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "Error",
|
||||
"expected": "undefined",
|
||||
"actual": "undefined"
|
||||
}
|
||||
},
|
||||
"reason": "Model returned empty response",
|
||||
"config": {
|
||||
"router": "openrouter",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "yes",
|
||||
"prompt": "say \"yes\"",
|
||||
"result": [],
|
||||
"expected": "yes",
|
||||
"model": "google/gemini-2.0-flash-exp:free",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:45.954Z",
|
||||
"passed": false,
|
||||
"duration": 760,
|
||||
"error": {
|
||||
"message": "Model returned empty response",
|
||||
"code": "UNKNOWN",
|
||||
"type": "Error",
|
||||
"details": {
|
||||
"stack": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:128:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "Model returned empty response",
|
||||
"stackStr": "Error: Model returned empty response\n at runBasicTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:61:15)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\basic.test.ts:128:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "Error",
|
||||
"expected": "undefined",
|
||||
"actual": "undefined"
|
||||
}
|
||||
},
|
||||
"reason": "Model returned empty response",
|
||||
"config": {
|
||||
"router": "openrouter",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "yes",
|
||||
"prompt": "say \"yes\"",
|
||||
"result": [],
|
||||
"expected": "yes",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:36:46.806Z",
|
||||
"passed": false,
|
||||
"duration": 851,
|
||||
"reason": "Unknown error occurred",
|
||||
"config": {
|
||||
"router": "openai",
|
||||
"apiKey": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"test": "addition",
|
||||
"prompt": "add 5 and 3. Return only the number, no explanation.",
|
||||
"result": [
|
||||
"8"
|
||||
],
|
||||
"expected": "8",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:39:04.078Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "multiplication",
|
||||
"prompt": "multiply 8 and 3. Return only the number, no explanation.",
|
||||
"result": [
|
||||
"24"
|
||||
],
|
||||
"expected": "24",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:39:06.572Z",
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"test": "division",
|
||||
"prompt": "divide 15 by 3. Return only the number, no explanation.",
|
||||
"result": [
|
||||
"5"
|
||||
],
|
||||
"expected": "5",
|
||||
"model": "gpt-4",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:39:08.868Z",
|
||||
"passed": true
|
||||
}
|
||||
]
|
||||
@ -1,210 +1,210 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { run } from '../../src/index'
|
||||
import * as path from 'node:path'
|
||||
import { sync as write } from "@polymech/fs/write"
|
||||
import { sync as read } from "@polymech/fs/read"
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
import {
|
||||
models,
|
||||
TEST_BASE_PATH,
|
||||
TEST_LOGS_PATH,
|
||||
TEST_PREFERENCES_PATH,
|
||||
TEST_TIMEOUT,
|
||||
TestResult,
|
||||
formatError,
|
||||
isEmptyResponse,
|
||||
getRouterForModel,
|
||||
getApiKeyForRouter
|
||||
} from './commons'
|
||||
|
||||
const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')
|
||||
|
||||
describe('Basic Capabilities', () => {
|
||||
let testResults: TestResult[] = []
|
||||
|
||||
// Load existing results if any
|
||||
if (exists(TEST_LOG_PATH)) {
|
||||
const data = read(TEST_LOG_PATH, 'json')
|
||||
testResults = Array.isArray(data) ? data : []
|
||||
}
|
||||
|
||||
const runBasicTest = async (prompt: string, expected: string, testName: string, modelName: string) => {
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
let startTime = Date.now()
|
||||
let error: TestResult['error'] | undefined
|
||||
|
||||
try {
|
||||
const result = await Promise.race([
|
||||
run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}),
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
|
||||
)
|
||||
]) as string[]
|
||||
|
||||
const actual = result?.[0]?.trim()?.toLowerCase() || ''
|
||||
const passed = actual === expected && !isEmptyResponse(result)
|
||||
|
||||
if (isEmptyResponse(result)) {
|
||||
throw new Error('Model returned empty response')
|
||||
}
|
||||
|
||||
expect(actual).toEqual(expected)
|
||||
|
||||
return {
|
||||
test: testName,
|
||||
prompt,
|
||||
result: result || [],
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
duration: Date.now() - startTime,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
|
||||
config: {
|
||||
router: getRouterForModel(modelName),
|
||||
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
error = formatError(e)
|
||||
throw e
|
||||
} finally {
|
||||
const testResult: TestResult = {
|
||||
test: testName,
|
||||
prompt,
|
||||
result: [],
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed: false,
|
||||
duration: Date.now() - startTime,
|
||||
error,
|
||||
reason: error?.message || 'Unknown error occurred',
|
||||
config: {
|
||||
router: getRouterForModel(modelName),
|
||||
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
|
||||
}
|
||||
}
|
||||
|
||||
testResults.push(testResult)
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
}
|
||||
}
|
||||
|
||||
it.each(models)('should respond to "hello" with model %s', async (modelName) => {
|
||||
await runBasicTest(
|
||||
'say "hello"',
|
||||
'hello',
|
||||
'hello',
|
||||
modelName
|
||||
)
|
||||
})
|
||||
|
||||
it.each(models)('should respond to "goodbye" with model %s', async (modelName) => {
|
||||
await runBasicTest(
|
||||
'say "goodbye"',
|
||||
'goodbye',
|
||||
'goodbye',
|
||||
modelName
|
||||
)
|
||||
})
|
||||
|
||||
it.each(models)('should respond to "yes" with model %s', async (modelName) => {
|
||||
await runBasicTest(
|
||||
'say "yes"',
|
||||
'yes',
|
||||
'yes',
|
||||
modelName
|
||||
)
|
||||
})
|
||||
|
||||
it('should generate markdown report', () => {
|
||||
// Group results by test and model
|
||||
const latestResults = new Map<string, Map<string, TestResult>>()
|
||||
|
||||
// Get only the latest result for each test+model combination
|
||||
testResults.forEach(result => {
|
||||
if (!latestResults.has(result.test)) {
|
||||
latestResults.set(result.test, new Map())
|
||||
}
|
||||
const testMap = latestResults.get(result.test)!
|
||||
const existingResult = testMap.get(result.model)
|
||||
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
|
||||
testMap.set(result.model, result)
|
||||
}
|
||||
})
|
||||
|
||||
// Generate markdown report
|
||||
let report = '# Basic Test Results\n\n'
|
||||
|
||||
// First list failed tests
|
||||
report += '## Failed Tests\n\n'
|
||||
let hasFailures = false
|
||||
for (const [testName, modelResults] of latestResults) {
|
||||
for (const [model, result] of modelResults) {
|
||||
if (!result.passed) {
|
||||
hasFailures = true
|
||||
report += `### ${testName} - ${model}\n`
|
||||
report += `- Prompt: \`${result.prompt}\`\n`
|
||||
report += `- Expected: \`${result.expected}\`\n`
|
||||
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
||||
report += `- Duration: ${result.duration}ms\n`
|
||||
if (result.error) {
|
||||
report += `- Error Type: ${result.error.type}\n`
|
||||
report += `- Error Code: ${result.error.code}\n`
|
||||
report += `- Error Message: ${result.error.message}\n`
|
||||
}
|
||||
report += `- Reason: ${result.reason}\n`
|
||||
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasFailures) {
|
||||
report += '*No failed tests*\n\n'
|
||||
}
|
||||
|
||||
// Then list passed tests
|
||||
report += '## Passed Tests\n\n'
|
||||
let hasPassed = false
|
||||
for (const [testName, modelResults] of latestResults) {
|
||||
for (const [model, result] of modelResults) {
|
||||
if (result.passed) {
|
||||
hasPassed = true
|
||||
report += `### ${testName} - ${model}\n`
|
||||
report += `- Prompt: \`${result.prompt}\`\n`
|
||||
report += `- Expected: \`${result.expected}\`\n`
|
||||
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
||||
report += `- Duration: ${result.duration}ms\n`
|
||||
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasPassed) {
|
||||
report += '*No passed tests*\n\n'
|
||||
}
|
||||
|
||||
// Write report to file
|
||||
const reportPath = path.resolve(__dirname, './basic-report.md')
|
||||
write(reportPath, report)
|
||||
|
||||
// Verify report was written
|
||||
expect(exists(reportPath) === 'file').toBe(true)
|
||||
})
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { run } from '../../src/index'
|
||||
import * as path from 'node:path'
|
||||
import { sync as write } from "@polymech/fs/write"
|
||||
import { sync as read } from "@polymech/fs/read"
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
import { models, TEST_BASE_PATH, TEST_LOGS_PATH, TEST_PREFERENCES_PATH, TestResult } from './commons'
|
||||
|
||||
const TEST_LOG_PATH = path.resolve(__dirname, './basic.json')
|
||||
|
||||
describe('Basic Operations', () => {
|
||||
let testResults: TestResult[] = []
|
||||
|
||||
// Load existing results if any
|
||||
if (exists(TEST_LOG_PATH)) {
|
||||
const data = read(TEST_LOG_PATH, 'json')
|
||||
testResults = Array.isArray(data) ? data : []
|
||||
}
|
||||
|
||||
it.each(models)('should add two numbers with model %s', async (modelName) => {
|
||||
const prompt = 'add 5 and 3. Return only the number, no explanation.'
|
||||
const expected = '8'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
|
||||
const actual = result?.[0]?.trim() || ''
|
||||
if (!actual) {
|
||||
console.log(`Skipping test for model ${modelName} - no result returned`)
|
||||
return
|
||||
}
|
||||
const passed = actual === expected
|
||||
expect(actual).toEqual(expected)
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'addition',
|
||||
prompt,
|
||||
result: result || [],
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
|
||||
it.each(models)('should multiply two numbers with model %s', async (modelName) => {
|
||||
const prompt = 'multiply 8 and 3. Return only the number, no explanation.'
|
||||
const expected = '24'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
|
||||
const actual = result?.[0]?.trim() || ''
|
||||
if (!actual) {
|
||||
console.log(`Skipping test for model ${modelName} - no result returned`)
|
||||
return
|
||||
}
|
||||
const passed = actual === expected
|
||||
expect(actual).toEqual(expected)
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'multiplication',
|
||||
prompt,
|
||||
result: result || [],
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
|
||||
it.each(models)('should divide two numbers with model %s', async (modelName) => {
|
||||
const prompt = 'divide 15 by 3. Return only the number, no explanation.'
|
||||
const expected = '5'
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
|
||||
const result = await run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}) as string[]
|
||||
|
||||
const actual = result?.[0]?.trim() || ''
|
||||
if (!actual) {
|
||||
console.log(`Skipping test for model ${modelName} - no result returned`)
|
||||
return
|
||||
}
|
||||
const passed = actual === expected
|
||||
expect(actual).toEqual(expected)
|
||||
|
||||
// Add test result to array
|
||||
testResults.push({
|
||||
test: 'division',
|
||||
prompt,
|
||||
result: result || [],
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
})
|
||||
|
||||
// Write all results to the same file
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
})
|
||||
|
||||
it('should generate markdown report', () => {
|
||||
// Group results by test and model
|
||||
const latestResults = new Map<string, Map<string, TestResult>>()
|
||||
|
||||
// Get only the latest result for each test+model combination
|
||||
testResults.forEach(result => {
|
||||
if (!latestResults.has(result.test)) {
|
||||
latestResults.set(result.test, new Map())
|
||||
}
|
||||
const testMap = latestResults.get(result.test)!
|
||||
const existingResult = testMap.get(result.model)
|
||||
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
|
||||
testMap.set(result.model, result)
|
||||
}
|
||||
})
|
||||
|
||||
// Generate markdown report
|
||||
let report = '# Basic Operations Test Results\n\n'
|
||||
|
||||
// First list failed tests
|
||||
report += '## Failed Tests\n\n'
|
||||
for (const [testName, modelResults] of latestResults) {
|
||||
for (const [model, result] of modelResults) {
|
||||
if (!result.passed) {
|
||||
report += `### ${testName} - ${model}\n`
|
||||
report += `- Prompt: \`${result.prompt}\`\n`
|
||||
report += `- Expected: \`${result.expected}\`\n`
|
||||
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
||||
report += `- Reason: ${result.reason}\n`
|
||||
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Then list passed tests
|
||||
report += '## Passed Tests\n\n'
|
||||
for (const [testName, modelResults] of latestResults) {
|
||||
for (const [model, result] of modelResults) {
|
||||
if (result.passed) {
|
||||
report += `### ${testName} - ${model}\n`
|
||||
report += `- Prompt: \`${result.prompt}\`\n`
|
||||
report += `- Expected: \`${result.expected}\`\n`
|
||||
report += `- Actual: \`${result.result[0] || ''}\`\n`
|
||||
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write report to file
|
||||
const reportPath = path.resolve(__dirname, './basic-report.md')
|
||||
write(reportPath, report)
|
||||
|
||||
// Verify report was written
|
||||
expect(exists(reportPath) === 'file').toBe(true)
|
||||
})
|
||||
})
|
||||
@ -1,88 +1,44 @@
|
||||
import * as path from 'node:path'
|
||||
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
|
||||
|
||||
// Test configuration
|
||||
export const TEST_CONFIG = {
|
||||
openrouter: {
|
||||
key: process.env.OPENROUTER_API_KEY || '',
|
||||
org: process.env.OPENROUTER_ORG_ID || ''
|
||||
},
|
||||
openai: {
|
||||
key: process.env.OPENAI_API_KEY || '',
|
||||
org: process.env.OPENAI_ORG_ID || ''
|
||||
}
|
||||
}
|
||||
|
||||
export const models = [
|
||||
E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
|
||||
E_OPENROUTER_MODEL_FREE.MODEL_FREE_GOOGLE_GEMINI_2_0_FLASH_EXP_FREE,
|
||||
E_OPENAI_MODEL.MODEL_GPT_4
|
||||
]
|
||||
|
||||
export const TEST_BASE_PATH = path.resolve(__dirname, '../../')
|
||||
export const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
|
||||
export const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
|
||||
export const TEST_TIMEOUT = 30000 // 30 seconds timeout for API calls
|
||||
|
||||
export interface TestResult {
|
||||
test: string;
|
||||
prompt: string;
|
||||
result: string[];
|
||||
expected: string;
|
||||
model: string;
|
||||
router: string;
|
||||
timestamp: string;
|
||||
passed: boolean;
|
||||
reason?: string;
|
||||
error?: {
|
||||
message: string;
|
||||
code?: string;
|
||||
type?: string;
|
||||
details?: any;
|
||||
};
|
||||
duration?: number;
|
||||
config?: {
|
||||
apiKey?: string;
|
||||
baseURL?: string;
|
||||
router?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export const formatError = (error: any): TestResult['error'] => {
|
||||
return {
|
||||
message: error?.message || 'Unknown error',
|
||||
code: error?.code || 'UNKNOWN',
|
||||
type: error?.type || error?.constructor?.name || 'Error',
|
||||
details: error?.response?.data || error?.response || error
|
||||
}
|
||||
}
|
||||
|
||||
export const isEmptyResponse = (result: string[] | null | undefined): boolean => {
|
||||
return !result || result.length === 0 || result.every(r => !r || r.trim() === '')
|
||||
}
|
||||
|
||||
export const getRouterForModel = (model: string): string => {
|
||||
if (model.startsWith('gpt-')) return 'openai'
|
||||
return 'openrouter'
|
||||
}
|
||||
|
||||
export const getApiKeyForRouter = (router: string): string => {
|
||||
switch (router) {
|
||||
case 'openai':
|
||||
return TEST_CONFIG.openai.key
|
||||
case 'openrouter':
|
||||
return TEST_CONFIG.openrouter.key
|
||||
default:
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
export const validateConfig = () => {
|
||||
const missingKeys: string[] = []
|
||||
if (!TEST_CONFIG.openrouter.key) missingKeys.push('OPENROUTER_API_KEY')
|
||||
if (!TEST_CONFIG.openai.key) missingKeys.push('OPENAI_API_KEY')
|
||||
|
||||
if (missingKeys.length > 0) {
|
||||
throw new Error(`Missing required environment variables: ${missingKeys.join(', ')}`)
|
||||
}
|
||||
}
|
||||
import * as path from 'node:path'
|
||||
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL, E_OPENROUTER_MODEL } from '../../src/index'
|
||||
|
||||
export const models = [
|
||||
//E_OPENROUTER_MODEL_FREE.MODEL_FREE_DEEPSEEK_DEEPSEEK_CHAT_FREE,
|
||||
E_OPENROUTER_MODEL.MODEL_ANTHROPIC_CLAUDE_2_0
|
||||
]
|
||||
|
||||
export const TEST_BASE_PATH = path.resolve(__dirname, '../../')
|
||||
export const TEST_LOGS_PATH = path.resolve(__dirname, '../../logs')
|
||||
export const TEST_PREFERENCES_PATH = path.resolve(__dirname, '../../preferences.md')
|
||||
export const TEST_TIMEOUT = 30000 // 30 seconds timeout for API calls
|
||||
|
||||
export interface TestResult {
|
||||
test: string;
|
||||
prompt: string;
|
||||
result: string[];
|
||||
expected: string;
|
||||
model: string;
|
||||
router: string;
|
||||
timestamp: string;
|
||||
passed: boolean;
|
||||
reason?: string;
|
||||
error?: {
|
||||
message: string;
|
||||
code?: string;
|
||||
type?: string;
|
||||
details?: any;
|
||||
};
|
||||
duration?: number
|
||||
}
|
||||
|
||||
export const formatError = (error: any): TestResult['error'] => {
|
||||
return {
|
||||
message: error?.message || 'Unknown error',
|
||||
code: error?.code || 'UNKNOWN',
|
||||
type: error?.type || error?.constructor?.name || 'Error',
|
||||
details: error?.response?.data || error?.response || error
|
||||
}
|
||||
}
|
||||
|
||||
export const isEmptyResponse = (result: string[] | null | undefined): boolean => {
|
||||
return !result || result.length === 0 || result.every(r => !r || r.trim() === '')
|
||||
}
|
||||
|
||||
547
packages/kbot/tests/unit/format-report.md
Normal file
547
packages/kbot/tests/unit/format-report.md
Normal file
@ -0,0 +1,547 @@
|
||||
# Format Test Results
|
||||
|
||||
## Failed Tests
|
||||
|
||||
### basic_structure - deepseek/deepseek-chat:free
|
||||
- Prompt: `return a greeting "hello" with count 42`
|
||||
- Expected: `{"greeting":"hello","count":42}`
|
||||
- Actual: `""`
|
||||
- Duration: 885ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:36 PM
|
||||
|
||||
### basic_structure - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `return a greeting "hello" with count 42`
|
||||
- Expected: `{"greeting":"hello","count":42}`
|
||||
- Actual: `""`
|
||||
- Duration: 757ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:36 PM
|
||||
|
||||
### basic_structure - gpt-4
|
||||
- Prompt: `return a greeting "hello" with count 42`
|
||||
- Expected: `{"greeting":"hello","count":42}`
|
||||
- Actual: `""`
|
||||
- Duration: 1043ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:37 PM
|
||||
|
||||
### basic_structure - anthropic/claude-3.7-sonnet
|
||||
- Prompt: `return a greeting "hello" with count 42`
|
||||
- Expected: `{"greeting":"hello","count":42}`
|
||||
- Actual: `""`
|
||||
- Duration: 1790ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: Unexpected token 'h', "hello 42" is not valid JSON
|
||||
- Reason: Failed to parse or validate response: Unexpected token 'h', "hello 42" is not valid JSON
|
||||
- Timestamp: 4/1/2025, 1:23:05 PM
|
||||
|
||||
### basic_structure - openai/gpt-4
|
||||
- Prompt: `Return a JSON object with a greeting "hello" and count 42. The response must be valid JSON with exactly these fields: { "greeting": string, "count": number }`
|
||||
- Expected: `{"greeting":"hello","count":42}`
|
||||
- Actual: `""`
|
||||
- Duration: 1258ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Invalid response from API
|
||||
- Reason: Invalid response from API
|
||||
- Timestamp: 4/1/2025, 1:32:43 PM
|
||||
|
||||
### nested_structure - deepseek/deepseek-chat:free
|
||||
- Prompt: `return user John age 30 with dark theme and notifications enabled`
|
||||
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
|
||||
- Actual: `""`
|
||||
- Duration: 655ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:38 PM
|
||||
|
||||
### nested_structure - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `return user John age 30 with dark theme and notifications enabled`
|
||||
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
|
||||
- Actual: `""`
|
||||
- Duration: 790ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:39 PM
|
||||
|
||||
### nested_structure - gpt-4
|
||||
- Prompt: `return user John age 30 with dark theme and notifications enabled`
|
||||
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
|
||||
- Actual: `""`
|
||||
- Duration: 717ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:40 PM
|
||||
|
||||
### nested_structure - anthropic/claude-3.7-sonnet
|
||||
- Prompt: `return user John age 30 with dark theme and notifications enabled`
|
||||
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
|
||||
- Actual: `""`
|
||||
- Duration: 1189ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: Unexpected token '#', "# John's U"... is not valid JSON
|
||||
- Reason: Failed to parse or validate response: Unexpected token '#', "# John's U"... is not valid JSON
|
||||
- Timestamp: 4/1/2025, 1:23:06 PM
|
||||
|
||||
### nested_structure - openai/gpt-4
|
||||
- Prompt: `Return a JSON object with user John age 30, dark theme and notifications enabled. The response must be valid JSON with this structure: { "user": { "name": string, "age": number }, "settings": { "theme": string, "notifications": boolean } }`
|
||||
- Expected: `{"user":{"name":"John","age":30},"settings":{"theme":"dark","notifications":true}}`
|
||||
- Actual: `""`
|
||||
- Duration: 716ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Invalid response from API
|
||||
- Reason: Invalid response from API
|
||||
- Timestamp: 4/1/2025, 1:32:44 PM
|
||||
|
||||
### array_structure - deepseek/deepseek-chat:free
|
||||
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
|
||||
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
|
||||
- Actual: `""`
|
||||
- Duration: 617ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:40 PM
|
||||
|
||||
### array_structure - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
|
||||
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
|
||||
- Actual: `""`
|
||||
- Duration: 756ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:41 PM
|
||||
|
||||
### array_structure - gpt-4
|
||||
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
|
||||
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
|
||||
- Actual: `""`
|
||||
- Duration: 1026ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:42 PM
|
||||
|
||||
### array_structure - anthropic/claude-3.7-sonnet
|
||||
- Prompt: `return a list of 2 items with ids 1 and 2, names "first" and "second"`
|
||||
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
|
||||
- Actual: `""`
|
||||
- Duration: 1190ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:23:08 PM
|
||||
|
||||
### array_structure - openai/gpt-4
|
||||
- Prompt: `Return a JSON object with a list of 2 items. The response must be valid JSON with this structure: { "items": [{ "id": number, "name": string }] }. The first item should have id 1 and name "first", the second item should have id 2 and name "second".`
|
||||
- Expected: `{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"}]}`
|
||||
- Actual: `""`
|
||||
- Duration: 703ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Invalid response from API
|
||||
- Reason: Invalid response from API
|
||||
- Timestamp: 4/1/2025, 1:32:44 PM
|
||||
|
||||
### enum_structure - deepseek/deepseek-chat:free
|
||||
- Prompt: `return status success with message "Operation completed"`
|
||||
- Expected: `{"status":"success","message":"Operation completed"}`
|
||||
- Actual: `""`
|
||||
- Duration: 647ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:43 PM
|
||||
|
||||
### enum_structure - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `return status success with message "Operation completed"`
|
||||
- Expected: `{"status":"success","message":"Operation completed"}`
|
||||
- Actual: `""`
|
||||
- Duration: 813ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:43 PM
|
||||
|
||||
### enum_structure - gpt-4
|
||||
- Prompt: `return status success with message "Operation completed"`
|
||||
- Expected: `{"status":"success","message":"Operation completed"}`
|
||||
- Actual: `""`
|
||||
- Duration: 1138ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:45 PM
|
||||
|
||||
### enum_structure - anthropic/claude-3.7-sonnet
|
||||
- Prompt: `return status success with message "Operation completed"`
|
||||
- Expected: `{"status":"success","message":"Operation completed"}`
|
||||
- Actual: `""`
|
||||
- Duration: 1728ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: Unexpected token '`', "```json
|
||||
{
|
||||
"... is not valid JSON
|
||||
- Reason: Failed to parse or validate response: Unexpected token '`', "```json
|
||||
{
|
||||
"... is not valid JSON
|
||||
- Timestamp: 4/1/2025, 1:23:09 PM
|
||||
|
||||
### enum_structure - openai/gpt-4
|
||||
- Prompt: `Return a JSON object with status "success" and message "Operation completed". The response must be valid JSON with this structure: { "status": "success" | "error" | "pending", "message": string }`
|
||||
- Expected: `{"status":"success","message":"Operation completed"}`
|
||||
- Actual: `""`
|
||||
- Duration: 688ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Invalid response from API
|
||||
- Reason: Invalid response from API
|
||||
- Timestamp: 4/1/2025, 1:32:45 PM
|
||||
|
||||
### optional_fields - deepseek/deepseek-chat:free
|
||||
- Prompt: `return name "John" with age 30 and email "john@example.com"`
|
||||
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
|
||||
- Actual: `""`
|
||||
- Duration: 676ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:45 PM
|
||||
|
||||
### optional_fields - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `return name "John" with age 30 and email "john@example.com"`
|
||||
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
|
||||
- Actual: `""`
|
||||
- Duration: 884ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:46 PM
|
||||
|
||||
### optional_fields - gpt-4
|
||||
- Prompt: `return name "John" with age 30 and email "john@example.com"`
|
||||
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
|
||||
- Actual: `""`
|
||||
- Duration: 669ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Reason: Failed to parse or validate response: [
|
||||
{
|
||||
"code": "invalid_type",
|
||||
"expected": "object",
|
||||
"received": "null",
|
||||
"path": [],
|
||||
"message": "Expected object, received null"
|
||||
}
|
||||
]
|
||||
- Timestamp: 4/1/2025, 1:21:47 PM
|
||||
|
||||
### optional_fields - anthropic/claude-3.7-sonnet
|
||||
- Prompt: `return name "John" with age 30 and email "john@example.com"`
|
||||
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
|
||||
- Actual: `""`
|
||||
- Duration: 1576ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Failed to parse or validate response: Unexpected token '`', "```json
|
||||
{
|
||||
"... is not valid JSON
|
||||
- Reason: Failed to parse or validate response: Unexpected token '`', "```json
|
||||
{
|
||||
"... is not valid JSON
|
||||
- Timestamp: 4/1/2025, 1:23:11 PM
|
||||
|
||||
### optional_fields - openai/gpt-4
|
||||
- Prompt: `Return a JSON object with name "John", age 30, and email "john@example.com". The response must be valid JSON with this structure: { "name": string, "age"?: number, "email"?: string }`
|
||||
- Expected: `{"name":"John","age":30,"email":"john@example.com"}`
|
||||
- Actual: `""`
|
||||
- Duration: 682ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Invalid response from API
|
||||
- Reason: Invalid response from API
|
||||
- Timestamp: 4/1/2025, 1:32:46 PM
|
||||
|
||||
## Passed Tests
|
||||
|
||||
*No passed tests*
|
||||
|
||||
2241
packages/kbot/tests/unit/format.json
Normal file
2241
packages/kbot/tests/unit/format.json
Normal file
File diff suppressed because it is too large
Load Diff
295
packages/kbot/tests/unit/format.test.ts
Normal file
295
packages/kbot/tests/unit/format.test.ts
Normal file
@ -0,0 +1,295 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { run } from '../../src/index'
|
||||
import * as path from 'node:path'
|
||||
import { sync as write } from "@polymech/fs/write"
|
||||
import { sync as read } from "@polymech/fs/read"
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
import { z } from 'zod'
|
||||
|
||||
|
||||
import {
|
||||
models_premium as models,
|
||||
TEST_BASE_PATH,
|
||||
TEST_LOGS_PATH,
|
||||
TEST_PREFERENCES_PATH,
|
||||
TEST_TIMEOUT,
|
||||
TestResult,
|
||||
formatError,
|
||||
isEmptyResponse,
|
||||
getRouterForModel,
|
||||
getApiKeyForRouter
|
||||
} from './commons'
|
||||
|
||||
const TEST_LOG_PATH = path.resolve(__dirname, './format.json')
|
||||
|
||||
describe('Structured Output Format', () => {
|
||||
let testResults: TestResult[] = []
|
||||
|
||||
// Load existing results if any
|
||||
if (exists(TEST_LOG_PATH)) {
|
||||
const data = read(TEST_LOG_PATH, 'json')
|
||||
testResults = Array.isArray(data) ? data : []
|
||||
}
|
||||
|
||||
const runFormatTest = async (prompt: string, format: z.ZodType<any>, expected: any, testName: string, modelName: string) => {
|
||||
let model = 'unknown'
|
||||
let router = 'unknown'
|
||||
let startTime = Date.now()
|
||||
let error: TestResult['error'] | undefined
|
||||
|
||||
try {
|
||||
const result = await Promise.race([
|
||||
run({
|
||||
prompt,
|
||||
mode: 'completion',
|
||||
model: modelName,
|
||||
path: TEST_BASE_PATH,
|
||||
logs: TEST_LOGS_PATH,
|
||||
preferences: TEST_PREFERENCES_PATH,
|
||||
format,
|
||||
onRun: async (options) => {
|
||||
model = options.model || 'unknown'
|
||||
router = options.router || 'unknown'
|
||||
return options
|
||||
}
|
||||
}),
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('API call timed out')), TEST_TIMEOUT)
|
||||
)
|
||||
]) as any[]
|
||||
|
||||
const actual = result?.[0]
|
||||
let parsed: any
|
||||
|
||||
try {
|
||||
parsed = typeof actual === 'string' ? JSON.parse(actual) : actual
|
||||
// Validate against the format schema
|
||||
parsed = format.parse(parsed)
|
||||
} catch (parseError) {
|
||||
throw new Error(`Failed to parse or validate response: ${parseError.message}`)
|
||||
}
|
||||
|
||||
const passed = JSON.stringify(parsed) === JSON.stringify(expected) && !isEmptyResponse(result)
|
||||
|
||||
if (isEmptyResponse(result)) {
|
||||
throw new Error('Model returned empty response')
|
||||
}
|
||||
|
||||
expect(parsed).toEqual(expected)
|
||||
|
||||
return {
|
||||
test: testName,
|
||||
prompt,
|
||||
result: result || [],
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
duration: Date.now() - startTime,
|
||||
reason: passed ? undefined : `Expected ${JSON.stringify(expected)}, but got ${JSON.stringify(parsed)}`,
|
||||
config: {
|
||||
router: getRouterForModel(modelName)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
error = formatError(e)
|
||||
throw e
|
||||
} finally {
|
||||
const testResult: TestResult = {
|
||||
test: testName,
|
||||
prompt,
|
||||
result: [],
|
||||
expected,
|
||||
model,
|
||||
router,
|
||||
timestamp: new Date().toISOString(),
|
||||
passed: false,
|
||||
duration: Date.now() - startTime,
|
||||
error,
|
||||
reason: error?.message || 'Unknown error occurred',
|
||||
config: {
|
||||
router: getRouterForModel(modelName),
|
||||
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
|
||||
}
|
||||
}
|
||||
|
||||
testResults.push(testResult)
|
||||
write(TEST_LOG_PATH, JSON.stringify(testResults, null, 2))
|
||||
}
|
||||
}
|
||||
|
||||
it.each(models)('should return basic structured output with model %s', async (modelName) => {
|
||||
const format = z.object({
|
||||
greeting: z.string(),
|
||||
count: z.number()
|
||||
})
|
||||
|
||||
await runFormatTest(
|
||||
'return a greeting "hello" with count 42',
|
||||
format,
|
||||
{ greeting: 'hello', count: 42 },
|
||||
'basic_structure',
|
||||
modelName
|
||||
)
|
||||
})
|
||||
|
||||
it.each(models)('should handle nested structures with model %s', async (modelName) => {
|
||||
const format = z.object({
|
||||
user: z.object({
|
||||
name: z.string(),
|
||||
age: z.number()
|
||||
}),
|
||||
settings: z.object({
|
||||
theme: z.string(),
|
||||
notifications: z.boolean()
|
||||
})
|
||||
})
|
||||
|
||||
await runFormatTest(
|
||||
'return user John age 30 with dark theme and notifications enabled',
|
||||
format,
|
||||
{
|
||||
user: { name: 'John', age: 30 },
|
||||
settings: { theme: 'dark', notifications: true }
|
||||
},
|
||||
'nested_structure',
|
||||
modelName
|
||||
)
|
||||
})
|
||||
|
||||
it.each(models)('should handle arrays with model %s', async (modelName) => {
|
||||
const format = z.object({
|
||||
items: z.array(z.object({
|
||||
id: z.number(),
|
||||
name: z.string()
|
||||
}))
|
||||
})
|
||||
|
||||
await runFormatTest(
|
||||
'return a list of 2 items with ids 1 and 2, names "first" and "second"',
|
||||
format,
|
||||
{
|
||||
items: [
|
||||
{ id: 1, name: 'first' },
|
||||
{ id: 2, name: 'second' }
|
||||
]
|
||||
},
|
||||
'array_structure',
|
||||
modelName
|
||||
)
|
||||
})
|
||||
|
||||
it.each(models)('should handle enums with model %s', async (modelName) => {
|
||||
const format = z.object({
|
||||
status: z.enum(['success', 'error', 'pending']),
|
||||
message: z.string()
|
||||
})
|
||||
|
||||
await runFormatTest(
|
||||
'return status success with message "Operation completed"',
|
||||
format,
|
||||
{
|
||||
status: 'success',
|
||||
message: 'Operation completed'
|
||||
},
|
||||
'enum_structure',
|
||||
modelName
|
||||
)
|
||||
})
|
||||
|
||||
it.each(models)('should handle optional fields with model %s', async (modelName) => {
|
||||
const format = z.object({
|
||||
name: z.string(),
|
||||
age: z.number().optional(),
|
||||
email: z.string().email().optional()
|
||||
})
|
||||
|
||||
await runFormatTest(
|
||||
'return name "John" with age 30 and email "john@example.com"',
|
||||
format,
|
||||
{
|
||||
name: 'John',
|
||||
age: 30,
|
||||
email: 'john@example.com'
|
||||
},
|
||||
'optional_fields',
|
||||
modelName
|
||||
)
|
||||
})
|
||||
|
||||
it('should generate markdown report', () => {
|
||||
// Group results by test and model
|
||||
const latestResults = new Map<string, Map<string, TestResult>>()
|
||||
|
||||
// Get only the latest result for each test+model combination
|
||||
testResults.forEach(result => {
|
||||
if (!latestResults.has(result.test)) {
|
||||
latestResults.set(result.test, new Map())
|
||||
}
|
||||
const testMap = latestResults.get(result.test)!
|
||||
const existingResult = testMap.get(result.model)
|
||||
if (!existingResult || new Date(result.timestamp) > new Date(existingResult.timestamp)) {
|
||||
testMap.set(result.model, result)
|
||||
}
|
||||
})
|
||||
|
||||
// Generate markdown report
|
||||
let report = '# Format Test Results\n\n'
|
||||
|
||||
// First list failed tests
|
||||
report += '## Failed Tests\n\n'
|
||||
let hasFailures = false
|
||||
for (const [testName, modelResults] of latestResults) {
|
||||
for (const [model, result] of modelResults) {
|
||||
if (!result.passed) {
|
||||
hasFailures = true
|
||||
report += `### ${testName} - ${model}\n`
|
||||
report += `- Prompt: \`${result.prompt}\`\n`
|
||||
report += `- Expected: \`${JSON.stringify(result.expected)}\`\n`
|
||||
report += `- Actual: \`${JSON.stringify(result.result[0] || '')}\`\n`
|
||||
report += `- Duration: ${result.duration}ms\n`
|
||||
if (result.error) {
|
||||
report += `- Error Type: ${result.error.type}\n`
|
||||
report += `- Error Code: ${result.error.code}\n`
|
||||
report += `- Error Message: ${result.error.message}\n`
|
||||
}
|
||||
report += `- Reason: ${result.reason}\n`
|
||||
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasFailures) {
|
||||
report += '*No failed tests*\n\n'
|
||||
}
|
||||
|
||||
// Then list passed tests
|
||||
report += '## Passed Tests\n\n'
|
||||
let hasPassed = false
|
||||
for (const [testName, modelResults] of latestResults) {
|
||||
for (const [model, result] of modelResults) {
|
||||
if (result.passed) {
|
||||
hasPassed = true
|
||||
report += `### ${testName} - ${model}\n`
|
||||
report += `- Prompt: \`${result.prompt}\`\n`
|
||||
report += `- Expected: \`${JSON.stringify(result.expected)}\`\n`
|
||||
report += `- Actual: \`${JSON.stringify(result.result[0] || '')}\`\n`
|
||||
report += `- Duration: ${result.duration}ms\n`
|
||||
report += `- Timestamp: ${new Date(result.timestamp).toLocaleString()}\n\n`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasPassed) {
|
||||
report += '*No passed tests*\n\n'
|
||||
}
|
||||
|
||||
// Write report to file
|
||||
const reportPath = path.resolve(__dirname, './format-report.md')
|
||||
write(reportPath, report)
|
||||
|
||||
// Verify report was written
|
||||
expect(exists(reportPath) === 'file').toBe(true)
|
||||
})
|
||||
})
|
||||
@ -32,6 +32,14 @@
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:05:52 PM
|
||||
|
||||
### german - anthropic/claude-2.0
|
||||
- Prompt: `translate "hello" to German. Return only the translated word, no explanation.`
|
||||
- Expected: `hallo`
|
||||
- Actual: ``
|
||||
- Duration: 1253ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:47:26 PM
|
||||
|
||||
### spanish - deepseek/deepseek-chat:free
|
||||
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
|
||||
- Expected: `sí`
|
||||
@ -62,6 +70,14 @@
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:05:55 PM
|
||||
|
||||
### spanish - anthropic/claude-2.0
|
||||
- Prompt: `translate "yes" to Spanish. Return only the translated word, no explanation.`
|
||||
- Expected: `sí`
|
||||
- Actual: ``
|
||||
- Duration: 932ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:47:27 PM
|
||||
|
||||
### french - deepseek/deepseek-chat:free
|
||||
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
|
||||
- Expected: `non`
|
||||
@ -92,6 +108,14 @@
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:05:56 PM
|
||||
|
||||
### french - anthropic/claude-2.0
|
||||
- Prompt: `translate "no" to French. Return only the translated word, no explanation.`
|
||||
- Expected: `non`
|
||||
- Actual: ``
|
||||
- Duration: 864ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:47:28 PM
|
||||
|
||||
## Passed Tests
|
||||
|
||||
### german_translation - deepseek/deepseek-chat:free
|
||||
|
||||
@ -700,5 +700,112 @@
|
||||
"passed": false,
|
||||
"duration": 1341,
|
||||
"reason": "Unknown error occurred"
|
||||
},
|
||||
{
|
||||
"test": "german",
|
||||
"prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
|
||||
"result": [],
|
||||
"expected": "hallo",
|
||||
"model": "anthropic/claude-2.0",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:47:02.933Z",
|
||||
"passed": false,
|
||||
"duration": 1416,
|
||||
"error": {
|
||||
"message": "getRouterForModel is not defined",
|
||||
"code": "UNKNOWN",
|
||||
"type": "ReferenceError",
|
||||
"details": {
|
||||
"stack": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:106:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "getRouterForModel is not defined",
|
||||
"stackStr": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:106:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "ReferenceError",
|
||||
"expected": "undefined",
|
||||
"actual": "undefined"
|
||||
}
|
||||
},
|
||||
"reason": "getRouterForModel is not defined"
|
||||
},
|
||||
{
|
||||
"test": "spanish",
|
||||
"prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
|
||||
"result": [],
|
||||
"expected": "sí",
|
||||
"model": "anthropic/claude-2.0",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:47:04.007Z",
|
||||
"passed": false,
|
||||
"duration": 1071,
|
||||
"error": {
|
||||
"message": "getRouterForModel is not defined",
|
||||
"code": "UNKNOWN",
|
||||
"type": "ReferenceError",
|
||||
"details": {
|
||||
"stack": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:115:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "getRouterForModel is not defined",
|
||||
"stackStr": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:115:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"nameStr": "ReferenceError",
|
||||
"expected": "undefined",
|
||||
"actual": "undefined"
|
||||
}
|
||||
},
|
||||
"reason": "getRouterForModel is not defined"
|
||||
},
|
||||
{
|
||||
"test": "french",
|
||||
"prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
|
||||
"result": [],
|
||||
"expected": "non",
|
||||
"model": "anthropic/claude-2.0",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:47:05.064Z",
|
||||
"passed": false,
|
||||
"duration": 1056,
|
||||
"error": {
|
||||
"message": "getRouterForModel is not defined",
|
||||
"code": "UNKNOWN",
|
||||
"type": "ReferenceError",
|
||||
"details": {
|
||||
"stack": "ReferenceError: getRouterForModel is not defined\n at runTranslationTest (C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:78:19)\n at processTicksAndRejections (node:internal/process/task_queues:105:5)\n at C:\\Users\\zx\\Desktop\\polymech\\polymech-mono\\packages\\kbot\\tests\\unit\\language.test.ts:124:5\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:533:5\n at runTest (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1056:11)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runSuite (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1205:15)\n at runFiles (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1262:5)\n at startTests (file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/@vitest/runner/dist/index.js:1271:3)\n at file:///C:/Users/zx/Desktop/polymech/polymech-mono/packages/kbot/node_modules/vitest/dist/chunks/runBaseTests.3qpJUEJM.js:126:11",
|
||||
"message": "getRouterForModel is not defined"
|
||||
}
|
||||
},
|
||||
"reason": "getRouterForModel is not defined"
|
||||
},
|
||||
{
|
||||
"test": "german",
|
||||
"prompt": "translate \"hello\" to German. Return only the translated word, no explanation.",
|
||||
"result": [],
|
||||
"expected": "hallo",
|
||||
"model": "anthropic/claude-2.0",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:47:26.798Z",
|
||||
"passed": false,
|
||||
"duration": 1253,
|
||||
"reason": "Unknown error occurred"
|
||||
},
|
||||
{
|
||||
"test": "spanish",
|
||||
"prompt": "translate \"yes\" to Spanish. Return only the translated word, no explanation.",
|
||||
"result": [],
|
||||
"expected": "sí",
|
||||
"model": "anthropic/claude-2.0",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:47:27.731Z",
|
||||
"passed": false,
|
||||
"duration": 932,
|
||||
"reason": "Unknown error occurred"
|
||||
},
|
||||
{
|
||||
"test": "french",
|
||||
"prompt": "translate \"no\" to French. Return only the translated word, no explanation.",
|
||||
"result": [],
|
||||
"expected": "non",
|
||||
"model": "anthropic/claude-2.0",
|
||||
"router": "openrouter",
|
||||
"timestamp": "2025-04-01T11:47:28.596Z",
|
||||
"passed": false,
|
||||
"duration": 864,
|
||||
"reason": "Unknown error occurred"
|
||||
}
|
||||
]
|
||||
@ -12,9 +12,7 @@ import {
|
||||
TEST_TIMEOUT,
|
||||
TestResult,
|
||||
formatError,
|
||||
isEmptyResponse,
|
||||
getRouterForModel,
|
||||
getApiKeyForRouter
|
||||
isEmptyResponse
|
||||
} from './commons'
|
||||
|
||||
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
|
||||
@ -75,11 +73,7 @@ describe('Language Capabilities', () => {
|
||||
timestamp: new Date().toISOString(),
|
||||
passed,
|
||||
duration: Date.now() - startTime,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
|
||||
config: {
|
||||
router: getRouterForModel(modelName),
|
||||
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
|
||||
}
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`
|
||||
}
|
||||
} catch (e) {
|
||||
error = formatError(e)
|
||||
@ -96,11 +90,7 @@ describe('Language Capabilities', () => {
|
||||
passed: false,
|
||||
duration: Date.now() - startTime,
|
||||
error,
|
||||
reason: error?.message || 'Unknown error occurred',
|
||||
config: {
|
||||
router: getRouterForModel(modelName),
|
||||
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
|
||||
}
|
||||
reason: error?.message || 'Unknown error occurred'
|
||||
}
|
||||
|
||||
testResults.push(testResult)
|
||||
|
||||
@ -2,12 +2,127 @@
|
||||
|
||||
## Failed Tests
|
||||
|
||||
### addition - deepseek/deepseek-chat:free
|
||||
- Prompt: `add 5 and 3. Return only the number, no explanation.`
|
||||
- Expected: `8`
|
||||
- Actual: ``
|
||||
- Duration: 913ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Model returned empty response
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:42:47 PM
|
||||
|
||||
### addition - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `add 5 and 3. Return only the number, no explanation.`
|
||||
- Expected: `8`
|
||||
- Actual: ``
|
||||
- Duration: 1112ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Model returned empty response
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:45:50 PM
|
||||
|
||||
### addition - gpt-4
|
||||
- Prompt: `add 5 and 3. Return only the number, no explanation.`
|
||||
- Expected: `8`
|
||||
- Actual: ``
|
||||
- Duration: 1038ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:42:26 PM
|
||||
|
||||
### addition - anthropic/claude-2.0
|
||||
- Prompt: `add 5 and 3. Return only the number, no explanation.`
|
||||
- Expected: `8`
|
||||
- Actual: ``
|
||||
- Duration: 1218ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:46:27 PM
|
||||
|
||||
### multiplication - deepseek/deepseek-chat:free
|
||||
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
|
||||
- Expected: `24`
|
||||
- Actual: ``
|
||||
- Duration: 636ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Model returned empty response
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:42:48 PM
|
||||
|
||||
### multiplication - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
|
||||
- Expected: `24`
|
||||
- Actual: ``
|
||||
- Duration: 764ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Model returned empty response
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:45:51 PM
|
||||
|
||||
### multiplication - gpt-4
|
||||
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
|
||||
- Expected: `24`
|
||||
- Actual: ``
|
||||
- Duration: 1052ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:42:28 PM
|
||||
|
||||
### multiplication - anthropic/claude-2.0
|
||||
- Prompt: `multiply 8 and 3. Return only the number, no explanation.`
|
||||
- Expected: `24`
|
||||
- Actual: ``
|
||||
- Duration: 911ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:46:27 PM
|
||||
|
||||
### division - deepseek/deepseek-chat:free
|
||||
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
|
||||
- Expected: `5`
|
||||
- Actual: ``
|
||||
- Duration: 648ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Model returned empty response
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:42:50 PM
|
||||
|
||||
### division - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
|
||||
- Expected: `5`
|
||||
- Actual: ``
|
||||
- Duration: 829ms
|
||||
- Error Type: Error
|
||||
- Error Code: UNKNOWN
|
||||
- Error Message: Model returned empty response
|
||||
- Reason: Model returned empty response
|
||||
- Timestamp: 4/1/2025, 1:45:52 PM
|
||||
|
||||
### division - gpt-4
|
||||
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
|
||||
- Expected: `5`
|
||||
- Actual: ``
|
||||
- Duration: 959ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:42:31 PM
|
||||
|
||||
### division - anthropic/claude-2.0
|
||||
- Prompt: `divide 15 by 3. Return only the number, no explanation.`
|
||||
- Expected: `5`
|
||||
- Actual: ``
|
||||
- Duration: 1485ms
|
||||
- Reason: Unknown error occurred
|
||||
- Timestamp: 4/1/2025, 1:46:29 PM
|
||||
|
||||
## Passed Tests
|
||||
|
||||
### factorial - deepseek/deepseek-chat:free
|
||||
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
|
||||
- Expected: `120`
|
||||
- Actual: `120`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 12:59:09 PM
|
||||
|
||||
### factorial - google/gemini-2.0-flash-exp:free
|
||||
@ -15,18 +130,21 @@
|
||||
- Expected: `120`
|
||||
- Actual: `120
|
||||
`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 12:59:10 PM
|
||||
|
||||
### factorial - gpt-4
|
||||
- Prompt: `calculate the factorial of 5 (5!). Return only the number, no explanation.`
|
||||
- Expected: `120`
|
||||
- Actual: `120`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 1:03:25 PM
|
||||
|
||||
### fibonacci - deepseek/deepseek-chat:free
|
||||
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
|
||||
- Expected: `0,1,1,2,3`
|
||||
- Actual: `0,1,1,2,3`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 12:59:13 PM
|
||||
|
||||
### fibonacci - google/gemini-2.0-flash-exp:free
|
||||
@ -34,29 +152,34 @@
|
||||
- Expected: `0,1,1,2,3`
|
||||
- Actual: `0,1,1,2,3
|
||||
`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 12:59:14 PM
|
||||
|
||||
### fibonacci - gpt-4
|
||||
- Prompt: `calculate the first 5 numbers of the fibonacci sequence. Return only the numbers separated by commas, no explanation.`
|
||||
- Expected: `0,1,1,2,3`
|
||||
- Actual: `0, 1, 1, 2, 3`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 1:03:27 PM
|
||||
|
||||
### quadratic - deepseek/deepseek-chat:free
|
||||
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
|
||||
- Expected: `[-3,-2]`
|
||||
- Actual: `[-2, -3]`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 12:59:19 PM
|
||||
|
||||
### quadratic - google/gemini-2.0-flash-exp:free
|
||||
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
|
||||
- Expected: `[-3,-2]`
|
||||
- Actual: `[-2, -3]`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 12:46:13 PM
|
||||
|
||||
### quadratic - gpt-4
|
||||
- Prompt: `solve the quadratic equation x² + 5x + 6 = 0. Return only the roots as a JSON array, no explanation.`
|
||||
- Expected: `[-3,-2]`
|
||||
- Actual: `[-2, -3]`
|
||||
- Duration: undefinedms
|
||||
- Timestamp: 4/1/2025, 1:03:30 PM
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -12,12 +12,9 @@ import {
|
||||
TEST_TIMEOUT,
|
||||
TestResult,
|
||||
formatError,
|
||||
isEmptyResponse,
|
||||
getRouterForModel,
|
||||
getApiKeyForRouter
|
||||
isEmptyResponse
|
||||
} from './commons'
|
||||
|
||||
import { E_OPENROUTER_MODEL_FREE, E_OPENAI_MODEL } from '../../src/index'
|
||||
|
||||
const TEST_LOG_PATH = path.resolve(__dirname, './math.json')
|
||||
|
||||
@ -76,10 +73,6 @@ describe('Math Capabilities', () => {
|
||||
passed,
|
||||
duration: Date.now() - startTime,
|
||||
reason: passed ? undefined : `Expected ${expected}, but got ${actual}`,
|
||||
config: {
|
||||
router: getRouterForModel(modelName),
|
||||
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
error = formatError(e)
|
||||
@ -96,11 +89,7 @@ describe('Math Capabilities', () => {
|
||||
passed: false,
|
||||
duration: Date.now() - startTime,
|
||||
error,
|
||||
reason: error?.message || 'Unknown error occurred',
|
||||
config: {
|
||||
router: getRouterForModel(modelName),
|
||||
apiKey: getApiKeyForRouter(getRouterForModel(modelName))
|
||||
}
|
||||
reason: error?.message || 'Unknown error occurred'
|
||||
}
|
||||
|
||||
testResults.push(testResult)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user