From 2c69a898dc1a6ed383f194a32119083a6946d49a Mon Sep 17 00:00:00 2001 From: Babayaga Date: Thu, 19 Mar 2026 17:44:22 +0100 Subject: [PATCH] grid search --- .../kbot/tests/unit/ollama-basics.test.ts | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/packages/kbot/tests/unit/ollama-basics.test.ts b/packages/kbot/tests/unit/ollama-basics.test.ts index 92bfc19c..42bb9de3 100644 --- a/packages/kbot/tests/unit/ollama-basics.test.ts +++ b/packages/kbot/tests/unit/ollama-basics.test.ts @@ -2,7 +2,7 @@ import { describe, it, expect } from 'vitest' import { sync as exists } from "@polymech/fs/exists" import { z } from 'zod' -import { +import { TEST_TIMEOUT, TestResult, runTest, @@ -71,10 +71,10 @@ describe('Ollama Basic Operations', () => { const TEST_LOG_PATH = getReportPaths('ollama-basics', 'json') const TEST_REPORT_PATH = getReportPaths('ollama-basics', 'md') - it.each(models)('should add two numbers with model %s', async (modelName) => { + it.each(models)('should add two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => { const result = await runTest( 'add 5 and 3. Return only the number, no explanation.', - '8', + '8', 'addition', modelName, TEST_LOG_PATH, @@ -83,9 +83,9 @@ describe('Ollama Basic Operations', () => { ) testResults.push(result) expect(result.result[0]?.trim()?.toLowerCase()).toEqual('8') - }, { timeout: TEST_TIMEOUT }) + }) - it.each(models)('should multiply two numbers with model %s', async (modelName) => { + it.each(models)('should multiply two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => { const result = await runTest( 'multiply 8 and 3. Return only the number, no explanation.', '24', @@ -97,9 +97,9 @@ describe('Ollama Basic Operations', () => { ) testResults.push(result) expect(result.result[0]?.trim()?.toLowerCase()).toEqual('24') - }, { timeout: TEST_TIMEOUT }) + }) - it.each(models)('should divide two numbers with model %s', async (modelName) => { + it.each(models)('should divide two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => { const result = await runTest( 'divide 15 by 3. Return only the number, no explanation.', '5', @@ -111,7 +111,7 @@ describe('Ollama Basic Operations', () => { ) testResults.push(result) expect(result.result[0]?.trim()?.toLowerCase()).toEqual('5') - }, { timeout: TEST_TIMEOUT }) + }) it('should generate markdown report', () => { generateTestReport(testResults, 'Ollama Basic Operations Test Results', TEST_REPORT_PATH) @@ -130,6 +130,7 @@ describe('Ollama Custom Tool Call Quality', () => { it.each(models)( 'should call add tool and return correct sum [%s]', + { timeout: TEST_TIMEOUT }, async (modelName) => { const result = await runTest( 'Use the add tool to add 17 and 25. Report back the result.', @@ -145,14 +146,13 @@ describe('Ollama Custom Tool Call Quality', () => { } ) testResults.push(result) - // Result must contain 42 expect(result.result[0]).toMatch(/42/) - }, - { timeout: TEST_TIMEOUT } + } ) it.each(models)( 'should call multiply tool and return correct product [%s]', + { timeout: TEST_TIMEOUT }, async (modelName) => { const result = await runTest( 'Use the multiply tool to compute 6 times 7. Tell me the answer.', @@ -169,12 +169,12 @@ describe('Ollama Custom Tool Call Quality', () => { ) testResults.push(result) expect(result.result[0]).toMatch(/42/) - }, - { timeout: TEST_TIMEOUT } + } ) it.each(models)( 'should call get_weather tool with correct city argument [%s]', + { timeout: TEST_TIMEOUT }, async (modelName) => { const result = await runTest( "What's the weather like in Paris? Use the get_weather tool.", @@ -190,15 +190,14 @@ describe('Ollama Custom Tool Call Quality', () => { } ) testResults.push(result) - // Response must mention the mocked condition "sunny" and/or 22°C const lower = result.result[0]?.toLowerCase() ?? '' expect(lower).toMatch(/sunny|22/) - }, - { timeout: TEST_TIMEOUT } + } ) it.each(models)( 'should select the correct tool from multiple available tools [%s]', + { timeout: TEST_TIMEOUT }, async (modelName) => { const result = await runTest( 'Use the appropriate tool to add 100 and 200.', @@ -209,19 +208,18 @@ describe('Ollama Custom Tool Call Quality', () => { 'tools', { router: 'ollama', - // Both tools available — model must pick add, not multiply customTools: [addTool, multiplyTool, getWeatherTool], equalityCheck: 'llm_equal', } ) testResults.push(result) expect(result.result[0]).toMatch(/300/) - }, - { timeout: TEST_TIMEOUT } + } ) it.each(models)( 'should chain two tool calls: multiply then format [%s]', + { timeout: TEST_TIMEOUT }, async (modelName) => { const result = await runTest( 'First multiply 123 by 456, then format the result with 2 decimal places.', @@ -239,8 +237,7 @@ describe('Ollama Custom Tool Call Quality', () => { testResults.push(result) // 123 * 456 = 56088 → formatted as 56,088.00 expect(result.result[0]).toMatch(/56[,.]?088/) - }, - { timeout: TEST_TIMEOUT } + } ) it('should generate tool quality markdown report', () => { @@ -248,4 +245,3 @@ describe('Ollama Custom Tool Call Quality', () => { expect(exists(TEST_REPORT_PATH) === 'file').toBe(true) }) }) -