grid search

2026-03-19 17:44:22 +01:00 · 2026-03-19 17:44:22 +01:00 · 2c69a898dc
commit 2c69a898dc
parent 561ec84eef
1 changed files with 18 additions and 22 deletions
--- a/packages/kbot/tests/unit/ollama-basics.test.ts
+++ b/packages/kbot/tests/unit/ollama-basics.test.ts
@ -2,7 +2,7 @@ import { describe, it, expect } from 'vitest'
 import { sync as exists } from "@polymech/fs/exists"
 import { z } from 'zod'

-import { 
+import {
  TEST_TIMEOUT,
  TestResult,
  runTest,
@ -71,10 +71,10 @@ describe('Ollama Basic Operations', () => {
  const TEST_LOG_PATH = getReportPaths('ollama-basics', 'json')
  const TEST_REPORT_PATH = getReportPaths('ollama-basics', 'md')

-  it.each(models)('should add two numbers with model %s', async (modelName) => {
+  it.each(models)('should add two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => {
    const result = await runTest(
      'add 5 and 3. Return only the number, no explanation.',
-      '8',  
+      '8',
      'addition',
      modelName,
      TEST_LOG_PATH,
@ -83,9 +83,9 @@ describe('Ollama Basic Operations', () => {
    )
    testResults.push(result)
    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('8')
-  }, { timeout: TEST_TIMEOUT })
+  })

-  it.each(models)('should multiply two numbers with model %s', async (modelName) => {
+  it.each(models)('should multiply two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => {
    const result = await runTest(
      'multiply 8 and 3. Return only the number, no explanation.',
      '24',
@ -97,9 +97,9 @@ describe('Ollama Basic Operations', () => {
    )
    testResults.push(result)
    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('24')
-  }, { timeout: TEST_TIMEOUT })
+  })

-  it.each(models)('should divide two numbers with model %s', async (modelName) => {
+  it.each(models)('should divide two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => {
    const result = await runTest(
      'divide 15 by 3. Return only the number, no explanation.',
      '5',
@ -111,7 +111,7 @@ describe('Ollama Basic Operations', () => {
    )
    testResults.push(result)
    expect(result.result[0]?.trim()?.toLowerCase()).toEqual('5')
-  }, { timeout: TEST_TIMEOUT })
+  })

  it('should generate markdown report', () => {
    generateTestReport(testResults, 'Ollama Basic Operations Test Results', TEST_REPORT_PATH)
@ -130,6 +130,7 @@ describe('Ollama Custom Tool Call Quality', () => {

  it.each(models)(
    'should call add tool and return correct sum [%s]',
+    { timeout: TEST_TIMEOUT },
    async (modelName) => {
      const result = await runTest(
        'Use the add tool to add 17 and 25. Report back the result.',
@ -145,14 +146,13 @@ describe('Ollama Custom Tool Call Quality', () => {
        }
      )
      testResults.push(result)
-      // Result must contain 42
      expect(result.result[0]).toMatch(/42/)
-    },
-    { timeout: TEST_TIMEOUT }
+    }
  )

  it.each(models)(
    'should call multiply tool and return correct product [%s]',
+    { timeout: TEST_TIMEOUT },
    async (modelName) => {
      const result = await runTest(
        'Use the multiply tool to compute 6 times 7. Tell me the answer.',
@ -169,12 +169,12 @@ describe('Ollama Custom Tool Call Quality', () => {
      )
      testResults.push(result)
      expect(result.result[0]).toMatch(/42/)
-    },
-    { timeout: TEST_TIMEOUT }
+    }
  )

  it.each(models)(
    'should call get_weather tool with correct city argument [%s]',
+    { timeout: TEST_TIMEOUT },
    async (modelName) => {
      const result = await runTest(
        "What's the weather like in Paris? Use the get_weather tool.",
@ -190,15 +190,14 @@ describe('Ollama Custom Tool Call Quality', () => {
        }
      )
      testResults.push(result)
-      // Response must mention the mocked condition "sunny" and/or 22°C
      const lower = result.result[0]?.toLowerCase() ?? ''
      expect(lower).toMatch(/sunny|22/)
-    },
-    { timeout: TEST_TIMEOUT }
+    }
  )

  it.each(models)(
    'should select the correct tool from multiple available tools [%s]',
+    { timeout: TEST_TIMEOUT },
    async (modelName) => {
      const result = await runTest(
        'Use the appropriate tool to add 100 and 200.',
@ -209,19 +208,18 @@ describe('Ollama Custom Tool Call Quality', () => {
        'tools',
        {
          router: 'ollama',
-          // Both tools available — model must pick add, not multiply
          customTools: [addTool, multiplyTool, getWeatherTool],
          equalityCheck: 'llm_equal',
        }
      )
      testResults.push(result)
      expect(result.result[0]).toMatch(/300/)
-    },
-    { timeout: TEST_TIMEOUT }
+    }
  )

  it.each(models)(
    'should chain two tool calls: multiply then format [%s]',
+    { timeout: TEST_TIMEOUT },
    async (modelName) => {
      const result = await runTest(
        'First multiply 123 by 456, then format the result with 2 decimal places.',
@ -239,8 +237,7 @@ describe('Ollama Custom Tool Call Quality', () => {
      testResults.push(result)
      // 123 * 456 = 56088 → formatted as 56,088.00
      expect(result.result[0]).toMatch(/56[,.]?088/)
-    },
-    { timeout: TEST_TIMEOUT }
+    }
  )

  it('should generate tool quality markdown report', () => {
@ -248,4 +245,3 @@ describe('Ollama Custom Tool Call Quality', () => {
    expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
  })
 })
-