grid search
This commit is contained in:
parent
561ec84eef
commit
2c69a898dc
@ -2,7 +2,7 @@ import { describe, it, expect } from 'vitest'
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
import { z } from 'zod'
|
||||
|
||||
import {
|
||||
import {
|
||||
TEST_TIMEOUT,
|
||||
TestResult,
|
||||
runTest,
|
||||
@ -71,10 +71,10 @@ describe('Ollama Basic Operations', () => {
|
||||
const TEST_LOG_PATH = getReportPaths('ollama-basics', 'json')
|
||||
const TEST_REPORT_PATH = getReportPaths('ollama-basics', 'md')
|
||||
|
||||
it.each(models)('should add two numbers with model %s', async (modelName) => {
|
||||
it.each(models)('should add two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => {
|
||||
const result = await runTest(
|
||||
'add 5 and 3. Return only the number, no explanation.',
|
||||
'8',
|
||||
'8',
|
||||
'addition',
|
||||
modelName,
|
||||
TEST_LOG_PATH,
|
||||
@ -83,9 +83,9 @@ describe('Ollama Basic Operations', () => {
|
||||
)
|
||||
testResults.push(result)
|
||||
expect(result.result[0]?.trim()?.toLowerCase()).toEqual('8')
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
})
|
||||
|
||||
it.each(models)('should multiply two numbers with model %s', async (modelName) => {
|
||||
it.each(models)('should multiply two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => {
|
||||
const result = await runTest(
|
||||
'multiply 8 and 3. Return only the number, no explanation.',
|
||||
'24',
|
||||
@ -97,9 +97,9 @@ describe('Ollama Basic Operations', () => {
|
||||
)
|
||||
testResults.push(result)
|
||||
expect(result.result[0]?.trim()?.toLowerCase()).toEqual('24')
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
})
|
||||
|
||||
it.each(models)('should divide two numbers with model %s', async (modelName) => {
|
||||
it.each(models)('should divide two numbers with model %s', { timeout: TEST_TIMEOUT }, async (modelName) => {
|
||||
const result = await runTest(
|
||||
'divide 15 by 3. Return only the number, no explanation.',
|
||||
'5',
|
||||
@ -111,7 +111,7 @@ describe('Ollama Basic Operations', () => {
|
||||
)
|
||||
testResults.push(result)
|
||||
expect(result.result[0]?.trim()?.toLowerCase()).toEqual('5')
|
||||
}, { timeout: TEST_TIMEOUT })
|
||||
})
|
||||
|
||||
it('should generate markdown report', () => {
|
||||
generateTestReport(testResults, 'Ollama Basic Operations Test Results', TEST_REPORT_PATH)
|
||||
@ -130,6 +130,7 @@ describe('Ollama Custom Tool Call Quality', () => {
|
||||
|
||||
it.each(models)(
|
||||
'should call add tool and return correct sum [%s]',
|
||||
{ timeout: TEST_TIMEOUT },
|
||||
async (modelName) => {
|
||||
const result = await runTest(
|
||||
'Use the add tool to add 17 and 25. Report back the result.',
|
||||
@ -145,14 +146,13 @@ describe('Ollama Custom Tool Call Quality', () => {
|
||||
}
|
||||
)
|
||||
testResults.push(result)
|
||||
// Result must contain 42
|
||||
expect(result.result[0]).toMatch(/42/)
|
||||
},
|
||||
{ timeout: TEST_TIMEOUT }
|
||||
}
|
||||
)
|
||||
|
||||
it.each(models)(
|
||||
'should call multiply tool and return correct product [%s]',
|
||||
{ timeout: TEST_TIMEOUT },
|
||||
async (modelName) => {
|
||||
const result = await runTest(
|
||||
'Use the multiply tool to compute 6 times 7. Tell me the answer.',
|
||||
@ -169,12 +169,12 @@ describe('Ollama Custom Tool Call Quality', () => {
|
||||
)
|
||||
testResults.push(result)
|
||||
expect(result.result[0]).toMatch(/42/)
|
||||
},
|
||||
{ timeout: TEST_TIMEOUT }
|
||||
}
|
||||
)
|
||||
|
||||
it.each(models)(
|
||||
'should call get_weather tool with correct city argument [%s]',
|
||||
{ timeout: TEST_TIMEOUT },
|
||||
async (modelName) => {
|
||||
const result = await runTest(
|
||||
"What's the weather like in Paris? Use the get_weather tool.",
|
||||
@ -190,15 +190,14 @@ describe('Ollama Custom Tool Call Quality', () => {
|
||||
}
|
||||
)
|
||||
testResults.push(result)
|
||||
// Response must mention the mocked condition "sunny" and/or 22°C
|
||||
const lower = result.result[0]?.toLowerCase() ?? ''
|
||||
expect(lower).toMatch(/sunny|22/)
|
||||
},
|
||||
{ timeout: TEST_TIMEOUT }
|
||||
}
|
||||
)
|
||||
|
||||
it.each(models)(
|
||||
'should select the correct tool from multiple available tools [%s]',
|
||||
{ timeout: TEST_TIMEOUT },
|
||||
async (modelName) => {
|
||||
const result = await runTest(
|
||||
'Use the appropriate tool to add 100 and 200.',
|
||||
@ -209,19 +208,18 @@ describe('Ollama Custom Tool Call Quality', () => {
|
||||
'tools',
|
||||
{
|
||||
router: 'ollama',
|
||||
// Both tools available — model must pick add, not multiply
|
||||
customTools: [addTool, multiplyTool, getWeatherTool],
|
||||
equalityCheck: 'llm_equal',
|
||||
}
|
||||
)
|
||||
testResults.push(result)
|
||||
expect(result.result[0]).toMatch(/300/)
|
||||
},
|
||||
{ timeout: TEST_TIMEOUT }
|
||||
}
|
||||
)
|
||||
|
||||
it.each(models)(
|
||||
'should chain two tool calls: multiply then format [%s]',
|
||||
{ timeout: TEST_TIMEOUT },
|
||||
async (modelName) => {
|
||||
const result = await runTest(
|
||||
'First multiply 123 by 456, then format the result with 2 decimal places.',
|
||||
@ -239,8 +237,7 @@ describe('Ollama Custom Tool Call Quality', () => {
|
||||
testResults.push(result)
|
||||
// 123 * 456 = 56088 → formatted as 56,088.00
|
||||
expect(result.result[0]).toMatch(/56[,.]?088/)
|
||||
},
|
||||
{ timeout: TEST_TIMEOUT }
|
||||
}
|
||||
)
|
||||
|
||||
it('should generate tool quality markdown report', () => {
|
||||
@ -248,4 +245,3 @@ describe('Ollama Custom Tool Call Quality', () => {
|
||||
expect(exists(TEST_REPORT_PATH) === 'file').toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user