mono/packages/kbot/tests/unit/reports/llama-tools.json
2026-03-19 18:40:35 +01:00

219 lines
6.4 KiB
JSON

{
"results": [
{
"test": "tool-add",
"prompt": "Use the add tool to calculate 15 plus 27. Return the result.",
"result": [
"{\"name\": \"add\", \"arguments\": {\"a\": 15, \"b\": 27}}\n</tool_call>\n"
],
"expected": "42",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:29:19.843Z",
"passed": false,
"duration": 5223,
"reason": "Expected 42, but got {\"name\": \"add\", \"arguments\": {\"a\": 15, \"b\": 27}}\n</tool_call>",
"category": "llama-tools"
},
{
"test": "tool-multiply",
"prompt": "Use the multiply tool to calculate 8 times 9. Return the result.",
"result": [
"{\"name\": \"multiply\", \"arguments\": {\"a\": 8, \"b\": 9}}\n</tool_call>\n"
],
"expected": "72",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:29:24.204Z",
"passed": false,
"duration": 4354,
"reason": "Expected 72, but got {\"name\": \"multiply\", \"arguments\": {\"a\": 8, \"b\": 9}}\n</tool_call>",
"category": "llama-tools"
},
{
"test": "tool-weather",
"prompt": "What is the weather like in Berlin? Use the get_weather tool.",
"result": [
"{\"name\": \"get_weather\", \"arguments\": {\"city\": \"Berlin\"}}\n</tool_call>"
],
"expected": "berlin",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:29:27.300Z",
"passed": true,
"duration": 3091,
"category": "llama-tools"
},
{
"test": "tool-selection",
"prompt": "Use the add tool to calculate 100 plus 200. Do not use any other tool.",
"result": [
"{{\"name\": \"add\", \"arguments\": {\"a\": 100, \"b\": 200}}}\n</tool_call>\n"
],
"expected": "300",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:29:33.302Z",
"passed": false,
"duration": 5996,
"reason": "Expected 300, but got {{\"name\": \"add\", \"arguments\": {\"a\": 100, \"b\": 200}}}\n</tool_call>",
"category": "llama-tools"
},
{
"test": "tool-add",
"prompt": "Use the add tool to calculate 15 plus 27. Return the result.",
"result": [
"{\"name\": \"add\", \"arguments\": {\"a\": 15, \"b\": 27}}\n</tool_call>\n"
],
"expected": "42",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:30:23.715Z",
"passed": true,
"duration": 2949,
"category": "llama-tools"
},
{
"test": "tool-multiply",
"prompt": "Use the multiply tool to calculate 8 times 9. Return the result.",
"result": [
"{\"name\": \"multiply\", \"arguments\": {\"a\": 8, \"b\": 9}}\n</tool_call>\n"
],
"expected": "72",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:30:25.848Z",
"passed": true,
"duration": 2127,
"category": "llama-tools"
},
{
"test": "tool-weather",
"prompt": "What is the weather like in Berlin? Use the get_weather tool.",
"result": [
"{\"name\": \"get_weather\", \"arguments\": {\"city\": \"Berlin\"}}\n</tool_call>"
],
"expected": "berlin",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:30:27.436Z",
"passed": true,
"duration": 1582,
"category": "llama-tools"
},
{
"test": "tool-selection",
"prompt": "Use the add tool to calculate 100 plus 200. Do not use any other tool.",
"result": [
"{{\"name\": \"add\", \"arguments\": {\"a\": 100, \"b\": 200}}}\n</tool_call>"
],
"expected": "300",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:30:31.362Z",
"passed": false,
"duration": 3920,
"reason": "Expected 300, but got {{\"name\": \"add\", \"arguments\": {\"a\": 100, \"b\": 200}}}\n</tool_call>",
"category": "llama-tools"
},
{
"test": "tool-add",
"prompt": "Use the add tool to calculate 15 plus 27. Return the result.",
"result": [
"The sum of 15 and 27 is 42."
],
"expected": "42",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:39:17.199Z",
"passed": true,
"duration": 12126,
"category": "llama-tools"
},
{
"test": "tool-multiply",
"prompt": "Use the multiply tool to calculate 8 times 9. Return the result.",
"result": [
"The result of 8 times 9 is 72."
],
"expected": "72",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:39:27.885Z",
"passed": true,
"duration": 10678,
"category": "llama-tools"
},
{
"test": "tool-weather",
"prompt": "What is the weather like in Berlin? Use the get_weather tool.",
"result": [
"The current temperature in Berlin is 18 degrees Celsius and the weather condition is cloudy."
],
"expected": "berlin",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:39:38.036Z",
"passed": true,
"duration": 10144,
"category": "llama-tools"
},
{
"test": "tool-selection",
"prompt": "Use the add tool to calculate 100 plus 200. Do not use any other tool.",
"result": [
"The sum of 100 and 200 is 300."
],
"expected": "300",
"model": "default",
"router": "default",
"timestamp": "2026-03-19T17:39:53.564Z",
"passed": true,
"duration": 15522,
"category": "llama-tools"
}
],
"highscores": [
{
"test": "tool-add",
"rankings": [
{
"model": "default",
"duration": 12126,
"duration_secs": 12.126
}
]
},
{
"test": "tool-multiply",
"rankings": [
{
"model": "default",
"duration": 10678,
"duration_secs": 10.678
}
]
},
{
"test": "tool-weather",
"rankings": [
{
"model": "default",
"duration": 10144,
"duration_secs": 10.144
}
]
},
{
"test": "tool-selection",
"rankings": [
{
"model": "default",
"duration": 15522,
"duration_secs": 15.522
}
]
}
],
"lastUpdated": "2026-03-19T17:39:53.564Z"
}