From 1201cfac47935437e21303e6912e6bf63d2745be Mon Sep 17 00:00:00 2001 From: Babayaga Date: Mon, 30 Mar 2026 12:47:18 +0200 Subject: [PATCH] kbot cpp testing - classifiers --- .../cpp/orchestrator/test-ipc-classifier.mjs | 324 ++++++++++++++++++ packages/kbot/cpp/package.json | 1 + packages/kbot/cpp/packages/kbot/kbot.h | 4 +- .../kbot/cpp/packages/kbot/llm_client.cpp | 8 +- packages/kbot/cpp/packages/kbot/llm_client.h | 1 + packages/kbot/cpp/src/cmd_kbot.cpp | 17 + 6 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 packages/kbot/cpp/orchestrator/test-ipc-classifier.mjs diff --git a/packages/kbot/cpp/orchestrator/test-ipc-classifier.mjs b/packages/kbot/cpp/orchestrator/test-ipc-classifier.mjs new file mode 100644 index 00000000..667d5de6 --- /dev/null +++ b/packages/kbot/cpp/orchestrator/test-ipc-classifier.mjs @@ -0,0 +1,324 @@ +/** + * orchestrator/test-ipc-classifier.mjs + * + * IPC + local llama: one kbot-ai call — semantic distance from anchor "machine workshop" + * to every business label (JobViewer.tsx ~205). Output is a single JSON array (+ meta). + * + * Run: npm run test:ipc:classifier + * + * Env: + * KBOT_IPC_LLAMA_AUTOSTART — 0 to skip spawning run-7b.sh + * KBOT_CLASSIFIER_LIMIT — max labels in the batch (default: all) + * KBOT_CLASSIFIER_TIMEOUT_MS — single batched kbot-ai call (default: 300000) + */ + +import { spawn } from 'node:child_process'; +import { mkdir, writeFile } from 'node:fs/promises'; +import { dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import net from 'node:net'; +import { existsSync, unlinkSync } from 'node:fs'; + +import { + distExePath, + platform, + uds, + timeouts, + kbotAiPayloadLlamaLocal, + ensureLlamaLocalServer, + llama, +} from './presets.js'; +import { + createAssert, + payloadObj, + llamaAutostartEnabled, + createIpcClient, + pipeWorkerStderr, +} from './test-commons.js'; +import { reportFilePathWithExt, timeParts } from './reports.js'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const EXE = distExePath(__dirname); +const stats = createAssert(); +const { assert } = stats; + +/** @see packages/kbot/.../JobViewer.tsx — business type options */ +export const JOB_VIEWER_MACHINE_LABELS = [ + '3D printing service', + 'Drafting service', + 'Engraver', + 'Furniture maker', + 'Industrial engineer', + 'Industrial equipment supplier', + 'Laser cutting service', + 'Machine construction', + 'Machine repair service', + 'Machine shop', + 'Machine workshop', + 'Machinery parts manufacturer', + 'Machining manufacturer', + 'Manufacturer', + 'Mechanic', + 'Mechanical engineer', + 'Mechanical plant', + 'Metal fabricator', + 'Metal heat treating service', + 'Metal machinery supplier', + 'Metal working shop', + 'Metal workshop', + 'Novelty store', + 'Plywood supplier', + 'Sign shop', + 'Tool manufacturer', + 'Trophy shop', +]; + +const ANCHOR = 'machine workshop'; + +/** Build one prompt: model returns a JSON array only. */ +function classifierBatchPrompt(labels) { + const numbered = labels.map((l, i) => `${i + 1}. ${JSON.stringify(l)}`).join('\n'); + return `You classify business types against one anchor. Output ONLY a JSON array, no markdown fences, no commentary. + +Rules for each element: +- Use shape: {"label": , "distance": } +- "distance" is semantic distance from 0 (same as anchor or direct synonym) to 10 (unrelated). One decimal allowed. +- Include EXACTLY one object per line item below, in the SAME ORDER, with "label" copied character-for-character from the list. + +Anchor business type: ${ANCHOR} + +Candidate labels (in order): +${numbered} + +Output: one JSON array, e.g. [{"label":"...","distance":2.5},...]`; +} + +function extractJsonArray(text) { + if (!text || typeof text !== 'string') return null; + let s = text.trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/u, '').trim(); + try { + const v = JSON.parse(s); + return Array.isArray(v) ? v : null; + } catch { + /* fall through */ + } + const i = s.indexOf('['); + const j = s.lastIndexOf(']'); + if (i < 0 || j <= i) return null; + try { + const v = JSON.parse(s.slice(i, j + 1)); + return Array.isArray(v) ? v : null; + } catch { + return null; + } +} + +/** + * @param {unknown[]} arr + * @param {string[]} expectedLabels — ordered + */ +function normalizeBatchArray(arr, expectedLabels) { + const expectedSet = new Set(expectedLabels); + const byLabel = new Map(); + + for (const item of arr) { + if (!item || typeof item !== 'object') continue; + const label = item.label; + let d = item.distance; + if (typeof d === 'string') d = parseFloat(d); + if (typeof label !== 'string' || typeof d !== 'number' || !Number.isFinite(d)) continue; + if (!expectedSet.has(label)) continue; + byLabel.set(label, d); + } + + const distances = expectedLabels.map((label) => ({ + label, + distance: byLabel.has(label) ? byLabel.get(label) : null, + })); + + const missing = distances.filter((r) => r.distance == null).map((r) => r.label); + return { distances, missing }; +} + +function batchTimeoutMs() { + const raw = process.env.KBOT_CLASSIFIER_TIMEOUT_MS; + if (raw === undefined || raw === '') return 300_000; + const n = Number.parseInt(raw, 10); + return Number.isFinite(n) && n > 0 ? n : 300_000; +} + +/** Log progress while awaiting a long LLM call (no silent hang). */ +function withHeartbeat(promise, ipcTimeoutMs) { + const every = 15_000; + let n = 0; + const id = setInterval(() => { + n += 1; + const sec = (n * every) / 1000; + console.log( + ` … still waiting on llama (batch is large; ${sec}s elapsed, IPC deadline ${Math.round(ipcTimeoutMs / 1000)}s)…` + ); + }, every); + return promise.finally(() => clearInterval(id)); +} + +async function run() { + const startedAt = new Date().toISOString(); + console.log('\n📐 IPC classifier (llama @ :8888) — one batch, distance vs "machine workshop"\n'); + + if (!existsSync(EXE)) { + console.error(`❌ Binary not found at ${EXE}`); + process.exit(1); + } + + await ensureLlamaLocalServer({ + autostart: llamaAutostartEnabled(), + startTimeoutMs: timeouts.llamaServerStart, + }); + + const limitRaw = process.env.KBOT_CLASSIFIER_LIMIT; + let labels = [...JOB_VIEWER_MACHINE_LABELS]; + if (limitRaw !== undefined && limitRaw !== '') { + const lim = Number.parseInt(limitRaw, 10); + if (Number.isFinite(lim) && lim > 0) labels = labels.slice(0, lim); + } + + const CPP_UDS_ARG = uds.workerArg(); + if (!platform.isWin && existsSync(CPP_UDS_ARG)) { + unlinkSync(CPP_UDS_ARG); + } + + const workerProc = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG], { stdio: 'pipe' }); + pipeWorkerStderr(workerProc); + + let socket; + for (let i = 0; i < timeouts.connectAttempts; i++) { + try { + await new Promise((res, rej) => { + socket = net.connect(uds.connectOpts(CPP_UDS_ARG)); + socket.once('connect', res); + socket.once('error', rej); + }); + break; + } catch (e) { + if (i === timeouts.connectAttempts - 1) throw e; + await new Promise((r) => setTimeout(r, timeouts.connectRetryMs)); + } + } + + const ipc = createIpcClient(socket); + ipc.attach(); + await ipc.readyPromise; + + const tmo = batchTimeoutMs(); + const ipcDeadlineMs = tmo + 60_000; + console.log(` Single kbot-ai batch: ${labels.length} labels`); + console.log(` liboai HTTP timeout: ${tmo} ms (llm_timeout_ms) — rebuild kbot if this was stuck at ~30s before`); + console.log(` IPC wait deadline: ${ipcDeadlineMs} ms (HTTP + margin)`); + console.log(` (Large batches can take many minutes; heartbeat every 15s…)\n`); + + const payload = { + ...kbotAiPayloadLlamaLocal({ prompt: classifierBatchPrompt(labels) }), + llm_timeout_ms: tmo, + }; + const msg = await withHeartbeat( + ipc.request({ type: 'kbot-ai', payload }, ipcDeadlineMs), + ipcDeadlineMs + ); + const p = payloadObj(msg); + + let rawText = null; + let distances = []; + let parseError = null; + let missing = []; + + if (p?.status === 'success' && typeof p?.text === 'string') { + rawText = p.text; + const arr = extractJsonArray(p.text); + if (arr) { + const norm = normalizeBatchArray(arr, labels); + distances = norm.distances; + missing = norm.missing; + if (missing.length === 0) { + assert(true, 'batch JSON array: all labels have distance'); + } else { + assert(false, `batch array complete (${missing.length} missing labels)`); + parseError = `missing: ${missing.join('; ')}`; + } + } else { + assert(false, 'batch response parses as JSON array'); + parseError = 'could not parse JSON array from model text'; + } + } else { + assert(false, 'kbot-ai success'); + parseError = p?.error ?? 'not success'; + } + + const shutdownRes = await ipc.request({ type: 'shutdown' }, timeouts.ipcDefault); + assert(shutdownRes.type === 'shutdown_ack', 'shutdown ack'); + await new Promise((r) => setTimeout(r, timeouts.postShutdownMs)); + socket.destroy(); + assert(workerProc.exitCode === 0, 'worker exit 0'); + + const finishedAt = new Date().toISOString(); + + /** Final array: sorted by distance (nulls last). */ + const byDistance = [...distances].sort((a, b) => { + if (a.distance == null && b.distance == null) return 0; + if (a.distance == null) return 1; + if (b.distance == null) return -1; + return a.distance - b.distance; + }); + + const out = { + meta: { + anchor: ANCHOR, + source: 'JobViewer.tsx:205', + batch: true, + llama: { + baseURL: llama.baseURL, + port: llama.port, + router: llama.router, + model: llama.model, + }, + labelCount: labels.length, + startedAt, + finishedAt, + }, + /** Ordered like input labels — primary result. */ + distances, + /** Same rows sorted by ascending distance. */ + byDistance, + rawText, + parseError: parseError ?? null, + summary: { + passed: stats.passed, + failed: stats.failed, + ok: stats.failed === 0, + }, + }; + + const now = new Date(); + const cwd = process.cwd(); + const jsonPath = reportFilePathWithExt('test-ipc-classifier', '.json', { cwd, now }); + /** Array-only artifact (parse → JSON array). */ + const arrayPath = reportFilePathWithExt('test-ipc-classifier-distances', '.json', { cwd, now }); + await mkdir(dirname(jsonPath), { recursive: true }); + await writeFile(jsonPath, JSON.stringify(out, null, 2), 'utf8'); + await writeFile(arrayPath, `${JSON.stringify(distances, null, 2)}\n`, 'utf8'); + + const { label: timeLabel } = timeParts(now); + console.log(`\n────────────────────────────────`); + console.log(` Passed: ${stats.passed} Failed: ${stats.failed}`); + console.log(` Full report: ${jsonPath}`); + console.log(` Array only: ${arrayPath}`); + console.log(` Run id: test-ipc-classifier::${timeLabel}`); + console.log(` distances.length: ${distances.length}`); + console.log(`────────────────────────────────\n`); + + process.exit(stats.failed > 0 ? 1 : 0); +} + +run().catch((err) => { + console.error('Classifier error:', err); + process.exit(1); +}); diff --git a/packages/kbot/cpp/package.json b/packages/kbot/cpp/package.json index f76e68bd..cff773eb 100644 --- a/packages/kbot/cpp/package.json +++ b/packages/kbot/cpp/package.json @@ -22,6 +22,7 @@ "kbot:ai": ".\\dist\\kbot.exe kbot ai --prompt \"hi\"", "kbot:run": ".\\dist\\kbot.exe kbot run --list", "test:ipc": "node orchestrator/test-ipc.mjs", + "test:ipc:classifier": "node orchestrator/test-ipc-classifier.mjs", "test:html": "cmake --preset release && cmake --build --preset release --target test_html && .\\dist\\test_html.exe" }, "repository": { diff --git a/packages/kbot/cpp/packages/kbot/kbot.h b/packages/kbot/cpp/packages/kbot/kbot.h index b4c3c0f2..6d78374a 100644 --- a/packages/kbot/cpp/packages/kbot/kbot.h +++ b/packages/kbot/cpp/packages/kbot/kbot.h @@ -41,7 +41,9 @@ struct KBotOptions { std::string query; bool dry_run = false; std::string format; - + /** liboai HTTP timeout (ms). 0 = library default (~30s). IPC may set for long prompts. */ + int llm_timeout_ms = 0; + // Internal std::string job_id; std::shared_ptr> cancel_token; diff --git a/packages/kbot/cpp/packages/kbot/llm_client.cpp b/packages/kbot/cpp/packages/kbot/llm_client.cpp index 7ba7fe79..ed174b93 100644 --- a/packages/kbot/cpp/packages/kbot/llm_client.cpp +++ b/packages/kbot/cpp/packages/kbot/llm_client.cpp @@ -8,7 +8,7 @@ namespace polymech { namespace kbot { LLMClient::LLMClient(const KBotOptions& opts) - : api_key_(opts.api_key), model_(opts.model), router_(opts.router) { + : api_key_(opts.api_key), model_(opts.model), router_(opts.router), llm_timeout_ms_(opts.llm_timeout_ms) { // Set default base_url_ according to client.ts mappings if (opts.base_url.empty()) { @@ -62,9 +62,15 @@ LLMResponse LLMClient::execute_chat(const std::string& prompt) { return res; } + if (llm_timeout_ms_ > 0) { + oai_impl.auth.SetMaxTimeout(llm_timeout_ms_); + logger::info("LLMClient: HTTP timeout set to " + std::to_string(llm_timeout_ms_) + " ms"); + } + std::string target_model = model_.empty() ? "gpt-4o" : model_; logger::debug("LLMClient::execute_chat: Target model: " + target_model); + logger::info("LLMClient: calling ChatCompletion (prompt chars=" + std::to_string(prompt.size()) + ")"); logger::debug("LLMClient::execute_chat: Init Conversation"); liboai::Conversation convo; convo.AddUserData(prompt); diff --git a/packages/kbot/cpp/packages/kbot/llm_client.h b/packages/kbot/cpp/packages/kbot/llm_client.h index 392fd71c..feedb63c 100644 --- a/packages/kbot/cpp/packages/kbot/llm_client.h +++ b/packages/kbot/cpp/packages/kbot/llm_client.h @@ -26,6 +26,7 @@ private: std::string model_; std::string router_; std::string base_url_; + int llm_timeout_ms_ = 0; }; } // namespace kbot diff --git a/packages/kbot/cpp/src/cmd_kbot.cpp b/packages/kbot/cpp/src/cmd_kbot.cpp index 9d12c4c6..9b7c863f 100644 --- a/packages/kbot/cpp/src/cmd_kbot.cpp +++ b/packages/kbot/cpp/src/cmd_kbot.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace polymech { @@ -104,6 +106,18 @@ int run_kbot_ai_ipc(const std::string& payload, const std::string& jobId, const if (doc.HasMember("model") && doc["model"].IsString()) opts.model = doc["model"].GetString(); if (doc.HasMember("base_url") && doc["base_url"].IsString()) opts.base_url = doc["base_url"].GetString(); else if (doc.HasMember("baseURL") && doc["baseURL"].IsString()) opts.base_url = doc["baseURL"].GetString(); + /* Single numeric path: RapidJSON may store JSON integers as int, uint64, or double — GetDouble() is consistent. */ + if (doc.HasMember("llm_timeout_ms") && doc["llm_timeout_ms"].IsNumber() && !doc["llm_timeout_ms"].IsNull()) { + const rapidjson::Value& v = doc["llm_timeout_ms"]; + const double d = v.GetDouble(); + if (d > 0.0 && std::isfinite(d)) { + const long long ms = static_cast(std::llround(d)); + const int cap = std::numeric_limits::max(); + if (ms > 0 && ms <= static_cast(cap)) { + opts.llm_timeout_ms = static_cast(ms); + } + } + } } if (opts.api_key.empty()) { @@ -114,6 +128,9 @@ int run_kbot_ai_ipc(const std::string& payload, const std::string& jobId, const } logger::info("Receiving AI task over IPC... job: " + jobId); + if (opts.llm_timeout_ms > 0) { + logger::info("kbot-ai IPC: llm_timeout_ms=" + std::to_string(opts.llm_timeout_ms)); + } return kbot::run_kbot_ai_pipeline(opts, cb); }