325 lines
10 KiB
JavaScript
325 lines
10 KiB
JavaScript
/**
|
|
* orchestrator/test-ipc-classifier.mjs
|
|
*
|
|
* IPC + local llama: one kbot-ai call — semantic distance from anchor "machine workshop"
|
|
* to every business label (JobViewer.tsx ~205). Output is a single JSON array (+ meta).
|
|
*
|
|
* Run: npm run test:ipc:classifier
|
|
*
|
|
* Env:
|
|
* KBOT_IPC_LLAMA_AUTOSTART — 0 to skip spawning run-7b.sh
|
|
* KBOT_CLASSIFIER_LIMIT — max labels in the batch (default: all)
|
|
* KBOT_CLASSIFIER_TIMEOUT_MS — single batched kbot-ai call (default: 300000)
|
|
*/
|
|
|
|
import { spawn } from 'node:child_process';
|
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
import { dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import net from 'node:net';
|
|
import { existsSync, unlinkSync } from 'node:fs';
|
|
|
|
import {
|
|
distExePath,
|
|
platform,
|
|
uds,
|
|
timeouts,
|
|
kbotAiPayloadLlamaLocal,
|
|
ensureLlamaLocalServer,
|
|
llama,
|
|
} from './presets.js';
|
|
import {
|
|
createAssert,
|
|
payloadObj,
|
|
llamaAutostartEnabled,
|
|
createIpcClient,
|
|
pipeWorkerStderr,
|
|
} from './test-commons.js';
|
|
import { reportFilePathWithExt, timeParts } from './reports.js';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const EXE = distExePath(__dirname);
|
|
const stats = createAssert();
|
|
const { assert } = stats;
|
|
|
|
/** @see packages/kbot/.../JobViewer.tsx — business type options */
|
|
export const JOB_VIEWER_MACHINE_LABELS = [
|
|
'3D printing service',
|
|
'Drafting service',
|
|
'Engraver',
|
|
'Furniture maker',
|
|
'Industrial engineer',
|
|
'Industrial equipment supplier',
|
|
'Laser cutting service',
|
|
'Machine construction',
|
|
'Machine repair service',
|
|
'Machine shop',
|
|
'Machine workshop',
|
|
'Machinery parts manufacturer',
|
|
'Machining manufacturer',
|
|
'Manufacturer',
|
|
'Mechanic',
|
|
'Mechanical engineer',
|
|
'Mechanical plant',
|
|
'Metal fabricator',
|
|
'Metal heat treating service',
|
|
'Metal machinery supplier',
|
|
'Metal working shop',
|
|
'Metal workshop',
|
|
'Novelty store',
|
|
'Plywood supplier',
|
|
'Sign shop',
|
|
'Tool manufacturer',
|
|
'Trophy shop',
|
|
];
|
|
|
|
const ANCHOR = 'machine workshop';
|
|
|
|
/** Build one prompt: model returns a JSON array only. */
|
|
function classifierBatchPrompt(labels) {
|
|
const numbered = labels.map((l, i) => `${i + 1}. ${JSON.stringify(l)}`).join('\n');
|
|
return `You classify business types against one anchor. Output ONLY a JSON array, no markdown fences, no commentary.
|
|
|
|
Rules for each element:
|
|
- Use shape: {"label": <exact string from the list below>, "distance": <number>}
|
|
- "distance" is semantic distance from 0 (same as anchor or direct synonym) to 10 (unrelated). One decimal allowed.
|
|
- Include EXACTLY one object per line item below, in the SAME ORDER, with "label" copied character-for-character from the list.
|
|
|
|
Anchor business type: ${ANCHOR}
|
|
|
|
Candidate labels (in order):
|
|
${numbered}
|
|
|
|
Output: one JSON array, e.g. [{"label":"...","distance":2.5},...]`;
|
|
}
|
|
|
|
function extractJsonArray(text) {
|
|
if (!text || typeof text !== 'string') return null;
|
|
let s = text.trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/u, '').trim();
|
|
try {
|
|
const v = JSON.parse(s);
|
|
return Array.isArray(v) ? v : null;
|
|
} catch {
|
|
/* fall through */
|
|
}
|
|
const i = s.indexOf('[');
|
|
const j = s.lastIndexOf(']');
|
|
if (i < 0 || j <= i) return null;
|
|
try {
|
|
const v = JSON.parse(s.slice(i, j + 1));
|
|
return Array.isArray(v) ? v : null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param {unknown[]} arr
|
|
* @param {string[]} expectedLabels — ordered
|
|
*/
|
|
function normalizeBatchArray(arr, expectedLabels) {
|
|
const expectedSet = new Set(expectedLabels);
|
|
const byLabel = new Map();
|
|
|
|
for (const item of arr) {
|
|
if (!item || typeof item !== 'object') continue;
|
|
const label = item.label;
|
|
let d = item.distance;
|
|
if (typeof d === 'string') d = parseFloat(d);
|
|
if (typeof label !== 'string' || typeof d !== 'number' || !Number.isFinite(d)) continue;
|
|
if (!expectedSet.has(label)) continue;
|
|
byLabel.set(label, d);
|
|
}
|
|
|
|
const distances = expectedLabels.map((label) => ({
|
|
label,
|
|
distance: byLabel.has(label) ? byLabel.get(label) : null,
|
|
}));
|
|
|
|
const missing = distances.filter((r) => r.distance == null).map((r) => r.label);
|
|
return { distances, missing };
|
|
}
|
|
|
|
function batchTimeoutMs() {
|
|
const raw = process.env.KBOT_CLASSIFIER_TIMEOUT_MS;
|
|
if (raw === undefined || raw === '') return 300_000;
|
|
const n = Number.parseInt(raw, 10);
|
|
return Number.isFinite(n) && n > 0 ? n : 300_000;
|
|
}
|
|
|
|
/** Log progress while awaiting a long LLM call (no silent hang). */
|
|
function withHeartbeat(promise, ipcTimeoutMs) {
|
|
const every = 15_000;
|
|
let n = 0;
|
|
const id = setInterval(() => {
|
|
n += 1;
|
|
const sec = (n * every) / 1000;
|
|
console.log(
|
|
` … still waiting on llama (batch is large; ${sec}s elapsed, IPC deadline ${Math.round(ipcTimeoutMs / 1000)}s)…`
|
|
);
|
|
}, every);
|
|
return promise.finally(() => clearInterval(id));
|
|
}
|
|
|
|
async function run() {
|
|
const startedAt = new Date().toISOString();
|
|
console.log('\n📐 IPC classifier (llama @ :8888) — one batch, distance vs "machine workshop"\n');
|
|
|
|
if (!existsSync(EXE)) {
|
|
console.error(`❌ Binary not found at ${EXE}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
await ensureLlamaLocalServer({
|
|
autostart: llamaAutostartEnabled(),
|
|
startTimeoutMs: timeouts.llamaServerStart,
|
|
});
|
|
|
|
const limitRaw = process.env.KBOT_CLASSIFIER_LIMIT;
|
|
let labels = [...JOB_VIEWER_MACHINE_LABELS];
|
|
if (limitRaw !== undefined && limitRaw !== '') {
|
|
const lim = Number.parseInt(limitRaw, 10);
|
|
if (Number.isFinite(lim) && lim > 0) labels = labels.slice(0, lim);
|
|
}
|
|
|
|
const CPP_UDS_ARG = uds.workerArg();
|
|
if (!platform.isWin && existsSync(CPP_UDS_ARG)) {
|
|
unlinkSync(CPP_UDS_ARG);
|
|
}
|
|
|
|
const workerProc = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG], { stdio: 'pipe' });
|
|
pipeWorkerStderr(workerProc);
|
|
|
|
let socket;
|
|
for (let i = 0; i < timeouts.connectAttempts; i++) {
|
|
try {
|
|
await new Promise((res, rej) => {
|
|
socket = net.connect(uds.connectOpts(CPP_UDS_ARG));
|
|
socket.once('connect', res);
|
|
socket.once('error', rej);
|
|
});
|
|
break;
|
|
} catch (e) {
|
|
if (i === timeouts.connectAttempts - 1) throw e;
|
|
await new Promise((r) => setTimeout(r, timeouts.connectRetryMs));
|
|
}
|
|
}
|
|
|
|
const ipc = createIpcClient(socket);
|
|
ipc.attach();
|
|
await ipc.readyPromise;
|
|
|
|
const tmo = batchTimeoutMs();
|
|
const ipcDeadlineMs = tmo + 60_000;
|
|
console.log(` Single kbot-ai batch: ${labels.length} labels`);
|
|
console.log(` liboai HTTP timeout: ${tmo} ms (llm_timeout_ms) — rebuild kbot if this was stuck at ~30s before`);
|
|
console.log(` IPC wait deadline: ${ipcDeadlineMs} ms (HTTP + margin)`);
|
|
console.log(` (Large batches can take many minutes; heartbeat every 15s…)\n`);
|
|
|
|
const payload = {
|
|
...kbotAiPayloadLlamaLocal({ prompt: classifierBatchPrompt(labels) }),
|
|
llm_timeout_ms: tmo,
|
|
};
|
|
const msg = await withHeartbeat(
|
|
ipc.request({ type: 'kbot-ai', payload }, ipcDeadlineMs),
|
|
ipcDeadlineMs
|
|
);
|
|
const p = payloadObj(msg);
|
|
|
|
let rawText = null;
|
|
let distances = [];
|
|
let parseError = null;
|
|
let missing = [];
|
|
|
|
if (p?.status === 'success' && typeof p?.text === 'string') {
|
|
rawText = p.text;
|
|
const arr = extractJsonArray(p.text);
|
|
if (arr) {
|
|
const norm = normalizeBatchArray(arr, labels);
|
|
distances = norm.distances;
|
|
missing = norm.missing;
|
|
if (missing.length === 0) {
|
|
assert(true, 'batch JSON array: all labels have distance');
|
|
} else {
|
|
assert(false, `batch array complete (${missing.length} missing labels)`);
|
|
parseError = `missing: ${missing.join('; ')}`;
|
|
}
|
|
} else {
|
|
assert(false, 'batch response parses as JSON array');
|
|
parseError = 'could not parse JSON array from model text';
|
|
}
|
|
} else {
|
|
assert(false, 'kbot-ai success');
|
|
parseError = p?.error ?? 'not success';
|
|
}
|
|
|
|
const shutdownRes = await ipc.request({ type: 'shutdown' }, timeouts.ipcDefault);
|
|
assert(shutdownRes.type === 'shutdown_ack', 'shutdown ack');
|
|
await new Promise((r) => setTimeout(r, timeouts.postShutdownMs));
|
|
socket.destroy();
|
|
assert(workerProc.exitCode === 0, 'worker exit 0');
|
|
|
|
const finishedAt = new Date().toISOString();
|
|
|
|
/** Final array: sorted by distance (nulls last). */
|
|
const byDistance = [...distances].sort((a, b) => {
|
|
if (a.distance == null && b.distance == null) return 0;
|
|
if (a.distance == null) return 1;
|
|
if (b.distance == null) return -1;
|
|
return a.distance - b.distance;
|
|
});
|
|
|
|
const out = {
|
|
meta: {
|
|
anchor: ANCHOR,
|
|
source: 'JobViewer.tsx:205',
|
|
batch: true,
|
|
llama: {
|
|
baseURL: llama.baseURL,
|
|
port: llama.port,
|
|
router: llama.router,
|
|
model: llama.model,
|
|
},
|
|
labelCount: labels.length,
|
|
startedAt,
|
|
finishedAt,
|
|
},
|
|
/** Ordered like input labels — primary result. */
|
|
distances,
|
|
/** Same rows sorted by ascending distance. */
|
|
byDistance,
|
|
rawText,
|
|
parseError: parseError ?? null,
|
|
summary: {
|
|
passed: stats.passed,
|
|
failed: stats.failed,
|
|
ok: stats.failed === 0,
|
|
},
|
|
};
|
|
|
|
const now = new Date();
|
|
const cwd = process.cwd();
|
|
const jsonPath = reportFilePathWithExt('test-ipc-classifier', '.json', { cwd, now });
|
|
/** Array-only artifact (parse → JSON array). */
|
|
const arrayPath = reportFilePathWithExt('test-ipc-classifier-distances', '.json', { cwd, now });
|
|
await mkdir(dirname(jsonPath), { recursive: true });
|
|
await writeFile(jsonPath, JSON.stringify(out, null, 2), 'utf8');
|
|
await writeFile(arrayPath, `${JSON.stringify(distances, null, 2)}\n`, 'utf8');
|
|
|
|
const { label: timeLabel } = timeParts(now);
|
|
console.log(`\n────────────────────────────────`);
|
|
console.log(` Passed: ${stats.passed} Failed: ${stats.failed}`);
|
|
console.log(` Full report: ${jsonPath}`);
|
|
console.log(` Array only: ${arrayPath}`);
|
|
console.log(` Run id: test-ipc-classifier::${timeLabel}`);
|
|
console.log(` distances.length: ${distances.length}`);
|
|
console.log(`────────────────────────────────\n`);
|
|
|
|
process.exit(stats.failed > 0 ? 1 : 0);
|
|
}
|
|
|
|
run().catch((err) => {
|
|
console.error('Classifier error:', err);
|
|
process.exit(1);
|
|
});
|