media cpp 1/3

This commit is contained in:
lovebird 2026-04-12 22:38:43 +02:00
parent 426b66828f
commit e4b4ff4bea
259 changed files with 26223 additions and 0 deletions

View File

@ -14,6 +14,9 @@
},
{
"path": "../xblox"
},
{
"path": "../media"
}
],
"settings": {}

37
packages/media/cpp/.gitignore vendored Normal file
View File

@ -0,0 +1,37 @@
# Build output
/build/
# Compiled objects
*.o
*.obj
*.exe
*.out
*.app
# CMake generated
CMakeCache.txt
CMakeFiles/
cmake_install.cmake
Makefile
# IDE / Editor
.vscode/
.idea/
*.swp
*.swo
*~
.env*
# OS
.DS_Store
Thumbs.db
# Logs
*.log
cache/
config/postgres.toml
dist
# Orchestrator reports (cwd/tests/*)
tests/*.json
tests/*.md
src/cmd_grid*.cpp

View File

@ -0,0 +1,177 @@
cmake_minimum_required(VERSION 3.20)
project(kbot-cli
VERSION 0.1.0
DESCRIPTION "KBot C++ CLI"
LANGUAGES CXX C
)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/dist")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/dist")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/dist")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_SOURCE_DIR}/dist")
# C++ standard
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# Dependencies
include(FetchContent)
FetchContent_Declare(
cli11
GIT_REPOSITORY https://github.com/CLIUtils/CLI11.git
GIT_TAG v2.4.2
GIT_SHALLOW TRUE
)
FetchContent_Declare(
tomlplusplus
GIT_REPOSITORY https://github.com/marzer/tomlplusplus.git
GIT_TAG v3.4.0
GIT_SHALLOW TRUE
)
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v3.7.1
GIT_SHALLOW TRUE
)
FetchContent_Declare(
asio
GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git
GIT_TAG asio-1-28-0
GIT_SHALLOW TRUE
)
FetchContent_Declare(
concurrentqueue
GIT_REPOSITORY https://github.com/cameron314/concurrentqueue.git
GIT_TAG v1.0.4
GIT_SHALLOW TRUE
)
FetchContent_Declare(
taskflow
GIT_REPOSITORY https://github.com/taskflow/taskflow.git
GIT_TAG v3.6.0
GIT_SHALLOW TRUE
)
FetchContent_Declare(
nlohmann_json
GIT_REPOSITORY https://github.com/nlohmann/json.git
GIT_TAG v3.11.3
GIT_SHALLOW TRUE
)
FetchContent_Declare(
liboai
GIT_REPOSITORY https://github.com/jasonduncan/liboai.git
GIT_TAG main
GIT_SHALLOW TRUE
SOURCE_SUBDIR liboai
)
# p-ranav/glob Unix-style glob / rglob (C++17); avoid upstream CMake (CPM + gtest).
FetchContent_Declare(
pranav_glob
GIT_REPOSITORY https://github.com/p-ranav/glob.git
GIT_TAG master
GIT_SHALLOW TRUE
)
FetchContent_GetProperties(pranav_glob)
if(NOT pranav_glob_POPULATED)
FetchContent_Populate(pranav_glob)
endif()
add_library(pranav_glob STATIC ${pranav_glob_SOURCE_DIR}/source/glob.cpp)
target_include_directories(pranav_glob PUBLIC ${pranav_glob_SOURCE_DIR}/include)
target_compile_features(pranav_glob PUBLIC cxx_std_17)
if(MSVC)
target_compile_options(pranav_glob PRIVATE /permissive-)
endif()
# laserpants/dotenv-cpp load .env into the process environment (header-only).
FetchContent_Declare(
laserpants_dotenv
GIT_REPOSITORY https://github.com/laserpants/dotenv-cpp.git
GIT_TAG master
GIT_SHALLOW TRUE
)
FetchContent_GetProperties(laserpants_dotenv)
if(NOT laserpants_dotenv_POPULATED)
FetchContent_Populate(laserpants_dotenv)
endif()
add_library(laserpants_dotenv INTERFACE)
target_include_directories(laserpants_dotenv INTERFACE ${laserpants_dotenv_SOURCE_DIR}/include)
add_library(laserpants::dotenv ALIAS laserpants_dotenv)
set(TF_BUILD_TESTS OFF CACHE BOOL "" FORCE)
set(TF_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
set(JSON_BuildTests OFF CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(cli11 tomlplusplus Catch2 asio concurrentqueue taskflow nlohmann_json)
# Packages
add_subdirectory(packages/logger)
add_subdirectory(packages/html)
add_subdirectory(packages/postgres)
add_subdirectory(packages/http)
add_subdirectory(packages/json)
add_subdirectory(packages/polymech)
add_subdirectory(packages/ipc)
add_subdirectory(packages/liboai/liboai)
add_subdirectory(packages/kbot)
# Sources
add_executable(${PROJECT_NAME}
src/main.cpp
src/cmd_kbot.cpp
src/cmd_kbot_uds.cpp
src/sys_metrics.cpp
)
# Output file name is kbot.exe / kbot (not kbot-cli)
set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "kbot")
target_link_libraries(${PROJECT_NAME} PRIVATE CLI11::CLI11 tomlplusplus::tomlplusplus logger html postgres http json polymech ipc kbot laserpants::dotenv)
target_include_directories(${PROJECT_NAME} PRIVATE
${asio_SOURCE_DIR}/asio/include
${taskflow_SOURCE_DIR}
${concurrentqueue_SOURCE_DIR}
)
# Define standalone ASIO (since it's not boost)
if(WIN32)
# Enable math constants like M_PI
add_compile_definitions(_USE_MATH_DEFINES)
add_compile_definitions(NOMINMAX)
endif()
target_compile_definitions(${PROJECT_NAME} PRIVATE ASIO_STANDALONE=1 ASIO_NO_DEPRECATED=1)
# Compiler warnings
if(MSVC)
target_compile_options(${PROJECT_NAME} PRIVATE /W4 /permissive-)
else()
target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wextra -Wpedantic)
endif()
# Install
# Library + headers: see packages/kbot/CMakeLists.txt and packages/ipc/CMakeLists.txt
# Optional DLL/so: configure with -DIPC_BUILD_SHARED=ON -DPOLYMECH_KBOT_SHARED=ON
install(TARGETS ${PROJECT_NAME}
RUNTIME DESTINATION bin
)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/cmd_kbot.h
DESTINATION include/polymech
)
# Tests
enable_testing()
add_subdirectory(tests)

View File

@ -0,0 +1,50 @@
{
"version": 6,
"cmakeMinimumRequired": {
"major": 3,
"minor": 20,
"patch": 0
},
"configurePresets": [
{
"name": "dev",
"displayName": "Dev (Debug)",
"binaryDir": "${sourceDir}/build/dev",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug"
}
},
{
"name": "release",
"displayName": "Release",
"binaryDir": "${sourceDir}/build/release",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
}
},
{
"name": "dev-dll",
"displayName": "Dev (Debug, ipc + kbot as DLL)",
"binaryDir": "${sourceDir}/build/dev-dll",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"IPC_BUILD_SHARED": "ON",
"POLYMECH_KBOT_SHARED": "ON"
}
}
],
"buildPresets": [
{
"name": "dev",
"configurePreset": "dev"
},
{
"name": "release",
"configurePreset": "release"
},
{
"name": "dev-dll",
"configurePreset": "dev-dll"
}
]
}

View File

@ -0,0 +1,9 @@
Copyright (c) <year> <owner> All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,266 @@
# kbot (C++)
CMake-based C++ toolchain for **kbot**: HTML/HTTP/JSON utilities, **length-prefixed JSON IPC**, optional **UDS/TCP worker** for Node orchestrators, and **LLM chat** via liboai (OpenRouter, OpenAI, Ollama-compatible servers, etc.). The main binary is **`kbot`** (`kbot.exe` on Windows).
## Prerequisites
| Requirement | Notes |
|-------------|--------|
| CMake | ≥ 3.20 |
| C++ compiler | C++17 (MSVC, GCC, Clang) |
| Git | For `FetchContent` dependencies |
| Node.js | Optional; for `orchestrator/` IPC integration tests (`npm run test:ipc`) |
On Windows, use a **Developer Command Prompt** or **PowerShell** with MSVC in `PATH`. **Git Bash** helps if you use shell scripts under `scripts/`.
## Quick start (build)
From this directory (`packages/kbot/cpp`):
```bash
npm install # optional; only needed if you use npm scripts
npm run build
```
Artifacts go to **`dist/`** (e.g. `dist/kbot.exe`, test tools).
Equivalent CMake:
```bash
cmake --preset dev
cmake --build --preset dev
```
### Presets
| Preset | Role |
|--------|------|
| `dev` | Debug, static `ipc` + `kbot` libraries (default) |
| `release` | Release build |
| `dev-dll` | Debug with **`ipc.dll`** and **`kbot.dll`** (`IPC_BUILD_SHARED=ON`, `POLYMECH_KBOT_SHARED=ON`) |
```bash
cmake --preset dev-dll
cmake --build --preset dev-dll --config Debug
```
Place **`ipc.dll`** and **`kbot.dll`** next to **`kbot.exe`** (or on `PATH`) when using the DLL configuration.
### npm scripts (reference)
| Script | Purpose |
|--------|---------|
| `npm run build` | Configure `dev` + build |
| `npm run build:release` | Release preset |
| `npm run test` | `ctest` in `build/dev` |
| `npm run clean` | Remove `build/` and `dist/` |
| `npm run test:ipc` | Node UDS IPC integration test |
| `npm run worker` | Run worker (stdio IPC) |
## Installation
Install the CLI and headers into a prefix (e.g. local tree or system root):
```bash
cmake --install build/dev --prefix "C:/path/to/install"
```
This installs:
- **`bin/kbot`** (runtime)
- **`include/polymech/`** — `kbot.h`, `llm_client.h`, `polymech_export.h`, `cmd_kbot.h`
- **`include/ipc/`** — `ipc.h`, `ipc_export.h`
- **`lib/`** — import libraries / archives (depending on static vs shared)
Library layout is defined in `packages/kbot/CMakeLists.txt` and `packages/ipc/CMakeLists.txt`.
### CMake options (libraries)
| Cache variable | Effect |
|----------------|--------|
| `IPC_BUILD_SHARED` | Build **`ipc`** as a shared library (`OFF` default) |
| `POLYMECH_KBOT_SHARED` | Build **`kbot`** as a shared library (`OFF` default) |
Static builds define `IPC_STATIC_BUILD` / `POLYMECH_STATIC_BUILD` for consumers via `INTERFACE` compile definitions. Shared builds export **`IPC_API`** / **`POLYMECH_API`** (see `ipc_export.h`, `polymech_export.h`).
## CLI overview
Top-level:
```bash
kbot --help
kbot -v,--version
kbot --log-level debug|info|warn|error
```
### Subcommands
| Command | Description |
|---------|-------------|
| `parse <html>` | Parse HTML and list elements |
| `select <html> <selector>` | CSS-select elements |
| `config <file>` | Load and print a TOML file |
| `fetch <url>` | HTTP GET |
| `json <input>` | Prettify JSON |
| `db [-c config] [table] [-l limit]` | Supabase / DB helper (uses `config/postgres.toml` by default) |
| `worker [--uds <arg>]` | IPC worker (see below) |
| `kbot ai ...` / `kbot run ...` | AI and run pipelines (`setup_cmd_kbot` — use `kbot kbot ai --help`) |
### Worker mode (`kbot worker`)
Used by orchestrators and tests.
- **Stdio IPC** (length-prefixed JSON frames on stdin/stdout):
```bash
kbot worker
```
- **UDS / TCP** (Windows: TCP port string, e.g. `4001`; Unix: socket path):
```bash
kbot worker --uds 4001
```
Framing: `[uint32 LE length][UTF-8 JSON object with id, type, payload]`. Message types include `ping`, `job`, `kbot-ai`, `kbot-run`, `shutdown`, etc. See `src/main.cpp` and `orchestrator/test-ipc.mjs`.
### `kbot kbot` (nested)
CLI for AI tasks and run configurations:
```bash
kbot kbot ai --help
kbot kbot run --help
```
Example:
```bash
kbot kbot ai --prompt "Hello" --config config/postgres.toml
```
API keys are typically resolved from **`config/postgres.toml`** (`[services]`).
## Using in other CMake projects
There is no single `find_package(kbot)` config yet. Practical options:
### 1. Same repository / superbuild (recommended)
Add this repos `cpp` tree as a subdirectory from a parent `CMakeLists.txt` so `FetchContent` and internal targets (`logger`, `json`, `ipc`, `oai`, `kbot`, …) resolve once. Then:
```cmake
target_link_libraries(your_app PRIVATE ipc kbot)
```
`kbot` pulls in `logger`, `json`, `liboai` (`oai`) per `packages/kbot/CMakeLists.txt`.
### 2. Install prefix + explicit `IMPORTED` libraries
After `cmake --install`, link import libraries under `lib/` and add `include/` for **`ipc`** and **`polymech`**. You must still satisfy **transitive** dependencies (`oai`, `logger`, `json`, …) from the **same** build/install of this project, or duplicate their build—usually easier to use option 1.
### 3. Minimal example: IPC framing only
If you only need **`ipc::encode` / `ipc::decode`** (and can build `logger` + `json` the same way this project does), mirror `packages/ipc/CMakeLists.txt`:
```cmake
cmake_minimum_required(VERSION 3.20)
project(myapp CXX)
set(CMAKE_CXX_STANDARD 17)
add_subdirectory(path/to/polymech-mono/packages/kbot/cpp/packages/logger)
add_subdirectory(path/to/polymech-mono/packages/kbot/cpp/packages/json)
add_subdirectory(path/to/polymech-mono/packages/kbot/cpp/packages/ipc)
add_executable(myapp main.cpp)
target_link_libraries(myapp PRIVATE ipc)
```
**`main.cpp`** (stdio-style framing helpers):
```cpp
#include <iostream>
#include <ipc/ipc.h>
int main() {
ipc::Message msg{"1", "ping", "{}"};
auto frame = ipc::encode(msg);
// frame: 4-byte LE length + JSON object bytes
ipc::Message roundtrip;
if (frame.size() > 4 &&
ipc::decode(frame.data() + 4, frame.size() - 4, roundtrip)) {
std::cout << roundtrip.type << "\n"; // ping
}
return 0;
}
```
### 4. Example: LLM pipeline API (`kbot` library)
Headers: `kbot.h`, `llm_client.h`, `polymech_export.h`. You need a valid API key and options (see `KBotOptions` in `kbot.h`).
```cpp
#include <iostream>
#include "kbot.h"
#include "llm_client.h"
int main() {
polymech::kbot::KBotOptions opts;
opts.prompt = "Say hello in one sentence.";
opts.api_key = "YOUR_KEY";
opts.router = "openrouter";
opts.model = "openai/gpt-4o-mini";
polymech::kbot::LLMClient client(opts);
polymech::kbot::LLMResponse r = client.execute_chat(opts.prompt);
if (r.success) {
std::cout << r.text << "\n";
} else {
std::cerr << r.error << "\n";
return 1;
}
return 0;
}
```
Or use the callback-based pipeline:
```cpp
polymech::kbot::KBotCallbacks cb;
cb.onEvent = [](const std::string& type, const std::string& json) {
std::cout << type << ": " << json << "\n";
};
return polymech::kbot::run_kbot_ai_pipeline(opts, cb);
```
Link **`kbot`** (and its public dependencies). **`cmd_kbot.h`** entry points (`run_kbot_ai_ipc`, `run_cmd_kbot_uds`, …) are implemented in **`src/cmd_kbot*.cpp`** in this project; to reuse them, compile those sources into your binary or vendor the logic.
## Node / IPC tests
Integration tests live under **`orchestrator/`** (see comments in `orchestrator/test-ipc.mjs`). Typical run from `cpp/`:
```bash
npm run test:ipc
```
Classifier batch (semantic distances vs JobViewer labels):
```bash
npm run test:ipc:classifier
npm run test:ipc:classifier:openrouter
```
Stress: repeat the **same** batched `kbot-ai` call **N** times on **one** worker; prints per-run wall time, token usage (when present), then **min / max / avg / p50 / p95** and Σ tokens. Default **N = 5** for the OpenRouter stress script:
```bash
npm run test:ipc:classifier:openrouter:stress
npm run test:ipc:classifier -- -r openrouter -m openai/gpt-4o-mini --backend remote -n 3
KBOT_CLASSIFIER_STRESS_RUNS=10 npm run test:ipc:classifier:openrouter:stress
```
Requires a built **`dist/kbot.exe`** (or `kbot` on Unix). Set API keys via `config/postgres.toml` for OpenRouter.
## License
See [LICENSE](LICENSE) in this directory.

112
packages/media/cpp/a.json Normal file
View File

@ -0,0 +1,112 @@
{
"items": [
{
"label": "3D printing service",
"distance": 6.0
},
{
"label": "Drafting service",
"distance": 7.0
},
{
"label": "Engraver",
"distance": 6.5
},
{
"label": "Furniture maker",
"distance": 7.5
},
{
"label": "Industrial engineer",
"distance": 7.0
},
{
"label": "Industrial equipment supplier",
"distance": 5.5
},
{
"label": "Laser cutting service",
"distance": 4.5
},
{
"label": "Machine construction",
"distance": 3.0
},
{
"label": "Machine repair service",
"distance": 2.5
},
{
"label": "Machine shop",
"distance": 0.2
},
{
"label": "Machine workshop",
"distance": 0.0
},
{
"label": "Machinery parts manufacturer",
"distance": 2.0
},
{
"label": "Machining manufacturer",
"distance": 1.5
},
{
"label": "Manufacturer",
"distance": 6.0
},
{
"label": "Mechanic",
"distance": 5.0
},
{
"label": "Mechanical engineer",
"distance": 6.5
},
{
"label": "Mechanical plant",
"distance": 3.5
},
{
"label": "Metal fabricator",
"distance": 2.0
},
{
"label": "Metal heat treating service",
"distance": 3.5
},
{
"label": "Metal machinery supplier",
"distance": 5.0
},
{
"label": "Metal working shop",
"distance": 1.0
},
{
"label": "Metal workshop",
"distance": 1.2
},
{
"label": "Novelty store",
"distance": 10.0
},
{
"label": "Plywood supplier",
"distance": 9.5
},
{
"label": "Sign shop",
"distance": 7.5
},
{
"label": "Tool manufacturer",
"distance": 3.0
},
{
"label": "Trophy shop",
"distance": 8.0
}
]
}

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
#rm -rf /tmp/polymech-build
mkdir -p /tmp/polymech-build
export PATH="/snap/bin:$PATH"
cmake -S ./ -B /tmp/polymech-build -DCMAKE_BUILD_TYPE=Release
cmake --build /tmp/polymech-build

View File

@ -0,0 +1,12 @@
[project]
name = "polymech"
version = "0.1.0"
description = "Polymech C++ CLI"
[database]
host = "localhost"
port = 5432
name = "polymech"
[logging]
level = "debug"

View File

@ -0,0 +1,43 @@
{
"guided": {
"areas": [
{
"gid": "ESP.6.1.10.14_1",
"name": "Sabadell",
"level": 4,
"raw": {
"level": 3,
"gadmName": "Sabadell",
"gid": "ESP.6.1.10.14_1"
}
}
],
"settings": {
"gridMode": "centers",
"pathOrder": "snake",
"groupByRegion": false,
"cellSize": 5,
"cellOverlap": 0,
"centroidOverlap": 0,
"ghsFilterMode": "OR",
"maxCellsLimit": 50000,
"maxElevation": 1000,
"minDensity": 0,
"minGhsPop": 0,
"minGhsBuilt": 0,
"allowMissingGhs": false,
"bypassFilters": false
}
},
"search": {
"types": [
"university"
],
"filterCountry": "",
"googleDomain": "google.com",
"limitPerArea": 20,
"zoom": 15,
"language": "en"
},
"filterTypes": []
}

View File

@ -0,0 +1,49 @@
{
"guided": {
"areas": [
{
"gid": "KEN.21_1",
"name": "Lamu",
"level": 1,
"raw": {
"gid": "KEN.21_1",
"gadmName": "Lamu",
"level": 1
}
}
],
"settings": {
"gridMode": "centers",
"pathOrder": "snake",
"groupByRegion": true,
"cellSize": 5,
"cellOverlap": 0,
"centroidOverlap": 50,
"ghsFilterMode": "OR",
"maxCellsLimit": 50000,
"maxElevation": 1000,
"minDensity": 10,
"minGhsPop": 26,
"minGhsBuilt": 154,
"enableElevation": false,
"enableDensity": false,
"enableGhsPop": false,
"enableGhsBuilt": false,
"allowMissingGhs": false,
"bypassFilters": true
}
},
"search": {
"types": [
"plastic"
],
"filterCountry": "",
"googleDomain": "google.com",
"limitPerArea": 20,
"zoom": 15,
"language": "en"
},
"filterTypes": [
"Recycling center"
]
}

View File

@ -0,0 +1,40 @@
{
"guided": {
"areas": [
{
"gid": "ABW",
"name": "Aruba",
"level": 0
}
],
"settings": {
"gridMode": "centers",
"pathOrder": "snake",
"groupByRegion": false,
"cellSize": 5,
"cellOverlap": 0,
"centroidOverlap": 0,
"ghsFilterMode": "OR",
"maxCellsLimit": 50000,
"maxElevation": 1000,
"minDensity": 0,
"minGhsPop": 0,
"minGhsBuilt": 0,
"allowMissingGhs": false,
"bypassFilters": false
}
},
"search": {
"types": [
"recycling"
],
"filterCountry": "",
"googleDomain": "google.com",
"limitPerArea": 20,
"zoom": 15,
"language": "en"
},
"filterTypes": [
"Recycling center"
]
}

View File

@ -0,0 +1,45 @@
{
"guided": {
"areas": [
{
"gid": "ESP.6.1_1",
"name": "Barcelona",
"level": 3,
"raw": {
"level": 2,
"gadmName": "Barcelona",
"gid": "ESP.6.1_1"
}
}
],
"settings": {
"gridMode": "centers",
"pathOrder": "snake",
"groupByRegion": true,
"cellSize": 5,
"cellOverlap": 0,
"centroidOverlap": 0,
"ghsFilterMode": "OR",
"maxCellsLimit": 50000,
"maxElevation": 1000,
"minDensity": 10,
"minGhsPop": 26,
"minGhsBuilt": 154,
"enableElevation": false,
"enableDensity": false,
"enableGhsPop": false,
"enableGhsBuilt": false,
"allowMissingGhs": false,
"bypassFilters": true
}
},
"search": {
"types": [
"marketing"
],
"filterCountry": "Spain",
"googleDomain": "google.es",
"limitPerArea": 10,
"useCache": true
}
}

View File

@ -0,0 +1,85 @@
{
"guided": {
"areas": [
{
"gid": "ESP.6.1.10.2_1",
"name": "Barberà del Vallès",
"level": 4,
"raw": {
"level": 4,
"gadmName": "Barberà del Vallès",
"gid": "ESP.6.1.10.2_1"
}
},
{
"gid": "ESP.6.1.10.14_1",
"name": "Sabadell",
"level": 4,
"raw": {
"level": 4,
"gadmName": "Sabadell",
"gid": "ESP.6.1.10.14_1"
}
},
{
"gid": "ESP.6.1.10.11_1",
"name": "Polinyà",
"level": 4,
"raw": {
"level": 4,
"gadmName": "Polinyà",
"gid": "ESP.6.1.10.11_1"
}
},
{
"gid": "ESP.6.1.10.4_1",
"name": "Castellar del Vallès",
"level": 4,
"raw": {
"level": 4,
"gadmName": "Castellar del Vallès",
"gid": "ESP.6.1.10.4_1"
}
},
{
"gid": "ESP.6.1.10.19_1",
"name": "Sentmenat",
"level": 4,
"raw": {
"level": 4,
"gadmName": "Sentmenat",
"gid": "ESP.6.1.10.19_1"
}
}
],
"settings": {
"gridMode": "centers",
"pathOrder": "snake",
"groupByRegion": true,
"cellSize": 10,
"cellOverlap": 0,
"centroidOverlap": 0,
"ghsFilterMode": "OR",
"maxCellsLimit": 50000,
"maxElevation": 1000,
"minDensity": 10,
"minGhsPop": 26,
"minGhsBuilt": 154,
"enableElevation": false,
"enableDensity": false,
"enableGhsPop": false,
"enableGhsBuilt": false,
"allowMissingGhs": false,
"bypassFilters": true
}
},
"search": {
"types": [
"mecanizado cnc"
],
"filterCountry": "Spain",
"googleDomain": "google.es",
"limitPerArea": 10,
"useCache": true
}
}

View File

@ -0,0 +1,37 @@
{
"guided": {
"areas": [
{
"gid": "ABW",
"name": "Aruba",
"level": 0
}
],
"settings": {
"gridMode": "centers",
"pathOrder": "snake",
"groupByRegion": false,
"cellSize": 5,
"cellOverlap": 0,
"centroidOverlap": 0,
"ghsFilterMode": "OR",
"maxCellsLimit": 50000,
"maxElevation": 1000,
"minDensity": 0,
"minGhsPop": 0,
"minGhsBuilt": 0,
"allowMissingGhs": false,
"bypassFilters": false
}
},
"search": {
"types": [
"recycling"
],
"filterCountry": "",
"googleDomain": "google.com",
"limitPerArea": 1,
"zoom": 15,
"language": "en"
}
}

View File

@ -0,0 +1,60 @@
#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────────────────────
# install-lnx.sh Install build dependencies for polymech-cli on Linux
#
# Tested on: Ubuntu 20.04+ / Debian 11+
# Usage: sudo bash install-lnx.sh
# ─────────────────────────────────────────────────────────────────────────────
set -euo pipefail
echo "── polymech-cli Linux dependency installer ──"
# ── 1. System packages (apt) ─────────────────────────────────────────────────
echo ""
echo "[1/3] Installing system packages via apt …"
apt-get update -qq
apt-get install -y --no-install-recommends \
build-essential \
gcc \
g++ \
git \
libssl-dev \
pkg-config \
snapd
# ── 2. CMake ≥ 3.20 via snap ────────────────────────────────────────────────
# The project requires cmake_minimum_required(VERSION 3.20).
# Ubuntu 20.04 ships cmake 3.16, so we use the snap package instead.
echo ""
echo "[2/3] Installing CMake via snap (≥ 3.20 required) …"
if command -v /snap/bin/cmake &>/dev/null; then
echo " cmake snap already installed: $(/snap/bin/cmake --version | head -1)"
else
snap install cmake --classic
echo " Installed: $(/snap/bin/cmake --version | head -1)"
fi
# ── 3. Node.js (for npm run build:linux) ──────────────────────────────────────
echo ""
echo "[3/3] Checking for Node.js / npm …"
if command -v node &>/dev/null; then
echo " node $(node --version) already installed"
else
echo " Node.js not found. Install via nvm or nodesource, e.g.:"
echo " curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -"
echo " sudo apt-get install -y nodejs"
fi
# ── Summary ──────────────────────────────────────────────────────────────────
echo ""
echo "── Done! ──"
echo ""
echo "All C++ dependencies (CLI11, tomlplusplus, Catch2, asio, concurrentqueue,"
echo "taskflow, curl, lexbor, rapidjson) are fetched automatically by CMake"
echo "FetchContent at build time — no manual installation needed."
echo ""
echo "To build:"
echo " cd $(dirname "$0")"
echo " npm run build:linux"
echo ""
echo "The binary will be placed in: dist/polymech-cli"

View File

@ -0,0 +1,8 @@
/**
* OpenRouter classifier + stress defaults: remote router, N batch iterations (see KBOT_CLASSIFIER_STRESS_RUNS).
*/
process.env.KBOT_IPC_CLASSIFIER_LLAMA = '0';
if (process.env.KBOT_CLASSIFIER_STRESS_RUNS === undefined || process.env.KBOT_CLASSIFIER_STRESS_RUNS === '') {
process.env.KBOT_CLASSIFIER_STRESS_RUNS = '5';
}
await import('./test-ipc-classifier.mjs');

View File

@ -0,0 +1,6 @@
/**
* Sets KBOT_IPC_CLASSIFIER_LLAMA=0 then runs the classifier IPC test against
* KBOT_ROUTER / KBOT_IPC_MODEL (default router: openrouter see presets.js).
*/
process.env.KBOT_IPC_CLASSIFIER_LLAMA = '0';
await import('./test-ipc-classifier.mjs');

View File

@ -0,0 +1,186 @@
/**
* orchestrator/presets.js defaults for IPC integration tests (extend here as suites grow).
*
* Llama local runner (llama-basics.test.ts): OpenAI-compatible API at http://localhost:8888/v1,
* router `ollama` + `base_url` override, model `default` (server picks loaded GGUF).
*/
import { dirname, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { spawn } from 'node:child_process';
import { existsSync } from 'node:fs';
import { probeTcpPort } from './test-commons.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
export const platform = {
isWin: process.platform === 'win32',
};
/** kbot/cpp root (parent of orchestrator/). */
export const paths = {
orchestratorDir: __dirname,
cppRoot: resolve(__dirname, '..'),
/** Same as packages/kbot/cpp/scripts/run-7b.sh — llama-server on :8888 */
run7bScript: resolve(__dirname, '../scripts/run-7b.sh'),
};
/** Dist binary name for the current OS. */
export function exeName() {
return platform.isWin ? 'kbot.exe' : 'kbot';
}
/** Absolute path to kbot binary given orchestrator/ directory (where test-ipc.mjs lives). */
export function distExePath(orchestratorDir) {
return resolve(orchestratorDir, '..', 'dist', exeName());
}
/** UDS / TCP listen argument passed to `kbot worker --uds <arg>`. */
export const uds = {
tcpPort: 4001,
unixPath: '/tmp/kbot-test-ipc.sock',
/** Value for `--uds` on this OS (Windows: port string; Unix: socket path). */
workerArg() {
return platform.isWin ? String(this.tcpPort) : this.unixPath;
},
/** Options for `net.connect` to reach the worker. */
connectOpts(cppUdsArg) {
return platform.isWin
? { port: this.tcpPort, host: '127.0.0.1' }
: cppUdsArg;
},
};
/** Millisecond timeouts — tune per step in new tests. */
export const timeouts = {
ipcDefault: 5000,
kbotAi: 180_000,
/** Llama local arithmetic (same order of magnitude as kbot-ai). */
llamaKbotAi: 180_000,
/** Max wait for :8888 after spawning run-7b.sh (model load can be slow). */
llamaServerStart: Number(process.env.KBOT_LLAMA_START_TIMEOUT_MS || 600_000),
connectAttempts: 15,
connectRetryMs: 400,
postShutdownMs: 200,
};
export const router = {
default: 'openrouter',
fromEnv() {
return process.env.KBOT_ROUTER || this.default;
},
};
/**
* Local llama.cpp HTTP server mirrors tests/unit/llama-basics.test.ts (LLAMA_OPTS).
* Uses router `ollama` so api_key resolves to dummy `ollama`; `base_url` points at :8888/v1.
*/
export const llama = {
get port() {
return Number(process.env.KBOT_LLAMA_PORT || 8888);
},
get host() {
return process.env.KBOT_LLAMA_HOST || '127.0.0.1';
},
get baseURL() {
return process.env.KBOT_LLAMA_BASE_URL || `http://localhost:${this.port}/v1`;
},
router: 'ollama',
get model() {
return process.env.KBOT_LLAMA_MODEL || 'default';
},
prompts: {
/** Same idea as llama-basics completion tests. */
add5_3: 'What is 5 + 3? Reply with just the number, nothing else.',
},
};
/**
* IPC payload for kbot-ai local llama-server (OpenAI-compatible).
* Pass `base_url` so LLMClient uses port 8888 instead of default ollama :11434.
*/
export function kbotAiPayloadLlamaLocal(overrides = {}) {
const merged = {
prompt: llama.prompts.add5_3,
router: llama.router,
model: llama.model,
base_url: llama.baseURL,
...overrides,
};
merged.base_url = merged.base_url ?? merged.baseURL ?? llama.baseURL;
delete merged.baseURL;
return merged;
}
/** Stock prompts and assertions helpers for LLM smoke tests. */
export const prompts = {
germanyCapital: 'What is the capital of Germany? Reply in one short sentence.',
};
/** Build `kbot-ai` IPC payload from env + presets (OpenRouter-friendly defaults). */
export function kbotAiPayloadFromEnv() {
const payload = {
prompt: process.env.KBOT_IPC_PROMPT || prompts.germanyCapital,
router: router.fromEnv(),
};
if (process.env.KBOT_IPC_MODEL) {
payload.model = process.env.KBOT_IPC_MODEL;
}
return payload;
}
/** True when using the default Germany prompt (for optional Berlin assertion). */
export function usingDefaultGermanyPrompt() {
return !process.env.KBOT_IPC_PROMPT;
}
/**
* If nothing listens on llama.port, optionally spawn `scripts/run-7b.sh` (requires `sh` on PATH, e.g. Git Bash on Windows).
*
* @param {{ autostart?: boolean, startTimeoutMs?: number }} [opts]
* @returns {Promise<{ ok: boolean, alreadyRunning: boolean, started?: boolean, pid?: number }>}
*/
export async function ensureLlamaLocalServer(opts = {}) {
const autostart = opts.autostart ?? true;
const startTimeoutMs = opts.startTimeoutMs ?? timeouts.llamaServerStart;
const host = llama.host;
const port = llama.port;
const scriptPath = paths.run7bScript;
if (await probeTcpPort(host, port, 1500)) {
return { ok: true, alreadyRunning: true };
}
if (!autostart) {
throw new Error(
`[llama] Nothing listening on ${host}:${port}. Start the server (e.g. sh scripts/run-7b.sh), or remove KBOT_IPC_LLAMA_AUTOSTART=0 to allow autostart`
);
}
if (!existsSync(scriptPath)) {
throw new Error(`[llama] Script missing: ${scriptPath}`);
}
console.log(`[llama] Port ${port} closed — starting ${scriptPath} (timeout ${startTimeoutMs}ms) …`);
const child = spawn('sh', [scriptPath], {
detached: true,
stdio: 'ignore',
cwd: dirname(scriptPath),
env: { ...process.env },
});
child.unref();
const deadline = Date.now() + startTimeoutMs;
while (Date.now() < deadline) {
if (await probeTcpPort(host, port, 1500)) {
return { ok: true, alreadyRunning: false, started: true, pid: child.pid };
}
await new Promise((r) => setTimeout(r, 1500));
}
throw new Error(
`[llama] Server did not open ${host}:${port} within ${startTimeoutMs}ms — check llama-server / GPU / model path`
);
}

View File

@ -0,0 +1,397 @@
/**
* orchestrator/reports.js JSON + Markdown reports under cwd/tests/
*
* File pattern (logical): test-name::hh:mm
* On-disk: test-name__HH-mm.json / .md (Windows: no `:` in filenames)
*/
import { mkdir, writeFile } from 'node:fs/promises';
import { join, dirname } from 'node:path';
import os from 'node:os';
import { performance } from 'node:perf_hooks';
import { resourceUsage } from 'node:process';
const WIN_BAD = /[<>:"/\\|?*\x00-\x1f]/g;
/** Strip characters invalid in Windows / POSIX filenames. */
export function sanitizeTestName(name) {
const s = String(name).trim().replace(WIN_BAD, '_').replace(/\s+/g, '_');
return s || 'test';
}
/**
* @param {Date} [now]
* @returns {{ hh: string, mm: string, label: string }}
*/
export function timeParts(now = new Date()) {
const hh = String(now.getHours()).padStart(2, '0');
const mm = String(now.getMinutes()).padStart(2, '0');
return { hh, mm, label: `${hh}:${mm}` };
}
/**
* @param {string} testName
* @param {string} ext including dot, e.g. '.json'
* @param {{ cwd?: string, now?: Date }} [options]
*/
export function reportFilePathWithExt(testName, ext, options = {}) {
const cwd = options.cwd ?? process.cwd();
const now = options.now ?? new Date();
const base = sanitizeTestName(testName);
const { hh, mm } = timeParts(now);
const file = `${base}__${hh}-${mm}${ext}`;
return join(cwd, 'tests', file);
}
export function reportFilePath(testName, options = {}) {
return reportFilePathWithExt(testName, '.json', options);
}
export function reportMarkdownPath(testName, options = {}) {
return reportFilePathWithExt(testName, '.md', options);
}
function formatBytes(n) {
if (typeof n !== 'number' || Number.isNaN(n)) return String(n);
const u = ['B', 'KB', 'MB', 'GB'];
let i = 0;
let x = n;
while (x >= 1024 && i < u.length - 1) {
x /= 1024;
i++;
}
return `${x < 10 && i > 0 ? x.toFixed(1) : Math.round(x)} ${u[i]}`;
}
/** Snapshot of host / OS (cheap; call anytime). */
export function hostSnapshot() {
const cpus = os.cpus();
const total = os.totalmem();
const free = os.freemem();
return {
hostname: os.hostname(),
platform: os.platform(),
arch: os.arch(),
release: os.release(),
cpuCount: cpus.length,
cpuModel: cpus[0]?.model?.trim() ?? '',
totalMemBytes: total,
freeMemBytes: free,
usedMemBytes: total - free,
loadAvg: os.loadavg(),
osUptimeSec: os.uptime(),
};
}
/**
* Call at test start; then call `.finalize()` at end for wall + CPU delta + memory.
*/
export function createMetricsCollector() {
const cpu0 = process.cpuUsage();
const perf0 = performance.now();
const wall0 = Date.now();
return {
hostSnapshot,
finalize() {
const cpu = process.cpuUsage(cpu0);
const perf1 = performance.now();
let ru = null;
try {
ru = resourceUsage();
} catch {
/* older runtimes */
}
return {
durationWallMs: Math.round((perf1 - perf0) * 1000) / 1000,
durationClockMs: Date.now() - wall0,
cpuUserUs: cpu.user,
cpuSystemUs: cpu.system,
cpuUserMs: cpu.user / 1000,
cpuSystemMs: cpu.system / 1000,
memory: process.memoryUsage(),
resourceUsage: ru,
pid: process.pid,
node: process.version,
processUptimeSec: process.uptime(),
};
},
};
}
/**
* @param {Record<string, unknown>} payload
* @returns {string}
*/
export function renderMarkdownReport(payload) {
const meta = payload.meta ?? {};
const m = payload.metrics ?? {};
const host = m.host ?? {};
const timing = m.timing ?? {};
const proc = m.process ?? {};
const tStart = timing.startedAt ?? payload.startedAt;
const tEnd = timing.finishedAt ?? payload.finishedAt;
const lines = [];
lines.push(`# Test report: ${meta.displayName ?? meta.testName ?? 'run'}`);
lines.push('');
lines.push('## Summary');
lines.push('');
lines.push(`| Key | Value |`);
lines.push(`| --- | --- |`);
lines.push(`| Result | ${payload.ok === true ? 'PASS' : payload.ok === false ? 'FAIL' : '—'} |`);
if (payload.passed != null) lines.push(`| Assertions passed | ${payload.passed} |`);
if (payload.failed != null) lines.push(`| Assertions failed | ${payload.failed} |`);
if (payload.ipcLlm != null) lines.push(`| IPC LLM step | ${payload.ipcLlm ? 'enabled' : 'skipped'} |`);
if (payload.ipcLlama != null) {
lines.push(`| IPC llama :8888 step | ${payload.ipcLlama ? 'enabled' : 'skipped'} |`);
}
if (payload.ipcClassifierLlama != null) {
lines.push(
`| IPC classifier | ${payload.ipcClassifierLlama ? 'local llama :8888' : 'remote (KBOT_ROUTER / KBOT_IPC_MODEL)'} |`
);
}
lines.push(`| CWD | \`${String(meta.cwd ?? '').replace(/`/g, "'")}\` |`);
lines.push('');
lines.push('## Timing');
lines.push('');
lines.push(`| Metric | Value |`);
lines.push(`| --- | --- |`);
if (tStart) lines.push(`| Started (ISO) | ${tStart} |`);
if (tEnd) lines.push(`| Finished (ISO) | ${tEnd} |`);
if (proc.durationWallMs != null) lines.push(`| Wall time (perf) | ${proc.durationWallMs} ms |`);
if (proc.durationClockMs != null) lines.push(`| Wall time (clock) | ${proc.durationClockMs} ms |`);
lines.push('');
lines.push('## Process (Node)');
lines.push('');
lines.push(`| Metric | Value |`);
lines.push(`| --- | --- |`);
if (proc.pid != null) lines.push(`| PID | ${proc.pid} |`);
if (proc.node) lines.push(`| Node | ${proc.node} |`);
if (proc.processUptimeSec != null) lines.push(`| process.uptime() | ${proc.processUptimeSec.toFixed(3)} s |`);
if (proc.cpuUserMs != null && proc.cpuSystemMs != null) {
lines.push(`| CPU user (process.cpuUsage Δ) | ${proc.cpuUserMs.toFixed(3)} ms (${proc.cpuUserUs ?? '—'} µs) |`);
lines.push(`| CPU system (process.cpuUsage Δ) | ${proc.cpuSystemMs.toFixed(3)} ms (${proc.cpuSystemUs ?? '—'} µs) |`);
}
const ru = proc.resourceUsage;
if (ru && typeof ru === 'object') {
if (ru.userCPUTime != null) {
lines.push(`| CPU user (resourceUsage) | ${(ru.userCPUTime / 1000).toFixed(3)} ms |`);
}
if (ru.systemCPUTime != null) {
lines.push(`| CPU system (resourceUsage) | ${(ru.systemCPUTime / 1000).toFixed(3)} ms |`);
}
if (ru.maxRSS != null) {
lines.push(`| Max RSS (resourceUsage) | ${formatBytes(ru.maxRSS * 1024)} |`);
}
}
const mem = proc.memory;
if (mem && typeof mem === 'object') {
lines.push(`| RSS | ${formatBytes(mem.rss)} (${mem.rss} B) |`);
lines.push(`| Heap used | ${formatBytes(mem.heapUsed)} |`);
lines.push(`| Heap total | ${formatBytes(mem.heapTotal)} |`);
lines.push(`| External | ${formatBytes(mem.external)} |`);
if (mem.arrayBuffers != null) lines.push(`| Array buffers | ${formatBytes(mem.arrayBuffers)} |`);
}
lines.push('');
lines.push('## Host');
lines.push('');
lines.push(`| Metric | Value |`);
lines.push(`| --- | --- |`);
if (host.hostname) lines.push(`| Hostname | ${host.hostname} |`);
if (host.platform) lines.push(`| OS | ${host.platform} ${host.release ?? ''} |`);
if (host.arch) lines.push(`| Arch | ${host.arch} |`);
if (host.cpuCount != null) lines.push(`| CPUs | ${host.cpuCount} |`);
if (host.cpuModel) lines.push(`| CPU model | ${host.cpuModel} |`);
if (host.totalMemBytes != null) {
lines.push(`| RAM total | ${formatBytes(host.totalMemBytes)} |`);
lines.push(`| RAM free | ${formatBytes(host.freeMemBytes)} |`);
lines.push(`| RAM used | ${formatBytes(host.usedMemBytes)} |`);
}
if (host.loadAvg && host.loadAvg.length) {
lines.push(`| Load avg (1/5/15) | ${host.loadAvg.map((x) => x.toFixed(2)).join(' / ')} |`);
}
if (host.osUptimeSec != null) lines.push(`| OS uptime | ${(host.osUptimeSec / 3600).toFixed(2)} h |`);
lines.push('');
const kbotAi = payload.kbotAi;
const hasKbotAiMeta =
kbotAi &&
typeof kbotAi === 'object' &&
(kbotAi.routerStep != null || kbotAi.llamaStep != null);
const hasClassifierLlm = payload.llm != null && typeof payload.llm === 'object';
if (hasKbotAiMeta || hasClassifierLlm) {
lines.push('## LLM API (provider JSON)');
lines.push('');
lines.push(
'Fields from the chat completion response except assistant message bodies (`usage`, `model`, `id`, provider-specific).'
);
lines.push('');
if (hasKbotAiMeta) {
if (kbotAi.routerStep != null) {
lines.push('### IPC step 6 — router / main kbot-ai');
lines.push('');
lines.push('```json');
lines.push(JSON.stringify(kbotAi.routerStep, null, 2));
lines.push('```');
lines.push('');
}
if (kbotAi.llamaStep != null) {
lines.push('### IPC step 7 — local llama :8888');
lines.push('');
lines.push('```json');
lines.push(JSON.stringify(kbotAi.llamaStep, null, 2));
lines.push('```');
lines.push('');
}
}
if (hasClassifierLlm) {
lines.push('### Classifier — batched kbot-ai');
lines.push('');
lines.push('```json');
lines.push(JSON.stringify(payload.llm, null, 2));
lines.push('```');
lines.push('');
}
}
if (payload.anchor != null || (Array.isArray(payload.distances) && payload.distances.length > 0)) {
lines.push('## Classifier batch');
lines.push('');
lines.push(`| Key | Value |`);
lines.push(`| --- | --- |`);
if (payload.anchor != null) lines.push(`| Anchor | ${payload.anchor} |`);
if (payload.labelCount != null) lines.push(`| Label count | ${payload.labelCount} |`);
if (payload.backend != null) lines.push(`| Backend | ${payload.backend} |`);
const pe = payload.parseError;
if (pe != null && String(pe).length) {
lines.push(`| Parse | Failed: ${String(pe).replace(/\|/g, '\\|').slice(0, 500)}${String(pe).length > 500 ? '…' : ''} |`);
} else {
lines.push(`| Parse | OK |`);
}
lines.push('');
const sorted = Array.isArray(payload.byDistance) ? payload.byDistance : [];
const preview = sorted.filter((r) => r && r.distance != null).slice(0, 12);
if (preview.length > 0) {
lines.push('### Nearest labels (by distance)');
lines.push('');
lines.push(`| Label | Distance |`);
lines.push(`| --- | ---: |`);
for (const row of preview) {
const lab = String(row.label ?? '').replace(/\|/g, '\\|');
lines.push(`| ${lab} | ${row.distance} |`);
}
lines.push('');
}
}
if (payload.stress?.summary && typeof payload.stress.summary === 'object') {
const s = payload.stress.summary;
const w = s.wallMs;
lines.push('## Classifier stress (batch repeats)');
lines.push('');
lines.push(`| Metric | Value |`);
lines.push(`| --- | --- |`);
lines.push(`| Requested runs | ${s.requestedRuns ?? '—'} |`);
if (w && typeof w === 'object') {
lines.push(
`| Wall time (ms) | min ${w.min} · max ${w.max} · avg ${w.avg} · p50 ${w.p50} · p95 ${w.p95} |`
);
}
lines.push(`| Batch OK / fail | ${s.successCount ?? '—'} / ${s.failCount ?? '—'} |`);
if (s.totalTokens > 0 || s.totalPromptTokens > 0 || s.totalCompletionTokens > 0) {
lines.push(
`| Σ tokens (prompt / completion / total) | ${s.totalPromptTokens} / ${s.totalCompletionTokens} / ${s.totalTokens} |`
);
}
lines.push('');
}
if (payload.env && typeof payload.env === 'object') {
lines.push('## Environment (selected)');
lines.push('');
lines.push(`| Variable | Value |`);
lines.push(`| --- | --- |`);
for (const [k, v] of Object.entries(payload.env)) {
lines.push(`| \`${k}\` | ${v === null || v === undefined ? '—' : String(v)} |`);
}
lines.push('');
}
if (payload.error) {
lines.push('## Error');
lines.push('');
lines.push('```');
lines.push(String(payload.error));
lines.push('```');
lines.push('');
}
lines.push('---');
lines.push(`*Written ${meta.writtenAt ?? new Date().toISOString()}*`);
lines.push('');
return lines.join('\n');
}
/**
* Build metrics block for JSON + MD (host snapshot + process finalize).
*/
export function buildMetricsBundle(collector, startedAtIso, finishedAtIso) {
const host = collector.hostSnapshot();
const processMetrics = collector.finalize();
return {
timing: {
startedAt: startedAtIso,
finishedAt: finishedAtIso,
},
host,
process: processMetrics,
};
}
/**
* @param {string} testName
* @param {Record<string, unknown>} data merged into payload (meta + metrics added)
* @param {{ cwd?: string, now?: Date, metrics?: object }} [options]
* @returns {Promise<{ jsonPath: string, mdPath: string }>}
*/
export async function writeTestReports(testName, data, options = {}) {
const cwd = options.cwd ?? process.cwd();
const now = options.now ?? new Date();
const jsonPath = reportFilePath(testName, { cwd, now });
const mdPath = reportMarkdownPath(testName, { cwd, now });
const { hh, mm, label } = timeParts(now);
const base = sanitizeTestName(testName);
const payload = {
meta: {
testName: base,
displayName: `${base}::${label}`,
cwd,
writtenAt: now.toISOString(),
jsonFile: jsonPath,
mdFile: mdPath,
},
...data,
};
await mkdir(dirname(jsonPath), { recursive: true });
await writeFile(jsonPath, JSON.stringify(payload, null, 2), 'utf8');
const md = renderMarkdownReport(payload);
await writeFile(mdPath, md, 'utf8');
return { jsonPath, mdPath };
}
/** @deprecated Prefer writeTestReports */
export async function writeJsonReport(testName, data, options = {}) {
const { jsonPath } = await writeTestReports(testName, data, options);
return jsonPath;
}

View File

@ -0,0 +1,159 @@
/**
* orchestrator/spawn.mjs
*
* Spawn a C++ worker as a child process, send/receive length-prefixed
* JSON messages over stdin/stdout.
*
* Usage:
* import { spawnWorker } from './spawn.mjs';
* const w = await spawnWorker('./dist/polymech-cli.exe');
* console.log(res); // { id: '...', type: 'pong', payload: {} }
* await w.shutdown();
*/
import { spawn } from 'node:child_process';
import { randomUUID } from 'node:crypto';
// ── frame helpers ────────────────────────────────────────────────────────────
/** Write a 4-byte LE length + JSON body to a writable stream. */
function writeFrame(stream, msg) {
const body = JSON.stringify(msg);
const bodyBuf = Buffer.from(body, 'utf8');
const lenBuf = Buffer.alloc(4);
lenBuf.writeUInt32LE(bodyBuf.length, 0);
stream.write(Buffer.concat([lenBuf, bodyBuf]));
}
/**
* Creates a streaming frame parser.
* Calls `onMessage(parsed)` for each complete frame.
*/
function createFrameReader(onMessage) {
let buffer = Buffer.alloc(0);
return (chunk) => {
buffer = Buffer.concat([buffer, chunk]);
while (buffer.length >= 4) {
const bodyLen = buffer.readUInt32LE(0);
const totalLen = 4 + bodyLen;
if (buffer.length < totalLen) break; // need more data
const bodyBuf = buffer.subarray(4, totalLen);
buffer = buffer.subarray(totalLen);
try {
const msg = JSON.parse(bodyBuf.toString('utf8'));
onMessage(msg);
} catch (e) {
console.error('[orchestrator] failed to parse frame:', e.message);
}
}
};
}
// ── spawnWorker ──────────────────────────────────────────────────────────────
/**
* Spawn the C++ binary in `worker` mode.
* Returns: { send, request, shutdown, kill, process, ready }
*
* `ready` is a Promise that resolves when the worker sends `{ type: 'ready' }`.
*/
export function spawnWorker(exePath, args = ['worker']) {
const proc = spawn(exePath, args, {
stdio: ['pipe', 'pipe', 'pipe'],
});
// Pending request map: id → { resolve, reject, timer }
const pending = new Map();
// Event handler for unmatched messages (progress events, etc.)
let eventHandler = null;
let readyResolve;
const ready = new Promise((resolve) => { readyResolve = resolve; });
// stderr → console (worker logs via spdlog go to stderr)
proc.stderr.on('data', (chunk) => {
const text = chunk.toString().trim();
if (text) console.error(`[worker:stderr] ${text}`);
});
// stdout → frame parser → route by id / type
const feedData = createFrameReader((msg) => {
// Handle the initial "ready" signal
if (msg.type === 'ready') {
readyResolve(msg);
return;
}
// Route response to pending request
if (msg.id && pending.has(msg.id)) {
const { resolve, timer } = pending.get(msg.id);
clearTimeout(timer);
pending.delete(msg.id);
resolve(msg);
return;
}
// Unmatched message (progress event, broadcast, etc.)
if (eventHandler) {
eventHandler(msg);
} else {
console.log('[orchestrator] unmatched message:', msg);
}
});
proc.stdout.on('data', feedData);
// ── public API ──────────────────────────────────────────────────────────
/** Fire-and-forget send. */
function send(msg) {
if (!msg.id) msg.id = randomUUID();
writeFrame(proc.stdin, msg);
}
/** Send a message and wait for the response with matching `id`. */
function request(msg, timeoutMs = 5000) {
return new Promise((resolve, reject) => {
const id = msg.id || randomUUID();
msg.id = id;
const timer = setTimeout(() => {
pending.delete(id);
reject(new Error(`IPC request timed out after ${timeoutMs}ms (id=${id}, type=${msg.type})`));
}, timeoutMs);
pending.set(id, { resolve, reject, timer });
writeFrame(proc.stdin, msg);
});
}
/** Graceful shutdown: send shutdown message & wait for process exit. */
async function shutdown(timeoutMs = 3000) {
const res = await request({ type: 'shutdown' }, timeoutMs);
// Wait for process to exit
await new Promise((resolve) => {
const timer = setTimeout(() => {
proc.kill();
resolve();
}, timeoutMs);
proc.on('exit', () => { clearTimeout(timer); resolve(); });
});
return res;
}
return {
send,
request,
shutdown,
kill: () => proc.kill(),
process: proc,
ready,
onEvent: (handler) => { eventHandler = handler; },
};
}

View File

@ -0,0 +1,237 @@
/**
* orchestrator/test-commons.js shared helpers for IPC orchestrator tests.
*/
import { randomUUID } from 'node:crypto';
import net from 'node:net';
/** kbot-ai live call runs unless KBOT_IPC_LLM is explicitly disabled. */
export function ipcLlmEnabled() {
const v = process.env.KBOT_IPC_LLM;
if (v === undefined || v === '') return true;
const s = String(v).trim().toLowerCase();
if (s === '0' || s === 'false' || s === 'no' || s === 'off') return false;
return true;
}
/** Llama local (:8888) IPC block — on by default; set KBOT_IPC_LLAMA=0 to skip (CI / no server). */
export function ipcLlamaEnabled() {
const v = process.env.KBOT_IPC_LLAMA;
if (v === undefined || v === '') return true;
const s = String(v).trim().toLowerCase();
if (s === '0' || s === 'false' || s === 'no' || s === 'off') return false;
return true;
}
/**
* Classifier batch test (`test-ipc-classifier.mjs`): local llama :8888 by default.
* Set KBOT_IPC_CLASSIFIER_LLAMA=0 to use KBOT_ROUTER / KBOT_IPC_MODEL (e.g. OpenRouter) instead.
*/
export function ipcClassifierLlamaEnabled() {
const v = process.env.KBOT_IPC_CLASSIFIER_LLAMA;
if (v === undefined || v === '') return true;
const s = String(v).trim().toLowerCase();
if (s === '0' || s === 'false' || s === 'no' || s === 'off') return false;
return true;
}
/** Auto-start scripts/run-7b.sh when :8888 is closed (default on). */
export function llamaAutostartEnabled() {
const v = process.env.KBOT_IPC_LLAMA_AUTOSTART;
if (v === undefined || v === '') return true;
const s = String(v).trim().toLowerCase();
if (s === '0' || s === 'false' || s === 'no' || s === 'off') return false;
return true;
}
/** TCP connect probe — true if something accepts connections. */
export function probeTcpPort(host, port, timeoutMs = 2000) {
return new Promise((resolve) => {
const socket = net.connect({ port, host });
const done = (ok) => {
socket.removeAllListeners();
try {
socket.destroy();
} catch {
/* ignore */
}
resolve(ok);
};
const timer = setTimeout(() => done(false), timeoutMs);
socket.once('connect', () => {
clearTimeout(timer);
done(true);
});
socket.once('error', () => {
clearTimeout(timer);
done(false);
});
});
}
/** Counters for a test run (create one per process / suite). */
export function createAssert() {
let passed = 0;
let failed = 0;
function assert(condition, label) {
if (condition) {
console.log(`${label}`);
passed++;
} else {
console.error(`${label}`);
failed++;
}
}
return {
assert,
get passed() {
return passed;
},
get failed() {
return failed;
},
};
}
/** Normalize IPC payload (object or JSON string). */
export function payloadObj(msg) {
const p = msg?.payload;
if (p == null) return null;
if (typeof p === 'string') {
try {
return JSON.parse(p);
} catch {
return { raw: p };
}
}
return p;
}
/** Print LLM job_result so it is easy to spot (stdout, not mixed with worker stderr). */
export function logKbotAiResponse(stepLabel, msg) {
const p = payloadObj(msg);
const text = p?.text != null ? String(p.text) : '';
const err = p?.error != null ? String(p.error) : '';
const maxRaw = process.env.KBOT_IPC_LLM_LOG_MAX;
const max =
maxRaw === undefined || maxRaw === ''
? Infinity
: Number.parseInt(maxRaw, 10);
console.log('');
console.log(` ┌── ${stepLabel} ──────────────────────────────────────────`);
console.log(` │ type: ${msg?.type ?? '?'}`);
if (p && typeof p === 'object') {
console.log(` │ status: ${p.status ?? '?'}`);
if (p.mode != null) console.log(` │ mode: ${p.mode}`);
if (p.router != null) console.log(` │ router: ${p.router}`);
if (p.model != null) console.log(` │ model: ${p.model}`);
}
if (err) {
const showErr =
Number.isFinite(max) && err.length > max
? `${err.slice(0, max)}… [truncated, ${err.length} chars]`
: err;
console.log(` │ error: ${showErr.replace(/\n/g, '\n │ ')}`);
}
if (p?.llm != null && typeof p.llm === 'object') {
const raw = JSON.stringify(p.llm);
const cap = 4000;
const shown = raw.length > cap ? `${raw.slice(0, cap)}… [+${raw.length - cap} chars]` : raw;
console.log(` │ llm (usage / provider JSON): ${shown}`);
}
if (text) {
let body = text;
let note = '';
if (Number.isFinite(max) && text.length > max) {
body = text.slice(0, max);
note = `\n │ … [truncated: ${text.length} chars total; set KBOT_IPC_LLM_LOG_MAX= to adjust]`;
}
console.log(' │ text:');
for (const line of body.split('\n')) {
console.log(`${line}`);
}
if (note) console.log(note);
} else if (!err) {
console.log(' │ (no text in payload)');
}
console.log(' └────────────────────────────────────────────────────────────');
console.log('');
}
/**
* Length-prefixed JSON framing used by the C++ UDS worker.
* Call `attach()` once to wire `socket.on('data', ...)`.
*/
export function createIpcClient(socket) {
const pending = new Map();
let readyResolve;
const readyPromise = new Promise((res) => {
readyResolve = res;
});
let buffer = Buffer.alloc(0);
function onData(chunk) {
buffer = Buffer.concat([buffer, chunk]);
while (buffer.length >= 4) {
const len = buffer.readUInt32LE(0);
if (buffer.length >= 4 + len) {
const payload = buffer.toString('utf8', 4, 4 + len);
buffer = buffer.subarray(4 + len);
try {
const msg = JSON.parse(payload);
if (msg.type === 'ready') {
readyResolve(msg);
} else if (msg.id && pending.has(msg.id)) {
const p = pending.get(msg.id);
clearTimeout(p.timer);
pending.delete(msg.id);
p.resolve(msg);
}
} catch (e) {
console.error('[orchestrator] frame parse error', e);
}
} else {
break;
}
}
}
function request(msg, timeoutMs = 5000) {
return new Promise((resolve, reject) => {
const id = msg.id || randomUUID();
msg.id = id;
const timer = setTimeout(() => {
pending.delete(id);
reject(new Error(`IPC request timed out`));
}, timeoutMs);
pending.set(id, { resolve, reject, timer });
const str = JSON.stringify(msg);
const lenBuf = Buffer.alloc(4);
lenBuf.writeUInt32LE(Buffer.byteLength(str));
socket.write(lenBuf);
socket.write(str);
});
}
return {
pending,
readyPromise,
request,
attach() {
socket.on('data', onData);
},
};
}
/** Forward worker stderr lines to console (prefixed). */
export function pipeWorkerStderr(workerProc, label = '[worker:stderr]') {
workerProc.stderr.on('data', (d) => {
const txt = d.toString().trim();
if (txt) console.error(`${label} ${txt}`);
});
}

View File

@ -0,0 +1,204 @@
/**
* orchestrator/test-files.mjs
*
* IPC + CLI parity for text file sources (port of kbot/src/source.ts text slice only; images later).
* Fixtures: packages/kbot/tests/test-data/files (path below is resolved from orchestrator/).
*
* Run: npm run test:files
*
* Env (optional live LLM step):
* KBOT_IPC_LLM set 0/false/off to skip live kbot-ai (default: run when key available)
* KBOT_ROUTER, KBOT_IPC_MODEL same as test-ipc
*
* CLI (npm run test:files -- --help):
* --fixtures <dir> Override fixture root (default: ../../tests/test-data/files)
*/
import { spawn } from 'node:child_process';
import { dirname, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import net from 'node:net';
import { existsSync, unlinkSync } from 'node:fs';
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import {
distExePath,
platform,
uds,
timeouts,
kbotAiPayloadFromEnv,
} from './presets.js';
import {
createAssert,
payloadObj,
ipcLlmEnabled,
createIpcClient,
pipeWorkerStderr,
} from './test-commons.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const EXE = distExePath(__dirname);
const stats = createAssert();
const { assert } = stats;
const defaultFixtures = resolve(__dirname, '../../tests/test-data/files');
function parseArgv() {
const y = yargs(hideBin(process.argv))
.scriptName('test-files')
.usage('$0 [options]\n\nText file source IPC tests (fixtures under packages/kbot/tests/test-data/files).')
.option('fixtures', {
type: 'string',
default: defaultFixtures,
describe: 'Directory used as kbot-ai `path` (project root for includes)',
})
.strict()
.help()
.alias('h', 'help');
return y.parseSync();
}
/**
* @param {import('node:net').Socket} socket
* @param {string} fixturesDir
*/
async function runFileSuite(socket, fixturesDir) {
const ipc = createIpcClient(socket);
ipc.attach();
const readyMsg = await ipc.readyPromise;
assert(readyMsg.type === 'ready', 'worker ready');
console.log('\n── Dry-run source attachment (no LLM) ──\n');
/** @param {Record<string, unknown>} payload */
async function dry(payload) {
const msg = await ipc.request({ type: 'kbot-ai', payload }, timeouts.ipcDefault);
assert(msg.type === 'job_result', `job_result (got ${msg.type})`);
const p = payloadObj(msg);
assert(p?.dry_run === true, 'dry_run flag');
assert(p?.status === 'success', 'status success');
assert(Array.isArray(p?.sources), 'sources array');
return p;
}
let p = await dry({
dry_run: true,
path: fixturesDir,
include: ['bubblesort.js'],
prompt: 'What function is defined? Reply one word.',
});
assert(
p.sources.some((s) => String(s).includes('bubblesort')),
'sources lists bubblesort.js',
);
assert(
/bubbleSort/i.test(String(p.prompt_preview || '')),
'prompt_preview contains bubbleSort',
);
p = await dry({
dry_run: true,
path: fixturesDir,
include: ['*.js'],
prompt: 'List algorithms.',
});
assert(p.sources.length >= 2, 'glob *.js yields at least 2 files');
const names = p.sources.map((s) => String(s).toLowerCase());
assert(names.some((n) => n.includes('bubblesort')), 'glob includes bubblesort.js');
assert(names.some((n) => n.includes('factorial')), 'glob includes factorial.js');
p = await dry({
dry_run: true,
path: fixturesDir,
include: ['glob/data.json'],
prompt: 'What is the title?',
});
assert(
String(p.prompt_preview || '').includes('Injection Barrel'),
'JSON fixture content in preview',
);
if (ipcLlmEnabled()) {
console.log('\n── Live LLM — single file prompt ──\n');
const base = kbotAiPayloadFromEnv();
const payload = {
...base,
path: fixturesDir,
include: ['bubblesort.js'],
prompt:
process.env.KBOT_FILES_LIVE_PROMPT ||
'What is the name of the sorting algorithm in the code? Reply with two words: bubble sort',
};
const msg = await ipc.request({ type: 'kbot-ai', payload }, timeouts.kbotAi);
assert(msg.type === 'job_result', 'live job_result');
const lp = payloadObj(msg);
assert(lp?.status === 'success', 'live status success');
const text = String(lp?.text || '');
assert(/bubble/i.test(text), 'assistant mentions bubble (file context worked)');
} else {
console.log('\n── Live LLM — skipped (KBOT_IPC_LLM off) ──\n');
}
const shutdownRes = await ipc.request({ type: 'shutdown' }, timeouts.ipcDefault);
assert(shutdownRes.type === 'shutdown_ack', 'shutdown ack');
}
async function run() {
const argv = parseArgv();
const fixturesDir = resolve(argv.fixtures);
if (!existsSync(EXE)) {
console.error(`Binary not found: ${EXE}`);
process.exit(1);
}
if (!existsSync(fixturesDir)) {
console.error(`Fixtures directory not found: ${fixturesDir}`);
process.exit(1);
}
console.log(`\n📁 test:files — fixtures: ${fixturesDir}\n`);
const CPP_UDS_ARG = uds.workerArg();
if (!platform.isWin && existsSync(CPP_UDS_ARG)) {
unlinkSync(CPP_UDS_ARG);
}
const workerProc = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG], { stdio: 'pipe' });
pipeWorkerStderr(workerProc);
let socket;
for (let i = 0; i < timeouts.connectAttempts; i++) {
try {
await new Promise((res, rej) => {
socket = net.connect(uds.connectOpts(CPP_UDS_ARG));
socket.once('connect', res);
socket.once('error', rej);
});
break;
} catch {
if (i === timeouts.connectAttempts - 1) throw new Error('connect failed');
await new Promise((r) => setTimeout(r, timeouts.connectRetryMs));
}
}
try {
await runFileSuite(socket, fixturesDir);
} finally {
try {
socket?.destroy();
} catch {
/* ignore */
}
workerProc.kill();
}
console.log(`\nDone. Passed: ${stats.passed} Failed: ${stats.failed}\n`);
process.exit(stats.failed > 0 ? 1 : 0);
}
run().catch((e) => {
console.error(e);
process.exit(1);
});

View File

@ -0,0 +1,204 @@
/**
* orchestrator/test-gridsearch-ipc.mjs
*
* E2E test: spawn the C++ worker, send a gridsearch request
* matching `npm run gridsearch:enrich` defaults, collect IPC events,
* and verify the full event sequence.
*
* Run: node orchestrator/test-gridsearch-ipc.mjs
* Needs: npm run build-debug (or npm run build)
*/
import { spawnWorker } from './spawn.mjs';
import { resolve, dirname } from 'node:path';
import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import fs from 'node:fs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const IS_WIN = process.platform === 'win32';
const EXE_NAME = IS_WIN ? 'polymech-cli.exe' : 'polymech-cli';
const EXE = resolve(__dirname, '..', 'dist', EXE_NAME);
if (!fs.existsSync(EXE)) {
console.error(`❌ No ${EXE_NAME} found in dist. Run npm run build first.`);
process.exit(1);
}
console.log(`Binary: ${EXE}\n`);
// Load the sample settings (same as gridsearch:enrich)
const sampleConfig = JSON.parse(
readFileSync(resolve(__dirname, '..', 'config', 'gridsearch-sample.json'), 'utf8')
);
let passed = 0;
let failed = 0;
function assert(condition, label) {
if (condition) {
console.log(`${label}`);
passed++;
} else {
console.error(`${label}`);
failed++;
}
}
// ── Event collector ─────────────────────────────────────────────────────────
const EXPECTED_EVENTS = [
'grid-ready',
'waypoint-start',
'area',
'location',
'enrich-start',
'node',
'nodePage',
// 'node-error' — may or may not occur, depends on network
];
function createCollector() {
const events = {};
for (const t of ['grid-ready', 'waypoint-start', 'area', 'location',
'enrich-start', 'node', 'node-error', 'nodePage']) {
events[t] = [];
}
return {
events,
handler(msg) {
const t = msg.type;
if (events[t]) {
events[t].push(msg);
} else {
events[t] = [msg];
}
// Live progress indicator
const d = msg.payload ?? {};
if (t === 'waypoint-start') {
process.stdout.write(`\r 🔍 Searching waypoint ${(d.index ?? 0) + 1}/${d.total ?? '?'}...`);
} else if (t === 'node') {
process.stdout.write(`\r 📧 Enriched: ${d.title?.substring(0, 40) ?? ''} `);
} else if (t === 'node-error') {
process.stdout.write(`\r ⚠️ Error: ${d.node?.title?.substring(0, 40) ?? ''} `);
}
},
};
}
// ── Main test ───────────────────────────────────────────────────────────────
async function run() {
console.log('🧪 Gridsearch IPC E2E Test\n');
// ── 1. Spawn worker ───────────────────────────────────────────────────
console.log('1. Spawn worker in daemon mode');
const worker = spawnWorker(EXE, ['worker', '--daemon', '--user-uid', '3bb4cfbf-318b-44d3-a9d3-35680e738421']);
const readyMsg = await worker.ready;
assert(readyMsg.type === 'ready', 'Worker sends ready signal');
// ── 2. Register event collector ───────────────────────────────────────
const collector = createCollector();
worker.onEvent(collector.handler);
// ── 3. Send gridsearch request (matching gridsearch:enrich) ────────────
console.log('2. Send gridsearch request (Aruba / recycling / --enrich)');
const t0 = Date.now();
// Very long timeout — enrichment can take minutes
const result = await worker.request(
{
type: 'gridsearch',
payload: {
...sampleConfig,
enrich: true,
},
},
5 * 60 * 1000 // 5 min timeout
);
const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
console.log(`\n\n ⏱️ Completed in ${elapsed}s\n`);
// ── 4. Verify final result ────────────────────────────────────────────
console.log('3. Verify job_result');
assert(result.type === 'job_result', `Response type is "job_result" (got "${result.type}")`);
const summary = result.payload ?? null;
assert(summary !== null, 'job_result payload is present');
if (summary) {
assert(typeof summary.totalMs === 'number', `totalMs is number (${summary.totalMs})`);
assert(typeof summary.searchMs === 'number', `searchMs is number (${summary.searchMs})`);
assert(typeof summary.enrichMs === 'number', `enrichMs is number (${summary.enrichMs})`);
assert(typeof summary.freshApiCalls === 'number', `freshApiCalls is number (${summary.freshApiCalls})`);
assert(typeof summary.waypointCount === 'number', `waypointCount is number (${summary.waypointCount})`);
assert(summary.gridStats && typeof summary.gridStats.validCells === 'number', 'gridStats.validCells present');
assert(summary.searchStats && typeof summary.searchStats.totalResults === 'number', 'searchStats.totalResults present');
assert(typeof summary.enrichedOk === 'number', `enrichedOk is number (${summary.enrichedOk})`);
assert(typeof summary.enrichedTotal === 'number', `enrichedTotal is number (${summary.enrichedTotal})`);
}
// ── 5. Verify event sequence ──────────────────────────────────────────
console.log('4. Verify event stream');
const e = collector.events;
assert(e['grid-ready'].length === 1, `Exactly 1 grid-ready event (got ${e['grid-ready'].length})`);
assert(e['waypoint-start'].length > 0, `At least 1 waypoint-start event (got ${e['waypoint-start'].length})`);
assert(e['area'].length > 0, `At least 1 area event (got ${e['area'].length})`);
assert(e['waypoint-start'].length === e['area'].length, `waypoint-start count (${e['waypoint-start'].length}) === area count (${e['area'].length})`);
assert(e['enrich-start'].length === 1, `Exactly 1 enrich-start event (got ${e['enrich-start'].length})`);
const totalNodes = e['node'].length + e['node-error'].length;
assert(totalNodes > 0, `At least 1 node event (got ${totalNodes}: ${e['node'].length} ok, ${e['node-error'].length} errors)`);
// Validate grid-ready payload
if (e['grid-ready'].length > 0) {
const gr = e['grid-ready'][0].payload ?? {};
assert(Array.isArray(gr.areas), 'grid-ready.areas is array');
assert(typeof gr.total === 'number' && gr.total > 0, `grid-ready.total > 0 (${gr.total})`);
}
// Validate location events have required fields
if (e['location'].length > 0) {
const loc = e['location'][0].payload ?? {};
assert(loc.location && typeof loc.location.title === 'string', 'location event has location.title');
assert(loc.location && typeof loc.location.place_id === 'string', 'location event has location.place_id');
assert(typeof loc.areaName === 'string', 'location event has areaName');
}
assert(e['location'].length > 0, `At least 1 location event (got ${e['location'].length})`);
// Validate node payloads
if (e['node'].length > 0) {
const nd = e['node'][0].payload ?? {};
assert(typeof nd.placeId === 'string', 'node event has placeId');
assert(typeof nd.title === 'string', 'node event has title');
assert(Array.isArray(nd.emails), 'node event has emails array');
assert(typeof nd.status === 'string', 'node event has status');
}
// ── 6. Print event summary ────────────────────────────────────────────
console.log('\n5. Event summary');
for (const [type, arr] of Object.entries(e)) {
if (arr.length > 0) console.log(` ${type}: ${arr.length}`);
}
// ── 7. Shutdown ───────────────────────────────────────────────────────
console.log('\n6. Graceful shutdown');
const shutdownRes = await worker.shutdown();
assert(shutdownRes.type === 'shutdown_ack', 'Shutdown acknowledged');
await new Promise(r => setTimeout(r, 500));
assert(worker.process.exitCode === 0, `Worker exited with code 0 (got ${worker.process.exitCode})`);
// ── Summary ───────────────────────────────────────────────────────────
console.log(`\n────────────────────────────────`);
console.log(` Passed: ${passed} Failed: ${failed}`);
console.log(`────────────────────────────────\n`);
process.exit(failed > 0 ? 1 : 0);
}
run().catch((err) => {
console.error('Test runner error:', err);
process.exit(1);
});

View File

@ -0,0 +1,218 @@
/**
* orchestrator/test-gridsearch-ipc-uds-meta.mjs
*
* E2E test for Unix Domain Sockets / Windows Named Pipes (Meta Enrichment)!
* Spawns the worker in `--uds` mode and tests direct high-throughput
* lock-free JSON binary framing over a net.Socket.
*/
import { spawn } from 'node:child_process';
import { resolve, dirname, join } from 'node:path';
import { readFileSync, existsSync, unlinkSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import net from 'node:net';
import { tmpdir } from 'node:os';
const __dirname = dirname(fileURLToPath(import.meta.url));
const IS_WIN = process.platform === 'win32';
const EXE_NAME = IS_WIN ? 'polymech-cli.exe' : 'polymech-cli';
const EXE = resolve(__dirname, '..', 'dist', EXE_NAME);
const TEST_CANCEL = false;
if (!existsSync(EXE)) {
console.error(`❌ Binary not found at ${EXE}`);
process.exit(1);
}
const PIPE_NAME = 'polymech-test-uds-meta';
const CPP_UDS_ARG = IS_WIN ? '4001' : join(tmpdir(), `${PIPE_NAME}.sock`);
if (!IS_WIN && existsSync(CPP_UDS_ARG)) {
unlinkSync(CPP_UDS_ARG);
}
console.log(`Binary: ${EXE}`);
console.log(`C++ Arg: ${CPP_UDS_ARG}\n`);
// ── Event collector ─────────────────────────────────────────────────────────
function createCollector() {
const events = {};
for (const t of ['grid-ready', 'waypoint-start', 'area', 'location',
'enrich-start', 'node', 'node-error', 'nodePage', 'job_result']) {
events[t] = [];
}
return {
events,
onComplete: null,
handler(msg) {
const t = msg.type;
if (events[t]) events[t].push(msg);
else events[t] = [msg];
const d = msg.data ?? {};
if (t === 'waypoint-start') {
process.stdout.write(`\r 🔍 Searching waypoint ${(d.index ?? 0) + 1}/${d.total ?? '?'}...`);
} else if (t === 'node') {
process.stdout.write(`\r 📧 Enriched: ${d.title?.substring(0, 40) ?? ''} `);
} else if (t === 'node-error') {
process.stdout.write(`\r ⚠️ Error: ${d.node?.title?.substring(0, 40) ?? ''} `);
} else if (t === 'job_result') {
console.log(`\n 🏁 Pipeline complete!`);
if (this.onComplete) this.onComplete(msg);
}
},
};
}
let passed = 0;
let failed = 0;
function assert(condition, label) {
if (condition) { console.log(`${label}`); passed++; }
else { console.error(`${label}`); failed++; }
}
async function run() {
console.log('🧪 Gridsearch UDS Meta E2E Test\n');
// 1. Spawn worker in UDS mode
console.log('1. Spawning remote C++ Taskflow Daemon');
const worker = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG, '--daemon'], { stdio: 'inherit' });
// Give the daemon a moment to boot
console.log('2. Connecting net.Socket with retries...');
let socket;
for (let i = 0; i < 15; i++) {
try {
await new Promise((resolve, reject) => {
if (IS_WIN) {
socket = net.connect({ port: 4001, host: '127.0.0.1' });
} else {
socket = net.connect(CPP_UDS_ARG);
}
socket.once('connect', resolve);
socket.once('error', reject);
});
console.log(' ✅ Socket Connected to UDS!');
break;
} catch (e) {
if (i === 14) throw e;
await new Promise(r => setTimeout(r, 500));
}
}
const collector = createCollector();
let buffer = Buffer.alloc(0);
// Buffer framing logic (length-prefixed streaming)
socket.on('data', (chunk) => {
buffer = Buffer.concat([buffer, chunk]);
while (buffer.length >= 4) {
const len = buffer.readUInt32LE(0);
if (buffer.length >= 4 + len) {
const payload = buffer.toString('utf8', 4, 4 + len);
buffer = buffer.subarray(4 + len);
try {
const msg = JSON.parse(payload);
collector.handler(msg);
} catch (e) {
console.error("JSON PARSE ERROR:", e, payload);
}
} else {
break; // Wait for more chunks
}
}
});
// 3. Send Gridsearch payload
// USE gridsearch-sample.json instead of gridsearch-bcn-universities.json
const sampleConfig = JSON.parse(
readFileSync(resolve(__dirname, '..', 'config', 'gridsearch-sample.json'), 'utf8')
);
sampleConfig.configPath = resolve(__dirname, '..', 'config', 'postgres.toml');
sampleConfig.jobId = 'uds-meta-test-abc';
sampleConfig.noCache = true; // force re-enrichment even if cached
console.log('3. Writing serialized IPC Payload over pipe...');
const jsonStr = JSON.stringify(sampleConfig);
const lenBuf = Buffer.alloc(4);
lenBuf.writeUInt32LE(Buffer.byteLength(jsonStr));
socket.write(lenBuf);
socket.write(jsonStr);
// 4. Wait for pipeline completion (job_result event) or timeout
console.log('\n4. Awaiting multi-threaded Execution Pipeline (can take minutes)...\n');
await new Promise((resolve) => {
collector.onComplete = () => {
// Send stop command to gracefully shut down the daemon
console.log(' 📤 Sending stop command to daemon...');
const stopPayload = JSON.stringify({ action: 'stop' });
const stopLen = Buffer.alloc(4);
stopLen.writeUInt32LE(Buffer.byteLength(stopPayload));
socket.write(stopLen);
socket.write(stopPayload);
setTimeout(resolve, 1000); // Give daemon a moment to ack
};
// Safety timeout
setTimeout(() => {
console.log('\n ⏰ Timeout reached (300s) — forcing shutdown.');
resolve();
}, 300000); // Wait up to 5 minutes
});
console.log('\n\n5. Event summary');
for (const [k, v] of Object.entries(collector.events)) {
console.log(` ${k}: ${v.length}`);
}
// Assertions
const ev = collector.events;
assert(ev['grid-ready'].length === 1, 'grid-ready emitted once');
assert(ev['waypoint-start'].length > 0, 'waypoint-start events received');
assert(ev['location'].length > 0, 'location events received');
assert(ev['enrich-start'].length === 1, 'enrich-start emitted once');
assert(ev['job_result'].length === 1, 'job_result emitted once');
// Verify social profiles and md body
const nodes = ev['node'];
let foundSocial = false;
let foundSiteMd = false;
for (const n of nodes) {
const d = n.data;
if (!d) continue;
if (d.socials && d.socials.length > 0) {
foundSocial = true;
}
if (d.sites && Array.isArray(d.sites) && d.sites.length > 0) {
foundSiteMd = true;
}
}
if (foundSocial) {
assert(foundSocial, 'At least one enriched node has social media profiles discovered');
} else {
console.log(' ⚠️ No social media profiles discovered in this run (data-dependent), but pipeline completed.');
}
assert(foundSiteMd, 'At least one enriched node has markdown sites mapped');
console.log('6. Cleanup');
socket.destroy();
worker.kill('SIGTERM');
console.log(`\n────────────────────────────────`);
console.log(` Passed: ${passed} Failed: ${failed}`);
console.log(`────────────────────────────────`);
process.exit(failed > 0 ? 1 : 0);
}
run().catch(e => {
console.error(e);
process.exit(1);
});

View File

@ -0,0 +1,255 @@
/**
* orchestrator/test-gridsearch-ipc-uds.mjs
*
* E2E test for Unix Domain Sockets / Windows Named Pipes!
* Spawns the worker in `--uds` mode and tests direct high-throughput
* lock-free JSON binary framing over a net.Socket.
*/
import { spawn } from 'node:child_process';
import { resolve, dirname, join } from 'node:path';
import { readFileSync, existsSync, unlinkSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import net from 'node:net';
import { tmpdir } from 'node:os';
const __dirname = dirname(fileURLToPath(import.meta.url));
const IS_WIN = process.platform === 'win32';
const EXE_NAME = IS_WIN ? 'polymech-cli.exe' : 'polymech-cli';
const EXE = resolve(__dirname, '..', 'dist', EXE_NAME);
const TEST_CANCEL = false;
if (!existsSync(EXE)) {
console.error(`❌ Binary not found at ${EXE}`);
process.exit(1);
}
const PIPE_NAME = 'polymech-test-uds';
const CPP_UDS_ARG = IS_WIN ? '4000' : join(tmpdir(), `${PIPE_NAME}.sock`);
if (!IS_WIN && existsSync(CPP_UDS_ARG)) {
unlinkSync(CPP_UDS_ARG);
}
console.log(`Binary: ${EXE}`);
console.log(`C++ Arg: ${CPP_UDS_ARG}\n`);
// ── Event collector ─────────────────────────────────────────────────────────
function createCollector() {
const events = {};
for (const t of ['grid-ready', 'waypoint-start', 'area', 'location',
'enrich-start', 'node', 'node-error', 'nodePage', 'job_result']) {
events[t] = [];
}
return {
events,
onComplete: null,
handler(msg) {
const t = msg.type;
if (events[t]) events[t].push(msg);
else events[t] = [msg];
const d = msg.data ?? {};
if (t === 'waypoint-start') {
process.stdout.write(`\r 🔍 Searching waypoint ${(d.index ?? 0) + 1}/${d.total ?? '?'}...`);
} else if (t === 'node') {
process.stdout.write(`\r 📧 Enriched: ${d.title?.substring(0, 40) ?? ''} `);
} else if (t === 'node-error') {
process.stdout.write(`\r ⚠️ Error: ${d.node?.title?.substring(0, 40) ?? ''} `);
} else if (t === 'job_result') {
console.log(`\n 🏁 Pipeline complete!`);
if (this.onComplete) this.onComplete(msg);
}
},
};
}
let passed = 0;
let failed = 0;
function assert(condition, label) {
if (condition) { console.log(`${label}`); passed++; }
else { console.error(`${label}`); failed++; }
}
async function run() {
console.log('🧪 Gridsearch UDS / Named Pipe E2E Test\n');
// 1. Spawn worker in UDS mode
console.log('1. Spawning remote C++ Taskflow Daemon');
const worker = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG, '--daemon'], { stdio: 'inherit' });
// Give the daemon a moment to boot
console.log('2. Connecting net.Socket with retries...');
let socket;
for (let i = 0; i < 15; i++) {
try {
await new Promise((resolve, reject) => {
if (IS_WIN) {
socket = net.connect({ port: 4000, host: '127.0.0.1' });
} else {
socket = net.connect(CPP_UDS_ARG);
}
socket.once('connect', resolve);
socket.once('error', reject);
});
console.log(' ✅ Socket Connected to UDS!');
break;
} catch (e) {
if (i === 14) throw e;
await new Promise(r => setTimeout(r, 500));
}
}
const collector = createCollector();
let buffer = Buffer.alloc(0);
// Buffer framing logic (length-prefixed streaming)
socket.on('data', (chunk) => {
buffer = Buffer.concat([buffer, chunk]);
while (buffer.length >= 4) {
const len = buffer.readUInt32LE(0);
if (buffer.length >= 4 + len) {
const payload = buffer.toString('utf8', 4, 4 + len);
buffer = buffer.subarray(4 + len);
try {
const msg = JSON.parse(payload);
collector.handler(msg);
} catch (e) {
console.error("JSON PARSE ERROR:", e, payload);
}
} else {
break; // Wait for more chunks
}
}
});
// 3. Send Gridsearch payload
const sampleConfig = JSON.parse(
readFileSync(resolve(__dirname, '..', 'config', 'gridsearch-bcn-universities.json'), 'utf8')
);
sampleConfig.configPath = resolve(__dirname, '..', 'config', 'postgres.toml');
sampleConfig.jobId = 'uds-test-cancel-abc';
console.log('3. Writing serialized IPC Payload over pipe...');
const jsonStr = JSON.stringify(sampleConfig);
const lenBuf = Buffer.alloc(4);
lenBuf.writeUInt32LE(Buffer.byteLength(jsonStr));
socket.write(lenBuf);
socket.write(jsonStr);
// Send cancellation after 5 seconds
if (TEST_CANCEL) {
setTimeout(() => {
console.log('\n\n--> Testing Dynamic Cancellation (Sending cancel event for uds-test-cancel-abc)...');
const cancelPayload = JSON.stringify({ action: "cancel", jobId: "uds-test-cancel-abc" });
const cancelLenBuf = Buffer.alloc(4);
cancelLenBuf.writeUInt32LE(Buffer.byteLength(cancelPayload));
socket.write(cancelLenBuf);
socket.write(cancelPayload);
}, 5000);
}
// 4. Wait for pipeline completion (job_result event) or timeout
console.log('\n4. Awaiting multi-threaded Execution Pipeline (can take minutes)...\n');
await new Promise((resolve) => {
collector.onComplete = () => {
// Send stop command to gracefully shut down the daemon
console.log(' 📤 Sending stop command to daemon...');
const stopPayload = JSON.stringify({ action: 'stop' });
const stopLen = Buffer.alloc(4);
stopLen.writeUInt32LE(Buffer.byteLength(stopPayload));
socket.write(stopLen);
socket.write(stopPayload);
setTimeout(resolve, 1000); // Give daemon a moment to ack
};
// Safety timeout
setTimeout(() => {
console.log('\n ⏰ Timeout reached (120s) — forcing shutdown.');
resolve();
}, 120000);
});
console.log('\n\n5. Event summary');
for (const [k, v] of Object.entries(collector.events)) {
console.log(` ${k}: ${v.length}`);
}
// Assertions
const ev = collector.events;
assert(ev['grid-ready'].length === 1, 'grid-ready emitted once');
assert(ev['waypoint-start'].length > 0, 'waypoint-start events received');
assert(ev['location'].length > 0, 'location events received');
assert(ev['enrich-start'].length === 1, 'enrich-start emitted once');
assert(ev['job_result'].length === 1, 'job_result emitted once');
// Check enrichment skip log (if present in log events)
const logEvents = ev['log'] ?? [];
const skipLog = logEvents.find(l =>
typeof l.data === 'string' && l.data.includes('already enriched')
);
const nodeCount = ev['node'].length + ev['node-error'].length;
if (skipLog) {
console.log(` Pre-enrich skip detected: ${skipLog.data}`);
assert(nodeCount === 0, 'no enrichment needed (all skipped)');
} else {
console.log(' No pre-enrich skips (all locations are new or unenriched)');
assert(nodeCount > 0, 'enrichment node events received');
}
// Check filterTypes assertions: all locations must have website + matching type
const FILTER_TYPE = 'Recycling center';
const locations = ev['location'];
const badWebsite = locations.filter(l => {
const loc = l.data?.location;
return !loc?.website;
});
assert(badWebsite.length === 0, `all locations have website (${badWebsite.length} missing)`);
const badType = locations.filter(l => {
const loc = l.data?.location;
const types = loc?.types ?? [];
const type = loc?.type ?? '';
return !types.includes(FILTER_TYPE) && type !== FILTER_TYPE;
});
if (badType.length > 0) {
console.log(` ❌ Mismatched locations:`);
badType.slice(0, 3).forEach(l => console.log(JSON.stringify(l.data?.location, null, 2)));
}
assert(badType.length === 0, `all locations match type "${FILTER_TYPE}" (${badType.length} mismatched)`);
const filterLog = logEvents.find(l =>
typeof l.data === 'string' && l.data.includes('locations removed')
);
if (filterLog) {
console.log(` Filter applied: ${filterLog.data}`);
}
const filterTypesLog = logEvents.filter(l =>
typeof l.data === 'string' && (l.data.includes('filterTypes:') || l.data.includes(' - '))
);
if (filterTypesLog.length > 0) {
console.log(` Parsed filterTypes in C++:`);
filterTypesLog.forEach(l => console.log(` ${l.data}`));
}
console.log(` Locations after filter: ${locations.length}`);
console.log('6. Cleanup');
socket.destroy();
worker.kill('SIGTERM');
console.log(`\n────────────────────────────────`);
console.log(` Passed: ${passed} Failed: ${failed}`);
console.log(`────────────────────────────────`);
process.exit(failed > 0 ? 1 : 0);
}
run().catch(e => {
console.error(e);
process.exit(1);
});

View File

@ -0,0 +1,204 @@
/**
* orchestrator/test-gridsearch-ipc.mjs
*
* E2E test: spawn the C++ worker, send a gridsearch request
* matching `npm run gridsearch:enrich` defaults, collect IPC events,
* and verify the full event sequence.
*
* Run: node orchestrator/test-gridsearch-ipc.mjs
* Needs: npm run build-debug (or npm run build)
*/
import { spawnWorker } from './spawn.mjs';
import { resolve, dirname } from 'node:path';
import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import fs from 'node:fs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const IS_WIN = process.platform === 'win32';
const EXE_NAME = IS_WIN ? 'polymech-cli.exe' : 'polymech-cli';
const EXE = resolve(__dirname, '..', 'dist', EXE_NAME);
if (!fs.existsSync(EXE)) {
console.error(`❌ No ${EXE_NAME} found in dist. Run npm run build first.`);
process.exit(1);
}
console.log(`Binary: ${EXE}\n`);
// Load the sample settings (same as gridsearch:enrich)
const sampleConfig = JSON.parse(
readFileSync(resolve(__dirname, '..', 'config', 'gridsearch-sample.json'), 'utf8')
);
let passed = 0;
let failed = 0;
function assert(condition, label) {
if (condition) {
console.log(`${label}`);
passed++;
} else {
console.error(`${label}`);
failed++;
}
}
// ── Event collector ─────────────────────────────────────────────────────────
const EXPECTED_EVENTS = [
'grid-ready',
'waypoint-start',
'area',
'location',
'enrich-start',
'node',
'nodePage',
// 'node-error' — may or may not occur, depends on network
];
function createCollector() {
const events = {};
for (const t of ['grid-ready', 'waypoint-start', 'area', 'location',
'enrich-start', 'node', 'node-error', 'nodePage']) {
events[t] = [];
}
return {
events,
handler(msg) {
const t = msg.type;
if (events[t]) {
events[t].push(msg);
} else {
events[t] = [msg];
}
// Live progress indicator
const d = msg.payload ?? {};
if (t === 'waypoint-start') {
process.stdout.write(`\r 🔍 Searching waypoint ${(d.index ?? 0) + 1}/${d.total ?? '?'}...`);
} else if (t === 'node') {
process.stdout.write(`\r 📧 Enriched: ${d.title?.substring(0, 40) ?? ''} `);
} else if (t === 'node-error') {
process.stdout.write(`\r ⚠️ Error: ${d.node?.title?.substring(0, 40) ?? ''} `);
}
},
};
}
// ── Main test ───────────────────────────────────────────────────────────────
async function run() {
console.log('🧪 Gridsearch IPC E2E Test\n');
// ── 1. Spawn worker ───────────────────────────────────────────────────
console.log('1. Spawn worker');
const worker = spawnWorker(EXE);
const readyMsg = await worker.ready;
assert(readyMsg.type === 'ready', 'Worker sends ready signal');
// ── 2. Register event collector ───────────────────────────────────────
const collector = createCollector();
worker.onEvent(collector.handler);
// ── 3. Send gridsearch request (matching gridsearch:enrich) ────────────
console.log('2. Send gridsearch request (Aruba / recycling / --enrich)');
const t0 = Date.now();
// Very long timeout — enrichment can take minutes
const result = await worker.request(
{
type: 'gridsearch',
payload: {
...sampleConfig,
enrich: true,
},
},
5 * 60 * 1000 // 5 min timeout
);
const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
console.log(`\n\n ⏱️ Completed in ${elapsed}s\n`);
// ── 4. Verify final result ────────────────────────────────────────────
console.log('3. Verify job_result');
assert(result.type === 'job_result', `Response type is "job_result" (got "${result.type}")`);
const summary = result.payload ?? null;
assert(summary !== null, 'job_result payload is present');
if (summary) {
assert(typeof summary.totalMs === 'number', `totalMs is number (${summary.totalMs})`);
assert(typeof summary.searchMs === 'number', `searchMs is number (${summary.searchMs})`);
assert(typeof summary.enrichMs === 'number', `enrichMs is number (${summary.enrichMs})`);
assert(typeof summary.freshApiCalls === 'number', `freshApiCalls is number (${summary.freshApiCalls})`);
assert(typeof summary.waypointCount === 'number', `waypointCount is number (${summary.waypointCount})`);
assert(summary.gridStats && typeof summary.gridStats.validCells === 'number', 'gridStats.validCells present');
assert(summary.searchStats && typeof summary.searchStats.totalResults === 'number', 'searchStats.totalResults present');
assert(typeof summary.enrichedOk === 'number', `enrichedOk is number (${summary.enrichedOk})`);
assert(typeof summary.enrichedTotal === 'number', `enrichedTotal is number (${summary.enrichedTotal})`);
}
// ── 5. Verify event sequence ──────────────────────────────────────────
console.log('4. Verify event stream');
const e = collector.events;
assert(e['grid-ready'].length === 1, `Exactly 1 grid-ready event (got ${e['grid-ready'].length})`);
assert(e['waypoint-start'].length > 0, `At least 1 waypoint-start event (got ${e['waypoint-start'].length})`);
assert(e['area'].length > 0, `At least 1 area event (got ${e['area'].length})`);
assert(e['waypoint-start'].length === e['area'].length, `waypoint-start count (${e['waypoint-start'].length}) === area count (${e['area'].length})`);
assert(e['enrich-start'].length === 1, `Exactly 1 enrich-start event (got ${e['enrich-start'].length})`);
const totalNodes = e['node'].length + e['node-error'].length;
assert(totalNodes > 0, `At least 1 node event (got ${totalNodes}: ${e['node'].length} ok, ${e['node-error'].length} errors)`);
// Validate grid-ready payload
if (e['grid-ready'].length > 0) {
const gr = e['grid-ready'][0].payload ?? {};
assert(Array.isArray(gr.areas), 'grid-ready.areas is array');
assert(typeof gr.total === 'number' && gr.total > 0, `grid-ready.total > 0 (${gr.total})`);
}
// Validate location events have required fields
if (e['location'].length > 0) {
const loc = e['location'][0].payload ?? {};
assert(loc.location && typeof loc.location.title === 'string', 'location event has location.title');
assert(loc.location && typeof loc.location.place_id === 'string', 'location event has location.place_id');
assert(typeof loc.areaName === 'string', 'location event has areaName');
}
assert(e['location'].length > 0, `At least 1 location event (got ${e['location'].length})`);
// Validate node payloads
if (e['node'].length > 0) {
const nd = e['node'][0].payload ?? {};
assert(typeof nd.placeId === 'string', 'node event has placeId');
assert(typeof nd.title === 'string', 'node event has title');
assert(Array.isArray(nd.emails), 'node event has emails array');
assert(typeof nd.status === 'string', 'node event has status');
}
// ── 6. Print event summary ────────────────────────────────────────────
console.log('\n5. Event summary');
for (const [type, arr] of Object.entries(e)) {
if (arr.length > 0) console.log(` ${type}: ${arr.length}`);
}
// ── 7. Shutdown ───────────────────────────────────────────────────────
console.log('\n6. Graceful shutdown');
const shutdownRes = await worker.shutdown();
assert(shutdownRes.type === 'shutdown_ack', 'Shutdown acknowledged');
await new Promise(r => setTimeout(r, 500));
assert(worker.process.exitCode === 0, `Worker exited with code 0 (got ${worker.process.exitCode})`);
// ── Summary ───────────────────────────────────────────────────────────
console.log(`\n────────────────────────────────`);
console.log(` Passed: ${passed} Failed: ${failed}`);
console.log(`────────────────────────────────\n`);
process.exit(failed > 0 ? 1 : 0);
}
run().catch((err) => {
console.error('Test runner error:', err);
process.exit(1);
});

View File

@ -0,0 +1,802 @@
/**
* orchestrator/test-ipc-classifier.mjs
*
* IPC + local llama: one kbot-ai call semantic distance from anchor "machine workshop"
* to every business label (JobViewer.tsx ~205). Output is a single JSON array (+ meta).
*
* Run: npm run test:ipc:classifier
* CLI (overrides env): yargs see parseClassifierArgv()
* npm run test:ipc:classifier -- --help
* npm run test:ipc:classifier -- --provider openrouter --model openai/gpt-4o-mini --backend remote -n 3
* npm run test:ipc:classifier -- -r openrouter -m openai/gpt-4o-mini --backend remote -n 3 -F structured
* npm run test:ipc:classifier -- -r openrouter -m x -F stress,no-heartbeat
* npm run test:ipc:classifier -- -r openrouter -m x --backend remote -n 3 -F stress,structured
* npm run test:ipc:classifier -- -r openrouter -m x --backend remote -F structured --dst ./out.json
*
* Env:
* KBOT_IPC_CLASSIFIER_LLAMA set 0 to use OpenRouter (KBOT_ROUTER, KBOT_IPC_MODEL) instead of local llama :8888
* KBOT_IPC_LLAMA_AUTOSTART 0 to skip spawning run-7b.sh (llama mode only)
* KBOT_ROUTER / KBOT_IPC_MODEL when classifier llama is off (same as test-ipc step 6)
* KBOT_CLASSIFIER_LIMIT max labels in the batch (default: all)
* KBOT_CLASSIFIER_TIMEOUT_MS single batched kbot-ai call (default: 300000)
*
* OpenRouter: npm run test:ipc:classifier:openrouter (sets KBOT_IPC_CLASSIFIER_LLAMA=0)
* Stress (batch repeats, one worker): KBOT_CLASSIFIER_STRESS_RUNS=N (default 1)
* npm run test:ipc:classifier:openrouter:stress OpenRouter + 5 runs (override N via env)
*
* Reports (reports.js): cwd/tests/test-ipc-classifier__HH-mm.{json,md}; distances in
* test-ipc-classifier-distances__HH-mm.json (same timestamp as the main JSON).
* With -F structured, the prompt asks for {"items":[...]} to match json_object APIs.
*/
import { spawn } from 'node:child_process';
import { mkdir, writeFile } from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import net from 'node:net';
import { existsSync, unlinkSync } from 'node:fs';
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import {
distExePath,
platform,
uds,
timeouts,
kbotAiPayloadLlamaLocal,
kbotAiPayloadFromEnv,
ensureLlamaLocalServer,
llama,
router,
} from './presets.js';
import {
createAssert,
payloadObj,
llamaAutostartEnabled,
ipcClassifierLlamaEnabled,
createIpcClient,
pipeWorkerStderr,
} from './test-commons.js';
import {
reportFilePathWithExt,
timeParts,
createMetricsCollector,
buildMetricsBundle,
writeTestReports,
} from './reports.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
/** Set at run start; used by catch for error reports */
let classifierMetricsCollector = null;
let classifierRunStartedAt = null;
/** Feature flags from `-F` / `--feature` (stress, structured, no-heartbeat, no-report, quiet) */
let classifierFeatures = /** @type {Set<string>} */ (new Set());
/** Parsed argv (after yargs); set in parseClassifierArgv */
let classifierArgv = /** @type {Record<string, unknown> | null} */ (null);
/**
* @param {unknown} featureOpt
* @returns {Set<string>}
*/
function parseFeatureList(featureOpt) {
const out = [];
const arr = Array.isArray(featureOpt) ? featureOpt : [];
for (const f of arr) {
if (typeof f === 'string') out.push(...f.split(',').map((s) => s.trim()).filter(Boolean));
}
return new Set(out);
}
/**
* Parse CLI and apply to `process.env` (CLI wins over prior env).
* @returns {Record<string, unknown> & { featuresSet: Set<string> }}
*/
export function parseClassifierArgv() {
const y = yargs(hideBin(process.argv))
.scriptName('test-ipc-classifier')
.usage('$0 [options]\n\nIPC classifier batch test. Flags override env vars for this process.')
.option('provider', {
alias: 'r',
type: 'string',
describe: 'Router / provider → KBOT_ROUTER (e.g. openrouter, ollama, openai)',
})
.option('model', {
alias: 'm',
type: 'string',
describe: 'Model id → KBOT_IPC_MODEL',
})
.option('runs', {
alias: 'n',
type: 'number',
describe: 'Batch repeats (stress) → KBOT_CLASSIFIER_STRESS_RUNS',
})
.option('limit', {
alias: 'l',
type: 'number',
describe: 'Max labels → KBOT_CLASSIFIER_LIMIT',
})
.option('timeout', {
alias: 't',
type: 'number',
describe: 'LLM HTTP timeout ms → KBOT_CLASSIFIER_TIMEOUT_MS',
})
.option('backend', {
type: 'string',
choices: ['local', 'remote'],
describe: 'local = llama :8888; remote = router (sets KBOT_IPC_CLASSIFIER_LLAMA=0)',
})
.option('no-autostart', {
type: 'boolean',
default: false,
describe: 'Do not spawn run-7b.sh → KBOT_IPC_LLAMA_AUTOSTART=0',
})
.option('feature', {
alias: 'F',
type: 'array',
default: [],
describe:
'Feature flags (repeat or comma-separated): stress, structured, no-heartbeat, no-report, quiet',
})
.option('dst', {
type: 'string',
describe:
'Forwarded to kbot-ai IPC `dst` (worker writes completion text here; path resolved from cwd). Same as C++ --dst.',
})
.option('output', {
type: 'string',
describe:
'Forwarded to IPC if --dst omitted (C++ `output` field). Prefer --dst when both are set.',
})
.strict()
.help()
.alias('h', 'help');
const argv = y.parseSync();
const featuresSet = parseFeatureList(argv.feature);
if (argv.provider != null && String(argv.provider).trim() !== '') {
process.env.KBOT_ROUTER = String(argv.provider).trim();
}
if (argv.model != null && String(argv.model).trim() !== '') {
process.env.KBOT_IPC_MODEL = String(argv.model).trim();
}
if (argv.runs != null && Number.isFinite(argv.runs) && argv.runs >= 1) {
process.env.KBOT_CLASSIFIER_STRESS_RUNS = String(Math.min(500, Math.floor(Number(argv.runs))));
}
if (argv.limit != null && Number.isFinite(argv.limit) && argv.limit >= 1) {
process.env.KBOT_CLASSIFIER_LIMIT = String(Math.floor(Number(argv.limit)));
}
if (argv.timeout != null && Number.isFinite(argv.timeout) && argv.timeout > 0) {
process.env.KBOT_CLASSIFIER_TIMEOUT_MS = String(Math.floor(Number(argv.timeout)));
}
if (argv['no-autostart'] === true) {
process.env.KBOT_IPC_LLAMA_AUTOSTART = '0';
}
if (argv.backend === 'remote') {
process.env.KBOT_IPC_CLASSIFIER_LLAMA = '0';
} else if (argv.backend === 'local') {
delete process.env.KBOT_IPC_CLASSIFIER_LLAMA;
}
if (featuresSet.has('stress') && (argv.runs == null || !Number.isFinite(argv.runs))) {
if (!process.env.KBOT_CLASSIFIER_STRESS_RUNS) {
process.env.KBOT_CLASSIFIER_STRESS_RUNS = '5';
}
}
classifierFeatures = featuresSet;
const out = { ...argv, featuresSet };
classifierArgv = out;
return out;
}
const EXE = distExePath(__dirname);
const stats = createAssert();
const { assert } = stats;
/** @see packages/kbot/.../JobViewer.tsx — business type options */
export const JOB_VIEWER_MACHINE_LABELS = [
'3D printing service',
'Drafting service',
'Engraver',
'Furniture maker',
'Industrial engineer',
'Industrial equipment supplier',
'Laser cutting service',
'Machine construction',
'Machine repair service',
'Machine shop',
'Machine workshop',
'Machinery parts manufacturer',
'Machining manufacturer',
'Manufacturer',
'Mechanic',
'Mechanical engineer',
'Mechanical plant',
'Metal fabricator',
'Metal heat treating service',
'Metal machinery supplier',
'Metal working shop',
'Metal workshop',
'Novelty store',
'Plywood supplier',
'Sign shop',
'Tool manufacturer',
'Trophy shop',
];
const ANCHOR = 'machine workshop';
/** Keys we accept for the batch array when API forces a JSON object (e.g. response_format json_object). */
const BATCH_ARRAY_OBJECT_KEYS = ['items', 'distances', 'results', 'data', 'labels', 'rows'];
/** Build one prompt: plain mode = JSON array root; structured (-F structured) = JSON object with "items" (json_object API). */
function classifierBatchPrompt(labels) {
const numbered = labels.map((l, i) => `${i + 1}. ${JSON.stringify(l)}`).join('\n');
const structured = classifierFeatures.has('structured');
const rules = `Rules for each element:
- Use shape: {"label": <exact string from the list below>, "distance": <number>}
- "distance" is semantic distance from 0 (same as anchor or direct synonym) to 10 (unrelated). One decimal allowed.
- Include EXACTLY one object per line item below, in the SAME ORDER, with "label" copied character-for-character from the list.
Anchor business type: ${ANCHOR}
Candidate labels (in order):
${numbered}`;
if (structured) {
return `You classify business types against one anchor. Output ONLY valid JSON: one object, no markdown fences, no commentary.
The API requires a JSON object (not a top-level array). Use exactly one top-level key "items" whose value is the array.
${rules}
Example: {"items":[{"label":"Example","distance":2.5},...]}`;
}
return `You classify business types against one anchor. Output ONLY a JSON array, no markdown fences, no commentary.
${rules}
Output: one JSON array, e.g. [{"label":"...","distance":2.5},...]`;
}
/**
* Parse model text into the batch array: root [...] or {"items":[...]} (json_object).
* @returns {unknown[] | null}
*/
function extractJsonArray(text) {
if (!text || typeof text !== 'string') return null;
let s = text.trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/u, '').trim();
/** @param {unknown} v */
const arrayFromParsed = (v) => {
if (Array.isArray(v)) return v;
if (!v || typeof v !== 'object') return null;
const o = /** @type {Record<string, unknown>} */ (v);
for (const key of BATCH_ARRAY_OBJECT_KEYS) {
const a = o[key];
if (Array.isArray(a)) return a;
}
for (const val of Object.values(o)) {
if (
Array.isArray(val) &&
val.length > 0 &&
val[0] &&
typeof val[0] === 'object' &&
val[0] !== null &&
'label' in val[0]
) {
return val;
}
}
return null;
};
try {
const v = JSON.parse(s);
return arrayFromParsed(v);
} catch {
/* fall through */
}
const i = s.indexOf('[');
const j = s.lastIndexOf(']');
if (i >= 0 && j > i) {
try {
const v = JSON.parse(s.slice(i, j + 1));
if (Array.isArray(v)) return v;
} catch {
/* ignore */
}
}
const oi = s.indexOf('{');
const oj = s.lastIndexOf('}');
if (oi >= 0 && oj > oi) {
try {
const v = JSON.parse(s.slice(oi, oj + 1));
return arrayFromParsed(v);
} catch {
/* ignore */
}
}
return null;
}
/**
* @param {unknown[]} arr
* @param {string[]} expectedLabels ordered
*/
function normalizeBatchArray(arr, expectedLabels) {
const expectedSet = new Set(expectedLabels);
const byLabel = new Map();
for (const item of arr) {
if (!item || typeof item !== 'object') continue;
const label = item.label;
let d = item.distance;
if (typeof d === 'string') d = parseFloat(d);
if (typeof label !== 'string' || typeof d !== 'number' || !Number.isFinite(d)) continue;
if (!expectedSet.has(label)) continue;
byLabel.set(label, d);
}
const distances = expectedLabels.map((label) => ({
label,
distance: byLabel.has(label) ? byLabel.get(label) : null,
}));
const missing = distances.filter((r) => r.distance == null).map((r) => r.label);
return { distances, missing };
}
function batchTimeoutMs() {
const raw = process.env.KBOT_CLASSIFIER_TIMEOUT_MS;
if (raw === undefined || raw === '') return 30_000;
const n = Number.parseInt(raw, 10);
return Number.isFinite(n) && n > 0 ? n : 30_000;
}
/** Sequential batch iterations on one worker (stress). Default 1 = single run. */
function stressRunCount() {
const raw = process.env.KBOT_CLASSIFIER_STRESS_RUNS;
if (raw === undefined || raw === '') return 1;
const n = Number.parseInt(String(raw).trim(), 10);
if (!Number.isFinite(n) || n < 1) return 1;
return Math.min(n, 500);
}
/** @param {unknown} llm — job_result.llm from kbot-ai */
function usageTokens(llm) {
if (!llm || typeof llm !== 'object') return null;
const u = /** @type {Record<string, unknown>} */ (llm).usage;
if (!u || typeof u !== 'object') return null;
const o = /** @type {Record<string, unknown>} */ (u);
return {
prompt: o.prompt_tokens ?? o.promptTokens ?? null,
completion: o.completion_tokens ?? o.completionTokens ?? null,
total: o.total_tokens ?? o.totalTokens ?? null,
};
}
/** @param {number[]} values */
function summarizeMs(values) {
if (values.length === 0) return null;
const sorted = [...values].sort((a, b) => a - b);
const sum = values.reduce((a, b) => a + b, 0);
const mid = (a, b) => (a + b) / 2;
const p = (q) => sorted[Math.min(sorted.length - 1, Math.max(0, Math.floor(q * (sorted.length - 1))))];
return {
min: sorted[0],
max: sorted[sorted.length - 1],
avg: Math.round((sum / values.length) * 100) / 100,
p50: sorted.length % 2 ? sorted[Math.floor(sorted.length / 2)] : mid(sorted[sorted.length / 2 - 1], sorted[sorted.length / 2]),
p95: p(0.95),
};
}
/** Log progress while awaiting a long LLM call (no silent hang). */
function withHeartbeat(promise, ipcTimeoutMs, backendLabel) {
const every = 10_000;
let n = 0;
const id = setInterval(() => {
n += 1;
const sec = (n * every) / 1000;
console.log(
` … still waiting on ${backendLabel} (batch is large; ${sec}s elapsed, IPC deadline ${Math.round(ipcTimeoutMs / 1000)}s)…`
);
}, every);
return promise.finally(() => clearInterval(id));
}
function buildKbotAiPayload(labels, tmo) {
const prompt = classifierBatchPrompt(labels);
const useLlama = ipcClassifierLlamaEnabled();
const structured = classifierFeatures.has('structured');
if (useLlama) {
return { ...kbotAiPayloadLlamaLocal({ prompt }), llm_timeout_ms: tmo };
}
const payload = {
...kbotAiPayloadFromEnv(),
prompt,
llm_timeout_ms: tmo,
};
/** OpenAI-style structured outputs; forwarded by kbot LLMClient → liboai ChatCompletion. */
if (structured) {
payload.response_format = { type: 'json_object' };
}
const rawDst = classifierArgv?.dst || classifierArgv?.output;
if (rawDst != null && String(rawDst).trim() !== '') {
payload.dst = path.resolve(process.cwd(), String(rawDst).trim());
}
return payload;
}
/**
* Parse kbot-ai job_result; updates assertion stats.
* @returns {{ distances: {label:string,distance:number|null}[], missing: string[], parseError: string|null, rawText: string|null, batchOk: boolean }}
*/
function processBatchResponse(p, labels) {
let rawText = null;
let distances = [];
let parseError = null;
let missing = [];
let batchOk = false;
if (p?.status === 'success' && typeof p?.text === 'string') {
rawText = p.text;
const arr = extractJsonArray(p.text);
if (arr) {
const norm = normalizeBatchArray(arr, labels);
distances = norm.distances;
missing = norm.missing;
if (missing.length === 0) {
assert(true, 'batch JSON array: all labels have distance');
batchOk = true;
} else {
assert(false, `batch array complete (${missing.length} missing labels)`);
parseError = `missing: ${missing.join('; ')}`;
}
} else {
assert(false, 'batch response parses as JSON array or {"items":[...]}');
parseError = 'could not parse batch array from model text';
}
} else {
assert(false, 'kbot-ai success');
parseError = p?.error ?? 'not success';
}
return { distances, missing, parseError, rawText, batchOk };
}
async function runSingleBatch(ipc, labels, tmo, ipcDeadlineMs, waitLabel) {
const payload = buildKbotAiPayload(labels, tmo);
const t0 = performance.now();
const pending = ipc.request({ type: 'kbot-ai', payload }, ipcDeadlineMs);
const msg = classifierFeatures.has('no-heartbeat')
? await pending
: await withHeartbeat(pending, ipcDeadlineMs, waitLabel);
const elapsedMs = Math.round(performance.now() - t0);
const p = payloadObj(msg);
const parsed = processBatchResponse(p, labels);
return { elapsedMs, p, ...parsed };
}
async function run() {
const quiet = classifierFeatures.has('quiet');
classifierMetricsCollector = createMetricsCollector();
classifierRunStartedAt = new Date().toISOString();
const startedAt = classifierRunStartedAt;
const useLlama = ipcClassifierLlamaEnabled();
const backendLabel = useLlama ? `llama @ :${llama.port}` : `router=${router.fromEnv()}`;
if (!quiet) {
console.log(`\n📐 IPC classifier (${backendLabel}) — one batch, distance vs "machine workshop"\n`);
if (classifierFeatures.has('structured')) {
if (useLlama) {
console.log(
` ⚠️ -F structured: ignored for local llama (use --backend remote for response_format json_object)\n`
);
} else {
console.log(
` Structured: response_format json_object + prompt asks for {"items":[...]} (not a top-level array)\n`
);
}
}
}
if (!existsSync(EXE)) {
console.error(`❌ Binary not found at ${EXE}`);
process.exit(1);
}
if (useLlama) {
await ensureLlamaLocalServer({
autostart: llamaAutostartEnabled(),
startTimeoutMs: timeouts.llamaServerStart,
});
}
const limitRaw = process.env.KBOT_CLASSIFIER_LIMIT;
let labels = [...JOB_VIEWER_MACHINE_LABELS];
if (limitRaw !== undefined && limitRaw !== '') {
const lim = Number.parseInt(limitRaw, 10);
if (Number.isFinite(lim) && lim > 0) labels = labels.slice(0, lim);
}
const CPP_UDS_ARG = uds.workerArg();
if (!platform.isWin && existsSync(CPP_UDS_ARG)) {
unlinkSync(CPP_UDS_ARG);
}
const workerProc = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG], { stdio: 'pipe' });
pipeWorkerStderr(workerProc);
let socket;
for (let i = 0; i < timeouts.connectAttempts; i++) {
try {
await new Promise((res, rej) => {
socket = net.connect(uds.connectOpts(CPP_UDS_ARG));
socket.once('connect', res);
socket.once('error', rej);
});
break;
} catch (e) {
if (i === timeouts.connectAttempts - 1) throw e;
await new Promise((r) => setTimeout(r, timeouts.connectRetryMs));
}
}
const ipc = createIpcClient(socket);
ipc.attach();
await ipc.readyPromise;
const tmo = batchTimeoutMs();
const ipcDeadlineMs = tmo + 60_000;
const waitLabel = useLlama ? 'llama' : router.fromEnv();
const nRuns = stressRunCount();
if (!quiet) {
console.log(` kbot-ai batch: ${labels.length} labels × ${nRuns} run(s)`);
console.log(` liboai HTTP timeout: ${tmo} ms (llm_timeout_ms) — rebuild kbot if this was stuck at ~30s before`);
console.log(` IPC wait deadline: ${ipcDeadlineMs} ms (HTTP + margin)`);
const hb = classifierFeatures.has('no-heartbeat') ? 'off' : '15s';
console.log(` (Large batches can take many minutes; heartbeat ${hb}…)\n`);
}
/** @type {Array<{ index: number, wallMs: number, batchOk: boolean, parseError: string|null, usage: ReturnType<typeof usageTokens>}>} */
const stressIterations = [];
let lastP = /** @type {Record<string, unknown>|null} */ (null);
let lastDistances = [];
let lastRawText = null;
let lastParseError = null;
let lastByDistance = [];
for (let r = 0; r < nRuns; r++) {
if (nRuns > 1 && !quiet) {
console.log(` ── Stress run ${r + 1}/${nRuns} ──`);
}
const batch = await runSingleBatch(ipc, labels, tmo, ipcDeadlineMs, waitLabel);
lastP = batch.p;
lastDistances = batch.distances;
lastRawText = batch.rawText;
lastParseError = batch.parseError;
lastByDistance = [...batch.distances].sort((a, b) => {
if (a.distance == null && b.distance == null) return 0;
if (a.distance == null) return 1;
if (b.distance == null) return -1;
return a.distance - b.distance;
});
const u = usageTokens(batch.p?.llm);
stressIterations.push({
index: r + 1,
wallMs: batch.elapsedMs,
batchOk: batch.batchOk,
parseError: batch.parseError,
usage: u,
});
if (nRuns > 1 && !quiet) {
const tok = u
? `tokens p/c/t ${u.prompt ?? '—'}/${u.completion ?? '—'}/${u.total ?? '—'}`
: 'tokens —';
console.log(` wall: ${batch.elapsedMs} ms ${batch.batchOk ? 'OK' : 'FAIL'} ${tok}`);
}
}
const wallMsList = stressIterations.map((x) => x.wallMs);
/** @type {null | { requestedRuns: number, wallMs: NonNullable<ReturnType<typeof summarizeMs>>, successCount: number, failCount: number, totalPromptTokens: number, totalCompletionTokens: number, totalTokens: number }} */
let stressSummary = null;
if (nRuns > 1) {
const w = summarizeMs(wallMsList);
stressSummary = {
requestedRuns: nRuns,
wallMs: /** @type {NonNullable<typeof w>} */ (w),
successCount: stressIterations.filter((x) => x.batchOk).length,
failCount: stressIterations.filter((x) => !x.batchOk).length,
totalPromptTokens: stressIterations.reduce((s, x) => s + (Number(x.usage?.prompt) || 0), 0),
totalCompletionTokens: stressIterations.reduce((s, x) => s + (Number(x.usage?.completion) || 0), 0),
totalTokens: stressIterations.reduce((s, x) => s + (Number(x.usage?.total) || 0), 0),
};
if (quiet) {
console.log(
`stress ${nRuns} runs: min=${stressSummary.wallMs.min}ms max=${stressSummary.wallMs.max}ms avg=${stressSummary.wallMs.avg}ms ok=${stressSummary.successCount}/${nRuns} tokensΣ=${stressSummary.totalTokens}`
);
} else {
console.log(`\n ═══════════════ Stress summary (${nRuns} batch runs) ═══════════════`);
console.log(
` Wall time (ms): min ${stressSummary.wallMs.min} max ${stressSummary.wallMs.max} avg ${stressSummary.wallMs.avg} p50 ${stressSummary.wallMs.p50} p95 ${stressSummary.wallMs.p95}`
);
console.log(
` Batches OK: ${stressSummary.successCount} fail: ${stressSummary.failCount} (assertions: passed ${stats.passed} failed ${stats.failed})`
);
if (
stressSummary.totalPromptTokens > 0 ||
stressSummary.totalCompletionTokens > 0 ||
stressSummary.totalTokens > 0
) {
console.log(
` Token totals (sum over runs): prompt ${stressSummary.totalPromptTokens} completion ${stressSummary.totalCompletionTokens} total ${stressSummary.totalTokens}`
);
}
console.log(` ═══════════════════════════════════════════════════════════════════\n`);
}
}
const p = lastP;
const distances = lastDistances;
const rawText = lastRawText;
const parseError = lastParseError;
const byDistance = lastByDistance;
const shutdownRes = await ipc.request({ type: 'shutdown' }, timeouts.ipcDefault);
assert(shutdownRes.type === 'shutdown_ack', 'shutdown ack');
await new Promise((r) => setTimeout(r, timeouts.postShutdownMs));
socket.destroy();
assert(workerProc.exitCode === 0, 'worker exit 0');
const finishedAt = new Date().toISOString();
const reportNow = new Date();
const cwd = process.cwd();
const reportData = {
startedAt,
finishedAt,
passed: stats.passed,
failed: stats.failed,
ok: stats.failed === 0,
ipcClassifierLlama: useLlama,
cli: {
features: [...classifierFeatures],
provider: process.env.KBOT_ROUTER ?? null,
model: process.env.KBOT_IPC_MODEL ?? null,
backend: useLlama ? 'local' : 'remote',
stressRuns: nRuns,
structuredOutput: !useLlama && classifierFeatures.has('structured'),
dst:
classifierArgv?.dst || classifierArgv?.output
? path.resolve(
process.cwd(),
String(classifierArgv.dst || classifierArgv.output).trim()
)
: null,
},
env: {
KBOT_IPC_CLASSIFIER_LLAMA: process.env.KBOT_IPC_CLASSIFIER_LLAMA ?? null,
KBOT_IPC_LLAMA_AUTOSTART: process.env.KBOT_IPC_LLAMA_AUTOSTART ?? null,
KBOT_ROUTER: process.env.KBOT_ROUTER ?? null,
KBOT_IPC_MODEL: process.env.KBOT_IPC_MODEL ?? null,
KBOT_CLASSIFIER_LIMIT: process.env.KBOT_CLASSIFIER_LIMIT ?? null,
KBOT_CLASSIFIER_TIMEOUT_MS: process.env.KBOT_CLASSIFIER_TIMEOUT_MS ?? null,
KBOT_CLASSIFIER_STRESS_RUNS: process.env.KBOT_CLASSIFIER_STRESS_RUNS ?? null,
KBOT_LLAMA_PORT: process.env.KBOT_LLAMA_PORT ?? null,
KBOT_LLAMA_BASE_URL: process.env.KBOT_LLAMA_BASE_URL ?? null,
},
metrics: buildMetricsBundle(classifierMetricsCollector, startedAt, finishedAt),
anchor: ANCHOR,
source: 'JobViewer.tsx:205',
batch: true,
backend: useLlama ? 'llama_local' : 'remote_router',
...(useLlama
? {
llama: {
baseURL: llama.baseURL,
port: llama.port,
router: llama.router,
model: llama.model,
},
}
: {
router: router.fromEnv(),
model: process.env.KBOT_IPC_MODEL ?? null,
}),
labelCount: labels.length,
/** Provider metadata from API (usage, model, id, OpenRouter fields) — see LLMClient + kbot `llm` key */
llm: p?.llm ?? null,
distances,
byDistance,
rawText,
parseError: parseError ?? null,
...(nRuns > 1 && stressSummary
? {
stress: {
requestedRuns: nRuns,
iterations: stressIterations,
summary: stressSummary,
},
}
: {}),
};
let jsonPath = '';
let mdPath = '';
let arrayPath = '';
if (!classifierFeatures.has('no-report')) {
try {
const written = await writeTestReports('test-ipc-classifier', reportData, { cwd, now: reportNow });
jsonPath = written.jsonPath;
mdPath = written.mdPath;
} catch (e) {
console.error(' ⚠️ Failed to write report:', e?.message ?? e);
}
/** Array-only artifact (same timestamp as main report). */
arrayPath = reportFilePathWithExt('test-ipc-classifier-distances', '.json', { cwd, now: reportNow });
await mkdir(path.dirname(arrayPath), { recursive: true });
await writeFile(arrayPath, `${JSON.stringify(distances, null, 2)}\n`, 'utf8');
}
const { label: timeLabel } = timeParts(reportNow);
if (!classifierFeatures.has('quiet')) {
console.log(`\n────────────────────────────────`);
console.log(` Passed: ${stats.passed} Failed: ${stats.failed}`);
if (jsonPath) console.log(` Report JSON: ${jsonPath}`);
if (mdPath) console.log(` Report MD: ${mdPath}`);
if (arrayPath) console.log(` Distances JSON: ${arrayPath}`);
console.log(` Run id: test-ipc-classifier::${timeLabel}`);
console.log(` distances.length: ${distances.length}`);
console.log(`────────────────────────────────\n`);
} else {
console.log(
`done: passed=${stats.passed} failed=${stats.failed} ok=${stats.failed === 0}${jsonPath ? ` json=${jsonPath}` : ''}`
);
}
process.exit(stats.failed > 0 ? 1 : 0);
}
parseClassifierArgv();
run().catch(async (err) => {
console.error('Classifier error:', err);
if (!classifierFeatures.has('no-report')) {
try {
const finishedAt = new Date().toISOString();
const c = classifierMetricsCollector ?? createMetricsCollector();
const started = classifierRunStartedAt ?? finishedAt;
await writeTestReports(
'test-ipc-classifier',
{
startedAt: started,
finishedAt,
error: String(err?.stack ?? err),
passed: stats.passed,
failed: stats.failed,
ok: false,
ipcClassifierLlama: ipcClassifierLlamaEnabled(),
metrics: buildMetricsBundle(c, started, finishedAt),
},
{ cwd: process.cwd() }
);
} catch (_) {
/* ignore */
}
}
process.exit(1);
});

View File

@ -0,0 +1,283 @@
/**
* orchestrator/test-ipc.mjs
*
* Integration test: spawn the C++ worker in UDS mode, exchange messages, verify responses.
*
* Run: npm run test:ipc
*
* Env:
* KBOT_IPC_LLM real LLM step is on by default; set to 0 / false / no / off to skip (CI / offline).
* KBOT_ROUTER router (default: openrouter; same defaults as C++ LLMClient / CLI)
* KBOT_IPC_MODEL optional model id (e.g. openrouter slug); else C++ default for that router
* KBOT_IPC_PROMPT custom prompt (default: capital of Germany; asserts "berlin" in reply)
* KBOT_IPC_LLM_LOG_MAX max chars to print for LLM text (default: unlimited)
* KBOT_IPC_LLAMA llama :8888 step on by default; set 0/false/no/off to skip
* KBOT_IPC_LLAMA_AUTOSTART if 0, do not spawn scripts/run-7b.sh when :8888 is closed
* KBOT_LLAMA_* KBOT_LLAMA_PORT, KBOT_LLAMA_BASE_URL, KBOT_LLAMA_MODEL, KBOT_LLAMA_START_TIMEOUT_MS
*
* Shared: presets.js, test-commons.js, reports.js
* Report: cwd/tests/test-ipc__HH-mm.{json,md} (see reports.js)
*/
import { spawn } from 'node:child_process';
import { dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import net from 'node:net';
import { existsSync, unlinkSync } from 'node:fs';
import {
distExePath,
platform,
uds,
timeouts,
kbotAiPayloadFromEnv,
kbotAiPayloadLlamaLocal,
usingDefaultGermanyPrompt,
ensureLlamaLocalServer,
} from './presets.js';
import {
createAssert,
payloadObj,
logKbotAiResponse,
ipcLlmEnabled,
ipcLlamaEnabled,
llamaAutostartEnabled,
createIpcClient,
pipeWorkerStderr,
} from './test-commons.js';
import {
createMetricsCollector,
buildMetricsBundle,
writeTestReports,
} from './reports.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const EXE = distExePath(__dirname);
const stats = createAssert();
const { assert } = stats;
/** Set at run start for error reports */
let ipcRunStartedAt = null;
let ipcMetricsCollector = null;
/** `llm` object from kbot-ai job_result (usage, model, OpenRouter extras) — filled in steps 67 */
let ipcKbotAiLlmRouter = null;
let ipcKbotAiLlmLlama = null;
async function run() {
ipcMetricsCollector = createMetricsCollector();
ipcRunStartedAt = new Date().toISOString();
ipcKbotAiLlmRouter = null;
ipcKbotAiLlmLlama = null;
console.log('\n🔧 IPC [UDS] Integration Tests\n');
if (!existsSync(EXE)) {
console.error(`❌ Binary not found at ${EXE}`);
process.exit(1);
}
const CPP_UDS_ARG = uds.workerArg();
if (!platform.isWin && existsSync(CPP_UDS_ARG)) {
unlinkSync(CPP_UDS_ARG);
}
// ── 1. Spawn & ready ────────────────────────────────────────────────────
console.log('1. Spawn worker (UDS mode) and wait for ready signal');
const workerProc = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG], { stdio: 'pipe' });
pipeWorkerStderr(workerProc);
let socket;
for (let i = 0; i < timeouts.connectAttempts; i++) {
try {
await new Promise((res, rej) => {
socket = net.connect(uds.connectOpts(CPP_UDS_ARG));
socket.once('connect', res);
socket.once('error', rej);
});
break;
} catch (e) {
if (i === timeouts.connectAttempts - 1) throw e;
await new Promise((r) => setTimeout(r, timeouts.connectRetryMs));
}
}
assert(true, 'Socket connected successfully');
const ipc = createIpcClient(socket);
ipc.attach();
const readyMsg = await ipc.readyPromise;
assert(readyMsg.type === 'ready', 'Worker sends ready message on startup');
// ── 2. Ping / Pong ─────────────────────────────────────────────────────
console.log('2. Ping → Pong');
const pong = await ipc.request({ type: 'ping' }, timeouts.ipcDefault);
assert(pong.type === 'pong', `Response type is "pong" (got "${pong.type}")`);
// ── 3. Job echo ─────────────────────────────────────────────────────────
console.log('3. Job → Job Result (echo payload)');
const payload = { action: 'resize', width: 1024, format: 'webp' };
const jobResult = await ipc.request({ type: 'job', payload }, timeouts.ipcDefault);
assert(jobResult.type === 'job_result', `Response type is "job_result" (got "${jobResult.type}")`);
assert(
jobResult.payload?.action === 'resize' && jobResult.payload?.width === 1024,
'Payload echoed back correctly'
);
// ── 4. Unknown type → error ─────────────────────────────────────────────
console.log('4. Unknown type → error response');
const errResp = await ipc.request({ type: 'nonsense' }, timeouts.ipcDefault);
assert(errResp.type === 'error', `Response type is "error" (got "${errResp.type}")`);
// ── 5. Multiple rapid requests ──────────────────────────────────────────
console.log('5. Multiple concurrent requests');
const promises = [];
for (let i = 0; i < 10; i++) {
promises.push(ipc.request({ type: 'ping', payload: { seq: i } }, timeouts.ipcDefault));
}
const results = await Promise.all(promises);
assert(results.length === 10, `All 10 responses received`);
assert(results.every((r) => r.type === 'pong'), 'All responses are pong');
// ── 6. kbot-ai — real LLM (optional via ipcLlmEnabled) ─────────────────
if (ipcLlmEnabled()) {
const aiPayload = kbotAiPayloadFromEnv();
const r = aiPayload.router;
console.log(`6. kbot-ai → real LLM (router=${r}, timeout 3m)`);
const live = await ipc.request(
{
type: 'kbot-ai',
payload: aiPayload,
},
timeouts.kbotAi
);
assert(live.type === 'job_result', `LLM response type job_result (got "${live.type}")`);
const lp = payloadObj(live);
assert(lp?.status === 'success', `payload status success (got "${lp?.status}")`);
assert(
typeof lp?.text === 'string' && lp.text.trim().length >= 3,
`assistant text present (length ${(lp?.text || '').length})`
);
if (usingDefaultGermanyPrompt()) {
assert(
/berlin/i.test(String(lp?.text || '')),
'assistant text mentions Berlin (capital of Germany)'
);
}
ipcKbotAiLlmRouter = lp?.llm ?? null;
logKbotAiResponse('kbot-ai response', live);
} else {
console.log('6. kbot-ai — skipped (KBOT_IPC_LLM=0/false/no/off; default is to run live LLM)');
}
// ── 7. kbot-ai — llama local :8888 (optional; llama-basics parity) ───────
if (ipcLlamaEnabled()) {
console.log('7. kbot-ai → llama local runner (OpenAI :8888, presets.llama)');
let llamaReady = false;
try {
await ensureLlamaLocalServer({
autostart: llamaAutostartEnabled(),
startTimeoutMs: timeouts.llamaServerStart,
});
llamaReady = true;
} catch (e) {
console.error(`${e?.message ?? e}`);
}
assert(llamaReady, 'llama-server listening on :8888 (or autostart run-7b.sh succeeded)');
if (llamaReady) {
const llamaPayload = kbotAiPayloadLlamaLocal();
const llamaRes = await ipc.request(
{ type: 'kbot-ai', payload: llamaPayload },
timeouts.llamaKbotAi
);
assert(llamaRes.type === 'job_result', `llama IPC response type job_result (got "${llamaRes.type}")`);
const llp = payloadObj(llamaRes);
assert(llp?.status === 'success', `llama payload status success (got "${llp?.status}")`);
assert(
typeof llp?.text === 'string' && llp.text.trim().length >= 1,
`llama assistant text present (length ${(llp?.text || '').length})`
);
assert(/\b8\b/.test(String(llp?.text || '')), 'llama arithmetic: reply mentions 8 (5+3)');
ipcKbotAiLlmLlama = llp?.llm ?? null;
logKbotAiResponse('kbot-ai llama local', llamaRes);
}
} else {
console.log('7. kbot-ai llama local — skipped (KBOT_IPC_LLAMA=0; default is to run)');
}
// ── 8. Graceful shutdown ────────────────────────────────────────────────
console.log('8. Graceful shutdown');
const shutdownRes = await ipc.request({ type: 'shutdown' }, timeouts.ipcDefault);
assert(shutdownRes.type === 'shutdown_ack', `Shutdown acknowledged (got "${shutdownRes.type}")`);
await new Promise((r) => setTimeout(r, timeouts.postShutdownMs));
socket.destroy();
assert(workerProc.exitCode === 0, `Worker exited with code 0 (got ${workerProc.exitCode})`);
// ── Summary ─────────────────────────────────────────────────────────────
console.log(`\n────────────────────────────────`);
console.log(` Passed: ${stats.passed} Failed: ${stats.failed}`);
console.log(`────────────────────────────────\n`);
try {
const finishedAt = new Date().toISOString();
const { jsonPath, mdPath } = await writeTestReports(
'test-ipc',
{
startedAt: ipcRunStartedAt,
finishedAt,
passed: stats.passed,
failed: stats.failed,
ok: stats.failed === 0,
ipcLlm: ipcLlmEnabled(),
ipcLlama: ipcLlamaEnabled(),
env: {
KBOT_IPC_LLM: process.env.KBOT_IPC_LLM ?? null,
KBOT_IPC_LLAMA: process.env.KBOT_IPC_LLAMA ?? null,
KBOT_IPC_LLAMA_AUTOSTART: process.env.KBOT_IPC_LLAMA_AUTOSTART ?? null,
KBOT_ROUTER: process.env.KBOT_ROUTER ?? null,
KBOT_IPC_MODEL: process.env.KBOT_IPC_MODEL ?? null,
KBOT_IPC_PROMPT: process.env.KBOT_IPC_PROMPT ?? null,
KBOT_LLAMA_PORT: process.env.KBOT_LLAMA_PORT ?? null,
KBOT_LLAMA_BASE_URL: process.env.KBOT_LLAMA_BASE_URL ?? null,
},
metrics: buildMetricsBundle(ipcMetricsCollector, ipcRunStartedAt, finishedAt),
kbotAi: {
routerStep: ipcKbotAiLlmRouter,
llamaStep: ipcKbotAiLlmLlama,
},
},
{ cwd: process.cwd() }
);
console.log(` 📄 Report JSON: ${jsonPath}`);
console.log(` 📄 Report MD: ${mdPath}\n`);
} catch (e) {
console.error(' ⚠️ Failed to write report:', e?.message ?? e);
}
process.exit(stats.failed > 0 ? 1 : 0);
}
run().catch(async (err) => {
console.error('Test runner error:', err);
try {
const finishedAt = new Date().toISOString();
const c = ipcMetricsCollector ?? createMetricsCollector();
const started = ipcRunStartedAt ?? finishedAt;
await writeTestReports(
'test-ipc',
{
startedAt: started,
finishedAt,
error: String(err?.stack ?? err),
passed: stats.passed,
failed: stats.failed,
ok: false,
metrics: buildMetricsBundle(c, started, finishedAt),
},
{ cwd: process.cwd() }
);
} catch (_) {
/* ignore */
}
process.exit(1);
});

193
packages/media/cpp/package-lock.json generated Normal file
View File

@ -0,0 +1,193 @@
{
"name": "kbot-cpp",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "kbot-cpp",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"yargs": "^17.7.2"
}
},
"node_modules/ansi-regex": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/ansi-styles": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
"license": "MIT",
"dependencies": {
"color-convert": "^2.0.1"
},
"engines": {
"node": ">=8"
},
"funding": {
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
}
},
"node_modules/cliui": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
"integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
"license": "ISC",
"dependencies": {
"string-width": "^4.2.0",
"strip-ansi": "^6.0.1",
"wrap-ansi": "^7.0.0"
},
"engines": {
"node": ">=12"
}
},
"node_modules/color-convert": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
"license": "MIT",
"dependencies": {
"color-name": "~1.1.4"
},
"engines": {
"node": ">=7.0.0"
}
},
"node_modules/color-name": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
"license": "MIT"
},
"node_modules/emoji-regex": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
"license": "MIT"
},
"node_modules/escalade": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
"integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/get-caller-file": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
"license": "ISC",
"engines": {
"node": "6.* || 8.* || >= 10.*"
}
},
"node_modules/is-fullwidth-code-point": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/require-directory": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/string-width": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"license": "MIT",
"dependencies": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
"strip-ansi": "^6.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/strip-ansi": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
"license": "MIT",
"dependencies": {
"ansi-regex": "^5.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/wrap-ansi": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
"license": "MIT",
"dependencies": {
"ansi-styles": "^4.0.0",
"string-width": "^4.1.0",
"strip-ansi": "^6.0.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
}
},
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
"license": "ISC",
"engines": {
"node": ">=10"
}
},
"node_modules/yargs": {
"version": "17.7.2",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
"license": "MIT",
"dependencies": {
"cliui": "^8.0.1",
"escalade": "^3.1.1",
"get-caller-file": "^2.0.5",
"require-directory": "^2.1.1",
"string-width": "^4.2.3",
"y18n": "^5.0.5",
"yargs-parser": "^21.1.1"
},
"engines": {
"node": ">=12"
}
},
"node_modules/yargs-parser": {
"version": "21.1.1",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
"license": "ISC",
"engines": {
"node": ">=12"
}
}
}
}

View File

@ -0,0 +1,41 @@
{
"name": "kbot-cpp",
"version": "1.0.0",
"type": "module",
"description": "KBot C++ CLI built with CMake.",
"directories": {
"test": "tests"
},
"dependencies": {
"yargs": "^17.7.2"
},
"scripts": {
"config": "cmake --preset dev",
"config:release": "cmake --preset release",
"build": "cmake --preset dev && cmake --build --preset dev",
"build:release": "cmake --preset release && cmake --build --preset release",
"build:linux": "bash build-linux.sh",
"test": "ctest --test-dir build/dev -C Debug --output-on-failure",
"test:release": "ctest --test-dir build/release -C Release --output-on-failure",
"clean": "cmake -E rm -rf build dist",
"rebuild": "npm run clean && npm run build",
"run": ".\\dist\\kbot.exe --help",
"worker": ".\\dist\\kbot.exe worker",
"worker:uds": ".\\dist\\kbot.exe worker --uds \\\\.\\pipe\\kbot-worker",
"kbot:ai": ".\\dist\\kbot.exe kbot ai --prompt \"hi\"",
"kbot:run": ".\\dist\\kbot.exe kbot run --list",
"test:ipc": "node orchestrator/test-ipc.mjs",
"test:ipc:classifier": "node orchestrator/test-ipc-classifier.mjs",
"test:files": "node orchestrator/test-files.mjs",
"test:ipc:classifier:openrouter": "node orchestrator/classifier-openrouter.mjs",
"test:ipc:classifier:openrouter:stress": "node orchestrator/classifier-openrouter-stress.mjs",
"test:html": "cmake --preset release && cmake --build --preset release --target test_html && .\\dist\\test_html.exe"
},
"repository": {
"type": "git",
"url": "https://git.polymech.info/polymech/mono-cpp.git"
},
"keywords": [],
"author": "",
"license": "ISC"
}

View File

@ -0,0 +1,33 @@
include(FetchContent)
FetchContent_Declare(
lexbor
GIT_REPOSITORY https://github.com/lexbor/lexbor.git
GIT_TAG v2.4.0
GIT_SHALLOW TRUE
)
# Build lexbor as static
set(LEXBOR_BUILD_SHARED OFF CACHE BOOL "" FORCE)
set(LEXBOR_BUILD_STATIC ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(lexbor)
add_library(html STATIC
src/html.cpp
src/html2md.cpp
src/table.cpp
)
# MSVC: treat source and execution charset as UTF-8
# (fixes \u200b zero-width-space mismatch in html2md tests)
if(MSVC)
target_compile_options(html PRIVATE /utf-8)
endif()
target_include_directories(html
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
)
target_link_libraries(html
PUBLIC lexbor_static
)

View File

@ -0,0 +1,55 @@
#pragma once
#include <string>
#include <vector>
namespace html {
/// Parsed element — tag name + text content.
struct Element {
std::string tag;
std::string text;
};
/// Link with href and optional attributes.
struct Link {
std::string href;
std::string rel; // e.g. "canonical", "stylesheet"
std::string text; // anchor text (for <a> tags)
};
/// Parse an HTML string and return all elements with their text content.
std::vector<Element> parse(const std::string &html_str);
/// Extract the text content of all elements matching a CSS selector.
std::vector<std::string> select(const std::string &html_str,
const std::string &selector);
// ── Enricher extraction helpers ─────────────────────────────────────────────
/// Extract the <title> text.
std::string get_title(const std::string &html_str);
/// Extract a <meta name="X"> or <meta property="X"> content attribute.
std::string get_meta(const std::string &html_str, const std::string &name);
/// Extract <link rel="canonical"> href.
std::string get_canonical(const std::string &html_str);
/// Extract all <a href="..."> values (resolved links as-is from the HTML).
std::vector<Link> get_links(const std::string &html_str);
/// Extract visible body text, stripping script/style/noscript/svg/iframe.
std::string get_body_text(const std::string &html_str);
/// Extract raw JSON strings from <script type="application/ld+json">.
std::vector<std::string> get_json_ld(const std::string &html_str);
/// Extract an attribute value from the first element matching a CSS selector.
std::string get_attr(const std::string &html_str, const std::string &selector,
const std::string &attr_name);
/// Convert HTML content to Markdown.
std::string to_markdown(const std::string &html_str);
} // namespace html

View File

@ -0,0 +1,690 @@
// Copyright (c) Tim Gromeyer
// Licensed under the MIT License - https://opensource.org/licenses/MIT
#ifndef HTML2MD_H
#define HTML2MD_H
#include <memory>
#include <string>
#include <unordered_map>
#include <cstdint>
/*!
* \brief html2md namespace
*
* The html2md namespace provides:
* 1. The Converter class
* 2. Static wrapper around Converter class
*
* \note Do NOT try to convert HTML that contains a list in an ordered list or a
* `blockquote` in a list!\n This will be a **total** mess!
*/
namespace html2md {
/*!
* \brief Options for the conversion from HTML to Markdown
* \warning Make sure to pass valid options; otherwise, the output will be
* invalid!
*
* Example from `tests/main.cpp`:
*
* ```cpp
* auto *options = new html2md::Options();
* options->splitLines = false;
*
* html2md::Converter c(html, options);
* auto md = c.convert();
* ```
*/
struct Options {
/*!
* \brief Add new line when a certain number of characters is reached
*
* \see softBreak
* \see hardBreak
*/
bool splitLines = true;
/*!
* \brief softBreak Wrap after ... characters when the next space is reached
* and as long as it's not in a list, table, image or anchor (link).
*/
int softBreak = 80;
/*!
* \brief hardBreak Force a break after ... characters in a line
*/
int hardBreak = 100;
/*!
* \brief The char used for unordered lists
*
* Valid:
* - `-`
* - `+`
* - `*`
*
* Example:
*
* ```markdown
* - List
* + Also a list
* * And this to
* ```
*/
char unorderedList = '-';
/*!
* \brief The char used after the number of the item
*
* Valid:
* - `.`
* - `)`
*
* Example:
*
* ```markdown
* 1. Hello
* 2) World!
* ```
*/
char orderedList = '.';
/*!
* \brief Whether title is added as h1 heading at the very beginning of the
* markdown
*
* Whether title is added as h1 heading at the very beginning of the markdown.
* Default is true.
*/
bool includeTitle = true;
/*!
* \brief Whetever to format Markdown Tables
*
* Whetever to format Markdown Tables.
* Default is true.
*/
bool formatTable = true;
/*!
* \brief Whether to force left trim of lines in the final Markdown output
*
* Whether to force left trim of lines in the final Markdown output.
* Default is false.
*/
bool forceLeftTrim = false;
/*!
* \brief Whether to compress whitespace (tabs, multiple spaces) into a single
* space
*
* Whether to compress whitespace (tabs, multiple spaces) into a single space.
* Default is false.
*/
bool compressWhitespace = false;
/*!
* \brief Whether to escape numbered lists (e.g. "4." -> "4\.") to prevent them
* from being interpreted as lists in Markdown.
*
* Whether to escape numbered lists.
* Default is true.
*/
bool escapeNumberedList = true;
/*!
* \brief Whether to keep HTML entities (e.g. `&nbsp;`) in the output
*
* If true, the converter will not replace HTML entities configured in the
* internal conversion map. Default is false (current behaviour).
*/
bool keepHtmlEntities = false;
inline bool operator==(html2md::Options o) const {
return splitLines == o.splitLines && unorderedList == o.unorderedList &&
orderedList == o.orderedList && includeTitle == o.includeTitle &&
softBreak == o.softBreak && hardBreak == o.hardBreak &&
formatTable == o.formatTable && forceLeftTrim == o.forceLeftTrim &&
compressWhitespace == o.compressWhitespace &&
escapeNumberedList == o.escapeNumberedList &&
keepHtmlEntities == o.keepHtmlEntities;
};
};
/*!
* \brief Class for converting HTML to Markdown
*
* This class converts HTML to Markdown.
* There is also a static wrapper for this class (see html2md::Convert).
*
* ## Usage example
*
* Option 1: Use the class:
*
* ```cpp
* std::string html = "<h1>example</h1>";
* html2md::Converter c(html);
* auto md = c.convert();
*
* if (!c.ok()) std::cout << "There was something wrong in the HTML\n";
* std::cout << md; // # example
* ```
*
* Option 2: Use the static wrapper:
*
* ```cpp
* std::string html = "<h1>example</h1>";
*
* auto md = html2md::Convert(html);
* std::cout << md;
* ```
*
* Advanced: use Options:
*
* ```cpp
* std::string html = "<h1>example</h1>";
*
* auto *options = new html2md::Options();
* options->splitLines = false;
* options->unorderedList = '*';
*
* html2md::Converter c(html, options);
* auto md = c.convert();
* if (!c.ok()) std::cout << "There was something wrong in the HTML\n";
* std::cout << md; // # example
* ```
*/
class Converter {
public:
/*!
* \brief Standard initializer, takes HTML as parameter. Also prepares
* everything. \param html The HTML as std::string. \param options Options for
* the Conversation. See html2md::Options() for more.
*
* \note Don't pass anything else than HTML, otherwise the output will be a
* **mess**!
*
* This is the default initializer.<br>
* You can use appendToMd() to append something to the beginning of the
* generated output.
*/
explicit inline Converter(const std::string &html,
struct Options *options = nullptr) {
*this = Converter(&html, options);
}
/*!
* \brief Convert HTML into Markdown.
* \return Returns the converted Markdown.
*
* This function actually converts the HTML into Markdown.
* It also cleans up the Markdown so you don't have to do anything.
*/
[[nodiscard]] std::string convert();
/*!
* \brief Append a char to the Markdown.
* \param ch The char to append.
* \return Returns a copy of the instance with the char appended.
*/
Converter *appendToMd(char ch);
/*!
* \brief Append a char* to the Markdown.
* \param str The char* to append.
* \return Returns a copy of the instance with the char* appended.
*/
Converter *appendToMd(const char *str);
/*!
* \brief Append a string to the Markdown.
* \param s The string to append.
* \return Returns a copy of the instance with the string appended.
*/
inline Converter *appendToMd(const std::string &s) {
return appendToMd(s.c_str());
}
/*!
* \brief Appends a ' ' in certain cases.
* \return Copy of the instance with(maybe) the appended space.
*
* This function appends ' ' if:
* - md does not end with `*`
* - md does not end with `\n` aka newline
*/
Converter *appendBlank();
/*!
* \brief Add an HTML symbol conversion
* \param htmlSymbol The HTML symbol to convert
* \param replacement The replacement string
* \note This is useful for converting HTML entities to their Markdown
* equivalents. For example, you can add a conversion for "&nbsp;" to
* " " (space) or "&lt;" to "<" (less than).
* \note This is not a standard feature of the Converter class, but it can
* be added to the class to allow for more flexibility in the conversion
* process. You can use this feature to add custom conversions for any HTML
* symbol that you want to convert to a specific Markdown representation.
*/
void addHtmlSymbolConversion(const std::string &htmlSymbol,
const std::string &replacement) {
htmlSymbolConversions_[htmlSymbol] = replacement;
}
/*!
* \brief Remove an HTML symbol conversion
* \param htmlSymbol The HTML symbol to remove
* \note This is useful for removing custom conversions that you have added
* previously.
*/
void removeHtmlSymbolConversion(const std::string &htmlSymbol) {
htmlSymbolConversions_.erase(htmlSymbol);
}
/*!
* \brief Clear all HTML symbol conversions
* \note This is useful for clearing the conversion map (it's empty afterwards).
*/
void clearHtmlSymbolConversions() { htmlSymbolConversions_.clear(); }
/*!
* \brief Checks if everything was closed properly(in the HTML).
* \return Returns false if there is a unclosed tag.
* \note As long as you have not called convert(), it always returns true.
*/
[[nodiscard]] bool ok() const;
/*!
* \brief Reset the generated Markdown
*/
void reset();
/*!
* \brief Checks if the HTML matches and the options are the same.
* \param The Converter object to compare with
* \return true if the HTML and options matches otherwise false
*/
inline bool operator==(const Converter *c) const { return *this == *c; }
inline bool operator==(const Converter &c) const {
return html_ == c.html_ && option == c.option;
}
/*!
* \brief Returns ok().
*/
inline explicit operator bool() const { return ok(); };
private:
// Attributes
static constexpr const char *kAttributeHref = "href";
static constexpr const char *kAttributeAlt = "alt";
static constexpr const char *kAttributeTitle = "title";
static constexpr const char *kAttributeClass = "class";
static constexpr const char *kAttributeSrc = "src";
static constexpr const char *kAttrinuteAlign = "align";
static constexpr const char *kTagAnchor = "a";
static constexpr const char *kTagBreak = "br";
static constexpr const char *kTagCode = "code";
static constexpr const char *kTagDiv = "div";
static constexpr const char *kTagHead = "head";
static constexpr const char *kTagLink = "link";
static constexpr const char *kTagListItem = "li";
static constexpr const char *kTagMeta = "meta";
static constexpr const char *kTagNav = "nav";
static constexpr const char *kTagNoScript = "noscript";
static constexpr const char *kTagOption = "option";
static constexpr const char *kTagOrderedList = "ol";
static constexpr const char *kTagParagraph = "p";
static constexpr const char *kTagPre = "pre";
static constexpr const char *kTagScript = "script";
static constexpr const char *kTagSpan = "span";
static constexpr const char *kTagStyle = "style";
static constexpr const char *kTagTemplate = "template";
static constexpr const char *kTagTitle = "title";
static constexpr const char *kTagUnorderedList = "ul";
static constexpr const char *kTagImg = "img";
static constexpr const char *kTagSeperator = "hr";
// Text format
static constexpr const char *kTagBold = "b";
static constexpr const char *kTagStrong = "strong";
static constexpr const char *kTagItalic = "em";
static constexpr const char *kTagItalic2 = "i";
static constexpr const char *kTagCitation = "cite";
static constexpr const char *kTagDefinition = "dfn";
static constexpr const char *kTagUnderline = "u";
static constexpr const char *kTagStrighthrought = "del";
static constexpr const char *kTagStrighthrought2 = "s";
static constexpr const char *kTagBlockquote = "blockquote";
// Header
static constexpr const char *kTagHeader1 = "h1";
static constexpr const char *kTagHeader2 = "h2";
static constexpr const char *kTagHeader3 = "h3";
static constexpr const char *kTagHeader4 = "h4";
static constexpr const char *kTagHeader5 = "h5";
static constexpr const char *kTagHeader6 = "h6";
// Table
static constexpr const char *kTagTable = "table";
static constexpr const char *kTagTableRow = "tr";
static constexpr const char *kTagTableHeader = "th";
static constexpr const char *kTagTableData = "td";
size_t index_ch_in_html_ = 0;
bool is_closing_tag_ = false;
bool is_in_attribute_value_ = false;
bool is_in_code_ = false;
bool is_in_list_ = false;
bool is_in_p_ = false;
bool is_in_pre_ = false;
bool is_in_table_ = false;
bool is_in_table_row_ = false;
bool is_in_tag_ = false;
bool is_self_closing_tag_ = false;
bool skipping_leading_whitespace_ = true;
// relevant for <li> only, false = is in unordered list
bool is_in_ordered_list_ = false;
uint8_t index_ol = 0;
// store the table start
size_t table_start = 0;
// number of lists
uint8_t index_li = 0;
uint8_t index_blockquote = 0;
char prev_ch_in_md_ = 0, prev_prev_ch_in_md_ = 0;
char prev_ch_in_html_ = 'x';
std::string html_;
uint16_t offset_lt_ = 0;
std::string current_tag_;
std::string prev_tag_;
// Line which separates header from data
std::string tableLine;
size_t chars_in_curr_line_ = 0;
std::string md_;
Options option;
std::unordered_map<std::string, std::string> htmlSymbolConversions_ = {
{"&quot;", "\""}, {"&lt;", "<"}, {"&gt;", ">"},
{"&amp;", "&"}, {"&nbsp;", " "}, {"&rarr;", ""}};
// Tag: base class for tag types
struct Tag {
virtual void OnHasLeftOpeningTag(Converter *c) = 0;
virtual void OnHasLeftClosingTag(Converter *c) = 0;
};
// Tag types
// tags that are not printed (nav, script, noscript, ...)
struct TagIgnored : Tag {
void OnHasLeftOpeningTag(Converter *c) override {};
void OnHasLeftClosingTag(Converter *c) override {};
};
struct TagAnchor : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
std::string current_href_;
std::string current_title_;
};
struct TagBold : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagItalic : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagUnderline : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagStrikethrought : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagBreak : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagDiv : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagHeader1 : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagHeader2 : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagHeader3 : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagHeader4 : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagHeader5 : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagHeader6 : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagListItem : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagOption : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagOrderedList : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagParagraph : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagPre : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagCode : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagSpan : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagTitle : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagUnorderedList : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagImage : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagSeperator : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagTable : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagTableRow : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagTableHeader : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagTableData : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
struct TagBlockquote : Tag {
void OnHasLeftOpeningTag(Converter *c) override;
void OnHasLeftClosingTag(Converter *c) override;
};
std::unordered_map<std::string, std::shared_ptr<Tag>> tags_;
explicit Converter(const std::string *html, struct Options *options);
void CleanUpMarkdown();
// Trim from start (in place)
static void LTrim(std::string *s);
// Trim from end (in place)
Converter *RTrim(std::string *s, bool trim_only_blank = false);
// Trim from both ends (in place)
Converter *Trim(std::string *s);
// 1. trim all lines
// 2. reduce consecutive newlines to maximum 3
void TidyAllLines(std::string *str);
std::string ExtractAttributeFromTagLeftOf(const std::string &attr);
void TurnLineIntoHeader1();
void TurnLineIntoHeader2();
// Current char: '<'
void OnHasEnteredTag();
Converter *UpdatePrevChFromMd();
/**
* Handle next char within <...> tag
*
* @param ch current character
* @return continue surrounding iteration?
*/
bool ParseCharInTag(char ch);
// Current char: '>'
bool OnHasLeftTag();
inline static bool TagContainsAttributesToHide(std::string *tag) {
using std::string;
return (*tag).find(" aria=\"hidden\"") != string::npos ||
(*tag).find("display:none") != string::npos ||
(*tag).find("visibility:hidden") != string::npos ||
(*tag).find("opacity:0") != string::npos ||
(*tag).find("Details-content--hidden-not-important") != string::npos;
}
Converter *ShortenMarkdown(size_t chars = 1);
inline bool shortIfPrevCh(char prev) {
if (prev_ch_in_md_ == prev) {
ShortenMarkdown();
return true;
}
return false;
};
/**
* @param ch
* @return continue iteration surrounding this method's invocation?
*/
bool ParseCharInTagContent(char ch);
// Replace previous space (if any) in current markdown line by newline
bool ReplacePreviousSpaceInLineByNewline();
static inline bool IsIgnoredTag(const std::string &tag) {
return (tag[0] == '-' || kTagTemplate == tag || kTagStyle == tag ||
kTagScript == tag || kTagNoScript == tag || kTagNav == tag);
// meta: not ignored to tolerate if closing is omitted
}
[[nodiscard]] bool IsInIgnoredTag() const;
}; // Converter
/*!
* \brief Static wrapper around the Converter class
* \param html The HTML passed to Converter
* \param ok Optional: Pass a reference to a local bool to store the output of
* Converter::ok() \return Returns the by Converter generated Markdown
*/
inline std::string Convert(const std::string &html, bool *ok = nullptr) {
Converter c(html);
auto md = c.convert();
if (ok != nullptr)
*ok = c.ok();
return md;
}
#ifndef PYTHON_BINDINGS
inline std::string Convert(const std::string &&html, bool *ok = nullptr) {
return Convert(html, ok);
}
#endif
} // namespace html2md
#endif // HTML2MD_H

View File

@ -0,0 +1,11 @@
// Copyright (c) Tim Gromeyer
// Licensed under the MIT License - https://opensource.org/licenses/MIT
#ifndef TABLE_H
#define TABLE_H
#include <string>
[[nodiscard]] std::string formatMarkdownTable(const std::string &inputTable);
#endif // TABLE_H

View File

@ -0,0 +1,101 @@
# Scraper Request
## OpenAPI Specification
```yaml
openapi: 3.0.1
info:
title: ''
description: ''
version: 1.0.0
paths:
/api/v1/scraper/request:
post:
summary: Scraper Request
deprecated: false
description: ''
tags:
- Scraping API
parameters: []
requestBody:
content:
application/json:
schema:
type: object
properties:
actor:
type: string
input:
type: object
properties:
url:
type: string
required:
- url
x-apidog-orders:
- url
proxy:
type: object
properties:
country:
type: string
required:
- country
x-apidog-orders:
- country
async:
type: boolean
description: |-
If true, the task will be executed asynchronously.
If false, the task will be executed synchronously.
required:
- actor
- input
- proxy
x-apidog-orders:
- actor
- input
- proxy
- async
example:
actor: scraper.xxx
input:
url: >-
https://www.***.com/shop/us/products/stmicroelectronics/tda7265a-3074457345625542393/
proxy:
country: US
async: false
responses:
'200':
description: ''
content:
application/json:
schema:
type: object
properties: {}
x-apidog-orders: []
headers: {}
x-apidog-name: Success
security:
- apikey-header-x-api-token: []
x-apidog-folder: Scraping API
x-apidog-status: released
x-run-in-apidog: https://app.apidog.com/web/project/745098/apis/api-11949852-run
components:
schemas: {}
securitySchemes:
bearer:
type: bearer
scheme: bearer
description: Bearer token authentication using your Scrapeless API key
apikey-header-x-api-token:
type: apiKey
in: header
name: x-api-token
servers:
- url: https://api.scrapeless.com
description: Prod Env
security:
- apikey-header-x-api-token: []
```

View File

@ -0,0 +1,403 @@
#include "html/html.h"
#include <lexbor/css/css.h>
#include <lexbor/html/html.h>
#include <lexbor/selectors/selectors.h>
#include <html/html2md.h>
#include <algorithm>
#include <cstring>
namespace html {
// ── helpers ─────────────────────────────────────────────────────────────────
static std::string node_text(lxb_dom_node_t *node) {
size_t len = 0;
lxb_char_t *text = lxb_dom_node_text_content(node, &len);
if (!text)
return {};
std::string result(reinterpret_cast<const char *>(text), len);
lxb_dom_document_destroy_text(node->owner_document, text);
return result;
}
static std::string tag_name(lxb_dom_element_t *el) {
size_t len = 0;
const lxb_char_t *name = lxb_dom_element_qualified_name(el, &len);
if (!name)
return {};
return std::string(reinterpret_cast<const char *>(name), len);
}
static std::string get_element_attr(lxb_dom_element_t *el, const char *attr) {
size_t len = 0;
const lxb_char_t *val = lxb_dom_element_get_attribute(
el, reinterpret_cast<const lxb_char_t *>(attr), strlen(attr), &len);
if (!val)
return {};
return std::string(reinterpret_cast<const char *>(val), len);
}
static lxb_html_document_t *parse_doc(const std::string &html_str) {
auto *doc = lxb_html_document_create();
if (!doc) return nullptr;
auto status = lxb_html_document_parse(
doc, reinterpret_cast<const lxb_char_t *>(html_str.c_str()),
html_str.size());
if (status != LXB_STATUS_OK) {
lxb_html_document_destroy(doc);
return nullptr;
}
return doc;
}
// ── Helper: check if a tag name matches a noise element ─────────────────────
static bool is_noise_tag(const std::string &name) {
return name == "script" || name == "style" || name == "noscript" ||
name == "svg" || name == "iframe";
}
// ── walk tree recursively ───────────────────────────────────────────────────
static void walk(lxb_dom_node_t *node, std::vector<Element> &out) {
if (!node)
return;
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
auto *el = lxb_dom_interface_element(node);
auto txt = node_text(node);
if (!txt.empty()) {
out.push_back({tag_name(el), txt});
}
}
auto *child = node->first_child;
while (child) {
walk(child, out);
child = child->next;
}
}
// ── Walk for visible text only (skip noise tags) ────────────────────────────
static void walk_text(lxb_dom_node_t *node, std::string &out) {
if (!node) return;
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
auto *el = lxb_dom_interface_element(node);
auto name = tag_name(el);
if (is_noise_tag(name)) return; // Skip noise subtrees entirely
}
if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
size_t len = 0;
const lxb_char_t *data = lxb_dom_node_text_content(node, &len);
if (data && len > 0) {
std::string chunk(reinterpret_cast<const char *>(data), len);
// Collapse whitespace
bool needSpace = !out.empty() && out.back() != ' ' && out.back() != '\n';
// Trim leading/trailing whitespace from chunk
size_t start = chunk.find_first_not_of(" \t\n\r");
size_t end = chunk.find_last_not_of(" \t\n\r");
if (start != std::string::npos) {
if (needSpace) out += ' ';
out += chunk.substr(start, end - start + 1);
}
}
}
auto *child = node->first_child;
while (child) {
walk_text(child, out);
child = child->next;
}
}
// ── Walk <head> for meta/title/link ─────────────────────────────────────────
struct HeadData {
std::string title;
std::string canonical;
std::vector<std::pair<std::string, std::string>> metas; // name/property → content
std::vector<std::string> json_ld;
};
static void walk_head(lxb_dom_node_t *node, HeadData &data) {
if (!node) return;
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
auto *el = lxb_dom_interface_element(node);
auto name = tag_name(el);
if (name == "title") {
data.title = node_text(node);
} else if (name == "meta") {
auto nameAttr = get_element_attr(el, "name");
auto propAttr = get_element_attr(el, "property");
auto content = get_element_attr(el, "content");
if (!content.empty()) {
if (!nameAttr.empty()) data.metas.emplace_back(nameAttr, content);
if (!propAttr.empty()) data.metas.emplace_back(propAttr, content);
}
} else if (name == "link") {
auto rel = get_element_attr(el, "rel");
if (rel == "canonical") {
data.canonical = get_element_attr(el, "href");
}
} else if (name == "script") {
auto type = get_element_attr(el, "type");
if (type == "application/ld+json") {
auto text = node_text(node);
if (!text.empty()) data.json_ld.push_back(text);
}
}
}
auto *child = node->first_child;
while (child) {
walk_head(child, data);
child = child->next;
}
}
// ── Walk <body> for <a> links ───────────────────────────────────────────────
static void walk_links(lxb_dom_node_t *node, std::vector<Link> &out) {
if (!node) return;
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
auto *el = lxb_dom_interface_element(node);
auto name = tag_name(el);
if (name == "a") {
auto href = get_element_attr(el, "href");
if (!href.empty()) {
Link lk;
lk.href = href;
lk.rel = get_element_attr(el, "rel");
lk.text = node_text(node);
out.push_back(std::move(lk));
}
}
}
auto *child = node->first_child;
while (child) {
walk_links(child, out);
child = child->next;
}
}
// ── public API ──────────────────────────────────────────────────────────────
std::vector<Element> parse(const std::string &html_str) {
auto *doc = parse_doc(html_str);
if (!doc) return {};
std::vector<Element> result;
auto *body = lxb_dom_interface_node(lxb_html_document_body_element(doc));
walk(body, result);
lxb_html_document_destroy(doc);
return result;
}
// ── CSS selector callback ───────────────────────────────────────────────────
struct SelectCtx {
std::vector<std::string> *out;
};
static lxb_status_t select_cb(lxb_dom_node_t *node,
lxb_css_selector_specificity_t spec, void *ctx) {
(void)spec;
auto *sctx = static_cast<SelectCtx *>(ctx);
auto txt = node_text(node);
if (!txt.empty()) {
sctx->out->push_back(txt);
}
return LXB_STATUS_OK;
}
std::vector<std::string> select(const std::string &html_str,
const std::string &selector) {
std::vector<std::string> result;
auto *doc = parse_doc(html_str);
if (!doc) return result;
auto *css_parser = lxb_css_parser_create();
lxb_css_parser_init(css_parser, nullptr);
auto *selectors = lxb_selectors_create();
lxb_selectors_init(selectors);
auto *list = lxb_css_selectors_parse(
css_parser, reinterpret_cast<const lxb_char_t *>(selector.c_str()),
selector.size());
if (list) {
SelectCtx ctx{&result};
lxb_selectors_find(
selectors, lxb_dom_interface_node(lxb_html_document_body_element(doc)),
list, select_cb, &ctx);
lxb_css_selector_list_destroy_memory(list);
}
lxb_selectors_destroy(selectors, true);
lxb_css_parser_destroy(css_parser, true);
lxb_html_document_destroy(doc);
return result;
}
// ── Enricher extraction helpers ─────────────────────────────────────────────
std::string get_title(const std::string &html_str) {
auto *doc = parse_doc(html_str);
if (!doc) return {};
HeadData data;
auto *head = lxb_dom_interface_node(lxb_html_document_head_element(doc));
walk_head(head, data);
lxb_html_document_destroy(doc);
return data.title;
}
std::string get_meta(const std::string &html_str, const std::string &name) {
auto *doc = parse_doc(html_str);
if (!doc) return {};
HeadData data;
auto *head = lxb_dom_interface_node(lxb_html_document_head_element(doc));
walk_head(head, data);
lxb_html_document_destroy(doc);
for (auto &[key, val] : data.metas) {
if (key == name) return val;
}
return {};
}
std::string get_canonical(const std::string &html_str) {
auto *doc = parse_doc(html_str);
if (!doc) return {};
HeadData data;
auto *head = lxb_dom_interface_node(lxb_html_document_head_element(doc));
walk_head(head, data);
lxb_html_document_destroy(doc);
return data.canonical;
}
std::vector<Link> get_links(const std::string &html_str) {
auto *doc = parse_doc(html_str);
if (!doc) return {};
std::vector<Link> links;
auto *body = lxb_dom_interface_node(lxb_html_document_body_element(doc));
walk_links(body, links);
lxb_html_document_destroy(doc);
return links;
}
std::string get_body_text(const std::string &html_str) {
auto *doc = parse_doc(html_str);
if (!doc) return {};
std::string text;
auto *body = lxb_dom_interface_node(lxb_html_document_body_element(doc));
walk_text(body, text);
lxb_html_document_destroy(doc);
return text;
}
std::vector<std::string> get_json_ld(const std::string &html_str) {
auto *doc = parse_doc(html_str);
if (!doc) return {};
HeadData data;
// JSON-LD can be in head or body — walk entire document
auto *root = lxb_dom_interface_node(
lxb_dom_document_element(&doc->dom_document));
walk_head(root, data);
lxb_html_document_destroy(doc);
return data.json_ld;
}
// ── get_attr via CSS selector ───────────────────────────────────────────────
struct AttrCtx {
std::string attr_name;
std::string result;
bool found;
};
static lxb_status_t attr_cb(lxb_dom_node_t *node,
lxb_css_selector_specificity_t spec, void *ctx) {
(void)spec;
auto *actx = static_cast<AttrCtx *>(ctx);
if (actx->found) return LXB_STATUS_OK;
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
auto *el = lxb_dom_interface_element(node);
auto val = get_element_attr(el, actx->attr_name.c_str());
if (!val.empty()) {
actx->result = val;
actx->found = true;
}
}
return LXB_STATUS_OK;
}
std::string get_attr(const std::string &html_str, const std::string &selector,
const std::string &attr_name) {
auto *doc = parse_doc(html_str);
if (!doc) return {};
auto *css_parser = lxb_css_parser_create();
lxb_css_parser_init(css_parser, nullptr);
auto *selectors = lxb_selectors_create();
lxb_selectors_init(selectors);
auto *list = lxb_css_selectors_parse(
css_parser, reinterpret_cast<const lxb_char_t *>(selector.c_str()),
selector.size());
std::string result;
if (list) {
AttrCtx ctx{attr_name, {}, false};
auto *root = lxb_dom_interface_node(
lxb_dom_document_element(&doc->dom_document));
lxb_selectors_find(selectors, root, list, attr_cb, &ctx);
result = ctx.result;
lxb_css_selector_list_destroy_memory(list);
}
lxb_selectors_destroy(selectors, true);
lxb_css_parser_destroy(css_parser, true);
lxb_html_document_destroy(doc);
return result;
}
std::string to_markdown(const std::string &html_str) {
// Defense-in-depth: hard cap at 2 MB even if the caller forgets.
// The enricher pipeline already caps at 512 KB, but future callers
// may not — prevent OOM / multi-second hangs from html2md.
static constexpr size_t MAX_HTML2MD_INPUT = 2 * 1024 * 1024;
if (html_str.size() > MAX_HTML2MD_INPUT) {
return "*[Content truncated: HTML too large for markdown conversion ("
+ std::to_string(html_str.size() / 1024) + " KB)]*\n";
}
return html2md::Convert(html_str);
}
} // namespace html

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
// Copyright (c) Tim Gromeyer
// Licensed under the MIT License - https://opensource.org/licenses/MIT
#include "html/table.h"
#include <iomanip>
#include <iostream>
#include <sstream>
#include <vector>
using std::string;
using std::vector;
const size_t MIN_LINE_LENGTH = 3; // Minimum length of line
void removeLeadingTrailingSpaces(string &str) {
size_t firstNonSpace = str.find_first_not_of(' ');
if (firstNonSpace == string::npos) {
str.clear(); // Entire string is spaces
return;
}
size_t lastNonSpace = str.find_last_not_of(' ');
str = str.substr(firstNonSpace, lastNonSpace - firstNonSpace + 1);
}
string enlargeTableHeaderLine(const string &str, size_t length) {
if (str.empty() || length < MIN_LINE_LENGTH)
return "";
size_t first = str.find_first_of(':');
size_t last = str.find_last_of(':');
if (first == 0 && first == last)
last = string::npos;
string line = string(length, '-');
if (first == 0)
line[0] = ':';
if (last == str.length() - 1)
line[length - 1] = ':';
return line;
}
string formatMarkdownTable(const string &inputTable) {
std::istringstream iss(inputTable);
string line;
vector<vector<string>> tableData;
// Parse the input table into a 2D vector
while (std::getline(iss, line)) {
std::istringstream lineStream(line);
string cell;
vector<string> rowData;
while (std::getline(lineStream, cell, '|')) {
removeLeadingTrailingSpaces(cell); // Trim first
if (!cell.empty()) { // Then check if empty
rowData.push_back(cell);
}
}
if (!rowData.empty()) {
tableData.push_back(std::move(rowData)); // Move rowData to avoid copying
}
}
if (tableData.empty()) {
return "";
}
// Determine maximum width of each column
vector<size_t> columnWidths(tableData[0].size(), 0);
for (const auto &row : tableData) {
if (columnWidths.size() < row.size()) {
columnWidths.resize(row.size(), 0);
}
for (size_t i = 0; i < row.size(); ++i) {
columnWidths[i] = std::max(columnWidths[i], row[i].size());
}
}
// Build the formatted table
std::ostringstream formattedTable;
for (size_t rowNumber = 0; rowNumber < tableData.size(); ++rowNumber) {
const auto &row = tableData[rowNumber];
formattedTable << "|";
for (size_t i = 0; i < row.size(); ++i) {
if (rowNumber == 1) {
formattedTable << enlargeTableHeaderLine(row[i], columnWidths[i] + 2)
<< "|";
continue;
}
formattedTable << " " << std::setw(columnWidths[i]) << std::left << row[i]
<< " |";
}
formattedTable << "\n";
}
return formattedTable.str();
}

View File

@ -0,0 +1,48 @@
include(FetchContent)
# Work around curl's old cmake_minimum_required for CMake 4.x
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
FetchContent_Declare(
CURL
URL https://github.com/curl/curl/releases/download/curl-8_12_1/curl-8.12.1.tar.xz
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
)
# Minimal curl build static, SChannel TLS, no optional deps
set(BUILD_CURL_EXE OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
# TLS backend: platform-appropriate
if(WIN32)
set(CURL_USE_OPENSSL OFF CACHE BOOL "" FORCE)
set(CURL_USE_SCHANNEL ON CACHE BOOL "" FORCE)
else()
set(CURL_USE_SCHANNEL OFF CACHE BOOL "" FORCE)
set(CURL_USE_OPENSSL ON CACHE BOOL "" FORCE)
endif()
# Disable optional compression/protocol deps
set(CURL_ZLIB OFF CACHE BOOL "" FORCE)
set(CURL_BROTLI OFF CACHE BOOL "" FORCE)
set(CURL_ZSTD OFF CACHE BOOL "" FORCE)
set(USE_NGHTTP2 OFF CACHE BOOL "" FORCE)
set(CURL_USE_LIBSSH2 OFF CACHE BOOL "" FORCE)
set(CURL_USE_LIBPSL OFF CACHE BOOL "" FORCE)
set(CURL_DISABLE_LDAP ON CACHE BOOL "" FORCE)
set(CURL_DISABLE_LDAPS ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(CURL)
add_library(http STATIC
src/http.cpp
)
target_include_directories(http
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
)
target_link_libraries(http
PUBLIC CURL::libcurl
)

View File

@ -0,0 +1,40 @@
#pragma once
#include <string>
namespace http {
struct Response {
long status_code;
std::string body;
};
/// Options for customisable HTTP GET requests.
struct GetOptions {
std::string user_agent = "Mozilla/5.0 (compatible; PolymechBot/1.0)";
int timeout_ms = 10000;
bool follow_redirects = true;
};
/// Perform an HTTP GET request. Returns the response body and status code.
Response get(const std::string &url);
/// Perform an HTTP GET request with custom options.
Response get(const std::string &url, const GetOptions &opts);
/// Perform an HTTP POST request with a body. Returns the response and status.
Response post(const std::string &url, const std::string &body,
const std::string &content_type = "application/json");
/// Options for customisable HTTP POST requests.
struct PostOptions {
std::string content_type = "application/json";
std::string bearer_token; // Authorization: Bearer <token>
int timeout_ms = 30000;
};
/// Perform an HTTP POST request with custom options.
Response post(const std::string &url, const std::string &body,
const PostOptions &opts);
} // namespace http

View File

@ -0,0 +1,216 @@
#include "http/http.h"
#include <curl/curl.h>
#include <mutex>
#include <chrono>
namespace http {
static std::once_flag curl_init_flag;
static void ensure_curl_init() {
std::call_once(curl_init_flag, []() {
curl_global_init(CURL_GLOBAL_ALL);
});
}
struct ThreadLocalCurl {
CURL *handle;
ThreadLocalCurl() {
ensure_curl_init();
handle = curl_easy_init();
}
~ThreadLocalCurl() {
if (handle) curl_easy_cleanup(handle);
}
CURL *get() {
if (handle) curl_easy_reset(handle);
return handle;
}
};
thread_local ThreadLocalCurl tl_curl;
struct ProgressData {
std::chrono::steady_clock::time_point start_time;
int timeout_ms;
};
static int progress_cb(void *clientp, curl_off_t dltotal, curl_off_t dlnow,
curl_off_t ultotal, curl_off_t ulnow) {
auto *pd = static_cast<ProgressData *>(clientp);
if (pd->timeout_ms <= 0) return 0;
auto now = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - pd->start_time).count();
if (elapsed > pd->timeout_ms) {
return 1; // Return non-zero to abort the transfer
}
return 0; // Continue
}
static size_t write_cb(void *contents, size_t size, size_t nmemb, void *userp) {
auto *out = static_cast<std::string *>(userp);
out->append(static_cast<char *>(contents), size * nmemb);
return size * nmemb;
}
Response get(const std::string &url) {
return get(url, GetOptions{});
}
Response get(const std::string &url, const GetOptions &opts) {
Response resp{};
CURL *curl = tl_curl.get();
if (!curl) {
resp.status_code = -1;
resp.body = "curl_easy_init (thread_local) failed";
return resp;
}
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp.body);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, opts.follow_redirects ? 1L : 0L);
ProgressData prog_data;
if (opts.timeout_ms > 0) {
curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, static_cast<long>(opts.timeout_ms));
prog_data.start_time = std::chrono::steady_clock::now();
prog_data.timeout_ms = opts.timeout_ms + 1000;
curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_cb);
curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &prog_data);
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
}
// Fail fast on dead sites (TCP SYN timeout)
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS, 5000L);
// Prevent stalling: abort if transfer speed is less than 1 byte/sec for 10 seconds
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, 10L);
// Prevent signal handlers from breaking in multithreaded environments
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
if (!opts.user_agent.empty()) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, opts.user_agent.c_str());
}
// Accept-Encoding for compressed responses
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
CURLcode res = curl_easy_perform(curl);
if (res != CURLE_OK) {
resp.status_code = -1;
resp.body = curl_easy_strerror(res);
} else {
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp.status_code);
}
return resp;
}
Response post(const std::string &url, const std::string &body,
const std::string &content_type) {
Response resp{};
CURL *curl = tl_curl.get();
if (!curl) {
resp.status_code = -1;
resp.body = "curl_easy_init failed";
return resp;
}
struct curl_slist *headers = nullptr;
headers =
curl_slist_append(headers, ("Content-Type: " + content_type).c_str());
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp.body);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10L);
ProgressData prog_data;
prog_data.start_time = std::chrono::steady_clock::now();
prog_data.timeout_ms = 11000;
curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_cb);
curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &prog_data);
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
// Prevent stalling: abort if transfer speed is less than 1 byte/sec for 10 seconds
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, 10L);
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
CURLcode res = curl_easy_perform(curl);
if (res != CURLE_OK) {
resp.status_code = -1;
resp.body = curl_easy_strerror(res);
} else {
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp.status_code);
}
curl_slist_free_all(headers);
return resp;
}
Response post(const std::string &url, const std::string &body,
const PostOptions &opts) {
Response resp{};
CURL *curl = tl_curl.get();
if (!curl) {
resp.status_code = -1;
resp.body = "curl_easy_init failed";
return resp;
}
struct curl_slist *headers = nullptr;
headers =
curl_slist_append(headers, ("Content-Type: " + opts.content_type).c_str());
if (!opts.bearer_token.empty()) {
headers = curl_slist_append(
headers, ("Authorization: Bearer " + opts.bearer_token).c_str());
headers = curl_slist_append(
headers, ("x-api-token: " + opts.bearer_token).c_str());
}
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp.body);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
ProgressData prog_data;
if (opts.timeout_ms > 0) {
curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, static_cast<long>(opts.timeout_ms));
prog_data.start_time = std::chrono::steady_clock::now();
prog_data.timeout_ms = opts.timeout_ms + 1000;
curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_cb);
curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &prog_data);
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
}
// Prevent stalling: abort if transfer speed is less than 1 byte/sec for 10 seconds
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, 10L);
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
CURLcode res = curl_easy_perform(curl);
if (res != CURLE_OK) {
resp.status_code = -1;
resp.body = curl_easy_strerror(res);
} else {
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp.status_code);
}
curl_slist_free_all(headers);
return resp;
}
} // namespace http

View File

@ -0,0 +1,45 @@
cmake_minimum_required(VERSION 3.20)
project(ipc CXX)
option(IPC_BUILD_SHARED "Build ipc as a shared library (DLL/so)" OFF)
set(_ipc_sources src/ipc.cpp)
if(IPC_BUILD_SHARED)
add_library(ipc SHARED ${_ipc_sources})
target_compile_definitions(ipc PRIVATE IPC_BUILDING_LIBRARY)
else()
add_library(ipc STATIC ${_ipc_sources})
target_compile_definitions(ipc PRIVATE IPC_STATIC_BUILD=1)
target_compile_definitions(ipc INTERFACE IPC_STATIC_BUILD=1)
endif()
target_include_directories(ipc
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
)
target_link_libraries(ipc
PUBLIC json logger
)
if(IPC_BUILD_SHARED)
set_target_properties(ipc PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/dist"
RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/dist"
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
)
endif()
install(TARGETS ipc
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)
install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/include/ipc/ipc.h
${CMAKE_CURRENT_SOURCE_DIR}/include/ipc/ipc_export.h
DESTINATION include/ipc
)

View File

@ -0,0 +1,35 @@
#pragma once
#include "ipc/ipc_export.h"
#include <cstdint>
#include <cstdio>
#include <string>
#include <vector>
namespace ipc {
/// A single IPC message: { id, type, payload (raw JSON string) }.
struct Message {
std::string id;
std::string type;
std::string payload; // opaque JSON string (can be "{}" or any object)
};
/// Encode a Message into a length-prefixed binary frame.
/// Layout: [4-byte LE uint32 length][JSON bytes]
IPC_API std::vector<uint8_t> encode(const Message &msg);
/// Decode a binary frame (without the 4-byte length prefix) into a Message.
/// Returns false if the JSON is invalid or missing required fields.
IPC_API bool decode(const uint8_t *data, size_t len, Message &out);
IPC_API bool decode(const std::vector<uint8_t> &frame, Message &out);
/// Blocking: read exactly one length-prefixed message from a FILE*.
/// Returns false on EOF or read error.
IPC_API bool read_message(Message &out, FILE *in = stdin);
/// Write one length-prefixed message to a FILE*. Flushes after write.
/// Returns false on write error.
IPC_API bool write_message(const Message &msg, FILE *out = stdout);
} // namespace ipc

View File

@ -0,0 +1,25 @@
#pragma once
/**
* DLL / shared-object exports for the length-prefixed JSON IPC framing library.
*
* CMake:
* - Building libipc: IPC_BUILDING_LIBRARY (PRIVATE)
* - Linking static ipc: IPC_STATIC_BUILD=1 (INTERFACE)
*/
#if defined(IPC_STATIC_BUILD)
# define IPC_API
#elif defined(_WIN32)
# if defined(IPC_BUILDING_LIBRARY)
# define IPC_API __declspec(dllexport)
# else
# define IPC_API __declspec(dllimport)
# endif
#else
# if defined(IPC_BUILDING_LIBRARY)
# define IPC_API __attribute__((visibility("default")))
# else
# define IPC_API
# endif
#endif

View File

@ -0,0 +1,158 @@
#include "ipc/ipc.h"
#include <cstring>
#include "json/json.h"
#include "logger/logger.h"
// We use RapidJSON directly for structured serialization
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
namespace ipc {
// ── helpers ──────────────────────────────────────────────────────────────────
static void write_u32_le(uint8_t *dst, uint32_t val) {
dst[0] = static_cast<uint8_t>(val & 0xFF);
dst[1] = static_cast<uint8_t>((val >> 8) & 0xFF);
dst[2] = static_cast<uint8_t>((val >> 16) & 0xFF);
dst[3] = static_cast<uint8_t>((val >> 24) & 0xFF);
}
static uint32_t read_u32_le(const uint8_t *src) {
return static_cast<uint32_t>(src[0]) |
(static_cast<uint32_t>(src[1]) << 8) |
(static_cast<uint32_t>(src[2]) << 16) |
(static_cast<uint32_t>(src[3]) << 24);
}
static bool read_exact(FILE *f, uint8_t *buf, size_t n) {
size_t total = 0;
while (total < n) {
size_t got = std::fread(buf + total, 1, n - total, f);
if (got == 0) return false; // EOF or error
total += got;
}
return true;
}
// ── encode ───────────────────────────────────────────────────────────────────
std::vector<uint8_t> encode(const Message &msg) {
// Build JSON: { "id": "...", "type": "...", "payload": ... }
// payload is stored as a raw JSON string, so we parse it first
rapidjson::StringBuffer sb;
rapidjson::Writer<rapidjson::StringBuffer> w(sb);
w.StartObject();
w.Key("id");
w.String(msg.id.c_str(), static_cast<rapidjson::SizeType>(msg.id.size()));
w.Key("type");
w.String(msg.type.c_str(),
static_cast<rapidjson::SizeType>(msg.type.size()));
w.Key("payload");
// If payload is valid JSON, embed it as-is; otherwise embed as string
rapidjson::Document pd;
if (!msg.payload.empty() &&
!pd.Parse(msg.payload.c_str()).HasParseError()) {
pd.Accept(w);
} else {
w.String(msg.payload.c_str(),
static_cast<rapidjson::SizeType>(msg.payload.size()));
}
w.EndObject();
const char *json_str = sb.GetString();
uint32_t json_len = static_cast<uint32_t>(sb.GetSize());
std::vector<uint8_t> frame(4 + json_len);
write_u32_le(frame.data(), json_len);
std::memcpy(frame.data() + 4, json_str, json_len);
return frame;
}
// ── decode ───────────────────────────────────────────────────────────────────
bool decode(const uint8_t *data, size_t len, Message &out) {
rapidjson::Document doc;
doc.Parse(reinterpret_cast<const char *>(data), len);
if (doc.HasParseError() || !doc.IsObject()) return false;
if (!doc.HasMember("id") || !doc["id"].IsString()) return false;
if (!doc.HasMember("type") || !doc["type"].IsString()) return false;
out.id = doc["id"].GetString();
out.type = doc["type"].GetString();
if (doc.HasMember("payload")) {
if (doc["payload"].IsString()) {
out.payload = doc["payload"].GetString();
} else {
// Re-serialize non-string payload back to JSON string
rapidjson::StringBuffer sb;
rapidjson::Writer<rapidjson::StringBuffer> w(sb);
doc["payload"].Accept(w);
out.payload = sb.GetString();
}
} else {
out.payload = "{}";
}
return true;
}
bool decode(const std::vector<uint8_t> &frame, Message &out) {
return decode(frame.data(), frame.size(), out);
}
// ── read_message ─────────────────────────────────────────────────────────────
bool read_message(Message &out, FILE *in) {
#ifdef _WIN32
// Ensure binary mode on Windows to prevent \r\n translation
_setmode(_fileno(in), _O_BINARY);
#endif
uint8_t len_buf[4];
if (!read_exact(in, len_buf, 4)) return false;
uint32_t msg_len = read_u32_le(len_buf);
if (msg_len == 0 || msg_len > 10 * 1024 * 1024) { // sanity: max 10 MB
logger::error("ipc::read_message: invalid length " +
std::to_string(msg_len));
return false;
}
std::vector<uint8_t> buf(msg_len);
if (!read_exact(in, buf.data(), msg_len)) return false;
return decode(buf, out);
}
// ── write_message ────────────────────────────────────────────────────────────
bool write_message(const Message &msg, FILE *out) {
#ifdef _WIN32
_setmode(_fileno(out), _O_BINARY);
#endif
auto frame = encode(msg);
size_t written = std::fwrite(frame.data(), 1, frame.size(), out);
if (written != frame.size()) return false;
std::fflush(out);
return true;
}
} // namespace ipc

View File

@ -0,0 +1,28 @@
include(FetchContent)
# RapidJSON use master for CMake 4.x compatibility (v1.1.0 is from 2016)
FetchContent_Declare(
rapidjson
GIT_REPOSITORY https://github.com/Tencent/rapidjson.git
GIT_TAG master
GIT_SHALLOW TRUE
)
set(RAPIDJSON_BUILD_DOC OFF CACHE BOOL "" FORCE)
set(RAPIDJSON_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
set(RAPIDJSON_BUILD_TESTS OFF CACHE BOOL "" FORCE)
FetchContent_GetProperties(rapidjson)
if(NOT rapidjson_POPULATED)
FetchContent_Populate(rapidjson)
# Don't add_subdirectory just use the headers
endif()
add_library(json STATIC
src/json.cpp
)
target_include_directories(json
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
PUBLIC ${rapidjson_SOURCE_DIR}/include
)

View File

@ -0,0 +1,23 @@
#pragma once
#include <string>
#include <vector>
namespace json {
/// Parse a JSON string and return a pretty-printed version.
std::string prettify(const std::string &json_str);
/// Extract a string value by key from a JSON object (top-level only).
std::string get_string(const std::string &json_str, const std::string &key);
/// Extract an int value by key from a JSON object (top-level only).
int get_int(const std::string &json_str, const std::string &key);
/// Check if a JSON string is valid.
bool is_valid(const std::string &json_str);
/// Get all top-level keys from a JSON object.
std::vector<std::string> keys(const std::string &json_str);
} // namespace json

View File

@ -0,0 +1,62 @@
#include "json/json.h"
#include <rapidjson/document.h>
#include <rapidjson/prettywriter.h>
#include <rapidjson/stringbuffer.h>
namespace json {
std::string prettify(const std::string &json_str) {
rapidjson::Document doc;
doc.Parse(json_str.c_str());
if (doc.HasParseError()) {
return {};
}
rapidjson::StringBuffer buffer;
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
doc.Accept(writer);
return std::string(buffer.GetString(), buffer.GetSize());
}
std::string get_string(const std::string &json_str, const std::string &key) {
rapidjson::Document doc;
doc.Parse(json_str.c_str());
if (doc.HasParseError() || !doc.IsObject())
return {};
auto it = doc.FindMember(key.c_str());
if (it == doc.MemberEnd() || !it->value.IsString())
return {};
return std::string(it->value.GetString(), it->value.GetStringLength());
}
int get_int(const std::string &json_str, const std::string &key) {
rapidjson::Document doc;
doc.Parse(json_str.c_str());
if (doc.HasParseError() || !doc.IsObject())
return 0;
auto it = doc.FindMember(key.c_str());
if (it == doc.MemberEnd() || !it->value.IsInt())
return 0;
return it->value.GetInt();
}
bool is_valid(const std::string &json_str) {
rapidjson::Document doc;
doc.Parse(json_str.c_str());
return !doc.HasParseError();
}
std::vector<std::string> keys(const std::string &json_str) {
std::vector<std::string> result;
rapidjson::Document doc;
doc.Parse(json_str.c_str());
if (doc.HasParseError() || !doc.IsObject())
return result;
for (auto it = doc.MemberBegin(); it != doc.MemberEnd(); ++it) {
result.emplace_back(it->name.GetString(), it->name.GetStringLength());
}
return result;
}
} // namespace json

View File

@ -0,0 +1,50 @@
cmake_minimum_required(VERSION 3.20)
project(kbot CXX)
option(POLYMECH_KBOT_SHARED "Build kbot as a shared library (DLL/so)" OFF)
set(_kbot_sources kbot.cpp llm_client.cpp source_files.cpp)
if(POLYMECH_KBOT_SHARED)
add_library(kbot SHARED ${_kbot_sources})
target_compile_definitions(kbot PRIVATE POLYMECH_BUILDING_LIBRARY)
else()
add_library(kbot STATIC ${_kbot_sources})
target_compile_definitions(kbot PRIVATE POLYMECH_STATIC_BUILD=1)
target_compile_definitions(kbot INTERFACE POLYMECH_STATIC_BUILD=1)
endif()
target_include_directories(kbot PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${taskflow_SOURCE_DIR}
)
target_link_libraries(kbot PUBLIC
logger
json
oai
pranav_glob
)
if(POLYMECH_KBOT_SHARED)
set_target_properties(kbot PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/dist"
RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/dist"
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
)
endif()
install(TARGETS kbot
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)
install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/kbot.h
${CMAKE_CURRENT_SOURCE_DIR}/llm_client.h
${CMAKE_CURRENT_SOURCE_DIR}/polymech_export.h
DESTINATION include/polymech
)

View File

@ -0,0 +1,189 @@
#include "kbot.h"
#include "source_files.h"
#include <fstream>
#include <filesystem>
#include <iostream>
#include "logger/logger.h"
#include "llm_client.h"
#include <nlohmann/json.hpp>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
namespace polymech {
namespace kbot {
namespace {
namespace fs = std::filesystem;
static void replace_all(std::string &s, const std::string &from, const std::string &to) {
std::size_t pos = 0;
while ((pos = s.find(from, pos)) != std::string::npos) {
s.replace(pos, from.length(), to);
pos += to.length();
}
}
static std::string model_basename(const std::string &model) {
if (model.empty())
return "unknown_model";
const auto slash = model.find_last_of("/\\");
if (slash == std::string::npos)
return model;
return model.substr(slash + 1);
}
static std::string expand_dst_path(const KBotOptions &opts, std::string raw) {
const std::string m = model_basename(opts.model);
const std::string r = opts.router.empty() ? std::string("unknown_router") : opts.router;
replace_all(raw, "${MODEL}", m);
replace_all(raw, "${MODEL_NAME}", m);
replace_all(raw, "${ROUTER}", r);
return raw;
}
/** Same idea as TS `onCompletion`: write to --dst / --output; `dst` wins over legacy `output` if both set. */
static std::string effective_completion_dst(const KBotOptions &opts) {
if (!opts.dst.empty())
return opts.dst;
return opts.output;
}
/** @returns true if wrote to file (caller should skip printing body to stdout). */
static bool try_write_completion_to_dst(const KBotOptions &opts, const std::string &text) {
const std::string raw = effective_completion_dst(opts);
if (raw.empty())
return false;
std::string expanded = expand_dst_path(opts, raw);
fs::path p;
try {
p = fs::absolute(expanded);
} catch (const std::exception &e) {
logger::error(std::string("Invalid output path: ") + e.what());
return false;
}
std::error_code ec;
fs::create_directories(p.parent_path(), ec);
if (ec) {
logger::error("Failed to create output directories: " + ec.message());
return false;
}
const bool append_existing = (opts.append != "replace") && fs::exists(p);
std::ofstream out(p, std::ios::binary | (append_existing ? std::ios::app : std::ios::trunc));
if (!out) {
logger::error("Failed to open output file: " + p.string());
return false;
}
out << text;
if (!text.empty() && text.back() != '\n')
out.put('\n');
logger::info(std::string(append_existing ? "Appended completion to " : "Wrote completion to ") + p.string());
return true;
}
std::string json_job_result_ai(bool success, const std::string &text_or_error, bool is_text,
const std::string &provider_meta_json = {}) {
nlohmann::json o;
o["status"] = success ? "success" : "error";
o["mode"] = "ai";
if (success && is_text) o["text"] = text_or_error;
else if (!success) o["error"] = text_or_error;
if (!provider_meta_json.empty()) {
try {
o["llm"] = nlohmann::json::parse(provider_meta_json);
} catch (...) {
o["llm"] = nlohmann::json{{"_parse_error", true}, {"raw", provider_meta_json}};
}
}
return o.dump();
}
} // namespace
int run_kbot_ai_pipeline(const KBotOptions &opts, const KBotCallbacks &cb) {
logger::debug("Starting kbot ai pipeline");
std::vector<std::string> source_rel_paths;
const std::string full_prompt = build_prompt_with_sources(
opts, opts.include_globs.empty() ? nullptr : &source_rel_paths);
if (!opts.include_globs.empty()) {
logger::info("kbot ai: attached " + std::to_string(source_rel_paths.size()) + " text source file(s)");
}
if (opts.dry_run) {
logger::info("Dry run triggered for kbot ai");
if (cb.onEvent) {
if (!opts.include_globs.empty()) {
cb.onEvent("job_result", make_dry_run_ai_result(opts, full_prompt, source_rel_paths).dump());
} else {
cb.onEvent("job_result", json_job_result_ai(true, "[dry-run] no LLM call", true));
}
}
return 0;
}
LLMClient client(opts);
std::string target_prompt = full_prompt;
if (target_prompt.empty()) {
target_prompt = "Respond with 'Hello from KBot C++ AI Pipeline!'";
}
logger::debug("Executing kbot ai completion via LLMClient...");
LLMResponse res = client.execute_chat(target_prompt);
if (res.success) {
if (!try_write_completion_to_dst(opts, res.text))
std::cout << res.text << "\n";
if (cb.onEvent) {
cb.onEvent("ai_progress",
"{\"message\":\"Task completion received\",\"has_text\":true}");
}
} else {
logger::error("AI Task Failed: " + res.error);
if (cb.onEvent) {
rapidjson::StringBuffer ebuf;
rapidjson::Writer<rapidjson::StringBuffer> ew(ebuf);
ew.StartObject();
ew.Key("error");
ew.String(res.error.c_str(),
static_cast<rapidjson::SizeType>(res.error.size()));
ew.EndObject();
cb.onEvent("ai_error",
std::string(ebuf.GetString(), ebuf.GetSize()));
}
}
if (cb.onEvent) {
if (res.success)
cb.onEvent("job_result", json_job_result_ai(true, res.text, true, res.provider_meta_json));
else
cb.onEvent("job_result", json_job_result_ai(false, res.error, false));
}
return res.success ? 0 : 1;
}
int run_kbot_run_pipeline(const KBotRunOptions &opts, const KBotCallbacks &cb) {
logger::info("Starting kbot run pipeline (stub) for config: " + opts.config);
if (opts.dry) {
logger::info("Dry run triggered for kbot run");
}
if (opts.list) {
logger::info("List configs mode enabled");
}
if (!opts.dry && !opts.list) {
logger::info("Simulating launching: .vscode/launch.json targeting " + opts.config);
}
if (cb.onEvent) {
cb.onEvent("job_result", "{\"status\":\"success\",\"mode\":\"run\"}");
}
return 0;
}
} // namespace kbot
} // namespace polymech

View File

@ -0,0 +1,79 @@
#pragma once
#include "polymech_export.h"
#include <string>
#include <vector>
#include <memory>
#include <atomic>
#include <functional>
namespace polymech {
namespace kbot {
struct KBotOptions {
std::string path = ".";
std::string prompt;
std::string output;
std::string dst;
std::string append = "concat";
std::string wrap = "none";
std::string each;
std::vector<std::string> disable;
std::vector<std::string> disable_tools;
std::vector<std::string> tools;
std::vector<std::string> include_globs;
std::vector<std::string> exclude_globs;
std::string glob_extension;
std::string api_key;
std::string model;
std::string router = "openrouter";
std::string mode = "tools";
int log_level = 4;
std::string profile;
std::string base_url;
std::string config_path;
std::string dump;
std::string preferences;
std::string logs;
bool stream = false;
bool alt = false;
std::string env = "default";
std::string filters;
std::string query;
bool dry_run = false;
std::string format;
/** liboai HTTP timeout (ms). 0 = library default (~30s). IPC may set for long prompts. */
int llm_timeout_ms = 0;
/**
* Optional chat completion `response_format` JSON (OpenAI structured outputs).
* Example: {"type":"json_object"} or {"type":"json_schema","json_schema":{...}}.
* Empty = omit (default text completion).
*/
std::string response_format_json;
// Internal
std::string job_id;
std::shared_ptr<std::atomic<bool>> cancel_token;
};
struct KBotRunOptions {
std::string config = "default";
bool dry = false;
bool list = false;
std::string project_path;
std::string log_file_path;
// Internal
std::string job_id;
std::shared_ptr<std::atomic<bool>> cancel_token;
};
struct KBotCallbacks {
std::function<void(const std::string& type, const std::string& json)> onEvent;
};
POLYMECH_API int run_kbot_ai_pipeline(const KBotOptions& opts, const KBotCallbacks& cb);
POLYMECH_API int run_kbot_run_pipeline(const KBotRunOptions& opts, const KBotCallbacks& cb);
} // namespace kbot
} // namespace polymech

View File

@ -0,0 +1,165 @@
#include "llm_client.h"
#include "logger/logger.h"
#include <liboai.h>
#include <nlohmann/json.hpp>
#include <iostream>
#include <optional>
namespace polymech {
namespace kbot {
LLMClient::LLMClient(const KBotOptions& opts)
: api_key_(opts.api_key),
model_(opts.model),
router_(opts.router),
llm_timeout_ms_(opts.llm_timeout_ms),
response_format_json_(opts.response_format_json) {
// Set default base_url_ according to client.ts mappings
if (opts.base_url.empty()) {
if (router_ == "openrouter") base_url_ = "https://openrouter.ai/api/v1";
else if (router_ == "openai") base_url_ = ""; // liboai uses the default URL automatically
else if (router_ == "deepseek") base_url_ = "https://api.deepseek.com/v1";
else if (router_ == "huggingface")base_url_ = "https://api-inference.huggingface.co/v1";
else if (router_ == "ollama") base_url_ = "http://localhost:11434/v1";
else if (router_ == "fireworks") base_url_ = "https://api.fireworks.ai/v1";
else if (router_ == "gemini") base_url_ = "https://generativelanguage.googleapis.com/v1beta"; // or gemini openai compat endpt
else if (router_ == "xai") base_url_ = "https://api.x.ai/v1";
else base_url_ = "https://api.openai.com/v1"; // Fallback to openai API
} else {
base_url_ = opts.base_url;
}
// Default models based on router (from client.ts)
if (model_.empty()) {
if (router_ == "openrouter") model_ = "anthropic/claude-sonnet-4";
else if (router_ == "openai") model_ = "gpt-4o";
else if (router_ == "deepseek") model_ = "deepseek-chat";
else if (router_ == "huggingface") model_ = "meta-llama/2";
else if (router_ == "ollama") model_ = "llama3.2";
else if (router_ == "fireworks") model_ = "llama-v2-70b-chat";
else if (router_ == "gemini") model_ = "gemini-1.5-pro";
else if (router_ == "xai") model_ = "grok-1";
else model_ = "gpt-4o";
}
}
LLMClient::~LLMClient() = default;
LLMResponse LLMClient::execute_chat(const std::string& prompt) {
LLMResponse res;
logger::debug("LLMClient::execute_chat: Starting. api_key length: " + std::to_string(api_key_.length()));
if (api_key_.empty()) {
res.success = false;
res.error = "API Key is empty.";
return res;
}
logger::debug("LLMClient::execute_chat: base_url_: " + base_url_);
liboai::OpenAI oai_impl(base_url_.empty() ? "https://api.openai.com/v1" : base_url_);
logger::debug("LLMClient::execute_chat: Setting API Key");
bool success = oai_impl.auth.SetKey(api_key_);
if (!success) {
res.success = false;
res.error = "Failed to set API Key in liboai.";
return res;
}
if (llm_timeout_ms_ > 0) {
oai_impl.auth.SetMaxTimeout(llm_timeout_ms_);
logger::info("LLMClient: HTTP timeout set to " + std::to_string(llm_timeout_ms_) + " ms");
}
std::string target_model = model_.empty() ? "gpt-4o" : model_;
logger::debug("LLMClient::execute_chat: Target model: " + target_model);
logger::info("LLMClient: calling ChatCompletion (prompt chars=" + std::to_string(prompt.size()) + ")");
logger::debug("LLMClient::execute_chat: Init Conversation");
liboai::Conversation convo;
convo.AddUserData(prompt);
std::optional<nlohmann::json> response_format;
if (!response_format_json_.empty()) {
try {
response_format = nlohmann::json::parse(response_format_json_);
} catch (const std::exception& e) {
logger::warn("LLMClient: invalid --response-format / response_format_json, ignoring: " +
std::string(e.what()));
}
}
logger::debug("LLMClient::execute_chat: Calling create()");
try {
liboai::Response response = oai_impl.ChatCompletion->create(
target_model,
convo,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
response_format);
logger::info("LLMClient: ChatCompletion returned (HTTP " + std::to_string(response.status_code) + ")");
logger::debug("LLMClient::execute_chat: Got response with status: " + std::to_string(response.status_code));
// liboai may not populate raw_json for custom base URLs — parse content directly.
nlohmann::json j;
bool json_ok = false;
if (!response.raw_json.empty() && response.raw_json.contains("choices")) {
j = response.raw_json;
json_ok = true;
} else if (!response.content.empty()) {
try {
j = nlohmann::json::parse(response.content);
json_ok = j.contains("choices");
} catch (...) {}
}
if (!json_ok || j["choices"].empty()) {
res.success = false;
if (json_ok && j.contains("error")) {
res.error = "API Error: " + j["error"].dump();
} else {
res.error = "Invalid response format: no choices found. Raw: " + response.content;
}
return res;
}
res.success = true;
res.text = j["choices"][0]["message"]["content"].get<std::string>();
/* Usage, model, cost (OpenRouter), etc. — everything except message bodies in choices. */
try {
nlohmann::json meta = nlohmann::json::object();
for (auto it = j.begin(); it != j.end(); ++it) {
if (it.key() == "choices") continue;
meta[it.key()] = it.value();
}
if (!meta.empty()) res.provider_meta_json = meta.dump();
} catch (...) {
/* keep text; omit provider_meta_json */
}
} catch (std::exception& e) {
logger::error("LLMClient::execute_chat: Exception caught: " + std::string(e.what()));
res.success = false;
res.error = e.what();
} catch (...) {
logger::error("LLMClient::execute_chat: Unknown exception caught");
res.success = false;
res.error = "Unknown error occurred inside LLMClient execute_chat.";
}
return res;
}
} // namespace kbot
} // namespace polymech

View File

@ -0,0 +1,37 @@
#pragma once
#include <string>
#include "kbot.h"
namespace polymech {
namespace kbot {
struct LLMResponse {
std::string text;
bool success = false;
std::string error;
/** Top-level chat completion JSON minus `choices` (usage, model, id, OpenRouter extras). Empty if not captured. */
std::string provider_meta_json;
};
class POLYMECH_API LLMClient {
public:
// Initialize the client with the options (api_key, model, router).
explicit LLMClient(const KBotOptions& opts);
~LLMClient();
// Execute a basic chat completion using the provided prompt.
LLMResponse execute_chat(const std::string& prompt);
private:
std::string api_key_;
std::string model_;
std::string router_;
std::string base_url_;
int llm_timeout_ms_ = 0;
/** Parsed in execute_chat; raw JSON from KBotOptions::response_format_json */
std::string response_format_json_;
};
} // namespace kbot
} // namespace polymech

View File

@ -0,0 +1,26 @@
#pragma once
/**
* DLL / shared-object exports for the Polymech kbot library (pipelines, LLM client).
*
* CMake:
* - Building libkbot: POLYMECH_BUILDING_LIBRARY (PRIVATE)
* - Linking static kbot: POLYMECH_STATIC_BUILD=1 (INTERFACE)
* - Linking shared kbot: default import on Windows
*/
#if defined(POLYMECH_STATIC_BUILD)
# define POLYMECH_API
#elif defined(_WIN32)
# if defined(POLYMECH_BUILDING_LIBRARY)
# define POLYMECH_API __declspec(dllexport)
# else
# define POLYMECH_API __declspec(dllimport)
# endif
#else
# if defined(POLYMECH_BUILDING_LIBRARY)
# define POLYMECH_API __attribute__((visibility("default")))
# else
# define POLYMECH_API
# endif
#endif

View File

@ -0,0 +1,221 @@
#include "source_files.h"
#include "logger/logger.h"
#include <glob/glob.h>
#include <algorithm>
#include <cctype>
#include <filesystem>
#include <fstream>
#include <sstream>
#include <unordered_set>
namespace fs = std::filesystem;
namespace polymech {
namespace kbot {
namespace {
constexpr std::size_t kMaxBytesPerFile = 4 * 1024 * 1024;
std::string to_lower(std::string s) {
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
return s;
}
std::string ext_of(const fs::path& p) {
std::string e = p.extension().string();
return to_lower(e);
}
/** Extensions handled as binary / non-text in this slice (expand later for vision). */
bool is_image_ext(const std::string& ext) {
static const char* kImg[] = {".jpg", ".jpeg", ".png", ".gif", ".webp",
".bmp", ".tiff", ".tif", ".ico", ".heic", ".avif"};
for (auto* x : kImg) {
if (ext == x) return true;
}
return false;
}
bool is_pdf_ext(const std::string& ext) { return ext == ".pdf"; }
/** Filename / relative path glob with * and ? only (no **). */
bool glob_match_segment(const std::string& text, const std::string& pat) {
const size_t n = text.size(), m = pat.size();
std::vector<std::vector<bool>> dp(n + 1, std::vector<bool>(m + 1, false));
dp[0][0] = true;
for (size_t j = 1; j <= m; ++j) {
if (pat[j - 1] == '*') dp[0][j] = dp[0][j - 1];
}
for (size_t i = 1; i <= n; ++i) {
for (size_t j = 1; j <= m; ++j) {
if (pat[j - 1] == '*') {
dp[i][j] = dp[i][j - 1] || dp[i - 1][j];
} else if (pat[j - 1] == '?' || text[i - 1] == pat[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
}
}
}
return dp[n][m];
}
fs::path absolute_root(const std::string& path_opt) {
if (path_opt.empty()) return fs::absolute(fs::path("."));
return fs::absolute(fs::path(path_opt));
}
bool excluded(const std::string& rel_fwd, const std::vector<std::string>& exclude_globs) {
for (const auto& pat : exclude_globs) {
if (pat.empty()) continue;
if (glob_match_segment(rel_fwd, pat)) return true;
fs::path p(rel_fwd);
if (glob_match_segment(p.filename().string(), pat)) return true;
}
return false;
}
void push_unique(std::vector<fs::path>& out, std::unordered_set<std::string>& seen, const fs::path& file) {
std::string key = file.generic_string();
if (seen.insert(key).second) out.push_back(file);
}
void expand_one_pattern(const fs::path& root, const std::string& pattern_str,
std::vector<fs::path>& out, std::unordered_set<std::string>& seen) {
fs::path pat_path = pattern_str.empty() ? fs::path() : fs::path(pattern_str);
fs::path resolved = pat_path.is_absolute() ? pat_path : (root / pat_path);
resolved = resolved.lexically_normal();
const std::string pat = resolved.string();
std::vector<fs::path> matched;
try {
if (pattern_str.find("**") != std::string::npos) {
matched = glob::rglob(pat);
} else {
matched = glob::glob(pat);
}
} catch (const std::exception& e) {
logger::warn(std::string("source_files: glob failed: ") + e.what());
return;
}
for (auto& p : matched) {
std::error_code ec;
if (!fs::is_regular_file(p, ec) || ec) continue;
fs::path canon = fs::weakly_canonical(p, ec);
if (!ec) push_unique(out, seen, canon);
}
}
std::string read_file_limited(const fs::path& p, std::size_t max_bytes) {
std::ifstream in(p, std::ios::binary);
if (!in) return {};
std::string buf;
buf.assign(std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>());
if (buf.size() > max_bytes) {
logger::warn("source_files: truncating large file " + p.generic_string());
buf.resize(max_bytes);
}
return buf;
}
} // namespace
bool is_text_source_file(const std::string& path_generic) {
fs::path p(path_generic);
std::string ext = ext_of(p);
if (is_image_ext(ext)) return false;
if (is_pdf_ext(ext)) return false;
if (ext.empty()) return true;
/* Code / text-like extensions (aligned with TS text/* + common sources) */
static const char* kText[] = {
".txt", ".md", ".json", ".js", ".mjs", ".cjs", ".ts", ".tsx", ".jsx", ".css",
".html", ".htm", ".xml", ".csv", ".yaml", ".yml", ".toml", ".sh", ".py",
".rs", ".go", ".java", ".cpp", ".cc", ".cxx", ".h", ".hpp", ".c",
".cs", ".rb", ".php", ".swift", ".kt", ".vue", ".svelte", ".scss", ".less",
".ini", ".cfg", ".properties", ".gradle", ".cmake", ".mdx", ".log", ".sql",
};
for (auto* x : kText) {
if (ext == x) return true;
}
return false;
}
std::vector<std::string> collect_source_rel_paths(const KBotOptions& opts) {
std::vector<std::string> rel;
build_prompt_with_sources(opts, &rel);
return rel;
}
std::string build_prompt_with_sources(const KBotOptions& opts, std::vector<std::string>* out_rel_paths) {
if (opts.include_globs.empty()) {
return opts.prompt;
}
const fs::path root = absolute_root(opts.path);
std::vector<fs::path> files;
std::unordered_set<std::string> seen;
for (const auto& inc : opts.include_globs) {
if (inc.empty()) continue;
expand_one_pattern(root, inc, files, seen);
}
std::ostringstream body;
for (const auto& abs : files) {
std::error_code ec;
if (!fs::is_regular_file(abs, ec) || ec) continue;
std::string abs_gen = abs.generic_string();
if (!is_text_source_file(abs_gen)) {
logger::info("source_files: skip non-text (e.g. image): " + abs_gen);
continue;
}
fs::path rel = fs::relative(abs, root, ec);
if (ec) rel = abs.filename();
std::string rel_fwd = rel.generic_string();
if (excluded(rel_fwd, opts.exclude_globs)) {
logger::debug("source_files: excluded: " + rel_fwd);
continue;
}
std::string content = read_file_limited(abs, kMaxBytesPerFile);
if (out_rel_paths) out_rel_paths->push_back(rel_fwd);
body << "--- file: " << rel_fwd << " ---\n";
body << content;
if (!content.empty() && content.back() != '\n') body << '\n';
body << '\n';
}
if (!opts.prompt.empty()) {
body << opts.prompt;
}
return body.str();
}
nlohmann::json make_dry_run_ai_result(const KBotOptions& opts, const std::string& augmented_prompt,
const std::vector<std::string>& rel_paths) {
nlohmann::json o;
o["status"] = "success";
o["mode"] = "ai";
o["text"] = "[dry-run] no LLM call";
o["dry_run"] = true;
o["path"] = opts.path.empty() ? std::string(".") : opts.path;
o["sources"] = rel_paths;
o["prompt_char_count"] = augmented_prompt.size();
const std::size_t cap = 2000;
if (augmented_prompt.size() <= cap) {
o["prompt_preview"] = augmented_prompt;
} else {
o["prompt_preview"] = augmented_prompt.substr(0, cap);
o["prompt_preview_truncated"] = true;
}
return o;
}
} // namespace kbot
} // namespace polymech

View File

@ -0,0 +1,32 @@
#pragma once
#include "kbot.h"
#include <nlohmann/json.hpp>
#include <string>
#include <vector>
namespace polymech {
namespace kbot {
/** True if we treat this path as a text source (UTF-8). Images/PDF reserved for future. */
bool is_text_source_file(const std::string& path_generic);
/**
* Resolve --include / IPC `include` patterns against `opts.path` (project root).
* Skips non-text files (e.g. images) with a debug log. Applies `exclude_globs` to relative paths.
*/
std::vector<std::string> collect_source_rel_paths(const KBotOptions& opts);
/**
* Build user prompt: optional file blocks (`--- file: rel ---` + contents) then `opts.prompt`.
* If `out_rel_paths` is set, filled with forward-slash relative paths in read order (deduped).
*/
std::string build_prompt_with_sources(const KBotOptions& opts,
std::vector<std::string>* out_rel_paths = nullptr);
/** JSON body for dry-run job_result when includes are used (sources + preview). */
nlohmann::json make_dry_run_ai_result(const KBotOptions& opts, const std::string& augmented_prompt,
const std::vector<std::string>& rel_paths);
} // namespace kbot
} // namespace polymech

View File

@ -0,0 +1,49 @@
name: Bug report
description: Create a report to help us improve
labels: ["bug"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
- type: textarea
id: what-happened
attributes:
label: Describe the bug
description: A clear and concise description of what the bug is, and any additional context.
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: repro-steps
attributes:
label: To Reproduce
description: Steps to reproduce the behavior.
placeholder: |
1. Fetch a '...'
2. Update the '....'
3. See error
validations:
required: true
- type: textarea
id: code-snippets
attributes:
label: Code snippets
description: If applicable, add code snippets to help explain your problem.
render: C++
validations:
required: false
- type: input
id: os
attributes:
label: OS
placeholder: macOS
validations:
required: true
- type: input
id: lib-version
attributes:
label: Library version
placeholder: liboai v1.0.0
validations:
required: true

View File

@ -0,0 +1,20 @@
name: Feature request
description: Suggest an idea for this library
labels: ["feature-request"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this feature request!
- type: textarea
id: feature
attributes:
label: Describe the feature or improvement you're requesting
description: A clear and concise description of what you want to happen.
validations:
required: true
- type: textarea
id: context
attributes:
label: Additional context
description: Add any other context about the feature request here.

View File

@ -0,0 +1,6 @@
.vs
[Bb]uild*
out
TestApp
.cache
/.idea

View File

@ -0,0 +1,24 @@
# AGENTS.md
This repo is a maintained fork of liboai. Our goal is to make it more reliable and feature-complete without breaking existing api.
## Core Principles
- Preserve backward compatibility; add features without breaking existing APIs.
- Favor small, composable changes over rewrites.
- Keep the codebase clean and maintainable; document anything user-facing.
- Prioritize stability, correctness, and clear error handling.
## Current Priorities
- Add OpenAI Responses API support for GPT-5.2 and gpt-5.2-pro.
- Keep Chat Completions and other existing components intact.
- Add documentation and examples for new features.
## Workflow
- Update docs whenever you add or change public APIs.
- Use existing patterns and naming conventions in liboai.
- Avoid introducing new dependencies unless justified.
## Notes
- The initial Responses API implementation should accept raw JSON payloads.
- A ResponseInput helper is planned, but not part of the initial implementation.
- Azure Responses support is out of scope for now.

View File

@ -0,0 +1,22 @@
cmake_minimum_required(VERSION 3.21)
project(liboai)
IF(WIN32)
set(VCPKG_CMAKE_PATH $ENV{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake CACHE FILEPATH "Location of vcpkg.cmake")
include(${VCPKG_CMAKE_PATH})
find_package(ZLIB REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(CURL REQUIRED)
ENDIF()
option(BUILD_EXAMPLES "Build example applications" OFF)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
add_subdirectory(liboai)
if(BUILD_EXAMPLES)
add_subdirectory(documentation)
endif()
set_property(DIRECTORY PROPERTY VS_STARTUP_PROJECT oai)

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 Dread
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,100 @@
<p align="center">
<img src="/images/_logo.png">
</p>
<hr>
<h1>Introduction</h1>
<p><code>liboai</code> is a simple, <b>unofficial</b> C++17 library for the OpenAI API. It allows developers to access OpenAI endpoints through a simple collection of methods and classes. The library can most effectively be thought of as a <b>spiritual port</b> of OpenAI's Python library, simply called <code>openai</code>, due to its similar structure - with few exceptions.
<h3>Features</h3>
- [x] [ChatGPT](https://github.com/D7EAD/liboai/tree/main/documentation/chat)
- [x] [Responses API](https://platform.openai.com/docs/api-reference/responses/create)
- [X] [Audio](https://github.com/D7EAD/liboai/tree/main/documentation/audio)
- [X] [Azure](https://github.com/D7EAD/liboai/tree/main/documentation/azure)
- [X] [Functions](https://platform.openai.com/docs/api-reference/chat/create)
- [x] [Image DALL·E](https://github.com/D7EAD/liboai/tree/main/documentation/images)
- [x] [Models](https://github.com/D7EAD/liboai/tree/main/documentation/models)
- [x] [Completions](https://github.com/D7EAD/liboai/tree/main/documentation/completions)
- [x] [Edit](https://github.com/D7EAD/liboai/tree/main/documentation/edits)
- [x] [Embeddings](https://github.com/D7EAD/liboai/tree/main/documentation/embeddings)
- [x] [Files](https://github.com/D7EAD/liboai/tree/main/documentation/files)
- [x] [Fine-tunes](https://github.com/D7EAD/liboai/tree/main/documentation/fine-tunes)
- [x] [Moderation](https://github.com/D7EAD/liboai/tree/main/documentation/moderations)
- [X] Asynchronous Support
<h1>Usage</h1>
See below for just how similar in style <code>liboai</code> and its Python alternative are when generating an image using DALL-E.</p>
<details open>
<summary>DALL-E Generation in Python.</summary>
<br>
```py
import openai
import os
openai.api_key = os.getenv("OPENAI_API_KEY")
response = openai.Image.create(
prompt="A snake in the grass!",
n=1,
size="256x256"
)
print(response["data"][0]["url"])
```
</details>
<details open>
<summary>DALL-E Generation in C++.</summary>
<br>
```cpp
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
oai.auth.SetKeyEnv("OPENAI_API_KEY");
Response res = oai.Image->create(
"A snake in the grass!",
1,
"256x256"
);
std::cout << res["data"][0]["url"] << std::endl;
}
```
</details>
<p>Running the above will print out the URL to the resulting generated image, which may or may not look similar to the one found below.</p>
<table>
<tr>
<th>Example Image</th>
</tr>
<td>
<img src="/images/snake.png">
</td>
</tr>
</table>
<p><i>Keep in mind the above C++ example is a minimal example and is not an exception-safe snippet. Please see <a href="/documentation">the documentation</a> for more detailed and exception-safe code snippets.</i></p>
<h1>Dependencies</h1>
<p>For the library to work the way it does, it relies on two major dependencies. These dependencies can be found listed below.<p>
- <a href="https://github.com/nlohmann/json">nlohmann-json</a>
- <a href="https://curl.se/">cURL</a>
*If building the library using the provided solution, it is recommended to install these dependencies using <b>vcpkg</b>.*
<h1>Documentation</h1>
<p>For detailed documentation and additional code examples, see the library's documentation <a href="/documentation">here</a>.
<h1>Contributing</h1>
<p>Artificial intelligence is an exciting and quickly-changing field.
If you'd like to partake in further placing the power of AI in the hands of everyday people, please consider contributing by submitting new code and features via a **Pull Request**. If you have any issues using the library, or just want to suggest new features, feel free to contact me directly using the info on my <a href="https://github.com/D7EAD">profile</a> or open an **Issue**.

View File

@ -0,0 +1,25 @@
# liboai Roadmap
This is a living backlog of improvements and ideas as we deepen our use of the library. It is intentionally lightweight and updated as we discover new needs.
## Now
- Responses API support (GPT-5.2, gpt-5.2-pro)
- Keep all existing APIs stable and intact
## Next
- Responses streaming helpers and SSE parsing
- ResponseInput helper to build Responses `input` items
- `output_text` convenience helper for Responses outputs
- Structured outputs helpers for `text.format`
- Tool definition builders for Responses (`tools`, `tool_choice`)
## Later
- More robust testing coverage (unit + integration samples)
- Improved error messaging with request context (safe, no secrets)
- Expanded docs and cookbook-style examples
- Performance pass on JSON construction and streaming
## Observations
- The Conversation class is useful for Chat Completions; Responses lacks an equivalent.
- The library is stable but needs modernization for new OpenAI primitives.
- Maintaining compatibility is critical for existing users.

View File

@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.13)
project(documentation)
macro(add_example target_name source_name)
add_executable(${target_name} "${source_name}")
target_link_libraries(${target_name} oai)
set_target_properties(${target_name} PROPERTIES FOLDER "examples/${PROJECT_NAME}")
endmacro()
macro(add_basic_example source_base_name)
add_example(${source_base_name} "${source_base_name}.cpp")
endmacro()
add_subdirectory(audio/examples)
add_subdirectory(authorization/examples)
add_subdirectory(azure/examples)
add_subdirectory(chat/examples)
add_subdirectory(chat/conversation/examples)
add_subdirectory(completions/examples)
add_subdirectory(edits/examples)
add_subdirectory(embeddings/examples)
add_subdirectory(files/examples)
add_subdirectory(fine-tunes/examples)
add_subdirectory(images/examples)
add_subdirectory(models/examples)
add_subdirectory(moderations/examples)
add_subdirectory(responses/examples)

View File

@ -0,0 +1,217 @@
<h1>Documentation</h1>
<p>Both above and below, you can find resources and documentation for each component of the library.</p>
<h3>Basic Usage</h3>
<p>In order to understand how to use each component of the library, it would be ideal to first understand the basic structure of the library as a whole. When using <code>liboai</code> in a project, you <b>should</b> only include one header file, <code>liboai.h</code>. This header provides an interface to all other components of the library such as <code>Images</code>, <code>Completions</code>, etc.
See below for both a correct and incorrect example.</p>
<table>
<tr>
<th>Correct</th>
<th>Incorrect</th>
</tr>
<tr>
<td>
```cpp
#include "liboai.h"
int main() {
...
}
```
</td>
<td>
```cpp
#include "fine_tunes.h"
#include "models.h"
// etc...
int main() {
...
}
```
</td>
</tr>
</table>
<br>
<p>Once we have properly included the necessary header file to use the library--and assuming symbols are linked properly--we can make use of the class in <code>liboai.h</code> to get started. At some point in our source code, we will have to choose when to define a <code>liboai::OpenAI</code> object to access component interfaces. Each component interface stored in this object offers methods associated with it, so, for instance, interface <code>Image</code> will have a method <code>create(...)</code> to generate an image from text. Each non-async method returns a <code>liboai::Response</code> containing response information whereas async methods return a <code>liboai::FutureResponse</code>. However, before we start using these methods, we must first set our authorization information--otherwise it will not work!
<code>liboai::OpenAI</code> also houses another important member, the authorization member, which is used to set authorization information (such as the API key and organization IDs) before we call the API methods. For more information on additional members found in <code>liboai::Authorization</code>, refer to the <a href="./authorization">authorization</a> folder above.
See below for both a correct and incorrect control flow when generating an image.</p>
<table>
<tr>
<th>Correct</th>
<th>Incorrect</th>
</tr>
<tr>
<td>
```cpp
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
// Set our API key using an environment variable.
// This is recommended as hard-coding API keys is
// insecure.
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
Response response = oai.Image->create(
"a siamese cat!"
);
}
...
}
```
</td>
<td>
```cpp
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
// Failure to set authorization info!
// Will fail, exception will be thrown!
Response response = oai.Image->create(
"a siamese cat!"
);
...
}
```
</td>
</tr>
</table>
<br>
<p>As you can see above, authentication-set related functions return booleans to indicate success and failure, whereas component methods will throw an exception, <code>OpenAIException</code> or <code>OpenAIRateLimited</code>, to indicate their success or failure; these should be checked for accordingly. Below you can find an exception-safe version of the above correct snippet.</p>
<table>
<tr>
<th>Correct, exception-safe</th>
</tr>
<tr>
<td>
```cpp
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
Response response = oai.Image->create(
"a siamese cat!"
);
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
...
}
}
```
</td>
</tr>
</table>
<br>
<p>Now, once we have made a call using a component interface, we most certainly want to get the information out of it. To do this, using our knowledge of the format of the API responses, we can extract the information, such as the resulting image's URL, using JSON indexing on the <code>liboai::Response</code> object. See below for an example where we print the generated image's URL.</p>
<table>
<tr>
<th>Accessing JSON Response Data</th>
</tr>
<tr>
<td>
```cpp
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
Response response = oai.Image->create(
"a siamese cat!"
);
std::cout << response["data"][0]["url"].get<std::string>() << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}
```
</td>
</tr>
</table>
<br>
<p>What if we want to do more than just print the URL of the image? Why not download it right when it's done? Thankfully, <code>liboai</code> has a convenient function for that, <code>Network::Download(...)</code> (and <code>Network::DownloadAsync(...)</code>). See below for an example of downloading a freshly generated image.
<table>
<tr>
<th>Downloading a Generated Image</th>
</tr>
<td>
```cpp
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
Response response = oai.Image->create(
"a siamese cat!"
);
Network::Download(
"C:/some/folder/file.png", // to
response["data"][0]["url"].get<std::string>(), // from
oai.auth.GetAuthorizationHeaders()
);
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}
```
</td>
</tr>
</table>
<br>
<p>After a successful run of the above snippet, the file found at the URL returned from the component call will be download to the path <code>C:/some/folder/file.png</code>.
<br>
<h1>Synopsis</h1>
<p>Each component interface found within <code>liboai::OpenAI</code> follows the same pattern found above. Whether you want to generate images, completions, or fine-tune models, the control flow should follow--or remain similar to--the above examples.
For detailed examples regarding individual component interfaces, refer to the appropriate folder listed above.</p>
<h3>Project Maintenance</h3>
<p>Maintainers can find PR workflow notes in <a href="./maintenance">documentation/maintenance</a>.</p>

View File

@ -0,0 +1,96 @@
<h1>Audio</h1>
<p>The <code>Audio</code> class is defined in <code>audio.h</code> at <code>liboai::Audio</code>, and its interface can ideally be accessed through a <code>liboai::OpenAI</code> object.
This class and its associated <code>liboai::OpenAI</code> interface allow access to the <a href="https://beta.openai.com/docs/api-reference/audio">Audio</a> endpoint of the OpenAI API; this endpoint's functionality can be found below.</p>
- Turn audio to text.
- Turn text to audio.
<br>
<h2>Methods</h2>
<p>This document covers the method(s) located in <code>audio.h</code>. You can find their function signature(s) below.</p>
<h3>Create a Transcription</h3>
<p>Transcribes audio into the input language. Returns a <code>liboai::Response</code> containing response data.</p>
```cpp
liboai::Response transcribe(
const std::filesystem::path& file,
const std::string& model,
std::optional<std::string> prompt = std::nullopt,
std::optional<std::string> response_format = std::nullopt,
std::optional<float> temperature = std::nullopt,
std::optional<std::string> language = std::nullopt
) const & noexcept(false);
```
<h3>Create a Transcription (async)</h3>
<p>Asynchronously transcribes audio into the input language. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
```cpp
liboai::FutureResponse transcribe_async(
const std::filesystem::path& file,
const std::string& model,
std::optional<std::string> prompt = std::nullopt,
std::optional<std::string> response_format = std::nullopt,
std::optional<float> temperature = std::nullopt,
std::optional<std::string> language = std::nullopt
) const& noexcept(false);
```
<h3>Create a Translation</h3>
<p>Translates audio into English. Returns a <code>liboai::Response</code> containing response data.</p>
```cpp
liboai::Response translate(
const std::filesystem::path& file,
const std::string& model,
std::optional<std::string> prompt = std::nullopt,
std::optional<std::string> response_format = std::nullopt,
std::optional<float> temperature = std::nullopt
) const & noexcept(false);
```
<h3>Create a Translation (async)</h3>
<p>Asynchronously translates audio into English. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
```cpp
liboai::FutureResponse translate_async(
const std::filesystem::path& file,
const std::string& model,
std::optional<std::string> prompt = std::nullopt,
std::optional<std::string> response_format = std::nullopt,
std::optional<float> temperature = std::nullopt
) const& noexcept(false);
```
<h3>Text to Speech</h3>
<p>Turn text into lifelike spoken audio. Returns a <code>liboai::Response</code> containing response data. The audio data is in the <code>content</code> field of the <code>liboai::Response</code></p>
```cpp
liboai::Response speech(
const std::string& model,
const std::string& voice,
const std::string& input,
std::optional<std::string> response_format = std::nullopt,
std::optional<float> speed = std::nullopt
) const& noexcept(false);
```
<h3>Text to Speech (async)</h3>
<p>Asynchronously turn text into lifelike spoken audio. Returns a <code>liboai::FutureResponse</code> containing response data. The audio data is in the <code>content</code> field of the <code>liboai::Response</code></p>
```cpp
liboai::FutureResponse speech_async(
const std::string& model,
const std::string& voice,
const std::string& input,
std::optional<std::string> response_format = std::nullopt,
std::optional<float> speed = std::nullopt
) const& noexcept(false);
```
<p>All function parameters marked <code>optional</code> are not required and are resolved on OpenAI's end if not supplied.</p>
<br>
<h2>Example Usage</h2>
<p>For example usage of the above function(s), please refer to the <a href="./examples">examples</a> folder.

View File

@ -0,0 +1,10 @@
cmake_minimum_required(VERSION 3.13)
project(audio)
add_basic_example(create_speech)
add_basic_example(create_speech_async)
add_basic_example(create_transcription)
add_basic_example(create_transcription_async)
add_basic_example(create_translation)
add_basic_example(create_translation_async)

View File

@ -0,0 +1,24 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
Response res = oai.Audio->speech(
"tts-1",
"alloy",
"Today is a wonderful day to build something people love!"
);
std::ofstream ocout("demo.mp3", std::ios::binary);
ocout << res.content;
ocout.close();
std::cout << res.content.size() << std::endl;
}
catch (const std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,31 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
auto fut = oai.Audio->speech_async(
"tts-1",
"alloy",
"Today is a wonderful day to build something people love!"
);
// do other work...
// check if the future is ready
fut.wait();
// get the contained response
auto res = fut.get();
std::ofstream ocout("demo.mp3", std::ios::binary);
ocout << res.content;
ocout.close();
std::cout << res.content.size() << std::endl;
}
catch (const std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,20 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
Response res = oai.Audio->transcribe(
"C:/some/folder/audio.mp3",
"whisper-1"
);
std::cout << res["text"].get<std::string>() << std::endl;
}
catch (const std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,30 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
// call async method; returns a future
auto fut = oai.Audio->transcribe_async(
"C:/some/folder/file.mp3",
"whisper-1"
);
// do other work...
// check if the future is ready
fut.wait();
// get the contained response
auto response = fut.get();
// print some response data
std::cout << response["text"].get<std::string>() << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,20 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
Response res = oai.Audio->translate(
"C:/some/folder/file.mp3",
"whisper-1"
);
std::cout << res["text"] << std::endl;
}
catch (const std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,30 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
try {
// call async method; returns a future
auto fut = oai.Audio->translate_async(
"C:/some/folder/file.mp3",
"whisper-1"
);
// do other work...
// check if the future is ready
fut.wait();
// get the contained response
auto response = fut.get();
// print some response data
std::cout << response["text"].get<std::string>() << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,177 @@
<h1>Authorization</h1>
<p>The <code>Authorization</code> class is defined in <code>authorization.h</code> at <code>liboai::Authorization</code>. This class is responsible for sharing all set authorization information with all component classes in <code>liboai</code>.
All authorization information should be set prior to the calling of any component methods such as <code>Images</code>, <code>Embeddings</code>, and so on. Failure to do so will result in a <code>liboai::OpenAIException</code> due to authorization failure on OpenAI's end.</p>
<br>
<h2>Methods</h2>
<p>This document covers the method(s) located in <code>authorization.h</code>. You can find their function signature(s) below.</p>
<h3>Get Authorizer</h3>
<p>Returns a reference to the <code>liboai::Authorization</code> singleton shared among all components.</p>
```cpp
static Authorization& Authorizer() noexcept;
```
<h3>Set API Key</h3>
<p>Sets the API key to use in subsequent component calls.</p>
```cpp
bool SetKey(std::string_view key) noexcept;
```
<h3>Set Azure API Key</h3>
<p>Sets the Azure API key to use in subsequent component calls.</p>
```cpp
bool SetAzureKey(std::string_view key) noexcept;
```
<h3>Set Active Directory Azure API Key</h3>
<p>Sets the Active Directory Azure API key to use in subsequent component calls.</p>
```cpp
bool SetAzureKeyAD(std::string_view key) noexcept;
```
<h3>Set API Key (File)</h3>
<p>Sets the API key to use in subsequent component calls from data found in file at path.</p>
```cpp
bool SetKeyFile(const std::filesystem::path& path) noexcept;
```
<h3>Set Azure API Key (File)</h3>
<p>Sets the Azure API key to use in subsequent component calls from data found in file at path.</p>
```cpp
bool SetAzureKeyFile(const std::filesystem::path& path) noexcept;
```
<h3>Set Active Directory Azure API Key (File)</h3>
<p>Sets the Active Directory Azure API key to use in subsequent component calls from data found in file at path.</p>
```cpp
bool SetAzureKeyFileAD(const std::filesystem::path& path) noexcept;
```
<h3>Set API Key (Environment Variable)</h3>
<p>Sets the API key to use in subsequent component calls from an environment variable.</p>
```cpp
bool SetKeyEnv(std::string_view var) noexcept;
```
<h3>Set Azure API Key (Environment Variable)</h3>
<p>Sets the Azure API key to use in subsequent component calls from an environment variable.</p>
```cpp
bool SetAzureKeyEnv(std::string_view var) noexcept;
```
<h3>Set Active Directory Azure API Key (Environment Variable)</h3>
<p>Sets the Active Directory Azure API key to use in subsequent component calls from an environment variable.</p>
```cpp
bool SetAzureKeyEnvAD(std::string_view var) noexcept;
```
<h3>Set Organization ID</h3>
<p>Sets the organization ID to send in subsequent component calls.</p>
```cpp
bool SetOrganization(std::string_view org) noexcept;
```
<h3>Set Organization ID (File)</h3>
<p>Sets the organization ID to send in subsequent component calls from data found in file at path.</p>
```cpp
bool SetOrganizationFile(const std::filesystem::path& path) noexcept;
```
<h3>Set Organization ID (Environment Variable)</h3>
<p>Sets the organization ID to send in subsequent component calls from an environment variable.</p>
```cpp
bool SetOrganizationEnv(std::string_view var) noexcept;
```
<h3>Set Proxies</h3>
<p>Sets the proxy, or proxies, to use in subsequent component calls.</p>
```cpp
void SetProxies(const std::initializer_list<std::pair<const std::string, std::string>>& hosts) noexcept;
void SetProxies(std::initializer_list<std::pair<const std::string, std::string>>&& hosts) noexcept;
void SetProxies(const std::map<std::string, std::string>& hosts) noexcept;
void SetProxies(std::map<std::string, std::string>&& hosts) noexcept;
```
<h3>Set Proxy Authentication</h3>
<p>Sets the username and password to use when using a certain proxy protocol.</p>
```cpp
void SetProxyAuth(const std::map<std::string, netimpl::components::EncodedAuthentication>& proto_up) noexcept;
```
<h3>Set Timeout</h3>
<p>Sets the timeout in milliseconds for the library to use in component calls.</p>
```cpp
void SetMaxTimeout(int32_t ms) noexcept
```
<h3>Get Key</h3>
<p>Returns the currently set API key.</p>
```cpp
constexpr const std::string& GetKey() const noexcept;
```
<h3>Get Organization ID</h3>
<p>Returns the currently set organization ID.</p>
```cpp
constexpr const std::string& GetOrganization() const noexcept;
```
<h3>Get Proxies</h3>
<p>Returns the currently set proxies.</p>
```cpp
netimpl::components::Proxies GetProxies() const noexcept;
```
<h3>Get Proxy Authentication</h3>
<p>Returns the currently set proxy authentication information.</p>
```cpp
netimpl::components::ProxyAuthentication GetProxyAuth() const noexcept;
```
<h3>Get Timeout</h3>
<p>Returns the currently set timeout.</p>
```cpp
netimpl::components::Timeout GetMaxTimeout() const noexcept;
```
<h3>Get Authorization Headers</h3>
<p>Returns the currently set authorization headers based on set information.</p>
```cpp
constexpr const netimpl::components::Header& GetAuthorizationHeaders() const noexcept;
```
<h3>Get Azure Authorization Headers</h3>
<p>Returns the currently set Azure authorization headers based on set information.</p>
```cpp
constexpr const netimpl::components::Header& GetAzureAuthorizationHeaders() const noexcept;
```
<br>
<h2>Example Usage</h2>
<p>For example usage of the above function(s), please refer to the <a href="./examples">examples</a> folder.

View File

@ -0,0 +1,15 @@
cmake_minimum_required(VERSION 3.13)
project(authorization)
add_basic_example(set_azure_key)
add_basic_example(set_azure_key_env)
add_basic_example(set_azure_key_file)
add_basic_example(set_key)
add_basic_example(set_key_env_var)
add_basic_example(set_key_file)
add_basic_example(set_organization)
add_basic_example(set_organization_env_var)
add_basic_example(set_organization_file)
add_basic_example(set_proxies)
add_basic_example(set_proxy_auth)

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKey("hard-coded-key")) { // NOT recommended
// ...
}
}

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
// ...
}
}

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyFile("C:/some/folder/key.dat")) {
// ...
}
}

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKey("hard-coded-key")) { // NOT recommended
// ...
}
}

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
// ...
}
}

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyFile("C:/some/folder/key.dat")) {
// ...
}
}

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY") && oai.auth.SetOrganization("org-123")) {
// ...
}
}

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY") && oai.auth.SetOrganizationEnv("OPENAI_ORG_ID")) {
// ...
}
}

View File

@ -0,0 +1,10 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetKeyEnv("OPENAI_API_KEY") && oai.auth.SetOrganizationFile("C:/some/folder/org.dat")) {
// ...
}
}

View File

@ -0,0 +1,21 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
/*
Set some proxies:
when we go to an http site, use fakeproxy1
when we go to an https site, use fakeproxy2
*/
oai.auth.SetProxies({
{ "http", "http://www.fakeproxy1.com" },
{ "https", "https://www.fakeproxy2.com" }
});
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
// ...
}
}

View File

@ -0,0 +1,31 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
/*
Set some proxies:
when we go to an http site, use fakeproxy1
when we go to an https site, use fakeproxy2
*/
oai.auth.SetProxies({
{ "http", "http://www.fakeproxy1.com" },
{ "https", "https://www.fakeproxy2.com" }
});
/*
Set the per-protocol proxy auth info:
when we go to an http site, use fakeuser1 and fakepass1
when we go to an https site, use fakeuser2 and fakepass2
*/
oai.auth.SetProxyAuth({
{"http", {"fakeuser1", "fakepass1"}},
{"https", {"fakeuser2", "fakepass2"}},
});
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
// ...
}
}

View File

@ -0,0 +1,204 @@
<h1>Azure</h1>
<p>The <code>Azure</code> class is defined in <code>azure.h</code> at <code>liboai::Azure</code>, and its interface can ideally be accessed through a <code>liboai::OpenAI</code> object.
This class and its associated <code>liboai::OpenAI</code> interface allow access to the <a href="https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference">Azure</a> OpenAI API components.
<br>
<h2>Methods</h2>
<p>This document covers the method(s) located in <code>azure.h</code>. You can find their function signature(s) below.</p>
<h3>Create a Completion</h3>
<p>Given a prompt, the model will return one or more predicted completions. Returns a <code>liboai::Response</code> containing response data.</p>
```cpp
liboai::Response create_completion(
const std::string& resource_name,
const std::string& deployment_id,
const std::string& api_version,
std::optional<std::string> prompt = std::nullopt,
std::optional<std::string> suffix = std::nullopt,
std::optional<uint16_t> max_tokens = std::nullopt,
std::optional<float> temperature = std::nullopt,
std::optional<float> top_p = std::nullopt,
std::optional<uint16_t> n = std::nullopt,
std::optional<std::function<bool(std::string, intptr_t)>> stream = std::nullopt,
std::optional<uint8_t> logprobs = std::nullopt,
std::optional<bool> echo = std::nullopt,
std::optional<std::vector<std::string>> stop = std::nullopt,
std::optional<float> presence_penalty = std::nullopt,
std::optional<float> frequency_penalty = std::nullopt,
std::optional<uint16_t> best_of = std::nullopt,
std::optional<std::unordered_map<std::string, int8_t>> logit_bias = std::nullopt,
std::optional<std::string> user = std::nullopt
) const & noexcept(false);
```
<h3>Create a Completion (async)</h3>
<p>Given a prompt, the model will asynchronously return one or more predicted completions. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
```cpp
liboai::FutureResponse create_completion_async(
const std::string& resource_name,
const std::string& deployment_id,
const std::string& api_version,
std::optional<std::string> prompt = std::nullopt,
std::optional<std::string> suffix = std::nullopt,
std::optional<uint16_t> max_tokens = std::nullopt,
std::optional<float> temperature = std::nullopt,
std::optional<float> top_p = std::nullopt,
std::optional<uint16_t> n = std::nullopt,
std::optional<std::function<bool(std::string, intptr_t)>> stream = std::nullopt,
std::optional<uint8_t> logprobs = std::nullopt,
std::optional<bool> echo = std::nullopt,
std::optional<std::vector<std::string>> stop = std::nullopt,
std::optional<float> presence_penalty = std::nullopt,
std::optional<float> frequency_penalty = std::nullopt,
std::optional<uint16_t> best_of = std::nullopt,
std::optional<std::unordered_map<std::string, int8_t>> logit_bias = std::nullopt,
std::optional<std::string> user = std::nullopt
) const & noexcept(false);
```
<h3>Create an Embedding</h3>
<p>Creates an embedding vector representing the input text. Returns a <code>liboai::Response</code> containing response data.</p>
```cpp
liboai::Response create_embedding(
const std::string& resource_name,
const std::string& deployment_id,
const std::string& api_version,
const std::string& input,
std::optional<std::string> user = std::nullopt
) const & noexcept(false);
```
<h3>Create an Embedding (async)</h3>
<p>Asynchronously creates an embedding vector representing the input text. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
```cpp
liboai::FutureResponse create_embedding_async(
const std::string& resource_name,
const std::string& deployment_id,
const std::string& api_version,
const std::string& input,
std::optional<std::string> user = std::nullopt
) const & noexcept(false);
```
<h3>Create a Chat Completion</h3>
<p>Creates a completion for the chat message. Returns a <code>liboai::Response</code> containing response data.</p>
```cpp
liboai::Response create_chat_completion(
const std::string& resource_name,
const std::string& deployment_id,
const std::string& api_version,
const Conversation& conversation,
std::optional<float> temperature = std::nullopt,
std::optional<uint16_t> n = std::nullopt,
std::optional<std::function<bool(std::string, intptr_t)>> stream = std::nullopt,
std::optional<std::vector<std::string>> stop = std::nullopt,
std::optional<uint16_t> max_tokens = std::nullopt,
std::optional<float> presence_penalty = std::nullopt,
std::optional<float> frequency_penalty = std::nullopt,
std::optional<std::unordered_map<std::string, int8_t>> logit_bias = std::nullopt,
std::optional<std::string> user = std::nullopt
) const & noexcept(false);
```
<h3>Create a Chat Completion (async)</h3>
<p>Asynchronously creates a completion for the chat message. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
```cpp
liboai::FutureResponse create_chat_completion_async(
const std::string& resource_name,
const std::string& deployment_id,
const std::string& api_version,
const Conversation& conversation,
std::optional<float> temperature = std::nullopt,
std::optional<uint16_t> n = std::nullopt,
std::optional<std::function<bool(std::string, intptr_t)>> stream = std::nullopt,
std::optional<std::vector<std::string>> stop = std::nullopt,
std::optional<uint16_t> max_tokens = std::nullopt,
std::optional<float> presence_penalty = std::nullopt,
std::optional<float> frequency_penalty = std::nullopt,
std::optional<std::unordered_map<std::string, int8_t>> logit_bias = std::nullopt,
std::optional<std::string> user = std::nullopt
) const & noexcept(false);
```
<h3>Request an Image Generation</h3>
<p>Generate a batch of images from a text caption. Returns a <code>liboai::Response</code> containing response data.</p>
```cpp
liboai::Response request_image_generation(
const std::string& resource_name,
const std::string& api_version,
const std::string& prompt,
std::optional<uint8_t> n = std::nullopt,
std::optional<std::string> size = std::nullopt
) const & noexcept(false);
```
<h3>Request an Image Generation (async)</h3>
<p>Asynchronously generate a batch of images from a text caption. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
```cpp
liboai::FutureResponse request_image_generation_async(
const std::string& resource_name,
const std::string& api_version,
const std::string& prompt,
std::optional<uint8_t> n = std::nullopt,
std::optional<std::string> size = std::nullopt
) const & noexcept(false);
```
<h3>Get a Previously Generated Image</h3>
<p>Retrieve the results (URL) of a previously called image generation operation. Returns a <code>liboai::Response</code> containing response data.</p>
```cpp
liboai::Response get_generated_image(
const std::string& resource_name,
const std::string& api_version,
const std::string& operation_id
) const & noexcept(false);
```
<h3>Get a Previously Generated Image (async)</h3>
<p>Asynchronously retrieve the results (URL) of a previously called image generation operation. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
```cpp
liboai::FutureResponse get_generated_image_async(
const std::string& resource_name,
const std::string& api_version,
const std::string& operation_id
) const & noexcept(false);
```
<h3>Delete a Previously Generated Image</h3>
<p>Deletes the corresponding image from the Azure server. Returns a <code>liboai::Response</code> containing response data.</p>
```cpp
liboai::Response delete_generated_image(
const std::string& resource_name,
const std::string& api_version,
const std::string& operation_id
) const & noexcept(false);
```
<h3>Delete a Previously Generated Image (async)</h3>
<p>Asynchronously deletes the corresponding image from the Azure server. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
```cpp
liboai::FutureResponse delete_generated_image_async(
const std::string& resource_name,
const std::string& api_version,
const std::string& operation_id
) const & noexcept(false);
```
<p>All function parameters marked <code>optional</code> are not required and are resolved on OpenAI's end if not supplied.</p>
<br>
<h2>Example Usage</h2>
<p>For example usage of the above function(s), please refer to the <a href="./examples">examples</a> folder.

View File

@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 3.13)
project(azure)
add_example(create_chat_completion_azure "create_chat_completion.cpp")
add_example(create_chat_completion_async_azure "create_chat_completion_async.cpp")
add_basic_example(create_completion)
add_basic_example(create_completion_async)
add_example(create_embedding_azure "create_embedding.cpp")
add_example(create_embedding_async_azure "create_embedding_async.cpp")
add_basic_example(delete_generated_image)
add_basic_example(delete_generated_image_async)
add_basic_example(get_generated_image)
add_basic_example(get_generated_image_async)
add_basic_example(request_image_generation)
add_basic_example(request_image_generation_async)

View File

@ -0,0 +1,28 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
Conversation convo;
convo.AddUserData("Hi, how are you?");
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
Response res = oai.Azure->create_chat_completion(
"resource", "deploymentID", "api_version",
convo
);
// update the conversation with the response
convo.Update(res);
// print the response from the API
std::cout << convo.GetLastResponse() << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,37 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
Conversation convo;
convo.AddUserData("Hi, how are you?");
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
// call async method; returns a future
auto fut = oai.Azure->create_chat_completion_async(
"resource", "deploymentID", "api_version",
convo
);
// do other work...
// check if the future is ready
fut.wait();
// get the contained response
auto res = fut.get();
// update the conversation with the response
convo.Update(res);
// print the response from the API
std::cout << convo.GetLastResponse() << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,21 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
Response res = oai.Azure->create_completion(
"resource", "deploymentID", "api_version",
"Write a short poem about a snowman."
);
std::cout << res["choices"][0]["text"].get<std::string>() << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,29 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
auto fut = oai.Azure->create_completion_async(
"resource", "deploymentID", "api_version",
"Write a short poem about a snowman."
);
// do other stuff
// wait for the future to be ready
fut.wait();
// get the result
auto res = fut.get();
std::cout << res["choices"][0]["text"].get<std::string>() << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,21 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
Response res = oai.Azure->create_embedding(
"resource", "deploymentID", "api_version",
"String to get embedding for"
);
std::cout << res << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,27 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
auto fut = oai.Azure->create_embedding_async(
"resource", "deploymentID", "api_version",
"String to get embedding for"
);
// do other work
// wait for the future to complete
auto res = fut.get();
// output the response
std::cout << res << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,22 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
Response res = oai.Azure->delete_generated_image(
"resource", "api_version",
"f508bcf2-e651-4b4b-85a7-58ad77981ffa"
);
// output the response
std::cout << res << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,30 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
auto fut = oai.Azure->delete_generated_image_async(
"resource", "api_version",
"f508bcf2-e651-4b4b-85a7-58ad77981ffa"
);
// do other work
// wait for the future to complete
fut.wait();
// get the result
auto res = fut.get();
// output the response
std::cout << res << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,22 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
Response res = oai.Azure->get_generated_image(
"resource", "api_version",
"f508bcf2-e651-4b4b-85a7-58ad77981ffa"
);
// output the response
std::cout << res << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

View File

@ -0,0 +1,30 @@
#include "liboai.h"
using namespace liboai;
int main() {
OpenAI oai;
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
try {
auto fut = oai.Azure->get_generated_image_async(
"resource", "api_version",
"f508bcf2-e651-4b4b-85a7-58ad77981ffa"
);
// do other work
// wait for the future to complete
fut.wait();
// get the result
auto res = fut.get();
// output the response
std::cout << res << std::endl;
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
}
}
}

Some files were not shown because too many files have changed in this diff Show More