media cpp 1/3
This commit is contained in:
parent
426b66828f
commit
e4b4ff4bea
@ -14,6 +14,9 @@
|
||||
},
|
||||
{
|
||||
"path": "../xblox"
|
||||
},
|
||||
{
|
||||
"path": "../media"
|
||||
}
|
||||
],
|
||||
"settings": {}
|
||||
|
||||
37
packages/media/cpp/.gitignore
vendored
Normal file
37
packages/media/cpp/.gitignore
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
# Build output
|
||||
/build/
|
||||
|
||||
# Compiled objects
|
||||
*.o
|
||||
*.obj
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
# CMake generated
|
||||
CMakeCache.txt
|
||||
CMakeFiles/
|
||||
cmake_install.cmake
|
||||
Makefile
|
||||
|
||||
# IDE / Editor
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.env*
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
cache/
|
||||
config/postgres.toml
|
||||
dist
|
||||
|
||||
# Orchestrator reports (cwd/tests/*)
|
||||
tests/*.json
|
||||
tests/*.md
|
||||
src/cmd_grid*.cpp
|
||||
177
packages/media/cpp/CMakeLists.txt
Normal file
177
packages/media/cpp/CMakeLists.txt
Normal file
@ -0,0 +1,177 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(kbot-cli
|
||||
VERSION 0.1.0
|
||||
DESCRIPTION "KBot C++ CLI"
|
||||
LANGUAGES CXX C
|
||||
)
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/dist")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/dist")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/dist")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_SOURCE_DIR}/dist")
|
||||
|
||||
# ── C++ standard ─────────────────────────────────────────────────────────────
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
# ── Dependencies ─────────────────────────────────────────────────────────────
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
cli11
|
||||
GIT_REPOSITORY https://github.com/CLIUtils/CLI11.git
|
||||
GIT_TAG v2.4.2
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
tomlplusplus
|
||||
GIT_REPOSITORY https://github.com/marzer/tomlplusplus.git
|
||||
GIT_TAG v3.4.0
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
Catch2
|
||||
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
|
||||
GIT_TAG v3.7.1
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
asio
|
||||
GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git
|
||||
GIT_TAG asio-1-28-0
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
concurrentqueue
|
||||
GIT_REPOSITORY https://github.com/cameron314/concurrentqueue.git
|
||||
GIT_TAG v1.0.4
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
taskflow
|
||||
GIT_REPOSITORY https://github.com/taskflow/taskflow.git
|
||||
GIT_TAG v3.6.0
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
nlohmann_json
|
||||
GIT_REPOSITORY https://github.com/nlohmann/json.git
|
||||
GIT_TAG v3.11.3
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
liboai
|
||||
GIT_REPOSITORY https://github.com/jasonduncan/liboai.git
|
||||
GIT_TAG main
|
||||
GIT_SHALLOW TRUE
|
||||
SOURCE_SUBDIR liboai
|
||||
)
|
||||
|
||||
# p-ranav/glob — Unix-style glob / rglob (C++17); avoid upstream CMake (CPM + gtest).
|
||||
FetchContent_Declare(
|
||||
pranav_glob
|
||||
GIT_REPOSITORY https://github.com/p-ranav/glob.git
|
||||
GIT_TAG master
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
FetchContent_GetProperties(pranav_glob)
|
||||
if(NOT pranav_glob_POPULATED)
|
||||
FetchContent_Populate(pranav_glob)
|
||||
endif()
|
||||
add_library(pranav_glob STATIC ${pranav_glob_SOURCE_DIR}/source/glob.cpp)
|
||||
target_include_directories(pranav_glob PUBLIC ${pranav_glob_SOURCE_DIR}/include)
|
||||
target_compile_features(pranav_glob PUBLIC cxx_std_17)
|
||||
if(MSVC)
|
||||
target_compile_options(pranav_glob PRIVATE /permissive-)
|
||||
endif()
|
||||
|
||||
# laserpants/dotenv-cpp — load .env into the process environment (header-only).
|
||||
FetchContent_Declare(
|
||||
laserpants_dotenv
|
||||
GIT_REPOSITORY https://github.com/laserpants/dotenv-cpp.git
|
||||
GIT_TAG master
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
FetchContent_GetProperties(laserpants_dotenv)
|
||||
if(NOT laserpants_dotenv_POPULATED)
|
||||
FetchContent_Populate(laserpants_dotenv)
|
||||
endif()
|
||||
add_library(laserpants_dotenv INTERFACE)
|
||||
target_include_directories(laserpants_dotenv INTERFACE ${laserpants_dotenv_SOURCE_DIR}/include)
|
||||
add_library(laserpants::dotenv ALIAS laserpants_dotenv)
|
||||
|
||||
set(TF_BUILD_TESTS OFF CACHE BOOL "" FORCE)
|
||||
set(TF_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
|
||||
set(JSON_BuildTests OFF CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_MakeAvailable(cli11 tomlplusplus Catch2 asio concurrentqueue taskflow nlohmann_json)
|
||||
# ── Packages ─────────────────────────────────────────────────────────────────
|
||||
add_subdirectory(packages/logger)
|
||||
add_subdirectory(packages/html)
|
||||
add_subdirectory(packages/postgres)
|
||||
add_subdirectory(packages/http)
|
||||
add_subdirectory(packages/json)
|
||||
add_subdirectory(packages/polymech)
|
||||
add_subdirectory(packages/ipc)
|
||||
add_subdirectory(packages/liboai/liboai)
|
||||
|
||||
add_subdirectory(packages/kbot)
|
||||
|
||||
# ── Sources ──────────────────────────────────────────────────────────────────
|
||||
add_executable(${PROJECT_NAME}
|
||||
src/main.cpp
|
||||
src/cmd_kbot.cpp
|
||||
src/cmd_kbot_uds.cpp
|
||||
src/sys_metrics.cpp
|
||||
)
|
||||
|
||||
# Output file name is kbot.exe / kbot (not kbot-cli)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "kbot")
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE CLI11::CLI11 tomlplusplus::tomlplusplus logger html postgres http json polymech ipc kbot laserpants::dotenv)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
${asio_SOURCE_DIR}/asio/include
|
||||
${taskflow_SOURCE_DIR}
|
||||
${concurrentqueue_SOURCE_DIR}
|
||||
)
|
||||
|
||||
# Define standalone ASIO (since it's not boost)
|
||||
if(WIN32)
|
||||
# Enable math constants like M_PI
|
||||
add_compile_definitions(_USE_MATH_DEFINES)
|
||||
add_compile_definitions(NOMINMAX)
|
||||
endif()
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE ASIO_STANDALONE=1 ASIO_NO_DEPRECATED=1)
|
||||
|
||||
|
||||
# ── Compiler warnings ───────────────────────────────────────────────────────
|
||||
if(MSVC)
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE /W4 /permissive-)
|
||||
else()
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wextra -Wpedantic)
|
||||
endif()
|
||||
|
||||
# ── Install ──────────────────────────────────────────────────────────────────
|
||||
# Library + headers: see packages/kbot/CMakeLists.txt and packages/ipc/CMakeLists.txt
|
||||
# Optional DLL/so: configure with -DIPC_BUILD_SHARED=ON -DPOLYMECH_KBOT_SHARED=ON
|
||||
install(TARGETS ${PROJECT_NAME}
|
||||
RUNTIME DESTINATION bin
|
||||
)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/cmd_kbot.h
|
||||
DESTINATION include/polymech
|
||||
)
|
||||
|
||||
# ── Tests ────────────────────────────────────────────────────────────────────
|
||||
enable_testing()
|
||||
add_subdirectory(tests)
|
||||
50
packages/media/cpp/CMakePresets.json
Normal file
50
packages/media/cpp/CMakePresets.json
Normal file
@ -0,0 +1,50 @@
|
||||
{
|
||||
"version": 6,
|
||||
"cmakeMinimumRequired": {
|
||||
"major": 3,
|
||||
"minor": 20,
|
||||
"patch": 0
|
||||
},
|
||||
"configurePresets": [
|
||||
{
|
||||
"name": "dev",
|
||||
"displayName": "Dev (Debug)",
|
||||
"binaryDir": "${sourceDir}/build/dev",
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "Debug"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "release",
|
||||
"displayName": "Release",
|
||||
"binaryDir": "${sourceDir}/build/release",
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "Release"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "dev-dll",
|
||||
"displayName": "Dev (Debug, ipc + kbot as DLL)",
|
||||
"binaryDir": "${sourceDir}/build/dev-dll",
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "Debug",
|
||||
"IPC_BUILD_SHARED": "ON",
|
||||
"POLYMECH_KBOT_SHARED": "ON"
|
||||
}
|
||||
}
|
||||
],
|
||||
"buildPresets": [
|
||||
{
|
||||
"name": "dev",
|
||||
"configurePreset": "dev"
|
||||
},
|
||||
{
|
||||
"name": "release",
|
||||
"configurePreset": "release"
|
||||
},
|
||||
{
|
||||
"name": "dev-dll",
|
||||
"configurePreset": "dev-dll"
|
||||
}
|
||||
]
|
||||
}
|
||||
9
packages/media/cpp/LICENSE
Normal file
9
packages/media/cpp/LICENSE
Normal file
@ -0,0 +1,9 @@
|
||||
Copyright (c) <year> <owner> All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
266
packages/media/cpp/README.md
Normal file
266
packages/media/cpp/README.md
Normal file
@ -0,0 +1,266 @@
|
||||
# kbot (C++)
|
||||
|
||||
CMake-based C++ toolchain for **kbot**: HTML/HTTP/JSON utilities, **length-prefixed JSON IPC**, optional **UDS/TCP worker** for Node orchestrators, and **LLM chat** via liboai (OpenRouter, OpenAI, Ollama-compatible servers, etc.). The main binary is **`kbot`** (`kbot.exe` on Windows).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
| Requirement | Notes |
|
||||
|-------------|--------|
|
||||
| CMake | ≥ 3.20 |
|
||||
| C++ compiler | C++17 (MSVC, GCC, Clang) |
|
||||
| Git | For `FetchContent` dependencies |
|
||||
| Node.js | Optional; for `orchestrator/` IPC integration tests (`npm run test:ipc`) |
|
||||
|
||||
On Windows, use a **Developer Command Prompt** or **PowerShell** with MSVC in `PATH`. **Git Bash** helps if you use shell scripts under `scripts/`.
|
||||
|
||||
## Quick start (build)
|
||||
|
||||
From this directory (`packages/kbot/cpp`):
|
||||
|
||||
```bash
|
||||
npm install # optional; only needed if you use npm scripts
|
||||
npm run build
|
||||
```
|
||||
|
||||
Artifacts go to **`dist/`** (e.g. `dist/kbot.exe`, test tools).
|
||||
|
||||
Equivalent CMake:
|
||||
|
||||
```bash
|
||||
cmake --preset dev
|
||||
cmake --build --preset dev
|
||||
```
|
||||
|
||||
### Presets
|
||||
|
||||
| Preset | Role |
|
||||
|--------|------|
|
||||
| `dev` | Debug, static `ipc` + `kbot` libraries (default) |
|
||||
| `release` | Release build |
|
||||
| `dev-dll` | Debug with **`ipc.dll`** and **`kbot.dll`** (`IPC_BUILD_SHARED=ON`, `POLYMECH_KBOT_SHARED=ON`) |
|
||||
|
||||
```bash
|
||||
cmake --preset dev-dll
|
||||
cmake --build --preset dev-dll --config Debug
|
||||
```
|
||||
|
||||
Place **`ipc.dll`** and **`kbot.dll`** next to **`kbot.exe`** (or on `PATH`) when using the DLL configuration.
|
||||
|
||||
### npm scripts (reference)
|
||||
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `npm run build` | Configure `dev` + build |
|
||||
| `npm run build:release` | Release preset |
|
||||
| `npm run test` | `ctest` in `build/dev` |
|
||||
| `npm run clean` | Remove `build/` and `dist/` |
|
||||
| `npm run test:ipc` | Node UDS IPC integration test |
|
||||
| `npm run worker` | Run worker (stdio IPC) |
|
||||
|
||||
## Installation
|
||||
|
||||
Install the CLI and headers into a prefix (e.g. local tree or system root):
|
||||
|
||||
```bash
|
||||
cmake --install build/dev --prefix "C:/path/to/install"
|
||||
```
|
||||
|
||||
This installs:
|
||||
|
||||
- **`bin/kbot`** (runtime)
|
||||
- **`include/polymech/`** — `kbot.h`, `llm_client.h`, `polymech_export.h`, `cmd_kbot.h`
|
||||
- **`include/ipc/`** — `ipc.h`, `ipc_export.h`
|
||||
- **`lib/`** — import libraries / archives (depending on static vs shared)
|
||||
|
||||
Library layout is defined in `packages/kbot/CMakeLists.txt` and `packages/ipc/CMakeLists.txt`.
|
||||
|
||||
### CMake options (libraries)
|
||||
|
||||
| Cache variable | Effect |
|
||||
|----------------|--------|
|
||||
| `IPC_BUILD_SHARED` | Build **`ipc`** as a shared library (`OFF` default) |
|
||||
| `POLYMECH_KBOT_SHARED` | Build **`kbot`** as a shared library (`OFF` default) |
|
||||
|
||||
Static builds define `IPC_STATIC_BUILD` / `POLYMECH_STATIC_BUILD` for consumers via `INTERFACE` compile definitions. Shared builds export **`IPC_API`** / **`POLYMECH_API`** (see `ipc_export.h`, `polymech_export.h`).
|
||||
|
||||
## CLI overview
|
||||
|
||||
Top-level:
|
||||
|
||||
```bash
|
||||
kbot --help
|
||||
kbot -v,--version
|
||||
kbot --log-level debug|info|warn|error
|
||||
```
|
||||
|
||||
### Subcommands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `parse <html>` | Parse HTML and list elements |
|
||||
| `select <html> <selector>` | CSS-select elements |
|
||||
| `config <file>` | Load and print a TOML file |
|
||||
| `fetch <url>` | HTTP GET |
|
||||
| `json <input>` | Prettify JSON |
|
||||
| `db [-c config] [table] [-l limit]` | Supabase / DB helper (uses `config/postgres.toml` by default) |
|
||||
| `worker [--uds <arg>]` | IPC worker (see below) |
|
||||
| `kbot ai ...` / `kbot run ...` | AI and run pipelines (`setup_cmd_kbot` — use `kbot kbot ai --help`) |
|
||||
|
||||
### Worker mode (`kbot worker`)
|
||||
|
||||
Used by orchestrators and tests.
|
||||
|
||||
- **Stdio IPC** (length-prefixed JSON frames on stdin/stdout):
|
||||
|
||||
```bash
|
||||
kbot worker
|
||||
```
|
||||
|
||||
- **UDS / TCP** (Windows: TCP port string, e.g. `4001`; Unix: socket path):
|
||||
|
||||
```bash
|
||||
kbot worker --uds 4001
|
||||
```
|
||||
|
||||
Framing: `[uint32 LE length][UTF-8 JSON object with id, type, payload]`. Message types include `ping`, `job`, `kbot-ai`, `kbot-run`, `shutdown`, etc. See `src/main.cpp` and `orchestrator/test-ipc.mjs`.
|
||||
|
||||
### `kbot kbot` (nested)
|
||||
|
||||
CLI for AI tasks and run configurations:
|
||||
|
||||
```bash
|
||||
kbot kbot ai --help
|
||||
kbot kbot run --help
|
||||
```
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
kbot kbot ai --prompt "Hello" --config config/postgres.toml
|
||||
```
|
||||
|
||||
API keys are typically resolved from **`config/postgres.toml`** (`[services]`).
|
||||
|
||||
## Using in other CMake projects
|
||||
|
||||
There is no single `find_package(kbot)` config yet. Practical options:
|
||||
|
||||
### 1. Same repository / superbuild (recommended)
|
||||
|
||||
Add this repo’s `cpp` tree as a subdirectory from a parent `CMakeLists.txt` so `FetchContent` and internal targets (`logger`, `json`, `ipc`, `oai`, `kbot`, …) resolve once. Then:
|
||||
|
||||
```cmake
|
||||
target_link_libraries(your_app PRIVATE ipc kbot)
|
||||
```
|
||||
|
||||
`kbot` pulls in `logger`, `json`, `liboai` (`oai`) per `packages/kbot/CMakeLists.txt`.
|
||||
|
||||
### 2. Install prefix + explicit `IMPORTED` libraries
|
||||
|
||||
After `cmake --install`, link import libraries under `lib/` and add `include/` for **`ipc`** and **`polymech`**. You must still satisfy **transitive** dependencies (`oai`, `logger`, `json`, …) from the **same** build/install of this project, or duplicate their build—usually easier to use option 1.
|
||||
|
||||
### 3. Minimal example: IPC framing only
|
||||
|
||||
If you only need **`ipc::encode` / `ipc::decode`** (and can build `logger` + `json` the same way this project does), mirror `packages/ipc/CMakeLists.txt`:
|
||||
|
||||
```cmake
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(myapp CXX)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
add_subdirectory(path/to/polymech-mono/packages/kbot/cpp/packages/logger)
|
||||
add_subdirectory(path/to/polymech-mono/packages/kbot/cpp/packages/json)
|
||||
add_subdirectory(path/to/polymech-mono/packages/kbot/cpp/packages/ipc)
|
||||
|
||||
add_executable(myapp main.cpp)
|
||||
target_link_libraries(myapp PRIVATE ipc)
|
||||
```
|
||||
|
||||
**`main.cpp`** (stdio-style framing helpers):
|
||||
|
||||
```cpp
|
||||
#include <iostream>
|
||||
#include <ipc/ipc.h>
|
||||
|
||||
int main() {
|
||||
ipc::Message msg{"1", "ping", "{}"};
|
||||
auto frame = ipc::encode(msg);
|
||||
// frame: 4-byte LE length + JSON object bytes
|
||||
|
||||
ipc::Message roundtrip;
|
||||
if (frame.size() > 4 &&
|
||||
ipc::decode(frame.data() + 4, frame.size() - 4, roundtrip)) {
|
||||
std::cout << roundtrip.type << "\n"; // ping
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Example: LLM pipeline API (`kbot` library)
|
||||
|
||||
Headers: `kbot.h`, `llm_client.h`, `polymech_export.h`. You need a valid API key and options (see `KBotOptions` in `kbot.h`).
|
||||
|
||||
```cpp
|
||||
#include <iostream>
|
||||
#include "kbot.h"
|
||||
#include "llm_client.h"
|
||||
|
||||
int main() {
|
||||
polymech::kbot::KBotOptions opts;
|
||||
opts.prompt = "Say hello in one sentence.";
|
||||
opts.api_key = "YOUR_KEY";
|
||||
opts.router = "openrouter";
|
||||
opts.model = "openai/gpt-4o-mini";
|
||||
|
||||
polymech::kbot::LLMClient client(opts);
|
||||
polymech::kbot::LLMResponse r = client.execute_chat(opts.prompt);
|
||||
if (r.success) {
|
||||
std::cout << r.text << "\n";
|
||||
} else {
|
||||
std::cerr << r.error << "\n";
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
Or use the callback-based pipeline:
|
||||
|
||||
```cpp
|
||||
polymech::kbot::KBotCallbacks cb;
|
||||
cb.onEvent = [](const std::string& type, const std::string& json) {
|
||||
std::cout << type << ": " << json << "\n";
|
||||
};
|
||||
return polymech::kbot::run_kbot_ai_pipeline(opts, cb);
|
||||
```
|
||||
|
||||
Link **`kbot`** (and its public dependencies). **`cmd_kbot.h`** entry points (`run_kbot_ai_ipc`, `run_cmd_kbot_uds`, …) are implemented in **`src/cmd_kbot*.cpp`** in this project; to reuse them, compile those sources into your binary or vendor the logic.
|
||||
|
||||
## Node / IPC tests
|
||||
|
||||
Integration tests live under **`orchestrator/`** (see comments in `orchestrator/test-ipc.mjs`). Typical run from `cpp/`:
|
||||
|
||||
```bash
|
||||
npm run test:ipc
|
||||
```
|
||||
|
||||
Classifier batch (semantic distances vs JobViewer labels):
|
||||
|
||||
```bash
|
||||
npm run test:ipc:classifier
|
||||
npm run test:ipc:classifier:openrouter
|
||||
```
|
||||
|
||||
Stress: repeat the **same** batched `kbot-ai` call **N** times on **one** worker; prints per-run wall time, token usage (when present), then **min / max / avg / p50 / p95** and Σ tokens. Default **N = 5** for the OpenRouter stress script:
|
||||
|
||||
```bash
|
||||
npm run test:ipc:classifier:openrouter:stress
|
||||
npm run test:ipc:classifier -- -r openrouter -m openai/gpt-4o-mini --backend remote -n 3
|
||||
KBOT_CLASSIFIER_STRESS_RUNS=10 npm run test:ipc:classifier:openrouter:stress
|
||||
```
|
||||
|
||||
Requires a built **`dist/kbot.exe`** (or `kbot` on Unix). Set API keys via `config/postgres.toml` for OpenRouter.
|
||||
|
||||
## License
|
||||
|
||||
See [LICENSE](LICENSE) in this directory.
|
||||
112
packages/media/cpp/a.json
Normal file
112
packages/media/cpp/a.json
Normal file
@ -0,0 +1,112 @@
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"label": "3D printing service",
|
||||
"distance": 6.0
|
||||
},
|
||||
{
|
||||
"label": "Drafting service",
|
||||
"distance": 7.0
|
||||
},
|
||||
{
|
||||
"label": "Engraver",
|
||||
"distance": 6.5
|
||||
},
|
||||
{
|
||||
"label": "Furniture maker",
|
||||
"distance": 7.5
|
||||
},
|
||||
{
|
||||
"label": "Industrial engineer",
|
||||
"distance": 7.0
|
||||
},
|
||||
{
|
||||
"label": "Industrial equipment supplier",
|
||||
"distance": 5.5
|
||||
},
|
||||
{
|
||||
"label": "Laser cutting service",
|
||||
"distance": 4.5
|
||||
},
|
||||
{
|
||||
"label": "Machine construction",
|
||||
"distance": 3.0
|
||||
},
|
||||
{
|
||||
"label": "Machine repair service",
|
||||
"distance": 2.5
|
||||
},
|
||||
{
|
||||
"label": "Machine shop",
|
||||
"distance": 0.2
|
||||
},
|
||||
{
|
||||
"label": "Machine workshop",
|
||||
"distance": 0.0
|
||||
},
|
||||
{
|
||||
"label": "Machinery parts manufacturer",
|
||||
"distance": 2.0
|
||||
},
|
||||
{
|
||||
"label": "Machining manufacturer",
|
||||
"distance": 1.5
|
||||
},
|
||||
{
|
||||
"label": "Manufacturer",
|
||||
"distance": 6.0
|
||||
},
|
||||
{
|
||||
"label": "Mechanic",
|
||||
"distance": 5.0
|
||||
},
|
||||
{
|
||||
"label": "Mechanical engineer",
|
||||
"distance": 6.5
|
||||
},
|
||||
{
|
||||
"label": "Mechanical plant",
|
||||
"distance": 3.5
|
||||
},
|
||||
{
|
||||
"label": "Metal fabricator",
|
||||
"distance": 2.0
|
||||
},
|
||||
{
|
||||
"label": "Metal heat treating service",
|
||||
"distance": 3.5
|
||||
},
|
||||
{
|
||||
"label": "Metal machinery supplier",
|
||||
"distance": 5.0
|
||||
},
|
||||
{
|
||||
"label": "Metal working shop",
|
||||
"distance": 1.0
|
||||
},
|
||||
{
|
||||
"label": "Metal workshop",
|
||||
"distance": 1.2
|
||||
},
|
||||
{
|
||||
"label": "Novelty store",
|
||||
"distance": 10.0
|
||||
},
|
||||
{
|
||||
"label": "Plywood supplier",
|
||||
"distance": 9.5
|
||||
},
|
||||
{
|
||||
"label": "Sign shop",
|
||||
"distance": 7.5
|
||||
},
|
||||
{
|
||||
"label": "Tool manufacturer",
|
||||
"distance": 3.0
|
||||
},
|
||||
{
|
||||
"label": "Trophy shop",
|
||||
"distance": 8.0
|
||||
}
|
||||
]
|
||||
}
|
||||
6
packages/media/cpp/build-linux.sh
Normal file
6
packages/media/cpp/build-linux.sh
Normal file
@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
#rm -rf /tmp/polymech-build
|
||||
mkdir -p /tmp/polymech-build
|
||||
export PATH="/snap/bin:$PATH"
|
||||
cmake -S ./ -B /tmp/polymech-build -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build /tmp/polymech-build
|
||||
12
packages/media/cpp/config.toml
Normal file
12
packages/media/cpp/config.toml
Normal file
@ -0,0 +1,12 @@
|
||||
[project]
|
||||
name = "polymech"
|
||||
version = "0.1.0"
|
||||
description = "Polymech C++ CLI"
|
||||
|
||||
[database]
|
||||
host = "localhost"
|
||||
port = 5432
|
||||
name = "polymech"
|
||||
|
||||
[logging]
|
||||
level = "debug"
|
||||
43
packages/media/cpp/config/gridsearch-bcn-universities.json
Normal file
43
packages/media/cpp/config/gridsearch-bcn-universities.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"guided": {
|
||||
"areas": [
|
||||
{
|
||||
"gid": "ESP.6.1.10.14_1",
|
||||
"name": "Sabadell",
|
||||
"level": 4,
|
||||
"raw": {
|
||||
"level": 3,
|
||||
"gadmName": "Sabadell",
|
||||
"gid": "ESP.6.1.10.14_1"
|
||||
}
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"gridMode": "centers",
|
||||
"pathOrder": "snake",
|
||||
"groupByRegion": false,
|
||||
"cellSize": 5,
|
||||
"cellOverlap": 0,
|
||||
"centroidOverlap": 0,
|
||||
"ghsFilterMode": "OR",
|
||||
"maxCellsLimit": 50000,
|
||||
"maxElevation": 1000,
|
||||
"minDensity": 0,
|
||||
"minGhsPop": 0,
|
||||
"minGhsBuilt": 0,
|
||||
"allowMissingGhs": false,
|
||||
"bypassFilters": false
|
||||
}
|
||||
},
|
||||
"search": {
|
||||
"types": [
|
||||
"university"
|
||||
],
|
||||
"filterCountry": "",
|
||||
"googleDomain": "google.com",
|
||||
"limitPerArea": 20,
|
||||
"zoom": 15,
|
||||
"language": "en"
|
||||
},
|
||||
"filterTypes": []
|
||||
}
|
||||
49
packages/media/cpp/config/gridsearch-lamu.json
Normal file
49
packages/media/cpp/config/gridsearch-lamu.json
Normal file
@ -0,0 +1,49 @@
|
||||
{
|
||||
"guided": {
|
||||
"areas": [
|
||||
{
|
||||
"gid": "KEN.21_1",
|
||||
"name": "Lamu",
|
||||
"level": 1,
|
||||
"raw": {
|
||||
"gid": "KEN.21_1",
|
||||
"gadmName": "Lamu",
|
||||
"level": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"gridMode": "centers",
|
||||
"pathOrder": "snake",
|
||||
"groupByRegion": true,
|
||||
"cellSize": 5,
|
||||
"cellOverlap": 0,
|
||||
"centroidOverlap": 50,
|
||||
"ghsFilterMode": "OR",
|
||||
"maxCellsLimit": 50000,
|
||||
"maxElevation": 1000,
|
||||
"minDensity": 10,
|
||||
"minGhsPop": 26,
|
||||
"minGhsBuilt": 154,
|
||||
"enableElevation": false,
|
||||
"enableDensity": false,
|
||||
"enableGhsPop": false,
|
||||
"enableGhsBuilt": false,
|
||||
"allowMissingGhs": false,
|
||||
"bypassFilters": true
|
||||
}
|
||||
},
|
||||
"search": {
|
||||
"types": [
|
||||
"plastic"
|
||||
],
|
||||
"filterCountry": "",
|
||||
"googleDomain": "google.com",
|
||||
"limitPerArea": 20,
|
||||
"zoom": 15,
|
||||
"language": "en"
|
||||
},
|
||||
"filterTypes": [
|
||||
"Recycling center"
|
||||
]
|
||||
}
|
||||
40
packages/media/cpp/config/gridsearch-sample.json
Normal file
40
packages/media/cpp/config/gridsearch-sample.json
Normal file
@ -0,0 +1,40 @@
|
||||
{
|
||||
"guided": {
|
||||
"areas": [
|
||||
{
|
||||
"gid": "ABW",
|
||||
"name": "Aruba",
|
||||
"level": 0
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"gridMode": "centers",
|
||||
"pathOrder": "snake",
|
||||
"groupByRegion": false,
|
||||
"cellSize": 5,
|
||||
"cellOverlap": 0,
|
||||
"centroidOverlap": 0,
|
||||
"ghsFilterMode": "OR",
|
||||
"maxCellsLimit": 50000,
|
||||
"maxElevation": 1000,
|
||||
"minDensity": 0,
|
||||
"minGhsPop": 0,
|
||||
"minGhsBuilt": 0,
|
||||
"allowMissingGhs": false,
|
||||
"bypassFilters": false
|
||||
}
|
||||
},
|
||||
"search": {
|
||||
"types": [
|
||||
"recycling"
|
||||
],
|
||||
"filterCountry": "",
|
||||
"googleDomain": "google.com",
|
||||
"limitPerArea": 20,
|
||||
"zoom": 15,
|
||||
"language": "en"
|
||||
},
|
||||
"filterTypes": [
|
||||
"Recycling center"
|
||||
]
|
||||
}
|
||||
45
packages/media/cpp/config/gridsearch-test-bcn-large.json
Normal file
45
packages/media/cpp/config/gridsearch-test-bcn-large.json
Normal file
@ -0,0 +1,45 @@
|
||||
{
|
||||
"guided": {
|
||||
"areas": [
|
||||
{
|
||||
"gid": "ESP.6.1_1",
|
||||
"name": "Barcelona",
|
||||
"level": 3,
|
||||
"raw": {
|
||||
"level": 2,
|
||||
"gadmName": "Barcelona",
|
||||
"gid": "ESP.6.1_1"
|
||||
}
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"gridMode": "centers",
|
||||
"pathOrder": "snake",
|
||||
"groupByRegion": true,
|
||||
"cellSize": 5,
|
||||
"cellOverlap": 0,
|
||||
"centroidOverlap": 0,
|
||||
"ghsFilterMode": "OR",
|
||||
"maxCellsLimit": 50000,
|
||||
"maxElevation": 1000,
|
||||
"minDensity": 10,
|
||||
"minGhsPop": 26,
|
||||
"minGhsBuilt": 154,
|
||||
"enableElevation": false,
|
||||
"enableDensity": false,
|
||||
"enableGhsPop": false,
|
||||
"enableGhsBuilt": false,
|
||||
"allowMissingGhs": false,
|
||||
"bypassFilters": true
|
||||
}
|
||||
},
|
||||
"search": {
|
||||
"types": [
|
||||
"marketing"
|
||||
],
|
||||
"filterCountry": "Spain",
|
||||
"googleDomain": "google.es",
|
||||
"limitPerArea": 10,
|
||||
"useCache": true
|
||||
}
|
||||
}
|
||||
85
packages/media/cpp/config/gridsearch-test-bcn.json
Normal file
85
packages/media/cpp/config/gridsearch-test-bcn.json
Normal file
@ -0,0 +1,85 @@
|
||||
{
|
||||
"guided": {
|
||||
"areas": [
|
||||
{
|
||||
"gid": "ESP.6.1.10.2_1",
|
||||
"name": "Barberà del Vallès",
|
||||
"level": 4,
|
||||
"raw": {
|
||||
"level": 4,
|
||||
"gadmName": "Barberà del Vallès",
|
||||
"gid": "ESP.6.1.10.2_1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"gid": "ESP.6.1.10.14_1",
|
||||
"name": "Sabadell",
|
||||
"level": 4,
|
||||
"raw": {
|
||||
"level": 4,
|
||||
"gadmName": "Sabadell",
|
||||
"gid": "ESP.6.1.10.14_1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"gid": "ESP.6.1.10.11_1",
|
||||
"name": "Polinyà",
|
||||
"level": 4,
|
||||
"raw": {
|
||||
"level": 4,
|
||||
"gadmName": "Polinyà",
|
||||
"gid": "ESP.6.1.10.11_1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"gid": "ESP.6.1.10.4_1",
|
||||
"name": "Castellar del Vallès",
|
||||
"level": 4,
|
||||
"raw": {
|
||||
"level": 4,
|
||||
"gadmName": "Castellar del Vallès",
|
||||
"gid": "ESP.6.1.10.4_1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"gid": "ESP.6.1.10.19_1",
|
||||
"name": "Sentmenat",
|
||||
"level": 4,
|
||||
"raw": {
|
||||
"level": 4,
|
||||
"gadmName": "Sentmenat",
|
||||
"gid": "ESP.6.1.10.19_1"
|
||||
}
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"gridMode": "centers",
|
||||
"pathOrder": "snake",
|
||||
"groupByRegion": true,
|
||||
"cellSize": 10,
|
||||
"cellOverlap": 0,
|
||||
"centroidOverlap": 0,
|
||||
"ghsFilterMode": "OR",
|
||||
"maxCellsLimit": 50000,
|
||||
"maxElevation": 1000,
|
||||
"minDensity": 10,
|
||||
"minGhsPop": 26,
|
||||
"minGhsBuilt": 154,
|
||||
"enableElevation": false,
|
||||
"enableDensity": false,
|
||||
"enableGhsPop": false,
|
||||
"enableGhsBuilt": false,
|
||||
"allowMissingGhs": false,
|
||||
"bypassFilters": true
|
||||
}
|
||||
},
|
||||
"search": {
|
||||
"types": [
|
||||
"mecanizado cnc"
|
||||
],
|
||||
"filterCountry": "Spain",
|
||||
"googleDomain": "google.es",
|
||||
"limitPerArea": 10,
|
||||
"useCache": true
|
||||
}
|
||||
}
|
||||
37
packages/media/cpp/config/gridsearch-test.json
Normal file
37
packages/media/cpp/config/gridsearch-test.json
Normal file
@ -0,0 +1,37 @@
|
||||
{
|
||||
"guided": {
|
||||
"areas": [
|
||||
{
|
||||
"gid": "ABW",
|
||||
"name": "Aruba",
|
||||
"level": 0
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"gridMode": "centers",
|
||||
"pathOrder": "snake",
|
||||
"groupByRegion": false,
|
||||
"cellSize": 5,
|
||||
"cellOverlap": 0,
|
||||
"centroidOverlap": 0,
|
||||
"ghsFilterMode": "OR",
|
||||
"maxCellsLimit": 50000,
|
||||
"maxElevation": 1000,
|
||||
"minDensity": 0,
|
||||
"minGhsPop": 0,
|
||||
"minGhsBuilt": 0,
|
||||
"allowMissingGhs": false,
|
||||
"bypassFilters": false
|
||||
}
|
||||
},
|
||||
"search": {
|
||||
"types": [
|
||||
"recycling"
|
||||
],
|
||||
"filterCountry": "",
|
||||
"googleDomain": "google.com",
|
||||
"limitPerArea": 1,
|
||||
"zoom": 15,
|
||||
"language": "en"
|
||||
}
|
||||
}
|
||||
60
packages/media/cpp/install-lnx.sh
Normal file
60
packages/media/cpp/install-lnx.sh
Normal file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# install-lnx.sh – Install build dependencies for polymech-cli on Linux
|
||||
#
|
||||
# Tested on: Ubuntu 20.04+ / Debian 11+
|
||||
# Usage: sudo bash install-lnx.sh
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
set -euo pipefail
|
||||
|
||||
echo "── polymech-cli Linux dependency installer ──"
|
||||
|
||||
# ── 1. System packages (apt) ─────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[1/3] Installing system packages via apt …"
|
||||
apt-get update -qq
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
gcc \
|
||||
g++ \
|
||||
git \
|
||||
libssl-dev \
|
||||
pkg-config \
|
||||
snapd
|
||||
|
||||
# ── 2. CMake ≥ 3.20 via snap ────────────────────────────────────────────────
|
||||
# The project requires cmake_minimum_required(VERSION 3.20).
|
||||
# Ubuntu 20.04 ships cmake 3.16, so we use the snap package instead.
|
||||
echo ""
|
||||
echo "[2/3] Installing CMake via snap (≥ 3.20 required) …"
|
||||
if command -v /snap/bin/cmake &>/dev/null; then
|
||||
echo " cmake snap already installed: $(/snap/bin/cmake --version | head -1)"
|
||||
else
|
||||
snap install cmake --classic
|
||||
echo " Installed: $(/snap/bin/cmake --version | head -1)"
|
||||
fi
|
||||
|
||||
# ── 3. Node.js (for npm run build:linux) ──────────────────────────────────────
|
||||
echo ""
|
||||
echo "[3/3] Checking for Node.js / npm …"
|
||||
if command -v node &>/dev/null; then
|
||||
echo " node $(node --version) already installed"
|
||||
else
|
||||
echo " Node.js not found. Install via nvm or nodesource, e.g.:"
|
||||
echo " curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -"
|
||||
echo " sudo apt-get install -y nodejs"
|
||||
fi
|
||||
|
||||
# ── Summary ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "── Done! ──"
|
||||
echo ""
|
||||
echo "All C++ dependencies (CLI11, tomlplusplus, Catch2, asio, concurrentqueue,"
|
||||
echo "taskflow, curl, lexbor, rapidjson) are fetched automatically by CMake"
|
||||
echo "FetchContent at build time — no manual installation needed."
|
||||
echo ""
|
||||
echo "To build:"
|
||||
echo " cd $(dirname "$0")"
|
||||
echo " npm run build:linux"
|
||||
echo ""
|
||||
echo "The binary will be placed in: dist/polymech-cli"
|
||||
@ -0,0 +1,8 @@
|
||||
/**
|
||||
* OpenRouter classifier + stress defaults: remote router, N batch iterations (see KBOT_CLASSIFIER_STRESS_RUNS).
|
||||
*/
|
||||
process.env.KBOT_IPC_CLASSIFIER_LLAMA = '0';
|
||||
if (process.env.KBOT_CLASSIFIER_STRESS_RUNS === undefined || process.env.KBOT_CLASSIFIER_STRESS_RUNS === '') {
|
||||
process.env.KBOT_CLASSIFIER_STRESS_RUNS = '5';
|
||||
}
|
||||
await import('./test-ipc-classifier.mjs');
|
||||
@ -0,0 +1,6 @@
|
||||
/**
|
||||
* Sets KBOT_IPC_CLASSIFIER_LLAMA=0 then runs the classifier IPC test against
|
||||
* KBOT_ROUTER / KBOT_IPC_MODEL (default router: openrouter — see presets.js).
|
||||
*/
|
||||
process.env.KBOT_IPC_CLASSIFIER_LLAMA = '0';
|
||||
await import('./test-ipc-classifier.mjs');
|
||||
186
packages/media/cpp/orchestrator/presets.js
Normal file
186
packages/media/cpp/orchestrator/presets.js
Normal file
@ -0,0 +1,186 @@
|
||||
/**
|
||||
* orchestrator/presets.js — defaults for IPC integration tests (extend here as suites grow).
|
||||
*
|
||||
* Llama local runner (llama-basics.test.ts): OpenAI-compatible API at http://localhost:8888/v1,
|
||||
* router `ollama` + `base_url` override, model `default` (server picks loaded GGUF).
|
||||
*/
|
||||
|
||||
import { dirname, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { existsSync } from 'node:fs';
|
||||
|
||||
import { probeTcpPort } from './test-commons.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
export const platform = {
|
||||
isWin: process.platform === 'win32',
|
||||
};
|
||||
|
||||
/** kbot/cpp root (parent of orchestrator/). */
|
||||
export const paths = {
|
||||
orchestratorDir: __dirname,
|
||||
cppRoot: resolve(__dirname, '..'),
|
||||
/** Same as packages/kbot/cpp/scripts/run-7b.sh — llama-server on :8888 */
|
||||
run7bScript: resolve(__dirname, '../scripts/run-7b.sh'),
|
||||
};
|
||||
|
||||
/** Dist binary name for the current OS. */
|
||||
export function exeName() {
|
||||
return platform.isWin ? 'kbot.exe' : 'kbot';
|
||||
}
|
||||
|
||||
/** Absolute path to kbot binary given orchestrator/ directory (where test-ipc.mjs lives). */
|
||||
export function distExePath(orchestratorDir) {
|
||||
return resolve(orchestratorDir, '..', 'dist', exeName());
|
||||
}
|
||||
|
||||
/** UDS / TCP listen argument passed to `kbot worker --uds <arg>`. */
|
||||
export const uds = {
|
||||
tcpPort: 4001,
|
||||
unixPath: '/tmp/kbot-test-ipc.sock',
|
||||
/** Value for `--uds` on this OS (Windows: port string; Unix: socket path). */
|
||||
workerArg() {
|
||||
return platform.isWin ? String(this.tcpPort) : this.unixPath;
|
||||
},
|
||||
/** Options for `net.connect` to reach the worker. */
|
||||
connectOpts(cppUdsArg) {
|
||||
return platform.isWin
|
||||
? { port: this.tcpPort, host: '127.0.0.1' }
|
||||
: cppUdsArg;
|
||||
},
|
||||
};
|
||||
|
||||
/** Millisecond timeouts — tune per step in new tests. */
|
||||
export const timeouts = {
|
||||
ipcDefault: 5000,
|
||||
kbotAi: 180_000,
|
||||
/** Llama local arithmetic (same order of magnitude as kbot-ai). */
|
||||
llamaKbotAi: 180_000,
|
||||
/** Max wait for :8888 after spawning run-7b.sh (model load can be slow). */
|
||||
llamaServerStart: Number(process.env.KBOT_LLAMA_START_TIMEOUT_MS || 600_000),
|
||||
connectAttempts: 15,
|
||||
connectRetryMs: 400,
|
||||
postShutdownMs: 200,
|
||||
};
|
||||
|
||||
export const router = {
|
||||
default: 'openrouter',
|
||||
fromEnv() {
|
||||
return process.env.KBOT_ROUTER || this.default;
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Local llama.cpp HTTP server — mirrors tests/unit/llama-basics.test.ts (LLAMA_OPTS).
|
||||
* Uses router `ollama` so api_key resolves to dummy `ollama`; `base_url` points at :8888/v1.
|
||||
*/
|
||||
export const llama = {
|
||||
get port() {
|
||||
return Number(process.env.KBOT_LLAMA_PORT || 8888);
|
||||
},
|
||||
get host() {
|
||||
return process.env.KBOT_LLAMA_HOST || '127.0.0.1';
|
||||
},
|
||||
get baseURL() {
|
||||
return process.env.KBOT_LLAMA_BASE_URL || `http://localhost:${this.port}/v1`;
|
||||
},
|
||||
router: 'ollama',
|
||||
get model() {
|
||||
return process.env.KBOT_LLAMA_MODEL || 'default';
|
||||
},
|
||||
prompts: {
|
||||
/** Same idea as llama-basics completion tests. */
|
||||
add5_3: 'What is 5 + 3? Reply with just the number, nothing else.',
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* IPC payload for kbot-ai → local llama-server (OpenAI-compatible).
|
||||
* Pass `base_url` so LLMClient uses port 8888 instead of default ollama :11434.
|
||||
*/
|
||||
export function kbotAiPayloadLlamaLocal(overrides = {}) {
|
||||
const merged = {
|
||||
prompt: llama.prompts.add5_3,
|
||||
router: llama.router,
|
||||
model: llama.model,
|
||||
base_url: llama.baseURL,
|
||||
...overrides,
|
||||
};
|
||||
merged.base_url = merged.base_url ?? merged.baseURL ?? llama.baseURL;
|
||||
delete merged.baseURL;
|
||||
return merged;
|
||||
}
|
||||
|
||||
/** Stock prompts and assertions helpers for LLM smoke tests. */
|
||||
export const prompts = {
|
||||
germanyCapital: 'What is the capital of Germany? Reply in one short sentence.',
|
||||
};
|
||||
|
||||
/** Build `kbot-ai` IPC payload from env + presets (OpenRouter-friendly defaults). */
|
||||
export function kbotAiPayloadFromEnv() {
|
||||
const payload = {
|
||||
prompt: process.env.KBOT_IPC_PROMPT || prompts.germanyCapital,
|
||||
router: router.fromEnv(),
|
||||
};
|
||||
if (process.env.KBOT_IPC_MODEL) {
|
||||
payload.model = process.env.KBOT_IPC_MODEL;
|
||||
}
|
||||
return payload;
|
||||
}
|
||||
|
||||
/** True when using the default Germany prompt (for optional Berlin assertion). */
|
||||
export function usingDefaultGermanyPrompt() {
|
||||
return !process.env.KBOT_IPC_PROMPT;
|
||||
}
|
||||
|
||||
/**
|
||||
* If nothing listens on llama.port, optionally spawn `scripts/run-7b.sh` (requires `sh` on PATH, e.g. Git Bash on Windows).
|
||||
*
|
||||
* @param {{ autostart?: boolean, startTimeoutMs?: number }} [opts]
|
||||
* @returns {Promise<{ ok: boolean, alreadyRunning: boolean, started?: boolean, pid?: number }>}
|
||||
*/
|
||||
export async function ensureLlamaLocalServer(opts = {}) {
|
||||
const autostart = opts.autostart ?? true;
|
||||
const startTimeoutMs = opts.startTimeoutMs ?? timeouts.llamaServerStart;
|
||||
const host = llama.host;
|
||||
const port = llama.port;
|
||||
const scriptPath = paths.run7bScript;
|
||||
|
||||
if (await probeTcpPort(host, port, 1500)) {
|
||||
return { ok: true, alreadyRunning: true };
|
||||
}
|
||||
|
||||
if (!autostart) {
|
||||
throw new Error(
|
||||
`[llama] Nothing listening on ${host}:${port}. Start the server (e.g. sh scripts/run-7b.sh), or remove KBOT_IPC_LLAMA_AUTOSTART=0 to allow autostart`
|
||||
);
|
||||
}
|
||||
|
||||
if (!existsSync(scriptPath)) {
|
||||
throw new Error(`[llama] Script missing: ${scriptPath}`);
|
||||
}
|
||||
|
||||
console.log(`[llama] Port ${port} closed — starting ${scriptPath} (timeout ${startTimeoutMs}ms) …`);
|
||||
|
||||
const child = spawn('sh', [scriptPath], {
|
||||
detached: true,
|
||||
stdio: 'ignore',
|
||||
cwd: dirname(scriptPath),
|
||||
env: { ...process.env },
|
||||
});
|
||||
child.unref();
|
||||
|
||||
const deadline = Date.now() + startTimeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
if (await probeTcpPort(host, port, 1500)) {
|
||||
return { ok: true, alreadyRunning: false, started: true, pid: child.pid };
|
||||
}
|
||||
await new Promise((r) => setTimeout(r, 1500));
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`[llama] Server did not open ${host}:${port} within ${startTimeoutMs}ms — check llama-server / GPU / model path`
|
||||
);
|
||||
}
|
||||
397
packages/media/cpp/orchestrator/reports.js
Normal file
397
packages/media/cpp/orchestrator/reports.js
Normal file
@ -0,0 +1,397 @@
|
||||
/**
|
||||
* orchestrator/reports.js — JSON + Markdown reports under cwd/tests/
|
||||
*
|
||||
* File pattern (logical): test-name::hh:mm
|
||||
* On-disk: test-name__HH-mm.json / .md (Windows: no `:` in filenames)
|
||||
*/
|
||||
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { join, dirname } from 'node:path';
|
||||
import os from 'node:os';
|
||||
import { performance } from 'node:perf_hooks';
|
||||
import { resourceUsage } from 'node:process';
|
||||
|
||||
const WIN_BAD = /[<>:"/\\|?*\x00-\x1f]/g;
|
||||
|
||||
/** Strip characters invalid in Windows / POSIX filenames. */
|
||||
export function sanitizeTestName(name) {
|
||||
const s = String(name).trim().replace(WIN_BAD, '_').replace(/\s+/g, '_');
|
||||
return s || 'test';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Date} [now]
|
||||
* @returns {{ hh: string, mm: string, label: string }}
|
||||
*/
|
||||
export function timeParts(now = new Date()) {
|
||||
const hh = String(now.getHours()).padStart(2, '0');
|
||||
const mm = String(now.getMinutes()).padStart(2, '0');
|
||||
return { hh, mm, label: `${hh}:${mm}` };
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} testName
|
||||
* @param {string} ext — including dot, e.g. '.json'
|
||||
* @param {{ cwd?: string, now?: Date }} [options]
|
||||
*/
|
||||
export function reportFilePathWithExt(testName, ext, options = {}) {
|
||||
const cwd = options.cwd ?? process.cwd();
|
||||
const now = options.now ?? new Date();
|
||||
const base = sanitizeTestName(testName);
|
||||
const { hh, mm } = timeParts(now);
|
||||
const file = `${base}__${hh}-${mm}${ext}`;
|
||||
return join(cwd, 'tests', file);
|
||||
}
|
||||
|
||||
export function reportFilePath(testName, options = {}) {
|
||||
return reportFilePathWithExt(testName, '.json', options);
|
||||
}
|
||||
|
||||
export function reportMarkdownPath(testName, options = {}) {
|
||||
return reportFilePathWithExt(testName, '.md', options);
|
||||
}
|
||||
|
||||
function formatBytes(n) {
|
||||
if (typeof n !== 'number' || Number.isNaN(n)) return String(n);
|
||||
const u = ['B', 'KB', 'MB', 'GB'];
|
||||
let i = 0;
|
||||
let x = n;
|
||||
while (x >= 1024 && i < u.length - 1) {
|
||||
x /= 1024;
|
||||
i++;
|
||||
}
|
||||
return `${x < 10 && i > 0 ? x.toFixed(1) : Math.round(x)} ${u[i]}`;
|
||||
}
|
||||
|
||||
/** Snapshot of host / OS (cheap; call anytime). */
|
||||
export function hostSnapshot() {
|
||||
const cpus = os.cpus();
|
||||
const total = os.totalmem();
|
||||
const free = os.freemem();
|
||||
return {
|
||||
hostname: os.hostname(),
|
||||
platform: os.platform(),
|
||||
arch: os.arch(),
|
||||
release: os.release(),
|
||||
cpuCount: cpus.length,
|
||||
cpuModel: cpus[0]?.model?.trim() ?? '',
|
||||
totalMemBytes: total,
|
||||
freeMemBytes: free,
|
||||
usedMemBytes: total - free,
|
||||
loadAvg: os.loadavg(),
|
||||
osUptimeSec: os.uptime(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Call at test start; then call `.finalize()` at end for wall + CPU delta + memory.
|
||||
*/
|
||||
export function createMetricsCollector() {
|
||||
const cpu0 = process.cpuUsage();
|
||||
const perf0 = performance.now();
|
||||
const wall0 = Date.now();
|
||||
|
||||
return {
|
||||
hostSnapshot,
|
||||
|
||||
finalize() {
|
||||
const cpu = process.cpuUsage(cpu0);
|
||||
const perf1 = performance.now();
|
||||
let ru = null;
|
||||
try {
|
||||
ru = resourceUsage();
|
||||
} catch {
|
||||
/* older runtimes */
|
||||
}
|
||||
return {
|
||||
durationWallMs: Math.round((perf1 - perf0) * 1000) / 1000,
|
||||
durationClockMs: Date.now() - wall0,
|
||||
cpuUserUs: cpu.user,
|
||||
cpuSystemUs: cpu.system,
|
||||
cpuUserMs: cpu.user / 1000,
|
||||
cpuSystemMs: cpu.system / 1000,
|
||||
memory: process.memoryUsage(),
|
||||
resourceUsage: ru,
|
||||
pid: process.pid,
|
||||
node: process.version,
|
||||
processUptimeSec: process.uptime(),
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Record<string, unknown>} payload
|
||||
* @returns {string}
|
||||
*/
|
||||
export function renderMarkdownReport(payload) {
|
||||
const meta = payload.meta ?? {};
|
||||
const m = payload.metrics ?? {};
|
||||
const host = m.host ?? {};
|
||||
const timing = m.timing ?? {};
|
||||
const proc = m.process ?? {};
|
||||
const tStart = timing.startedAt ?? payload.startedAt;
|
||||
const tEnd = timing.finishedAt ?? payload.finishedAt;
|
||||
|
||||
const lines = [];
|
||||
|
||||
lines.push(`# Test report: ${meta.displayName ?? meta.testName ?? 'run'}`);
|
||||
lines.push('');
|
||||
lines.push('## Summary');
|
||||
lines.push('');
|
||||
lines.push(`| Key | Value |`);
|
||||
lines.push(`| --- | --- |`);
|
||||
lines.push(`| Result | ${payload.ok === true ? 'PASS' : payload.ok === false ? 'FAIL' : '—'} |`);
|
||||
if (payload.passed != null) lines.push(`| Assertions passed | ${payload.passed} |`);
|
||||
if (payload.failed != null) lines.push(`| Assertions failed | ${payload.failed} |`);
|
||||
if (payload.ipcLlm != null) lines.push(`| IPC LLM step | ${payload.ipcLlm ? 'enabled' : 'skipped'} |`);
|
||||
if (payload.ipcLlama != null) {
|
||||
lines.push(`| IPC llama :8888 step | ${payload.ipcLlama ? 'enabled' : 'skipped'} |`);
|
||||
}
|
||||
if (payload.ipcClassifierLlama != null) {
|
||||
lines.push(
|
||||
`| IPC classifier | ${payload.ipcClassifierLlama ? 'local llama :8888' : 'remote (KBOT_ROUTER / KBOT_IPC_MODEL)'} |`
|
||||
);
|
||||
}
|
||||
lines.push(`| CWD | \`${String(meta.cwd ?? '').replace(/`/g, "'")}\` |`);
|
||||
lines.push('');
|
||||
|
||||
lines.push('## Timing');
|
||||
lines.push('');
|
||||
lines.push(`| Metric | Value |`);
|
||||
lines.push(`| --- | --- |`);
|
||||
if (tStart) lines.push(`| Started (ISO) | ${tStart} |`);
|
||||
if (tEnd) lines.push(`| Finished (ISO) | ${tEnd} |`);
|
||||
if (proc.durationWallMs != null) lines.push(`| Wall time (perf) | ${proc.durationWallMs} ms |`);
|
||||
if (proc.durationClockMs != null) lines.push(`| Wall time (clock) | ${proc.durationClockMs} ms |`);
|
||||
lines.push('');
|
||||
|
||||
lines.push('## Process (Node)');
|
||||
lines.push('');
|
||||
lines.push(`| Metric | Value |`);
|
||||
lines.push(`| --- | --- |`);
|
||||
if (proc.pid != null) lines.push(`| PID | ${proc.pid} |`);
|
||||
if (proc.node) lines.push(`| Node | ${proc.node} |`);
|
||||
if (proc.processUptimeSec != null) lines.push(`| process.uptime() | ${proc.processUptimeSec.toFixed(3)} s |`);
|
||||
if (proc.cpuUserMs != null && proc.cpuSystemMs != null) {
|
||||
lines.push(`| CPU user (process.cpuUsage Δ) | ${proc.cpuUserMs.toFixed(3)} ms (${proc.cpuUserUs ?? '—'} µs) |`);
|
||||
lines.push(`| CPU system (process.cpuUsage Δ) | ${proc.cpuSystemMs.toFixed(3)} ms (${proc.cpuSystemUs ?? '—'} µs) |`);
|
||||
}
|
||||
const ru = proc.resourceUsage;
|
||||
if (ru && typeof ru === 'object') {
|
||||
if (ru.userCPUTime != null) {
|
||||
lines.push(`| CPU user (resourceUsage) | ${(ru.userCPUTime / 1000).toFixed(3)} ms |`);
|
||||
}
|
||||
if (ru.systemCPUTime != null) {
|
||||
lines.push(`| CPU system (resourceUsage) | ${(ru.systemCPUTime / 1000).toFixed(3)} ms |`);
|
||||
}
|
||||
if (ru.maxRSS != null) {
|
||||
lines.push(`| Max RSS (resourceUsage) | ${formatBytes(ru.maxRSS * 1024)} |`);
|
||||
}
|
||||
}
|
||||
const mem = proc.memory;
|
||||
if (mem && typeof mem === 'object') {
|
||||
lines.push(`| RSS | ${formatBytes(mem.rss)} (${mem.rss} B) |`);
|
||||
lines.push(`| Heap used | ${formatBytes(mem.heapUsed)} |`);
|
||||
lines.push(`| Heap total | ${formatBytes(mem.heapTotal)} |`);
|
||||
lines.push(`| External | ${formatBytes(mem.external)} |`);
|
||||
if (mem.arrayBuffers != null) lines.push(`| Array buffers | ${formatBytes(mem.arrayBuffers)} |`);
|
||||
}
|
||||
lines.push('');
|
||||
|
||||
lines.push('## Host');
|
||||
lines.push('');
|
||||
lines.push(`| Metric | Value |`);
|
||||
lines.push(`| --- | --- |`);
|
||||
if (host.hostname) lines.push(`| Hostname | ${host.hostname} |`);
|
||||
if (host.platform) lines.push(`| OS | ${host.platform} ${host.release ?? ''} |`);
|
||||
if (host.arch) lines.push(`| Arch | ${host.arch} |`);
|
||||
if (host.cpuCount != null) lines.push(`| CPUs | ${host.cpuCount} |`);
|
||||
if (host.cpuModel) lines.push(`| CPU model | ${host.cpuModel} |`);
|
||||
if (host.totalMemBytes != null) {
|
||||
lines.push(`| RAM total | ${formatBytes(host.totalMemBytes)} |`);
|
||||
lines.push(`| RAM free | ${formatBytes(host.freeMemBytes)} |`);
|
||||
lines.push(`| RAM used | ${formatBytes(host.usedMemBytes)} |`);
|
||||
}
|
||||
if (host.loadAvg && host.loadAvg.length) {
|
||||
lines.push(`| Load avg (1/5/15) | ${host.loadAvg.map((x) => x.toFixed(2)).join(' / ')} |`);
|
||||
}
|
||||
if (host.osUptimeSec != null) lines.push(`| OS uptime | ${(host.osUptimeSec / 3600).toFixed(2)} h |`);
|
||||
lines.push('');
|
||||
|
||||
const kbotAi = payload.kbotAi;
|
||||
const hasKbotAiMeta =
|
||||
kbotAi &&
|
||||
typeof kbotAi === 'object' &&
|
||||
(kbotAi.routerStep != null || kbotAi.llamaStep != null);
|
||||
const hasClassifierLlm = payload.llm != null && typeof payload.llm === 'object';
|
||||
if (hasKbotAiMeta || hasClassifierLlm) {
|
||||
lines.push('## LLM API (provider JSON)');
|
||||
lines.push('');
|
||||
lines.push(
|
||||
'Fields from the chat completion response except assistant message bodies (`usage`, `model`, `id`, provider-specific).'
|
||||
);
|
||||
lines.push('');
|
||||
if (hasKbotAiMeta) {
|
||||
if (kbotAi.routerStep != null) {
|
||||
lines.push('### IPC step 6 — router / main kbot-ai');
|
||||
lines.push('');
|
||||
lines.push('```json');
|
||||
lines.push(JSON.stringify(kbotAi.routerStep, null, 2));
|
||||
lines.push('```');
|
||||
lines.push('');
|
||||
}
|
||||
if (kbotAi.llamaStep != null) {
|
||||
lines.push('### IPC step 7 — local llama :8888');
|
||||
lines.push('');
|
||||
lines.push('```json');
|
||||
lines.push(JSON.stringify(kbotAi.llamaStep, null, 2));
|
||||
lines.push('```');
|
||||
lines.push('');
|
||||
}
|
||||
}
|
||||
if (hasClassifierLlm) {
|
||||
lines.push('### Classifier — batched kbot-ai');
|
||||
lines.push('');
|
||||
lines.push('```json');
|
||||
lines.push(JSON.stringify(payload.llm, null, 2));
|
||||
lines.push('```');
|
||||
lines.push('');
|
||||
}
|
||||
}
|
||||
|
||||
if (payload.anchor != null || (Array.isArray(payload.distances) && payload.distances.length > 0)) {
|
||||
lines.push('## Classifier batch');
|
||||
lines.push('');
|
||||
lines.push(`| Key | Value |`);
|
||||
lines.push(`| --- | --- |`);
|
||||
if (payload.anchor != null) lines.push(`| Anchor | ${payload.anchor} |`);
|
||||
if (payload.labelCount != null) lines.push(`| Label count | ${payload.labelCount} |`);
|
||||
if (payload.backend != null) lines.push(`| Backend | ${payload.backend} |`);
|
||||
const pe = payload.parseError;
|
||||
if (pe != null && String(pe).length) {
|
||||
lines.push(`| Parse | Failed: ${String(pe).replace(/\|/g, '\\|').slice(0, 500)}${String(pe).length > 500 ? '…' : ''} |`);
|
||||
} else {
|
||||
lines.push(`| Parse | OK |`);
|
||||
}
|
||||
lines.push('');
|
||||
const sorted = Array.isArray(payload.byDistance) ? payload.byDistance : [];
|
||||
const preview = sorted.filter((r) => r && r.distance != null).slice(0, 12);
|
||||
if (preview.length > 0) {
|
||||
lines.push('### Nearest labels (by distance)');
|
||||
lines.push('');
|
||||
lines.push(`| Label | Distance |`);
|
||||
lines.push(`| --- | ---: |`);
|
||||
for (const row of preview) {
|
||||
const lab = String(row.label ?? '').replace(/\|/g, '\\|');
|
||||
lines.push(`| ${lab} | ${row.distance} |`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
}
|
||||
|
||||
if (payload.stress?.summary && typeof payload.stress.summary === 'object') {
|
||||
const s = payload.stress.summary;
|
||||
const w = s.wallMs;
|
||||
lines.push('## Classifier stress (batch repeats)');
|
||||
lines.push('');
|
||||
lines.push(`| Metric | Value |`);
|
||||
lines.push(`| --- | --- |`);
|
||||
lines.push(`| Requested runs | ${s.requestedRuns ?? '—'} |`);
|
||||
if (w && typeof w === 'object') {
|
||||
lines.push(
|
||||
`| Wall time (ms) | min ${w.min} · max ${w.max} · avg ${w.avg} · p50 ${w.p50} · p95 ${w.p95} |`
|
||||
);
|
||||
}
|
||||
lines.push(`| Batch OK / fail | ${s.successCount ?? '—'} / ${s.failCount ?? '—'} |`);
|
||||
if (s.totalTokens > 0 || s.totalPromptTokens > 0 || s.totalCompletionTokens > 0) {
|
||||
lines.push(
|
||||
`| Σ tokens (prompt / completion / total) | ${s.totalPromptTokens} / ${s.totalCompletionTokens} / ${s.totalTokens} |`
|
||||
);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
if (payload.env && typeof payload.env === 'object') {
|
||||
lines.push('## Environment (selected)');
|
||||
lines.push('');
|
||||
lines.push(`| Variable | Value |`);
|
||||
lines.push(`| --- | --- |`);
|
||||
for (const [k, v] of Object.entries(payload.env)) {
|
||||
lines.push(`| \`${k}\` | ${v === null || v === undefined ? '—' : String(v)} |`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
if (payload.error) {
|
||||
lines.push('## Error');
|
||||
lines.push('');
|
||||
lines.push('```');
|
||||
lines.push(String(payload.error));
|
||||
lines.push('```');
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
lines.push('---');
|
||||
lines.push(`*Written ${meta.writtenAt ?? new Date().toISOString()}*`);
|
||||
lines.push('');
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Build metrics block for JSON + MD (host snapshot + process finalize).
|
||||
*/
|
||||
export function buildMetricsBundle(collector, startedAtIso, finishedAtIso) {
|
||||
const host = collector.hostSnapshot();
|
||||
const processMetrics = collector.finalize();
|
||||
return {
|
||||
timing: {
|
||||
startedAt: startedAtIso,
|
||||
finishedAt: finishedAtIso,
|
||||
},
|
||||
host,
|
||||
process: processMetrics,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} testName
|
||||
* @param {Record<string, unknown>} data — merged into payload (meta + metrics added)
|
||||
* @param {{ cwd?: string, now?: Date, metrics?: object }} [options]
|
||||
* @returns {Promise<{ jsonPath: string, mdPath: string }>}
|
||||
*/
|
||||
export async function writeTestReports(testName, data, options = {}) {
|
||||
const cwd = options.cwd ?? process.cwd();
|
||||
const now = options.now ?? new Date();
|
||||
const jsonPath = reportFilePath(testName, { cwd, now });
|
||||
const mdPath = reportMarkdownPath(testName, { cwd, now });
|
||||
const { hh, mm, label } = timeParts(now);
|
||||
|
||||
const base = sanitizeTestName(testName);
|
||||
const payload = {
|
||||
meta: {
|
||||
testName: base,
|
||||
displayName: `${base}::${label}`,
|
||||
cwd,
|
||||
writtenAt: now.toISOString(),
|
||||
jsonFile: jsonPath,
|
||||
mdFile: mdPath,
|
||||
},
|
||||
...data,
|
||||
};
|
||||
|
||||
await mkdir(dirname(jsonPath), { recursive: true });
|
||||
await writeFile(jsonPath, JSON.stringify(payload, null, 2), 'utf8');
|
||||
|
||||
const md = renderMarkdownReport(payload);
|
||||
await writeFile(mdPath, md, 'utf8');
|
||||
|
||||
return { jsonPath, mdPath };
|
||||
}
|
||||
|
||||
/** @deprecated Prefer writeTestReports */
|
||||
export async function writeJsonReport(testName, data, options = {}) {
|
||||
const { jsonPath } = await writeTestReports(testName, data, options);
|
||||
return jsonPath;
|
||||
}
|
||||
159
packages/media/cpp/orchestrator/spawn.mjs
Normal file
159
packages/media/cpp/orchestrator/spawn.mjs
Normal file
@ -0,0 +1,159 @@
|
||||
/**
|
||||
* orchestrator/spawn.mjs
|
||||
*
|
||||
* Spawn a C++ worker as a child process, send/receive length-prefixed
|
||||
* JSON messages over stdin/stdout.
|
||||
*
|
||||
* Usage:
|
||||
* import { spawnWorker } from './spawn.mjs';
|
||||
* const w = await spawnWorker('./dist/polymech-cli.exe');
|
||||
* console.log(res); // { id: '...', type: 'pong', payload: {} }
|
||||
* await w.shutdown();
|
||||
*/
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
// ── frame helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
/** Write a 4-byte LE length + JSON body to a writable stream. */
|
||||
function writeFrame(stream, msg) {
|
||||
const body = JSON.stringify(msg);
|
||||
const bodyBuf = Buffer.from(body, 'utf8');
|
||||
const lenBuf = Buffer.alloc(4);
|
||||
lenBuf.writeUInt32LE(bodyBuf.length, 0);
|
||||
stream.write(Buffer.concat([lenBuf, bodyBuf]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a streaming frame parser.
|
||||
* Calls `onMessage(parsed)` for each complete frame.
|
||||
*/
|
||||
function createFrameReader(onMessage) {
|
||||
let buffer = Buffer.alloc(0);
|
||||
|
||||
return (chunk) => {
|
||||
buffer = Buffer.concat([buffer, chunk]);
|
||||
|
||||
while (buffer.length >= 4) {
|
||||
const bodyLen = buffer.readUInt32LE(0);
|
||||
const totalLen = 4 + bodyLen;
|
||||
|
||||
if (buffer.length < totalLen) break; // need more data
|
||||
|
||||
const bodyBuf = buffer.subarray(4, totalLen);
|
||||
buffer = buffer.subarray(totalLen);
|
||||
|
||||
try {
|
||||
const msg = JSON.parse(bodyBuf.toString('utf8'));
|
||||
onMessage(msg);
|
||||
} catch (e) {
|
||||
console.error('[orchestrator] failed to parse frame:', e.message);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// ── spawnWorker ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Spawn the C++ binary in `worker` mode.
|
||||
* Returns: { send, request, shutdown, kill, process, ready }
|
||||
*
|
||||
* `ready` is a Promise that resolves when the worker sends `{ type: 'ready' }`.
|
||||
*/
|
||||
export function spawnWorker(exePath, args = ['worker']) {
|
||||
const proc = spawn(exePath, args, {
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
// Pending request map: id → { resolve, reject, timer }
|
||||
const pending = new Map();
|
||||
|
||||
// Event handler for unmatched messages (progress events, etc.)
|
||||
let eventHandler = null;
|
||||
|
||||
let readyResolve;
|
||||
const ready = new Promise((resolve) => { readyResolve = resolve; });
|
||||
|
||||
// stderr → console (worker logs via spdlog go to stderr)
|
||||
proc.stderr.on('data', (chunk) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text) console.error(`[worker:stderr] ${text}`);
|
||||
});
|
||||
|
||||
// stdout → frame parser → route by id / type
|
||||
const feedData = createFrameReader((msg) => {
|
||||
// Handle the initial "ready" signal
|
||||
if (msg.type === 'ready') {
|
||||
readyResolve(msg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Route response to pending request
|
||||
if (msg.id && pending.has(msg.id)) {
|
||||
const { resolve, timer } = pending.get(msg.id);
|
||||
clearTimeout(timer);
|
||||
pending.delete(msg.id);
|
||||
resolve(msg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Unmatched message (progress event, broadcast, etc.)
|
||||
if (eventHandler) {
|
||||
eventHandler(msg);
|
||||
} else {
|
||||
console.log('[orchestrator] unmatched message:', msg);
|
||||
}
|
||||
});
|
||||
|
||||
proc.stdout.on('data', feedData);
|
||||
|
||||
// ── public API ──────────────────────────────────────────────────────────
|
||||
|
||||
/** Fire-and-forget send. */
|
||||
function send(msg) {
|
||||
if (!msg.id) msg.id = randomUUID();
|
||||
writeFrame(proc.stdin, msg);
|
||||
}
|
||||
|
||||
/** Send a message and wait for the response with matching `id`. */
|
||||
function request(msg, timeoutMs = 5000) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const id = msg.id || randomUUID();
|
||||
msg.id = id;
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
pending.delete(id);
|
||||
reject(new Error(`IPC request timed out after ${timeoutMs}ms (id=${id}, type=${msg.type})`));
|
||||
}, timeoutMs);
|
||||
|
||||
pending.set(id, { resolve, reject, timer });
|
||||
writeFrame(proc.stdin, msg);
|
||||
});
|
||||
}
|
||||
|
||||
/** Graceful shutdown: send shutdown message & wait for process exit. */
|
||||
async function shutdown(timeoutMs = 3000) {
|
||||
const res = await request({ type: 'shutdown' }, timeoutMs);
|
||||
// Wait for process to exit
|
||||
await new Promise((resolve) => {
|
||||
const timer = setTimeout(() => {
|
||||
proc.kill();
|
||||
resolve();
|
||||
}, timeoutMs);
|
||||
proc.on('exit', () => { clearTimeout(timer); resolve(); });
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
return {
|
||||
send,
|
||||
request,
|
||||
shutdown,
|
||||
kill: () => proc.kill(),
|
||||
process: proc,
|
||||
ready,
|
||||
onEvent: (handler) => { eventHandler = handler; },
|
||||
};
|
||||
}
|
||||
237
packages/media/cpp/orchestrator/test-commons.js
Normal file
237
packages/media/cpp/orchestrator/test-commons.js
Normal file
@ -0,0 +1,237 @@
|
||||
/**
|
||||
* orchestrator/test-commons.js — shared helpers for IPC orchestrator tests.
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import net from 'node:net';
|
||||
|
||||
/** kbot-ai live call runs unless KBOT_IPC_LLM is explicitly disabled. */
|
||||
export function ipcLlmEnabled() {
|
||||
const v = process.env.KBOT_IPC_LLM;
|
||||
if (v === undefined || v === '') return true;
|
||||
const s = String(v).trim().toLowerCase();
|
||||
if (s === '0' || s === 'false' || s === 'no' || s === 'off') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Llama local (:8888) IPC block — on by default; set KBOT_IPC_LLAMA=0 to skip (CI / no server). */
|
||||
export function ipcLlamaEnabled() {
|
||||
const v = process.env.KBOT_IPC_LLAMA;
|
||||
if (v === undefined || v === '') return true;
|
||||
const s = String(v).trim().toLowerCase();
|
||||
if (s === '0' || s === 'false' || s === 'no' || s === 'off') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifier batch test (`test-ipc-classifier.mjs`): local llama :8888 by default.
|
||||
* Set KBOT_IPC_CLASSIFIER_LLAMA=0 to use KBOT_ROUTER / KBOT_IPC_MODEL (e.g. OpenRouter) instead.
|
||||
*/
|
||||
export function ipcClassifierLlamaEnabled() {
|
||||
const v = process.env.KBOT_IPC_CLASSIFIER_LLAMA;
|
||||
if (v === undefined || v === '') return true;
|
||||
const s = String(v).trim().toLowerCase();
|
||||
if (s === '0' || s === 'false' || s === 'no' || s === 'off') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Auto-start scripts/run-7b.sh when :8888 is closed (default on). */
|
||||
export function llamaAutostartEnabled() {
|
||||
const v = process.env.KBOT_IPC_LLAMA_AUTOSTART;
|
||||
if (v === undefined || v === '') return true;
|
||||
const s = String(v).trim().toLowerCase();
|
||||
if (s === '0' || s === 'false' || s === 'no' || s === 'off') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** TCP connect probe — true if something accepts connections. */
|
||||
export function probeTcpPort(host, port, timeoutMs = 2000) {
|
||||
return new Promise((resolve) => {
|
||||
const socket = net.connect({ port, host });
|
||||
const done = (ok) => {
|
||||
socket.removeAllListeners();
|
||||
try {
|
||||
socket.destroy();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
resolve(ok);
|
||||
};
|
||||
const timer = setTimeout(() => done(false), timeoutMs);
|
||||
socket.once('connect', () => {
|
||||
clearTimeout(timer);
|
||||
done(true);
|
||||
});
|
||||
socket.once('error', () => {
|
||||
clearTimeout(timer);
|
||||
done(false);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/** Counters for a test run (create one per process / suite). */
|
||||
export function createAssert() {
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
function assert(condition, label) {
|
||||
if (condition) {
|
||||
console.log(` ✅ ${label}`);
|
||||
passed++;
|
||||
} else {
|
||||
console.error(` ❌ ${label}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
assert,
|
||||
get passed() {
|
||||
return passed;
|
||||
},
|
||||
get failed() {
|
||||
return failed;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/** Normalize IPC payload (object or JSON string). */
|
||||
export function payloadObj(msg) {
|
||||
const p = msg?.payload;
|
||||
if (p == null) return null;
|
||||
if (typeof p === 'string') {
|
||||
try {
|
||||
return JSON.parse(p);
|
||||
} catch {
|
||||
return { raw: p };
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/** Print LLM job_result so it is easy to spot (stdout, not mixed with worker stderr). */
|
||||
export function logKbotAiResponse(stepLabel, msg) {
|
||||
const p = payloadObj(msg);
|
||||
const text = p?.text != null ? String(p.text) : '';
|
||||
const err = p?.error != null ? String(p.error) : '';
|
||||
const maxRaw = process.env.KBOT_IPC_LLM_LOG_MAX;
|
||||
const max =
|
||||
maxRaw === undefined || maxRaw === ''
|
||||
? Infinity
|
||||
: Number.parseInt(maxRaw, 10);
|
||||
|
||||
console.log('');
|
||||
console.log(` ┌── ${stepLabel} ──────────────────────────────────────────`);
|
||||
console.log(` │ type: ${msg?.type ?? '?'}`);
|
||||
if (p && typeof p === 'object') {
|
||||
console.log(` │ status: ${p.status ?? '?'}`);
|
||||
if (p.mode != null) console.log(` │ mode: ${p.mode}`);
|
||||
if (p.router != null) console.log(` │ router: ${p.router}`);
|
||||
if (p.model != null) console.log(` │ model: ${p.model}`);
|
||||
}
|
||||
if (err) {
|
||||
const showErr =
|
||||
Number.isFinite(max) && err.length > max
|
||||
? `${err.slice(0, max)}… [truncated, ${err.length} chars]`
|
||||
: err;
|
||||
console.log(` │ error: ${showErr.replace(/\n/g, '\n │ ')}`);
|
||||
}
|
||||
if (p?.llm != null && typeof p.llm === 'object') {
|
||||
const raw = JSON.stringify(p.llm);
|
||||
const cap = 4000;
|
||||
const shown = raw.length > cap ? `${raw.slice(0, cap)}… [+${raw.length - cap} chars]` : raw;
|
||||
console.log(` │ llm (usage / provider JSON): ${shown}`);
|
||||
}
|
||||
if (text) {
|
||||
let body = text;
|
||||
let note = '';
|
||||
if (Number.isFinite(max) && text.length > max) {
|
||||
body = text.slice(0, max);
|
||||
note = `\n │ … [truncated: ${text.length} chars total; set KBOT_IPC_LLM_LOG_MAX= to adjust]`;
|
||||
}
|
||||
console.log(' │ text:');
|
||||
for (const line of body.split('\n')) {
|
||||
console.log(` │ ${line}`);
|
||||
}
|
||||
if (note) console.log(note);
|
||||
} else if (!err) {
|
||||
console.log(' │ (no text in payload)');
|
||||
}
|
||||
console.log(' └────────────────────────────────────────────────────────────');
|
||||
console.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Length-prefixed JSON framing used by the C++ UDS worker.
|
||||
* Call `attach()` once to wire `socket.on('data', ...)`.
|
||||
*/
|
||||
export function createIpcClient(socket) {
|
||||
const pending = new Map();
|
||||
let readyResolve;
|
||||
const readyPromise = new Promise((res) => {
|
||||
readyResolve = res;
|
||||
});
|
||||
|
||||
let buffer = Buffer.alloc(0);
|
||||
|
||||
function onData(chunk) {
|
||||
buffer = Buffer.concat([buffer, chunk]);
|
||||
while (buffer.length >= 4) {
|
||||
const len = buffer.readUInt32LE(0);
|
||||
if (buffer.length >= 4 + len) {
|
||||
const payload = buffer.toString('utf8', 4, 4 + len);
|
||||
buffer = buffer.subarray(4 + len);
|
||||
try {
|
||||
const msg = JSON.parse(payload);
|
||||
if (msg.type === 'ready') {
|
||||
readyResolve(msg);
|
||||
} else if (msg.id && pending.has(msg.id)) {
|
||||
const p = pending.get(msg.id);
|
||||
clearTimeout(p.timer);
|
||||
pending.delete(msg.id);
|
||||
p.resolve(msg);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[orchestrator] frame parse error', e);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function request(msg, timeoutMs = 5000) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const id = msg.id || randomUUID();
|
||||
msg.id = id;
|
||||
const timer = setTimeout(() => {
|
||||
pending.delete(id);
|
||||
reject(new Error(`IPC request timed out`));
|
||||
}, timeoutMs);
|
||||
pending.set(id, { resolve, reject, timer });
|
||||
|
||||
const str = JSON.stringify(msg);
|
||||
const lenBuf = Buffer.alloc(4);
|
||||
lenBuf.writeUInt32LE(Buffer.byteLength(str));
|
||||
socket.write(lenBuf);
|
||||
socket.write(str);
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
pending,
|
||||
readyPromise,
|
||||
request,
|
||||
attach() {
|
||||
socket.on('data', onData);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/** Forward worker stderr lines to console (prefixed). */
|
||||
export function pipeWorkerStderr(workerProc, label = '[worker:stderr]') {
|
||||
workerProc.stderr.on('data', (d) => {
|
||||
const txt = d.toString().trim();
|
||||
if (txt) console.error(`${label} ${txt}`);
|
||||
});
|
||||
}
|
||||
204
packages/media/cpp/orchestrator/test-files.mjs
Normal file
204
packages/media/cpp/orchestrator/test-files.mjs
Normal file
@ -0,0 +1,204 @@
|
||||
/**
|
||||
* orchestrator/test-files.mjs
|
||||
*
|
||||
* IPC + CLI parity for text file sources (port of kbot/src/source.ts — text slice only; images later).
|
||||
* Fixtures: packages/kbot/tests/test-data/files (path below is resolved from orchestrator/).
|
||||
*
|
||||
* Run: npm run test:files
|
||||
*
|
||||
* Env (optional live LLM step):
|
||||
* KBOT_IPC_LLM — set 0/false/off to skip live kbot-ai (default: run when key available)
|
||||
* KBOT_ROUTER, KBOT_IPC_MODEL — same as test-ipc
|
||||
*
|
||||
* CLI (npm run test:files -- --help):
|
||||
* --fixtures <dir> Override fixture root (default: ../../tests/test-data/files)
|
||||
*/
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { dirname, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import net from 'node:net';
|
||||
import { existsSync, unlinkSync } from 'node:fs';
|
||||
import yargs from 'yargs';
|
||||
import { hideBin } from 'yargs/helpers';
|
||||
|
||||
import {
|
||||
distExePath,
|
||||
platform,
|
||||
uds,
|
||||
timeouts,
|
||||
kbotAiPayloadFromEnv,
|
||||
} from './presets.js';
|
||||
import {
|
||||
createAssert,
|
||||
payloadObj,
|
||||
ipcLlmEnabled,
|
||||
createIpcClient,
|
||||
pipeWorkerStderr,
|
||||
} from './test-commons.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const EXE = distExePath(__dirname);
|
||||
const stats = createAssert();
|
||||
const { assert } = stats;
|
||||
|
||||
const defaultFixtures = resolve(__dirname, '../../tests/test-data/files');
|
||||
|
||||
function parseArgv() {
|
||||
const y = yargs(hideBin(process.argv))
|
||||
.scriptName('test-files')
|
||||
.usage('$0 [options]\n\nText file source IPC tests (fixtures under packages/kbot/tests/test-data/files).')
|
||||
.option('fixtures', {
|
||||
type: 'string',
|
||||
default: defaultFixtures,
|
||||
describe: 'Directory used as kbot-ai `path` (project root for includes)',
|
||||
})
|
||||
.strict()
|
||||
.help()
|
||||
.alias('h', 'help');
|
||||
return y.parseSync();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('node:net').Socket} socket
|
||||
* @param {string} fixturesDir
|
||||
*/
|
||||
async function runFileSuite(socket, fixturesDir) {
|
||||
const ipc = createIpcClient(socket);
|
||||
ipc.attach();
|
||||
|
||||
const readyMsg = await ipc.readyPromise;
|
||||
assert(readyMsg.type === 'ready', 'worker ready');
|
||||
|
||||
console.log('\n── Dry-run source attachment (no LLM) ──\n');
|
||||
|
||||
/** @param {Record<string, unknown>} payload */
|
||||
async function dry(payload) {
|
||||
const msg = await ipc.request({ type: 'kbot-ai', payload }, timeouts.ipcDefault);
|
||||
assert(msg.type === 'job_result', `job_result (got ${msg.type})`);
|
||||
const p = payloadObj(msg);
|
||||
assert(p?.dry_run === true, 'dry_run flag');
|
||||
assert(p?.status === 'success', 'status success');
|
||||
assert(Array.isArray(p?.sources), 'sources array');
|
||||
return p;
|
||||
}
|
||||
|
||||
let p = await dry({
|
||||
dry_run: true,
|
||||
path: fixturesDir,
|
||||
include: ['bubblesort.js'],
|
||||
prompt: 'What function is defined? Reply one word.',
|
||||
});
|
||||
assert(
|
||||
p.sources.some((s) => String(s).includes('bubblesort')),
|
||||
'sources lists bubblesort.js',
|
||||
);
|
||||
assert(
|
||||
/bubbleSort/i.test(String(p.prompt_preview || '')),
|
||||
'prompt_preview contains bubbleSort',
|
||||
);
|
||||
|
||||
p = await dry({
|
||||
dry_run: true,
|
||||
path: fixturesDir,
|
||||
include: ['*.js'],
|
||||
prompt: 'List algorithms.',
|
||||
});
|
||||
assert(p.sources.length >= 2, 'glob *.js yields at least 2 files');
|
||||
const names = p.sources.map((s) => String(s).toLowerCase());
|
||||
assert(names.some((n) => n.includes('bubblesort')), 'glob includes bubblesort.js');
|
||||
assert(names.some((n) => n.includes('factorial')), 'glob includes factorial.js');
|
||||
|
||||
p = await dry({
|
||||
dry_run: true,
|
||||
path: fixturesDir,
|
||||
include: ['glob/data.json'],
|
||||
prompt: 'What is the title?',
|
||||
});
|
||||
assert(
|
||||
String(p.prompt_preview || '').includes('Injection Barrel'),
|
||||
'JSON fixture content in preview',
|
||||
);
|
||||
|
||||
if (ipcLlmEnabled()) {
|
||||
console.log('\n── Live LLM — single file prompt ──\n');
|
||||
const base = kbotAiPayloadFromEnv();
|
||||
const payload = {
|
||||
...base,
|
||||
path: fixturesDir,
|
||||
include: ['bubblesort.js'],
|
||||
prompt:
|
||||
process.env.KBOT_FILES_LIVE_PROMPT ||
|
||||
'What is the name of the sorting algorithm in the code? Reply with two words: bubble sort',
|
||||
};
|
||||
const msg = await ipc.request({ type: 'kbot-ai', payload }, timeouts.kbotAi);
|
||||
assert(msg.type === 'job_result', 'live job_result');
|
||||
const lp = payloadObj(msg);
|
||||
assert(lp?.status === 'success', 'live status success');
|
||||
const text = String(lp?.text || '');
|
||||
assert(/bubble/i.test(text), 'assistant mentions bubble (file context worked)');
|
||||
} else {
|
||||
console.log('\n── Live LLM — skipped (KBOT_IPC_LLM off) ──\n');
|
||||
}
|
||||
|
||||
const shutdownRes = await ipc.request({ type: 'shutdown' }, timeouts.ipcDefault);
|
||||
assert(shutdownRes.type === 'shutdown_ack', 'shutdown ack');
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const argv = parseArgv();
|
||||
const fixturesDir = resolve(argv.fixtures);
|
||||
|
||||
if (!existsSync(EXE)) {
|
||||
console.error(`Binary not found: ${EXE}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (!existsSync(fixturesDir)) {
|
||||
console.error(`Fixtures directory not found: ${fixturesDir}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\n📁 test:files — fixtures: ${fixturesDir}\n`);
|
||||
|
||||
const CPP_UDS_ARG = uds.workerArg();
|
||||
if (!platform.isWin && existsSync(CPP_UDS_ARG)) {
|
||||
unlinkSync(CPP_UDS_ARG);
|
||||
}
|
||||
|
||||
const workerProc = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG], { stdio: 'pipe' });
|
||||
pipeWorkerStderr(workerProc);
|
||||
|
||||
let socket;
|
||||
for (let i = 0; i < timeouts.connectAttempts; i++) {
|
||||
try {
|
||||
await new Promise((res, rej) => {
|
||||
socket = net.connect(uds.connectOpts(CPP_UDS_ARG));
|
||||
socket.once('connect', res);
|
||||
socket.once('error', rej);
|
||||
});
|
||||
break;
|
||||
} catch {
|
||||
if (i === timeouts.connectAttempts - 1) throw new Error('connect failed');
|
||||
await new Promise((r) => setTimeout(r, timeouts.connectRetryMs));
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await runFileSuite(socket, fixturesDir);
|
||||
} finally {
|
||||
try {
|
||||
socket?.destroy();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
workerProc.kill();
|
||||
}
|
||||
|
||||
console.log(`\nDone. Passed: ${stats.passed} Failed: ${stats.failed}\n`);
|
||||
process.exit(stats.failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
run().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
204
packages/media/cpp/orchestrator/test-gridsearch-ipc-daemon.mjs
Normal file
204
packages/media/cpp/orchestrator/test-gridsearch-ipc-daemon.mjs
Normal file
@ -0,0 +1,204 @@
|
||||
/**
|
||||
* orchestrator/test-gridsearch-ipc.mjs
|
||||
*
|
||||
* E2E test: spawn the C++ worker, send a gridsearch request
|
||||
* matching `npm run gridsearch:enrich` defaults, collect IPC events,
|
||||
* and verify the full event sequence.
|
||||
*
|
||||
* Run: node orchestrator/test-gridsearch-ipc.mjs
|
||||
* Needs: npm run build-debug (or npm run build)
|
||||
*/
|
||||
|
||||
import { spawnWorker } from './spawn.mjs';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import fs from 'node:fs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const IS_WIN = process.platform === 'win32';
|
||||
const EXE_NAME = IS_WIN ? 'polymech-cli.exe' : 'polymech-cli';
|
||||
|
||||
const EXE = resolve(__dirname, '..', 'dist', EXE_NAME);
|
||||
if (!fs.existsSync(EXE)) {
|
||||
console.error(`❌ No ${EXE_NAME} found in dist. Run npm run build first.`);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(`Binary: ${EXE}\n`);
|
||||
|
||||
// Load the sample settings (same as gridsearch:enrich)
|
||||
const sampleConfig = JSON.parse(
|
||||
readFileSync(resolve(__dirname, '..', 'config', 'gridsearch-sample.json'), 'utf8')
|
||||
);
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
function assert(condition, label) {
|
||||
if (condition) {
|
||||
console.log(` ✅ ${label}`);
|
||||
passed++;
|
||||
} else {
|
||||
console.error(` ❌ ${label}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Event collector ─────────────────────────────────────────────────────────
|
||||
|
||||
const EXPECTED_EVENTS = [
|
||||
'grid-ready',
|
||||
'waypoint-start',
|
||||
'area',
|
||||
'location',
|
||||
'enrich-start',
|
||||
'node',
|
||||
'nodePage',
|
||||
// 'node-error' — may or may not occur, depends on network
|
||||
];
|
||||
|
||||
function createCollector() {
|
||||
const events = {};
|
||||
for (const t of ['grid-ready', 'waypoint-start', 'area', 'location',
|
||||
'enrich-start', 'node', 'node-error', 'nodePage']) {
|
||||
events[t] = [];
|
||||
}
|
||||
return {
|
||||
events,
|
||||
handler(msg) {
|
||||
const t = msg.type;
|
||||
if (events[t]) {
|
||||
events[t].push(msg);
|
||||
} else {
|
||||
events[t] = [msg];
|
||||
}
|
||||
// Live progress indicator
|
||||
const d = msg.payload ?? {};
|
||||
if (t === 'waypoint-start') {
|
||||
process.stdout.write(`\r 🔍 Searching waypoint ${(d.index ?? 0) + 1}/${d.total ?? '?'}...`);
|
||||
} else if (t === 'node') {
|
||||
process.stdout.write(`\r 📧 Enriched: ${d.title?.substring(0, 40) ?? ''} `);
|
||||
} else if (t === 'node-error') {
|
||||
process.stdout.write(`\r ⚠️ Error: ${d.node?.title?.substring(0, 40) ?? ''} `);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ── Main test ───────────────────────────────────────────────────────────────
|
||||
|
||||
async function run() {
|
||||
console.log('🧪 Gridsearch IPC E2E Test\n');
|
||||
|
||||
// ── 1. Spawn worker ───────────────────────────────────────────────────
|
||||
console.log('1. Spawn worker in daemon mode');
|
||||
const worker = spawnWorker(EXE, ['worker', '--daemon', '--user-uid', '3bb4cfbf-318b-44d3-a9d3-35680e738421']);
|
||||
const readyMsg = await worker.ready;
|
||||
assert(readyMsg.type === 'ready', 'Worker sends ready signal');
|
||||
|
||||
// ── 2. Register event collector ───────────────────────────────────────
|
||||
const collector = createCollector();
|
||||
worker.onEvent(collector.handler);
|
||||
|
||||
// ── 3. Send gridsearch request (matching gridsearch:enrich) ────────────
|
||||
console.log('2. Send gridsearch request (Aruba / recycling / --enrich)');
|
||||
const t0 = Date.now();
|
||||
|
||||
// Very long timeout — enrichment can take minutes
|
||||
const result = await worker.request(
|
||||
{
|
||||
type: 'gridsearch',
|
||||
payload: {
|
||||
...sampleConfig,
|
||||
enrich: true,
|
||||
},
|
||||
},
|
||||
5 * 60 * 1000 // 5 min timeout
|
||||
);
|
||||
|
||||
const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
|
||||
console.log(`\n\n ⏱️ Completed in ${elapsed}s\n`);
|
||||
|
||||
// ── 4. Verify final result ────────────────────────────────────────────
|
||||
console.log('3. Verify job_result');
|
||||
assert(result.type === 'job_result', `Response type is "job_result" (got "${result.type}")`);
|
||||
|
||||
const summary = result.payload ?? null;
|
||||
assert(summary !== null, 'job_result payload is present');
|
||||
|
||||
if (summary) {
|
||||
assert(typeof summary.totalMs === 'number', `totalMs is number (${summary.totalMs})`);
|
||||
assert(typeof summary.searchMs === 'number', `searchMs is number (${summary.searchMs})`);
|
||||
assert(typeof summary.enrichMs === 'number', `enrichMs is number (${summary.enrichMs})`);
|
||||
assert(typeof summary.freshApiCalls === 'number', `freshApiCalls is number (${summary.freshApiCalls})`);
|
||||
assert(typeof summary.waypointCount === 'number', `waypointCount is number (${summary.waypointCount})`);
|
||||
assert(summary.gridStats && typeof summary.gridStats.validCells === 'number', 'gridStats.validCells present');
|
||||
assert(summary.searchStats && typeof summary.searchStats.totalResults === 'number', 'searchStats.totalResults present');
|
||||
assert(typeof summary.enrichedOk === 'number', `enrichedOk is number (${summary.enrichedOk})`);
|
||||
assert(typeof summary.enrichedTotal === 'number', `enrichedTotal is number (${summary.enrichedTotal})`);
|
||||
}
|
||||
|
||||
// ── 5. Verify event sequence ──────────────────────────────────────────
|
||||
console.log('4. Verify event stream');
|
||||
const e = collector.events;
|
||||
|
||||
assert(e['grid-ready'].length === 1, `Exactly 1 grid-ready event (got ${e['grid-ready'].length})`);
|
||||
assert(e['waypoint-start'].length > 0, `At least 1 waypoint-start event (got ${e['waypoint-start'].length})`);
|
||||
assert(e['area'].length > 0, `At least 1 area event (got ${e['area'].length})`);
|
||||
assert(e['waypoint-start'].length === e['area'].length, `waypoint-start count (${e['waypoint-start'].length}) === area count (${e['area'].length})`);
|
||||
assert(e['enrich-start'].length === 1, `Exactly 1 enrich-start event (got ${e['enrich-start'].length})`);
|
||||
|
||||
const totalNodes = e['node'].length + e['node-error'].length;
|
||||
assert(totalNodes > 0, `At least 1 node event (got ${totalNodes}: ${e['node'].length} ok, ${e['node-error'].length} errors)`);
|
||||
|
||||
// Validate grid-ready payload
|
||||
if (e['grid-ready'].length > 0) {
|
||||
const gr = e['grid-ready'][0].payload ?? {};
|
||||
assert(Array.isArray(gr.areas), 'grid-ready.areas is array');
|
||||
assert(typeof gr.total === 'number' && gr.total > 0, `grid-ready.total > 0 (${gr.total})`);
|
||||
}
|
||||
|
||||
// Validate location events have required fields
|
||||
if (e['location'].length > 0) {
|
||||
const loc = e['location'][0].payload ?? {};
|
||||
assert(loc.location && typeof loc.location.title === 'string', 'location event has location.title');
|
||||
assert(loc.location && typeof loc.location.place_id === 'string', 'location event has location.place_id');
|
||||
assert(typeof loc.areaName === 'string', 'location event has areaName');
|
||||
}
|
||||
assert(e['location'].length > 0, `At least 1 location event (got ${e['location'].length})`);
|
||||
|
||||
// Validate node payloads
|
||||
if (e['node'].length > 0) {
|
||||
const nd = e['node'][0].payload ?? {};
|
||||
assert(typeof nd.placeId === 'string', 'node event has placeId');
|
||||
assert(typeof nd.title === 'string', 'node event has title');
|
||||
assert(Array.isArray(nd.emails), 'node event has emails array');
|
||||
assert(typeof nd.status === 'string', 'node event has status');
|
||||
}
|
||||
|
||||
// ── 6. Print event summary ────────────────────────────────────────────
|
||||
console.log('\n5. Event summary');
|
||||
for (const [type, arr] of Object.entries(e)) {
|
||||
if (arr.length > 0) console.log(` ${type}: ${arr.length}`);
|
||||
}
|
||||
|
||||
// ── 7. Shutdown ───────────────────────────────────────────────────────
|
||||
console.log('\n6. Graceful shutdown');
|
||||
const shutdownRes = await worker.shutdown();
|
||||
assert(shutdownRes.type === 'shutdown_ack', 'Shutdown acknowledged');
|
||||
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
assert(worker.process.exitCode === 0, `Worker exited with code 0 (got ${worker.process.exitCode})`);
|
||||
|
||||
// ── Summary ───────────────────────────────────────────────────────────
|
||||
console.log(`\n────────────────────────────────`);
|
||||
console.log(` Passed: ${passed} Failed: ${failed}`);
|
||||
console.log(`────────────────────────────────\n`);
|
||||
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
run().catch((err) => {
|
||||
console.error('Test runner error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
218
packages/media/cpp/orchestrator/test-gridsearch-ipc-uds-meta.mjs
Normal file
218
packages/media/cpp/orchestrator/test-gridsearch-ipc-uds-meta.mjs
Normal file
@ -0,0 +1,218 @@
|
||||
/**
|
||||
* orchestrator/test-gridsearch-ipc-uds-meta.mjs
|
||||
*
|
||||
* E2E test for Unix Domain Sockets / Windows Named Pipes (Meta Enrichment)!
|
||||
* Spawns the worker in `--uds` mode and tests direct high-throughput
|
||||
* lock-free JSON binary framing over a net.Socket.
|
||||
*/
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { resolve, dirname, join } from 'node:path';
|
||||
import { readFileSync, existsSync, unlinkSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import net from 'node:net';
|
||||
import { tmpdir } from 'node:os';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const IS_WIN = process.platform === 'win32';
|
||||
const EXE_NAME = IS_WIN ? 'polymech-cli.exe' : 'polymech-cli';
|
||||
const EXE = resolve(__dirname, '..', 'dist', EXE_NAME);
|
||||
const TEST_CANCEL = false;
|
||||
|
||||
if (!existsSync(EXE)) {
|
||||
console.error(`❌ Binary not found at ${EXE}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const PIPE_NAME = 'polymech-test-uds-meta';
|
||||
const CPP_UDS_ARG = IS_WIN ? '4001' : join(tmpdir(), `${PIPE_NAME}.sock`);
|
||||
|
||||
if (!IS_WIN && existsSync(CPP_UDS_ARG)) {
|
||||
unlinkSync(CPP_UDS_ARG);
|
||||
}
|
||||
|
||||
console.log(`Binary: ${EXE}`);
|
||||
console.log(`C++ Arg: ${CPP_UDS_ARG}\n`);
|
||||
|
||||
// ── Event collector ─────────────────────────────────────────────────────────
|
||||
function createCollector() {
|
||||
const events = {};
|
||||
for (const t of ['grid-ready', 'waypoint-start', 'area', 'location',
|
||||
'enrich-start', 'node', 'node-error', 'nodePage', 'job_result']) {
|
||||
events[t] = [];
|
||||
}
|
||||
return {
|
||||
events,
|
||||
onComplete: null,
|
||||
handler(msg) {
|
||||
const t = msg.type;
|
||||
if (events[t]) events[t].push(msg);
|
||||
else events[t] = [msg];
|
||||
|
||||
const d = msg.data ?? {};
|
||||
if (t === 'waypoint-start') {
|
||||
process.stdout.write(`\r 🔍 Searching waypoint ${(d.index ?? 0) + 1}/${d.total ?? '?'}...`);
|
||||
} else if (t === 'node') {
|
||||
process.stdout.write(`\r 📧 Enriched: ${d.title?.substring(0, 40) ?? ''} `);
|
||||
} else if (t === 'node-error') {
|
||||
process.stdout.write(`\r ⚠️ Error: ${d.node?.title?.substring(0, 40) ?? ''} `);
|
||||
} else if (t === 'job_result') {
|
||||
console.log(`\n 🏁 Pipeline complete!`);
|
||||
if (this.onComplete) this.onComplete(msg);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
function assert(condition, label) {
|
||||
if (condition) { console.log(` ✅ ${label}`); passed++; }
|
||||
else { console.error(` ❌ ${label}`); failed++; }
|
||||
}
|
||||
|
||||
async function run() {
|
||||
console.log('🧪 Gridsearch UDS Meta E2E Test\n');
|
||||
|
||||
// 1. Spawn worker in UDS mode
|
||||
console.log('1. Spawning remote C++ Taskflow Daemon');
|
||||
const worker = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG, '--daemon'], { stdio: 'inherit' });
|
||||
|
||||
// Give the daemon a moment to boot
|
||||
console.log('2. Connecting net.Socket with retries...');
|
||||
|
||||
let socket;
|
||||
for (let i = 0; i < 15; i++) {
|
||||
try {
|
||||
await new Promise((resolve, reject) => {
|
||||
if (IS_WIN) {
|
||||
socket = net.connect({ port: 4001, host: '127.0.0.1' });
|
||||
} else {
|
||||
socket = net.connect(CPP_UDS_ARG);
|
||||
}
|
||||
socket.once('connect', resolve);
|
||||
socket.once('error', reject);
|
||||
});
|
||||
console.log(' ✅ Socket Connected to UDS!');
|
||||
break;
|
||||
} catch (e) {
|
||||
if (i === 14) throw e;
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
}
|
||||
|
||||
const collector = createCollector();
|
||||
let buffer = Buffer.alloc(0);
|
||||
|
||||
// Buffer framing logic (length-prefixed streaming)
|
||||
socket.on('data', (chunk) => {
|
||||
buffer = Buffer.concat([buffer, chunk]);
|
||||
while (buffer.length >= 4) {
|
||||
const len = buffer.readUInt32LE(0);
|
||||
if (buffer.length >= 4 + len) {
|
||||
const payload = buffer.toString('utf8', 4, 4 + len);
|
||||
buffer = buffer.subarray(4 + len);
|
||||
try {
|
||||
const msg = JSON.parse(payload);
|
||||
collector.handler(msg);
|
||||
} catch (e) {
|
||||
console.error("JSON PARSE ERROR:", e, payload);
|
||||
}
|
||||
} else {
|
||||
break; // Wait for more chunks
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// 3. Send Gridsearch payload
|
||||
// USE gridsearch-sample.json instead of gridsearch-bcn-universities.json
|
||||
const sampleConfig = JSON.parse(
|
||||
readFileSync(resolve(__dirname, '..', 'config', 'gridsearch-sample.json'), 'utf8')
|
||||
);
|
||||
|
||||
sampleConfig.configPath = resolve(__dirname, '..', 'config', 'postgres.toml');
|
||||
sampleConfig.jobId = 'uds-meta-test-abc';
|
||||
sampleConfig.noCache = true; // force re-enrichment even if cached
|
||||
|
||||
console.log('3. Writing serialized IPC Payload over pipe...');
|
||||
const jsonStr = JSON.stringify(sampleConfig);
|
||||
const lenBuf = Buffer.alloc(4);
|
||||
lenBuf.writeUInt32LE(Buffer.byteLength(jsonStr));
|
||||
socket.write(lenBuf);
|
||||
socket.write(jsonStr);
|
||||
|
||||
// 4. Wait for pipeline completion (job_result event) or timeout
|
||||
console.log('\n4. Awaiting multi-threaded Execution Pipeline (can take minutes)...\n');
|
||||
|
||||
await new Promise((resolve) => {
|
||||
collector.onComplete = () => {
|
||||
// Send stop command to gracefully shut down the daemon
|
||||
console.log(' 📤 Sending stop command to daemon...');
|
||||
const stopPayload = JSON.stringify({ action: 'stop' });
|
||||
const stopLen = Buffer.alloc(4);
|
||||
stopLen.writeUInt32LE(Buffer.byteLength(stopPayload));
|
||||
socket.write(stopLen);
|
||||
socket.write(stopPayload);
|
||||
setTimeout(resolve, 1000); // Give daemon a moment to ack
|
||||
};
|
||||
|
||||
// Safety timeout
|
||||
setTimeout(() => {
|
||||
console.log('\n ⏰ Timeout reached (300s) — forcing shutdown.');
|
||||
resolve();
|
||||
}, 300000); // Wait up to 5 minutes
|
||||
});
|
||||
|
||||
console.log('\n\n5. Event summary');
|
||||
for (const [k, v] of Object.entries(collector.events)) {
|
||||
console.log(` ${k}: ${v.length}`);
|
||||
}
|
||||
|
||||
// Assertions
|
||||
const ev = collector.events;
|
||||
assert(ev['grid-ready'].length === 1, 'grid-ready emitted once');
|
||||
assert(ev['waypoint-start'].length > 0, 'waypoint-start events received');
|
||||
assert(ev['location'].length > 0, 'location events received');
|
||||
assert(ev['enrich-start'].length === 1, 'enrich-start emitted once');
|
||||
assert(ev['job_result'].length === 1, 'job_result emitted once');
|
||||
|
||||
// Verify social profiles and md body
|
||||
const nodes = ev['node'];
|
||||
let foundSocial = false;
|
||||
let foundSiteMd = false;
|
||||
|
||||
for (const n of nodes) {
|
||||
const d = n.data;
|
||||
if (!d) continue;
|
||||
|
||||
if (d.socials && d.socials.length > 0) {
|
||||
foundSocial = true;
|
||||
}
|
||||
|
||||
if (d.sites && Array.isArray(d.sites) && d.sites.length > 0) {
|
||||
foundSiteMd = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (foundSocial) {
|
||||
assert(foundSocial, 'At least one enriched node has social media profiles discovered');
|
||||
} else {
|
||||
console.log(' ⚠️ No social media profiles discovered in this run (data-dependent), but pipeline completed.');
|
||||
}
|
||||
|
||||
assert(foundSiteMd, 'At least one enriched node has markdown sites mapped');
|
||||
|
||||
console.log('6. Cleanup');
|
||||
socket.destroy();
|
||||
worker.kill('SIGTERM');
|
||||
|
||||
console.log(`\n────────────────────────────────`);
|
||||
console.log(` Passed: ${passed} Failed: ${failed}`);
|
||||
console.log(`────────────────────────────────`);
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
run().catch(e => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
255
packages/media/cpp/orchestrator/test-gridsearch-ipc-uds.mjs
Normal file
255
packages/media/cpp/orchestrator/test-gridsearch-ipc-uds.mjs
Normal file
@ -0,0 +1,255 @@
|
||||
/**
|
||||
* orchestrator/test-gridsearch-ipc-uds.mjs
|
||||
*
|
||||
* E2E test for Unix Domain Sockets / Windows Named Pipes!
|
||||
* Spawns the worker in `--uds` mode and tests direct high-throughput
|
||||
* lock-free JSON binary framing over a net.Socket.
|
||||
*/
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { resolve, dirname, join } from 'node:path';
|
||||
import { readFileSync, existsSync, unlinkSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import net from 'node:net';
|
||||
import { tmpdir } from 'node:os';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const IS_WIN = process.platform === 'win32';
|
||||
const EXE_NAME = IS_WIN ? 'polymech-cli.exe' : 'polymech-cli';
|
||||
const EXE = resolve(__dirname, '..', 'dist', EXE_NAME);
|
||||
const TEST_CANCEL = false;
|
||||
|
||||
if (!existsSync(EXE)) {
|
||||
console.error(`❌ Binary not found at ${EXE}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const PIPE_NAME = 'polymech-test-uds';
|
||||
const CPP_UDS_ARG = IS_WIN ? '4000' : join(tmpdir(), `${PIPE_NAME}.sock`);
|
||||
|
||||
if (!IS_WIN && existsSync(CPP_UDS_ARG)) {
|
||||
unlinkSync(CPP_UDS_ARG);
|
||||
}
|
||||
|
||||
console.log(`Binary: ${EXE}`);
|
||||
console.log(`C++ Arg: ${CPP_UDS_ARG}\n`);
|
||||
|
||||
// ── Event collector ─────────────────────────────────────────────────────────
|
||||
function createCollector() {
|
||||
const events = {};
|
||||
for (const t of ['grid-ready', 'waypoint-start', 'area', 'location',
|
||||
'enrich-start', 'node', 'node-error', 'nodePage', 'job_result']) {
|
||||
events[t] = [];
|
||||
}
|
||||
return {
|
||||
events,
|
||||
onComplete: null,
|
||||
handler(msg) {
|
||||
const t = msg.type;
|
||||
if (events[t]) events[t].push(msg);
|
||||
else events[t] = [msg];
|
||||
|
||||
const d = msg.data ?? {};
|
||||
if (t === 'waypoint-start') {
|
||||
process.stdout.write(`\r 🔍 Searching waypoint ${(d.index ?? 0) + 1}/${d.total ?? '?'}...`);
|
||||
} else if (t === 'node') {
|
||||
process.stdout.write(`\r 📧 Enriched: ${d.title?.substring(0, 40) ?? ''} `);
|
||||
} else if (t === 'node-error') {
|
||||
process.stdout.write(`\r ⚠️ Error: ${d.node?.title?.substring(0, 40) ?? ''} `);
|
||||
} else if (t === 'job_result') {
|
||||
console.log(`\n 🏁 Pipeline complete!`);
|
||||
if (this.onComplete) this.onComplete(msg);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
function assert(condition, label) {
|
||||
if (condition) { console.log(` ✅ ${label}`); passed++; }
|
||||
else { console.error(` ❌ ${label}`); failed++; }
|
||||
}
|
||||
|
||||
async function run() {
|
||||
console.log('🧪 Gridsearch UDS / Named Pipe E2E Test\n');
|
||||
|
||||
// 1. Spawn worker in UDS mode
|
||||
console.log('1. Spawning remote C++ Taskflow Daemon');
|
||||
const worker = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG, '--daemon'], { stdio: 'inherit' });
|
||||
|
||||
// Give the daemon a moment to boot
|
||||
console.log('2. Connecting net.Socket with retries...');
|
||||
|
||||
let socket;
|
||||
for (let i = 0; i < 15; i++) {
|
||||
try {
|
||||
await new Promise((resolve, reject) => {
|
||||
if (IS_WIN) {
|
||||
socket = net.connect({ port: 4000, host: '127.0.0.1' });
|
||||
} else {
|
||||
socket = net.connect(CPP_UDS_ARG);
|
||||
}
|
||||
socket.once('connect', resolve);
|
||||
socket.once('error', reject);
|
||||
});
|
||||
console.log(' ✅ Socket Connected to UDS!');
|
||||
break;
|
||||
} catch (e) {
|
||||
if (i === 14) throw e;
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
}
|
||||
|
||||
const collector = createCollector();
|
||||
let buffer = Buffer.alloc(0);
|
||||
|
||||
// Buffer framing logic (length-prefixed streaming)
|
||||
socket.on('data', (chunk) => {
|
||||
buffer = Buffer.concat([buffer, chunk]);
|
||||
while (buffer.length >= 4) {
|
||||
const len = buffer.readUInt32LE(0);
|
||||
if (buffer.length >= 4 + len) {
|
||||
const payload = buffer.toString('utf8', 4, 4 + len);
|
||||
buffer = buffer.subarray(4 + len);
|
||||
try {
|
||||
const msg = JSON.parse(payload);
|
||||
collector.handler(msg);
|
||||
} catch (e) {
|
||||
console.error("JSON PARSE ERROR:", e, payload);
|
||||
}
|
||||
} else {
|
||||
break; // Wait for more chunks
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// 3. Send Gridsearch payload
|
||||
const sampleConfig = JSON.parse(
|
||||
readFileSync(resolve(__dirname, '..', 'config', 'gridsearch-bcn-universities.json'), 'utf8')
|
||||
);
|
||||
|
||||
sampleConfig.configPath = resolve(__dirname, '..', 'config', 'postgres.toml');
|
||||
sampleConfig.jobId = 'uds-test-cancel-abc';
|
||||
|
||||
console.log('3. Writing serialized IPC Payload over pipe...');
|
||||
const jsonStr = JSON.stringify(sampleConfig);
|
||||
const lenBuf = Buffer.alloc(4);
|
||||
lenBuf.writeUInt32LE(Buffer.byteLength(jsonStr));
|
||||
socket.write(lenBuf);
|
||||
socket.write(jsonStr);
|
||||
|
||||
// Send cancellation after 5 seconds
|
||||
if (TEST_CANCEL) {
|
||||
setTimeout(() => {
|
||||
console.log('\n\n--> Testing Dynamic Cancellation (Sending cancel event for uds-test-cancel-abc)...');
|
||||
const cancelPayload = JSON.stringify({ action: "cancel", jobId: "uds-test-cancel-abc" });
|
||||
const cancelLenBuf = Buffer.alloc(4);
|
||||
cancelLenBuf.writeUInt32LE(Buffer.byteLength(cancelPayload));
|
||||
socket.write(cancelLenBuf);
|
||||
socket.write(cancelPayload);
|
||||
}, 5000);
|
||||
}
|
||||
|
||||
// 4. Wait for pipeline completion (job_result event) or timeout
|
||||
console.log('\n4. Awaiting multi-threaded Execution Pipeline (can take minutes)...\n');
|
||||
|
||||
await new Promise((resolve) => {
|
||||
collector.onComplete = () => {
|
||||
// Send stop command to gracefully shut down the daemon
|
||||
console.log(' 📤 Sending stop command to daemon...');
|
||||
const stopPayload = JSON.stringify({ action: 'stop' });
|
||||
const stopLen = Buffer.alloc(4);
|
||||
stopLen.writeUInt32LE(Buffer.byteLength(stopPayload));
|
||||
socket.write(stopLen);
|
||||
socket.write(stopPayload);
|
||||
setTimeout(resolve, 1000); // Give daemon a moment to ack
|
||||
};
|
||||
|
||||
// Safety timeout
|
||||
setTimeout(() => {
|
||||
console.log('\n ⏰ Timeout reached (120s) — forcing shutdown.');
|
||||
resolve();
|
||||
}, 120000);
|
||||
});
|
||||
|
||||
console.log('\n\n5. Event summary');
|
||||
for (const [k, v] of Object.entries(collector.events)) {
|
||||
console.log(` ${k}: ${v.length}`);
|
||||
}
|
||||
|
||||
// Assertions
|
||||
const ev = collector.events;
|
||||
assert(ev['grid-ready'].length === 1, 'grid-ready emitted once');
|
||||
assert(ev['waypoint-start'].length > 0, 'waypoint-start events received');
|
||||
assert(ev['location'].length > 0, 'location events received');
|
||||
assert(ev['enrich-start'].length === 1, 'enrich-start emitted once');
|
||||
assert(ev['job_result'].length === 1, 'job_result emitted once');
|
||||
|
||||
// Check enrichment skip log (if present in log events)
|
||||
const logEvents = ev['log'] ?? [];
|
||||
const skipLog = logEvents.find(l =>
|
||||
typeof l.data === 'string' && l.data.includes('already enriched')
|
||||
);
|
||||
const nodeCount = ev['node'].length + ev['node-error'].length;
|
||||
if (skipLog) {
|
||||
console.log(` ℹ️ Pre-enrich skip detected: ${skipLog.data}`);
|
||||
assert(nodeCount === 0, 'no enrichment needed (all skipped)');
|
||||
} else {
|
||||
console.log(' ℹ️ No pre-enrich skips (all locations are new or unenriched)');
|
||||
assert(nodeCount > 0, 'enrichment node events received');
|
||||
}
|
||||
|
||||
// Check filterTypes assertions: all locations must have website + matching type
|
||||
const FILTER_TYPE = 'Recycling center';
|
||||
const locations = ev['location'];
|
||||
const badWebsite = locations.filter(l => {
|
||||
const loc = l.data?.location;
|
||||
return !loc?.website;
|
||||
});
|
||||
|
||||
assert(badWebsite.length === 0, `all locations have website (${badWebsite.length} missing)`);
|
||||
|
||||
const badType = locations.filter(l => {
|
||||
const loc = l.data?.location;
|
||||
const types = loc?.types ?? [];
|
||||
const type = loc?.type ?? '';
|
||||
return !types.includes(FILTER_TYPE) && type !== FILTER_TYPE;
|
||||
});
|
||||
if (badType.length > 0) {
|
||||
console.log(` ❌ Mismatched locations:`);
|
||||
badType.slice(0, 3).forEach(l => console.log(JSON.stringify(l.data?.location, null, 2)));
|
||||
}
|
||||
assert(badType.length === 0, `all locations match type "${FILTER_TYPE}" (${badType.length} mismatched)`);
|
||||
|
||||
const filterLog = logEvents.find(l =>
|
||||
typeof l.data === 'string' && l.data.includes('locations removed')
|
||||
);
|
||||
if (filterLog) {
|
||||
console.log(` ℹ️ Filter applied: ${filterLog.data}`);
|
||||
}
|
||||
|
||||
const filterTypesLog = logEvents.filter(l =>
|
||||
typeof l.data === 'string' && (l.data.includes('filterTypes:') || l.data.includes(' - '))
|
||||
);
|
||||
if (filterTypesLog.length > 0) {
|
||||
console.log(` ℹ️ Parsed filterTypes in C++:`);
|
||||
filterTypesLog.forEach(l => console.log(` ${l.data}`));
|
||||
}
|
||||
|
||||
console.log(` ℹ️ Locations after filter: ${locations.length}`);
|
||||
|
||||
console.log('6. Cleanup');
|
||||
socket.destroy();
|
||||
worker.kill('SIGTERM');
|
||||
|
||||
console.log(`\n────────────────────────────────`);
|
||||
console.log(` Passed: ${passed} Failed: ${failed}`);
|
||||
console.log(`────────────────────────────────`);
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
run().catch(e => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
204
packages/media/cpp/orchestrator/test-gridsearch-ipc.mjs
Normal file
204
packages/media/cpp/orchestrator/test-gridsearch-ipc.mjs
Normal file
@ -0,0 +1,204 @@
|
||||
/**
|
||||
* orchestrator/test-gridsearch-ipc.mjs
|
||||
*
|
||||
* E2E test: spawn the C++ worker, send a gridsearch request
|
||||
* matching `npm run gridsearch:enrich` defaults, collect IPC events,
|
||||
* and verify the full event sequence.
|
||||
*
|
||||
* Run: node orchestrator/test-gridsearch-ipc.mjs
|
||||
* Needs: npm run build-debug (or npm run build)
|
||||
*/
|
||||
|
||||
import { spawnWorker } from './spawn.mjs';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import fs from 'node:fs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const IS_WIN = process.platform === 'win32';
|
||||
const EXE_NAME = IS_WIN ? 'polymech-cli.exe' : 'polymech-cli';
|
||||
|
||||
const EXE = resolve(__dirname, '..', 'dist', EXE_NAME);
|
||||
if (!fs.existsSync(EXE)) {
|
||||
console.error(`❌ No ${EXE_NAME} found in dist. Run npm run build first.`);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(`Binary: ${EXE}\n`);
|
||||
|
||||
// Load the sample settings (same as gridsearch:enrich)
|
||||
const sampleConfig = JSON.parse(
|
||||
readFileSync(resolve(__dirname, '..', 'config', 'gridsearch-sample.json'), 'utf8')
|
||||
);
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
function assert(condition, label) {
|
||||
if (condition) {
|
||||
console.log(` ✅ ${label}`);
|
||||
passed++;
|
||||
} else {
|
||||
console.error(` ❌ ${label}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Event collector ─────────────────────────────────────────────────────────
|
||||
|
||||
const EXPECTED_EVENTS = [
|
||||
'grid-ready',
|
||||
'waypoint-start',
|
||||
'area',
|
||||
'location',
|
||||
'enrich-start',
|
||||
'node',
|
||||
'nodePage',
|
||||
// 'node-error' — may or may not occur, depends on network
|
||||
];
|
||||
|
||||
function createCollector() {
|
||||
const events = {};
|
||||
for (const t of ['grid-ready', 'waypoint-start', 'area', 'location',
|
||||
'enrich-start', 'node', 'node-error', 'nodePage']) {
|
||||
events[t] = [];
|
||||
}
|
||||
return {
|
||||
events,
|
||||
handler(msg) {
|
||||
const t = msg.type;
|
||||
if (events[t]) {
|
||||
events[t].push(msg);
|
||||
} else {
|
||||
events[t] = [msg];
|
||||
}
|
||||
// Live progress indicator
|
||||
const d = msg.payload ?? {};
|
||||
if (t === 'waypoint-start') {
|
||||
process.stdout.write(`\r 🔍 Searching waypoint ${(d.index ?? 0) + 1}/${d.total ?? '?'}...`);
|
||||
} else if (t === 'node') {
|
||||
process.stdout.write(`\r 📧 Enriched: ${d.title?.substring(0, 40) ?? ''} `);
|
||||
} else if (t === 'node-error') {
|
||||
process.stdout.write(`\r ⚠️ Error: ${d.node?.title?.substring(0, 40) ?? ''} `);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ── Main test ───────────────────────────────────────────────────────────────
|
||||
|
||||
async function run() {
|
||||
console.log('🧪 Gridsearch IPC E2E Test\n');
|
||||
|
||||
// ── 1. Spawn worker ───────────────────────────────────────────────────
|
||||
console.log('1. Spawn worker');
|
||||
const worker = spawnWorker(EXE);
|
||||
const readyMsg = await worker.ready;
|
||||
assert(readyMsg.type === 'ready', 'Worker sends ready signal');
|
||||
|
||||
// ── 2. Register event collector ───────────────────────────────────────
|
||||
const collector = createCollector();
|
||||
worker.onEvent(collector.handler);
|
||||
|
||||
// ── 3. Send gridsearch request (matching gridsearch:enrich) ────────────
|
||||
console.log('2. Send gridsearch request (Aruba / recycling / --enrich)');
|
||||
const t0 = Date.now();
|
||||
|
||||
// Very long timeout — enrichment can take minutes
|
||||
const result = await worker.request(
|
||||
{
|
||||
type: 'gridsearch',
|
||||
payload: {
|
||||
...sampleConfig,
|
||||
enrich: true,
|
||||
},
|
||||
},
|
||||
5 * 60 * 1000 // 5 min timeout
|
||||
);
|
||||
|
||||
const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
|
||||
console.log(`\n\n ⏱️ Completed in ${elapsed}s\n`);
|
||||
|
||||
// ── 4. Verify final result ────────────────────────────────────────────
|
||||
console.log('3. Verify job_result');
|
||||
assert(result.type === 'job_result', `Response type is "job_result" (got "${result.type}")`);
|
||||
|
||||
const summary = result.payload ?? null;
|
||||
assert(summary !== null, 'job_result payload is present');
|
||||
|
||||
if (summary) {
|
||||
assert(typeof summary.totalMs === 'number', `totalMs is number (${summary.totalMs})`);
|
||||
assert(typeof summary.searchMs === 'number', `searchMs is number (${summary.searchMs})`);
|
||||
assert(typeof summary.enrichMs === 'number', `enrichMs is number (${summary.enrichMs})`);
|
||||
assert(typeof summary.freshApiCalls === 'number', `freshApiCalls is number (${summary.freshApiCalls})`);
|
||||
assert(typeof summary.waypointCount === 'number', `waypointCount is number (${summary.waypointCount})`);
|
||||
assert(summary.gridStats && typeof summary.gridStats.validCells === 'number', 'gridStats.validCells present');
|
||||
assert(summary.searchStats && typeof summary.searchStats.totalResults === 'number', 'searchStats.totalResults present');
|
||||
assert(typeof summary.enrichedOk === 'number', `enrichedOk is number (${summary.enrichedOk})`);
|
||||
assert(typeof summary.enrichedTotal === 'number', `enrichedTotal is number (${summary.enrichedTotal})`);
|
||||
}
|
||||
|
||||
// ── 5. Verify event sequence ──────────────────────────────────────────
|
||||
console.log('4. Verify event stream');
|
||||
const e = collector.events;
|
||||
|
||||
assert(e['grid-ready'].length === 1, `Exactly 1 grid-ready event (got ${e['grid-ready'].length})`);
|
||||
assert(e['waypoint-start'].length > 0, `At least 1 waypoint-start event (got ${e['waypoint-start'].length})`);
|
||||
assert(e['area'].length > 0, `At least 1 area event (got ${e['area'].length})`);
|
||||
assert(e['waypoint-start'].length === e['area'].length, `waypoint-start count (${e['waypoint-start'].length}) === area count (${e['area'].length})`);
|
||||
assert(e['enrich-start'].length === 1, `Exactly 1 enrich-start event (got ${e['enrich-start'].length})`);
|
||||
|
||||
const totalNodes = e['node'].length + e['node-error'].length;
|
||||
assert(totalNodes > 0, `At least 1 node event (got ${totalNodes}: ${e['node'].length} ok, ${e['node-error'].length} errors)`);
|
||||
|
||||
// Validate grid-ready payload
|
||||
if (e['grid-ready'].length > 0) {
|
||||
const gr = e['grid-ready'][0].payload ?? {};
|
||||
assert(Array.isArray(gr.areas), 'grid-ready.areas is array');
|
||||
assert(typeof gr.total === 'number' && gr.total > 0, `grid-ready.total > 0 (${gr.total})`);
|
||||
}
|
||||
|
||||
// Validate location events have required fields
|
||||
if (e['location'].length > 0) {
|
||||
const loc = e['location'][0].payload ?? {};
|
||||
assert(loc.location && typeof loc.location.title === 'string', 'location event has location.title');
|
||||
assert(loc.location && typeof loc.location.place_id === 'string', 'location event has location.place_id');
|
||||
assert(typeof loc.areaName === 'string', 'location event has areaName');
|
||||
}
|
||||
assert(e['location'].length > 0, `At least 1 location event (got ${e['location'].length})`);
|
||||
|
||||
// Validate node payloads
|
||||
if (e['node'].length > 0) {
|
||||
const nd = e['node'][0].payload ?? {};
|
||||
assert(typeof nd.placeId === 'string', 'node event has placeId');
|
||||
assert(typeof nd.title === 'string', 'node event has title');
|
||||
assert(Array.isArray(nd.emails), 'node event has emails array');
|
||||
assert(typeof nd.status === 'string', 'node event has status');
|
||||
}
|
||||
|
||||
// ── 6. Print event summary ────────────────────────────────────────────
|
||||
console.log('\n5. Event summary');
|
||||
for (const [type, arr] of Object.entries(e)) {
|
||||
if (arr.length > 0) console.log(` ${type}: ${arr.length}`);
|
||||
}
|
||||
|
||||
// ── 7. Shutdown ───────────────────────────────────────────────────────
|
||||
console.log('\n6. Graceful shutdown');
|
||||
const shutdownRes = await worker.shutdown();
|
||||
assert(shutdownRes.type === 'shutdown_ack', 'Shutdown acknowledged');
|
||||
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
assert(worker.process.exitCode === 0, `Worker exited with code 0 (got ${worker.process.exitCode})`);
|
||||
|
||||
// ── Summary ───────────────────────────────────────────────────────────
|
||||
console.log(`\n────────────────────────────────`);
|
||||
console.log(` Passed: ${passed} Failed: ${failed}`);
|
||||
console.log(`────────────────────────────────\n`);
|
||||
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
run().catch((err) => {
|
||||
console.error('Test runner error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
802
packages/media/cpp/orchestrator/test-ipc-classifier.mjs
Normal file
802
packages/media/cpp/orchestrator/test-ipc-classifier.mjs
Normal file
@ -0,0 +1,802 @@
|
||||
/**
|
||||
* orchestrator/test-ipc-classifier.mjs
|
||||
*
|
||||
* IPC + local llama: one kbot-ai call — semantic distance from anchor "machine workshop"
|
||||
* to every business label (JobViewer.tsx ~205). Output is a single JSON array (+ meta).
|
||||
*
|
||||
* Run: npm run test:ipc:classifier
|
||||
* CLI (overrides env): yargs — see parseClassifierArgv()
|
||||
* npm run test:ipc:classifier -- --help
|
||||
* npm run test:ipc:classifier -- --provider openrouter --model openai/gpt-4o-mini --backend remote -n 3
|
||||
* npm run test:ipc:classifier -- -r openrouter -m openai/gpt-4o-mini --backend remote -n 3 -F structured
|
||||
* npm run test:ipc:classifier -- -r openrouter -m x -F stress,no-heartbeat
|
||||
* npm run test:ipc:classifier -- -r openrouter -m x --backend remote -n 3 -F stress,structured
|
||||
* npm run test:ipc:classifier -- -r openrouter -m x --backend remote -F structured --dst ./out.json
|
||||
*
|
||||
* Env:
|
||||
* KBOT_IPC_CLASSIFIER_LLAMA — set 0 to use OpenRouter (KBOT_ROUTER, KBOT_IPC_MODEL) instead of local llama :8888
|
||||
* KBOT_IPC_LLAMA_AUTOSTART — 0 to skip spawning run-7b.sh (llama mode only)
|
||||
* KBOT_ROUTER / KBOT_IPC_MODEL — when classifier llama is off (same as test-ipc step 6)
|
||||
* KBOT_CLASSIFIER_LIMIT — max labels in the batch (default: all)
|
||||
* KBOT_CLASSIFIER_TIMEOUT_MS — single batched kbot-ai call (default: 300000)
|
||||
*
|
||||
* OpenRouter: npm run test:ipc:classifier:openrouter (sets KBOT_IPC_CLASSIFIER_LLAMA=0)
|
||||
* Stress (batch repeats, one worker): KBOT_CLASSIFIER_STRESS_RUNS=N (default 1)
|
||||
* npm run test:ipc:classifier:openrouter:stress → OpenRouter + 5 runs (override N via env)
|
||||
*
|
||||
* Reports (reports.js): cwd/tests/test-ipc-classifier__HH-mm.{json,md}; distances in
|
||||
* test-ipc-classifier-distances__HH-mm.json (same timestamp as the main JSON).
|
||||
* With -F structured, the prompt asks for {"items":[...]} to match json_object APIs.
|
||||
*/
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import net from 'node:net';
|
||||
import { existsSync, unlinkSync } from 'node:fs';
|
||||
import yargs from 'yargs';
|
||||
import { hideBin } from 'yargs/helpers';
|
||||
|
||||
import {
|
||||
distExePath,
|
||||
platform,
|
||||
uds,
|
||||
timeouts,
|
||||
kbotAiPayloadLlamaLocal,
|
||||
kbotAiPayloadFromEnv,
|
||||
ensureLlamaLocalServer,
|
||||
llama,
|
||||
router,
|
||||
} from './presets.js';
|
||||
import {
|
||||
createAssert,
|
||||
payloadObj,
|
||||
llamaAutostartEnabled,
|
||||
ipcClassifierLlamaEnabled,
|
||||
createIpcClient,
|
||||
pipeWorkerStderr,
|
||||
} from './test-commons.js';
|
||||
import {
|
||||
reportFilePathWithExt,
|
||||
timeParts,
|
||||
createMetricsCollector,
|
||||
buildMetricsBundle,
|
||||
writeTestReports,
|
||||
} from './reports.js';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
/** Set at run start; used by catch for error reports */
|
||||
let classifierMetricsCollector = null;
|
||||
let classifierRunStartedAt = null;
|
||||
/** Feature flags from `-F` / `--feature` (stress, structured, no-heartbeat, no-report, quiet) */
|
||||
let classifierFeatures = /** @type {Set<string>} */ (new Set());
|
||||
/** Parsed argv (after yargs); set in parseClassifierArgv */
|
||||
let classifierArgv = /** @type {Record<string, unknown> | null} */ (null);
|
||||
|
||||
/**
|
||||
* @param {unknown} featureOpt
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function parseFeatureList(featureOpt) {
|
||||
const out = [];
|
||||
const arr = Array.isArray(featureOpt) ? featureOpt : [];
|
||||
for (const f of arr) {
|
||||
if (typeof f === 'string') out.push(...f.split(',').map((s) => s.trim()).filter(Boolean));
|
||||
}
|
||||
return new Set(out);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse CLI and apply to `process.env` (CLI wins over prior env).
|
||||
* @returns {Record<string, unknown> & { featuresSet: Set<string> }}
|
||||
*/
|
||||
export function parseClassifierArgv() {
|
||||
const y = yargs(hideBin(process.argv))
|
||||
.scriptName('test-ipc-classifier')
|
||||
.usage('$0 [options]\n\nIPC classifier batch test. Flags override env vars for this process.')
|
||||
.option('provider', {
|
||||
alias: 'r',
|
||||
type: 'string',
|
||||
describe: 'Router / provider → KBOT_ROUTER (e.g. openrouter, ollama, openai)',
|
||||
})
|
||||
.option('model', {
|
||||
alias: 'm',
|
||||
type: 'string',
|
||||
describe: 'Model id → KBOT_IPC_MODEL',
|
||||
})
|
||||
.option('runs', {
|
||||
alias: 'n',
|
||||
type: 'number',
|
||||
describe: 'Batch repeats (stress) → KBOT_CLASSIFIER_STRESS_RUNS',
|
||||
})
|
||||
.option('limit', {
|
||||
alias: 'l',
|
||||
type: 'number',
|
||||
describe: 'Max labels → KBOT_CLASSIFIER_LIMIT',
|
||||
})
|
||||
.option('timeout', {
|
||||
alias: 't',
|
||||
type: 'number',
|
||||
describe: 'LLM HTTP timeout ms → KBOT_CLASSIFIER_TIMEOUT_MS',
|
||||
})
|
||||
.option('backend', {
|
||||
type: 'string',
|
||||
choices: ['local', 'remote'],
|
||||
describe: 'local = llama :8888; remote = router (sets KBOT_IPC_CLASSIFIER_LLAMA=0)',
|
||||
})
|
||||
.option('no-autostart', {
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
describe: 'Do not spawn run-7b.sh → KBOT_IPC_LLAMA_AUTOSTART=0',
|
||||
})
|
||||
.option('feature', {
|
||||
alias: 'F',
|
||||
type: 'array',
|
||||
default: [],
|
||||
describe:
|
||||
'Feature flags (repeat or comma-separated): stress, structured, no-heartbeat, no-report, quiet',
|
||||
})
|
||||
.option('dst', {
|
||||
type: 'string',
|
||||
describe:
|
||||
'Forwarded to kbot-ai IPC `dst` (worker writes completion text here; path resolved from cwd). Same as C++ --dst.',
|
||||
})
|
||||
.option('output', {
|
||||
type: 'string',
|
||||
describe:
|
||||
'Forwarded to IPC if --dst omitted (C++ `output` field). Prefer --dst when both are set.',
|
||||
})
|
||||
.strict()
|
||||
.help()
|
||||
.alias('h', 'help');
|
||||
|
||||
const argv = y.parseSync();
|
||||
const featuresSet = parseFeatureList(argv.feature);
|
||||
|
||||
if (argv.provider != null && String(argv.provider).trim() !== '') {
|
||||
process.env.KBOT_ROUTER = String(argv.provider).trim();
|
||||
}
|
||||
if (argv.model != null && String(argv.model).trim() !== '') {
|
||||
process.env.KBOT_IPC_MODEL = String(argv.model).trim();
|
||||
}
|
||||
if (argv.runs != null && Number.isFinite(argv.runs) && argv.runs >= 1) {
|
||||
process.env.KBOT_CLASSIFIER_STRESS_RUNS = String(Math.min(500, Math.floor(Number(argv.runs))));
|
||||
}
|
||||
if (argv.limit != null && Number.isFinite(argv.limit) && argv.limit >= 1) {
|
||||
process.env.KBOT_CLASSIFIER_LIMIT = String(Math.floor(Number(argv.limit)));
|
||||
}
|
||||
if (argv.timeout != null && Number.isFinite(argv.timeout) && argv.timeout > 0) {
|
||||
process.env.KBOT_CLASSIFIER_TIMEOUT_MS = String(Math.floor(Number(argv.timeout)));
|
||||
}
|
||||
if (argv['no-autostart'] === true) {
|
||||
process.env.KBOT_IPC_LLAMA_AUTOSTART = '0';
|
||||
}
|
||||
if (argv.backend === 'remote') {
|
||||
process.env.KBOT_IPC_CLASSIFIER_LLAMA = '0';
|
||||
} else if (argv.backend === 'local') {
|
||||
delete process.env.KBOT_IPC_CLASSIFIER_LLAMA;
|
||||
}
|
||||
|
||||
if (featuresSet.has('stress') && (argv.runs == null || !Number.isFinite(argv.runs))) {
|
||||
if (!process.env.KBOT_CLASSIFIER_STRESS_RUNS) {
|
||||
process.env.KBOT_CLASSIFIER_STRESS_RUNS = '5';
|
||||
}
|
||||
}
|
||||
|
||||
classifierFeatures = featuresSet;
|
||||
const out = { ...argv, featuresSet };
|
||||
classifierArgv = out;
|
||||
return out;
|
||||
}
|
||||
const EXE = distExePath(__dirname);
|
||||
const stats = createAssert();
|
||||
const { assert } = stats;
|
||||
|
||||
/** @see packages/kbot/.../JobViewer.tsx — business type options */
|
||||
export const JOB_VIEWER_MACHINE_LABELS = [
|
||||
'3D printing service',
|
||||
'Drafting service',
|
||||
'Engraver',
|
||||
'Furniture maker',
|
||||
'Industrial engineer',
|
||||
'Industrial equipment supplier',
|
||||
'Laser cutting service',
|
||||
'Machine construction',
|
||||
'Machine repair service',
|
||||
'Machine shop',
|
||||
'Machine workshop',
|
||||
'Machinery parts manufacturer',
|
||||
'Machining manufacturer',
|
||||
'Manufacturer',
|
||||
'Mechanic',
|
||||
'Mechanical engineer',
|
||||
'Mechanical plant',
|
||||
'Metal fabricator',
|
||||
'Metal heat treating service',
|
||||
'Metal machinery supplier',
|
||||
'Metal working shop',
|
||||
'Metal workshop',
|
||||
'Novelty store',
|
||||
'Plywood supplier',
|
||||
'Sign shop',
|
||||
'Tool manufacturer',
|
||||
'Trophy shop',
|
||||
];
|
||||
|
||||
const ANCHOR = 'machine workshop';
|
||||
|
||||
/** Keys we accept for the batch array when API forces a JSON object (e.g. response_format json_object). */
|
||||
const BATCH_ARRAY_OBJECT_KEYS = ['items', 'distances', 'results', 'data', 'labels', 'rows'];
|
||||
|
||||
/** Build one prompt: plain mode = JSON array root; structured (-F structured) = JSON object with "items" (json_object API). */
|
||||
function classifierBatchPrompt(labels) {
|
||||
const numbered = labels.map((l, i) => `${i + 1}. ${JSON.stringify(l)}`).join('\n');
|
||||
const structured = classifierFeatures.has('structured');
|
||||
|
||||
const rules = `Rules for each element:
|
||||
- Use shape: {"label": <exact string from the list below>, "distance": <number>}
|
||||
- "distance" is semantic distance from 0 (same as anchor or direct synonym) to 10 (unrelated). One decimal allowed.
|
||||
- Include EXACTLY one object per line item below, in the SAME ORDER, with "label" copied character-for-character from the list.
|
||||
|
||||
Anchor business type: ${ANCHOR}
|
||||
|
||||
Candidate labels (in order):
|
||||
${numbered}`;
|
||||
|
||||
if (structured) {
|
||||
return `You classify business types against one anchor. Output ONLY valid JSON: one object, no markdown fences, no commentary.
|
||||
The API requires a JSON object (not a top-level array). Use exactly one top-level key "items" whose value is the array.
|
||||
|
||||
${rules}
|
||||
|
||||
Example: {"items":[{"label":"Example","distance":2.5},...]}`;
|
||||
}
|
||||
|
||||
return `You classify business types against one anchor. Output ONLY a JSON array, no markdown fences, no commentary.
|
||||
|
||||
${rules}
|
||||
|
||||
Output: one JSON array, e.g. [{"label":"...","distance":2.5},...]`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse model text into the batch array: root [...] or {"items":[...]} (json_object).
|
||||
* @returns {unknown[] | null}
|
||||
*/
|
||||
function extractJsonArray(text) {
|
||||
if (!text || typeof text !== 'string') return null;
|
||||
let s = text.trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/u, '').trim();
|
||||
|
||||
/** @param {unknown} v */
|
||||
const arrayFromParsed = (v) => {
|
||||
if (Array.isArray(v)) return v;
|
||||
if (!v || typeof v !== 'object') return null;
|
||||
const o = /** @type {Record<string, unknown>} */ (v);
|
||||
for (const key of BATCH_ARRAY_OBJECT_KEYS) {
|
||||
const a = o[key];
|
||||
if (Array.isArray(a)) return a;
|
||||
}
|
||||
for (const val of Object.values(o)) {
|
||||
if (
|
||||
Array.isArray(val) &&
|
||||
val.length > 0 &&
|
||||
val[0] &&
|
||||
typeof val[0] === 'object' &&
|
||||
val[0] !== null &&
|
||||
'label' in val[0]
|
||||
) {
|
||||
return val;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
try {
|
||||
const v = JSON.parse(s);
|
||||
return arrayFromParsed(v);
|
||||
} catch {
|
||||
/* fall through */
|
||||
}
|
||||
|
||||
const i = s.indexOf('[');
|
||||
const j = s.lastIndexOf(']');
|
||||
if (i >= 0 && j > i) {
|
||||
try {
|
||||
const v = JSON.parse(s.slice(i, j + 1));
|
||||
if (Array.isArray(v)) return v;
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
const oi = s.indexOf('{');
|
||||
const oj = s.lastIndexOf('}');
|
||||
if (oi >= 0 && oj > oi) {
|
||||
try {
|
||||
const v = JSON.parse(s.slice(oi, oj + 1));
|
||||
return arrayFromParsed(v);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {unknown[]} arr
|
||||
* @param {string[]} expectedLabels — ordered
|
||||
*/
|
||||
function normalizeBatchArray(arr, expectedLabels) {
|
||||
const expectedSet = new Set(expectedLabels);
|
||||
const byLabel = new Map();
|
||||
|
||||
for (const item of arr) {
|
||||
if (!item || typeof item !== 'object') continue;
|
||||
const label = item.label;
|
||||
let d = item.distance;
|
||||
if (typeof d === 'string') d = parseFloat(d);
|
||||
if (typeof label !== 'string' || typeof d !== 'number' || !Number.isFinite(d)) continue;
|
||||
if (!expectedSet.has(label)) continue;
|
||||
byLabel.set(label, d);
|
||||
}
|
||||
|
||||
const distances = expectedLabels.map((label) => ({
|
||||
label,
|
||||
distance: byLabel.has(label) ? byLabel.get(label) : null,
|
||||
}));
|
||||
|
||||
const missing = distances.filter((r) => r.distance == null).map((r) => r.label);
|
||||
return { distances, missing };
|
||||
}
|
||||
|
||||
function batchTimeoutMs() {
|
||||
const raw = process.env.KBOT_CLASSIFIER_TIMEOUT_MS;
|
||||
if (raw === undefined || raw === '') return 30_000;
|
||||
const n = Number.parseInt(raw, 10);
|
||||
return Number.isFinite(n) && n > 0 ? n : 30_000;
|
||||
}
|
||||
|
||||
/** Sequential batch iterations on one worker (stress). Default 1 = single run. */
|
||||
function stressRunCount() {
|
||||
const raw = process.env.KBOT_CLASSIFIER_STRESS_RUNS;
|
||||
if (raw === undefined || raw === '') return 1;
|
||||
const n = Number.parseInt(String(raw).trim(), 10);
|
||||
if (!Number.isFinite(n) || n < 1) return 1;
|
||||
return Math.min(n, 500);
|
||||
}
|
||||
|
||||
/** @param {unknown} llm — job_result.llm from kbot-ai */
|
||||
function usageTokens(llm) {
|
||||
if (!llm || typeof llm !== 'object') return null;
|
||||
const u = /** @type {Record<string, unknown>} */ (llm).usage;
|
||||
if (!u || typeof u !== 'object') return null;
|
||||
const o = /** @type {Record<string, unknown>} */ (u);
|
||||
return {
|
||||
prompt: o.prompt_tokens ?? o.promptTokens ?? null,
|
||||
completion: o.completion_tokens ?? o.completionTokens ?? null,
|
||||
total: o.total_tokens ?? o.totalTokens ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
/** @param {number[]} values */
|
||||
function summarizeMs(values) {
|
||||
if (values.length === 0) return null;
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
const sum = values.reduce((a, b) => a + b, 0);
|
||||
const mid = (a, b) => (a + b) / 2;
|
||||
const p = (q) => sorted[Math.min(sorted.length - 1, Math.max(0, Math.floor(q * (sorted.length - 1))))];
|
||||
return {
|
||||
min: sorted[0],
|
||||
max: sorted[sorted.length - 1],
|
||||
avg: Math.round((sum / values.length) * 100) / 100,
|
||||
p50: sorted.length % 2 ? sorted[Math.floor(sorted.length / 2)] : mid(sorted[sorted.length / 2 - 1], sorted[sorted.length / 2]),
|
||||
p95: p(0.95),
|
||||
};
|
||||
}
|
||||
|
||||
/** Log progress while awaiting a long LLM call (no silent hang). */
|
||||
function withHeartbeat(promise, ipcTimeoutMs, backendLabel) {
|
||||
const every = 10_000;
|
||||
let n = 0;
|
||||
const id = setInterval(() => {
|
||||
n += 1;
|
||||
const sec = (n * every) / 1000;
|
||||
console.log(
|
||||
` … still waiting on ${backendLabel} (batch is large; ${sec}s elapsed, IPC deadline ${Math.round(ipcTimeoutMs / 1000)}s)…`
|
||||
);
|
||||
}, every);
|
||||
return promise.finally(() => clearInterval(id));
|
||||
}
|
||||
|
||||
function buildKbotAiPayload(labels, tmo) {
|
||||
const prompt = classifierBatchPrompt(labels);
|
||||
const useLlama = ipcClassifierLlamaEnabled();
|
||||
const structured = classifierFeatures.has('structured');
|
||||
|
||||
if (useLlama) {
|
||||
return { ...kbotAiPayloadLlamaLocal({ prompt }), llm_timeout_ms: tmo };
|
||||
}
|
||||
|
||||
const payload = {
|
||||
...kbotAiPayloadFromEnv(),
|
||||
prompt,
|
||||
llm_timeout_ms: tmo,
|
||||
};
|
||||
/** OpenAI-style structured outputs; forwarded by kbot LLMClient → liboai ChatCompletion. */
|
||||
if (structured) {
|
||||
payload.response_format = { type: 'json_object' };
|
||||
}
|
||||
|
||||
const rawDst = classifierArgv?.dst || classifierArgv?.output;
|
||||
if (rawDst != null && String(rawDst).trim() !== '') {
|
||||
payload.dst = path.resolve(process.cwd(), String(rawDst).trim());
|
||||
}
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse kbot-ai job_result; updates assertion stats.
|
||||
* @returns {{ distances: {label:string,distance:number|null}[], missing: string[], parseError: string|null, rawText: string|null, batchOk: boolean }}
|
||||
*/
|
||||
function processBatchResponse(p, labels) {
|
||||
let rawText = null;
|
||||
let distances = [];
|
||||
let parseError = null;
|
||||
let missing = [];
|
||||
let batchOk = false;
|
||||
|
||||
if (p?.status === 'success' && typeof p?.text === 'string') {
|
||||
rawText = p.text;
|
||||
const arr = extractJsonArray(p.text);
|
||||
if (arr) {
|
||||
const norm = normalizeBatchArray(arr, labels);
|
||||
distances = norm.distances;
|
||||
missing = norm.missing;
|
||||
if (missing.length === 0) {
|
||||
assert(true, 'batch JSON array: all labels have distance');
|
||||
batchOk = true;
|
||||
} else {
|
||||
assert(false, `batch array complete (${missing.length} missing labels)`);
|
||||
parseError = `missing: ${missing.join('; ')}`;
|
||||
}
|
||||
} else {
|
||||
assert(false, 'batch response parses as JSON array or {"items":[...]}');
|
||||
parseError = 'could not parse batch array from model text';
|
||||
}
|
||||
} else {
|
||||
assert(false, 'kbot-ai success');
|
||||
parseError = p?.error ?? 'not success';
|
||||
}
|
||||
|
||||
return { distances, missing, parseError, rawText, batchOk };
|
||||
}
|
||||
|
||||
async function runSingleBatch(ipc, labels, tmo, ipcDeadlineMs, waitLabel) {
|
||||
const payload = buildKbotAiPayload(labels, tmo);
|
||||
const t0 = performance.now();
|
||||
const pending = ipc.request({ type: 'kbot-ai', payload }, ipcDeadlineMs);
|
||||
const msg = classifierFeatures.has('no-heartbeat')
|
||||
? await pending
|
||||
: await withHeartbeat(pending, ipcDeadlineMs, waitLabel);
|
||||
const elapsedMs = Math.round(performance.now() - t0);
|
||||
const p = payloadObj(msg);
|
||||
const parsed = processBatchResponse(p, labels);
|
||||
return { elapsedMs, p, ...parsed };
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const quiet = classifierFeatures.has('quiet');
|
||||
classifierMetricsCollector = createMetricsCollector();
|
||||
classifierRunStartedAt = new Date().toISOString();
|
||||
const startedAt = classifierRunStartedAt;
|
||||
const useLlama = ipcClassifierLlamaEnabled();
|
||||
const backendLabel = useLlama ? `llama @ :${llama.port}` : `router=${router.fromEnv()}`;
|
||||
if (!quiet) {
|
||||
console.log(`\n📐 IPC classifier (${backendLabel}) — one batch, distance vs "machine workshop"\n`);
|
||||
if (classifierFeatures.has('structured')) {
|
||||
if (useLlama) {
|
||||
console.log(
|
||||
` ⚠️ -F structured: ignored for local llama (use --backend remote for response_format json_object)\n`
|
||||
);
|
||||
} else {
|
||||
console.log(
|
||||
` Structured: response_format json_object + prompt asks for {"items":[...]} (not a top-level array)\n`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!existsSync(EXE)) {
|
||||
console.error(`❌ Binary not found at ${EXE}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (useLlama) {
|
||||
await ensureLlamaLocalServer({
|
||||
autostart: llamaAutostartEnabled(),
|
||||
startTimeoutMs: timeouts.llamaServerStart,
|
||||
});
|
||||
}
|
||||
|
||||
const limitRaw = process.env.KBOT_CLASSIFIER_LIMIT;
|
||||
let labels = [...JOB_VIEWER_MACHINE_LABELS];
|
||||
if (limitRaw !== undefined && limitRaw !== '') {
|
||||
const lim = Number.parseInt(limitRaw, 10);
|
||||
if (Number.isFinite(lim) && lim > 0) labels = labels.slice(0, lim);
|
||||
}
|
||||
|
||||
const CPP_UDS_ARG = uds.workerArg();
|
||||
if (!platform.isWin && existsSync(CPP_UDS_ARG)) {
|
||||
unlinkSync(CPP_UDS_ARG);
|
||||
}
|
||||
|
||||
const workerProc = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG], { stdio: 'pipe' });
|
||||
pipeWorkerStderr(workerProc);
|
||||
|
||||
let socket;
|
||||
for (let i = 0; i < timeouts.connectAttempts; i++) {
|
||||
try {
|
||||
await new Promise((res, rej) => {
|
||||
socket = net.connect(uds.connectOpts(CPP_UDS_ARG));
|
||||
socket.once('connect', res);
|
||||
socket.once('error', rej);
|
||||
});
|
||||
break;
|
||||
} catch (e) {
|
||||
if (i === timeouts.connectAttempts - 1) throw e;
|
||||
await new Promise((r) => setTimeout(r, timeouts.connectRetryMs));
|
||||
}
|
||||
}
|
||||
|
||||
const ipc = createIpcClient(socket);
|
||||
ipc.attach();
|
||||
await ipc.readyPromise;
|
||||
|
||||
const tmo = batchTimeoutMs();
|
||||
const ipcDeadlineMs = tmo + 60_000;
|
||||
const waitLabel = useLlama ? 'llama' : router.fromEnv();
|
||||
const nRuns = stressRunCount();
|
||||
|
||||
if (!quiet) {
|
||||
console.log(` kbot-ai batch: ${labels.length} labels × ${nRuns} run(s)`);
|
||||
console.log(` liboai HTTP timeout: ${tmo} ms (llm_timeout_ms) — rebuild kbot if this was stuck at ~30s before`);
|
||||
console.log(` IPC wait deadline: ${ipcDeadlineMs} ms (HTTP + margin)`);
|
||||
const hb = classifierFeatures.has('no-heartbeat') ? 'off' : '15s';
|
||||
console.log(` (Large batches can take many minutes; heartbeat ${hb}…)\n`);
|
||||
}
|
||||
|
||||
/** @type {Array<{ index: number, wallMs: number, batchOk: boolean, parseError: string|null, usage: ReturnType<typeof usageTokens>}>} */
|
||||
const stressIterations = [];
|
||||
|
||||
let lastP = /** @type {Record<string, unknown>|null} */ (null);
|
||||
let lastDistances = [];
|
||||
let lastRawText = null;
|
||||
let lastParseError = null;
|
||||
let lastByDistance = [];
|
||||
|
||||
for (let r = 0; r < nRuns; r++) {
|
||||
if (nRuns > 1 && !quiet) {
|
||||
console.log(` ── Stress run ${r + 1}/${nRuns} ──`);
|
||||
}
|
||||
const batch = await runSingleBatch(ipc, labels, tmo, ipcDeadlineMs, waitLabel);
|
||||
lastP = batch.p;
|
||||
lastDistances = batch.distances;
|
||||
lastRawText = batch.rawText;
|
||||
lastParseError = batch.parseError;
|
||||
lastByDistance = [...batch.distances].sort((a, b) => {
|
||||
if (a.distance == null && b.distance == null) return 0;
|
||||
if (a.distance == null) return 1;
|
||||
if (b.distance == null) return -1;
|
||||
return a.distance - b.distance;
|
||||
});
|
||||
|
||||
const u = usageTokens(batch.p?.llm);
|
||||
stressIterations.push({
|
||||
index: r + 1,
|
||||
wallMs: batch.elapsedMs,
|
||||
batchOk: batch.batchOk,
|
||||
parseError: batch.parseError,
|
||||
usage: u,
|
||||
});
|
||||
|
||||
if (nRuns > 1 && !quiet) {
|
||||
const tok = u
|
||||
? `tokens p/c/t ${u.prompt ?? '—'}/${u.completion ?? '—'}/${u.total ?? '—'}`
|
||||
: 'tokens —';
|
||||
console.log(` wall: ${batch.elapsedMs} ms ${batch.batchOk ? 'OK' : 'FAIL'} ${tok}`);
|
||||
}
|
||||
}
|
||||
|
||||
const wallMsList = stressIterations.map((x) => x.wallMs);
|
||||
/** @type {null | { requestedRuns: number, wallMs: NonNullable<ReturnType<typeof summarizeMs>>, successCount: number, failCount: number, totalPromptTokens: number, totalCompletionTokens: number, totalTokens: number }} */
|
||||
let stressSummary = null;
|
||||
if (nRuns > 1) {
|
||||
const w = summarizeMs(wallMsList);
|
||||
stressSummary = {
|
||||
requestedRuns: nRuns,
|
||||
wallMs: /** @type {NonNullable<typeof w>} */ (w),
|
||||
successCount: stressIterations.filter((x) => x.batchOk).length,
|
||||
failCount: stressIterations.filter((x) => !x.batchOk).length,
|
||||
totalPromptTokens: stressIterations.reduce((s, x) => s + (Number(x.usage?.prompt) || 0), 0),
|
||||
totalCompletionTokens: stressIterations.reduce((s, x) => s + (Number(x.usage?.completion) || 0), 0),
|
||||
totalTokens: stressIterations.reduce((s, x) => s + (Number(x.usage?.total) || 0), 0),
|
||||
};
|
||||
if (quiet) {
|
||||
console.log(
|
||||
`stress ${nRuns} runs: min=${stressSummary.wallMs.min}ms max=${stressSummary.wallMs.max}ms avg=${stressSummary.wallMs.avg}ms ok=${stressSummary.successCount}/${nRuns} tokensΣ=${stressSummary.totalTokens}`
|
||||
);
|
||||
} else {
|
||||
console.log(`\n ═══════════════ Stress summary (${nRuns} batch runs) ═══════════════`);
|
||||
console.log(
|
||||
` Wall time (ms): min ${stressSummary.wallMs.min} max ${stressSummary.wallMs.max} avg ${stressSummary.wallMs.avg} p50 ${stressSummary.wallMs.p50} p95 ${stressSummary.wallMs.p95}`
|
||||
);
|
||||
console.log(
|
||||
` Batches OK: ${stressSummary.successCount} fail: ${stressSummary.failCount} (assertions: passed ${stats.passed} failed ${stats.failed})`
|
||||
);
|
||||
if (
|
||||
stressSummary.totalPromptTokens > 0 ||
|
||||
stressSummary.totalCompletionTokens > 0 ||
|
||||
stressSummary.totalTokens > 0
|
||||
) {
|
||||
console.log(
|
||||
` Token totals (sum over runs): prompt ${stressSummary.totalPromptTokens} completion ${stressSummary.totalCompletionTokens} total ${stressSummary.totalTokens}`
|
||||
);
|
||||
}
|
||||
console.log(` ═══════════════════════════════════════════════════════════════════\n`);
|
||||
}
|
||||
}
|
||||
|
||||
const p = lastP;
|
||||
const distances = lastDistances;
|
||||
const rawText = lastRawText;
|
||||
const parseError = lastParseError;
|
||||
const byDistance = lastByDistance;
|
||||
|
||||
const shutdownRes = await ipc.request({ type: 'shutdown' }, timeouts.ipcDefault);
|
||||
assert(shutdownRes.type === 'shutdown_ack', 'shutdown ack');
|
||||
await new Promise((r) => setTimeout(r, timeouts.postShutdownMs));
|
||||
socket.destroy();
|
||||
assert(workerProc.exitCode === 0, 'worker exit 0');
|
||||
|
||||
const finishedAt = new Date().toISOString();
|
||||
|
||||
const reportNow = new Date();
|
||||
const cwd = process.cwd();
|
||||
|
||||
const reportData = {
|
||||
startedAt,
|
||||
finishedAt,
|
||||
passed: stats.passed,
|
||||
failed: stats.failed,
|
||||
ok: stats.failed === 0,
|
||||
ipcClassifierLlama: useLlama,
|
||||
cli: {
|
||||
features: [...classifierFeatures],
|
||||
provider: process.env.KBOT_ROUTER ?? null,
|
||||
model: process.env.KBOT_IPC_MODEL ?? null,
|
||||
backend: useLlama ? 'local' : 'remote',
|
||||
stressRuns: nRuns,
|
||||
structuredOutput: !useLlama && classifierFeatures.has('structured'),
|
||||
dst:
|
||||
classifierArgv?.dst || classifierArgv?.output
|
||||
? path.resolve(
|
||||
process.cwd(),
|
||||
String(classifierArgv.dst || classifierArgv.output).trim()
|
||||
)
|
||||
: null,
|
||||
},
|
||||
env: {
|
||||
KBOT_IPC_CLASSIFIER_LLAMA: process.env.KBOT_IPC_CLASSIFIER_LLAMA ?? null,
|
||||
KBOT_IPC_LLAMA_AUTOSTART: process.env.KBOT_IPC_LLAMA_AUTOSTART ?? null,
|
||||
KBOT_ROUTER: process.env.KBOT_ROUTER ?? null,
|
||||
KBOT_IPC_MODEL: process.env.KBOT_IPC_MODEL ?? null,
|
||||
KBOT_CLASSIFIER_LIMIT: process.env.KBOT_CLASSIFIER_LIMIT ?? null,
|
||||
KBOT_CLASSIFIER_TIMEOUT_MS: process.env.KBOT_CLASSIFIER_TIMEOUT_MS ?? null,
|
||||
KBOT_CLASSIFIER_STRESS_RUNS: process.env.KBOT_CLASSIFIER_STRESS_RUNS ?? null,
|
||||
KBOT_LLAMA_PORT: process.env.KBOT_LLAMA_PORT ?? null,
|
||||
KBOT_LLAMA_BASE_URL: process.env.KBOT_LLAMA_BASE_URL ?? null,
|
||||
},
|
||||
metrics: buildMetricsBundle(classifierMetricsCollector, startedAt, finishedAt),
|
||||
anchor: ANCHOR,
|
||||
source: 'JobViewer.tsx:205',
|
||||
batch: true,
|
||||
backend: useLlama ? 'llama_local' : 'remote_router',
|
||||
...(useLlama
|
||||
? {
|
||||
llama: {
|
||||
baseURL: llama.baseURL,
|
||||
port: llama.port,
|
||||
router: llama.router,
|
||||
model: llama.model,
|
||||
},
|
||||
}
|
||||
: {
|
||||
router: router.fromEnv(),
|
||||
model: process.env.KBOT_IPC_MODEL ?? null,
|
||||
}),
|
||||
labelCount: labels.length,
|
||||
/** Provider metadata from API (usage, model, id, OpenRouter fields) — see LLMClient + kbot `llm` key */
|
||||
llm: p?.llm ?? null,
|
||||
distances,
|
||||
byDistance,
|
||||
rawText,
|
||||
parseError: parseError ?? null,
|
||||
...(nRuns > 1 && stressSummary
|
||||
? {
|
||||
stress: {
|
||||
requestedRuns: nRuns,
|
||||
iterations: stressIterations,
|
||||
summary: stressSummary,
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
|
||||
let jsonPath = '';
|
||||
let mdPath = '';
|
||||
let arrayPath = '';
|
||||
if (!classifierFeatures.has('no-report')) {
|
||||
try {
|
||||
const written = await writeTestReports('test-ipc-classifier', reportData, { cwd, now: reportNow });
|
||||
jsonPath = written.jsonPath;
|
||||
mdPath = written.mdPath;
|
||||
} catch (e) {
|
||||
console.error(' ⚠️ Failed to write report:', e?.message ?? e);
|
||||
}
|
||||
|
||||
/** Array-only artifact (same timestamp as main report). */
|
||||
arrayPath = reportFilePathWithExt('test-ipc-classifier-distances', '.json', { cwd, now: reportNow });
|
||||
await mkdir(path.dirname(arrayPath), { recursive: true });
|
||||
await writeFile(arrayPath, `${JSON.stringify(distances, null, 2)}\n`, 'utf8');
|
||||
}
|
||||
|
||||
const { label: timeLabel } = timeParts(reportNow);
|
||||
if (!classifierFeatures.has('quiet')) {
|
||||
console.log(`\n────────────────────────────────`);
|
||||
console.log(` Passed: ${stats.passed} Failed: ${stats.failed}`);
|
||||
if (jsonPath) console.log(` Report JSON: ${jsonPath}`);
|
||||
if (mdPath) console.log(` Report MD: ${mdPath}`);
|
||||
if (arrayPath) console.log(` Distances JSON: ${arrayPath}`);
|
||||
console.log(` Run id: test-ipc-classifier::${timeLabel}`);
|
||||
console.log(` distances.length: ${distances.length}`);
|
||||
console.log(`────────────────────────────────\n`);
|
||||
} else {
|
||||
console.log(
|
||||
`done: passed=${stats.passed} failed=${stats.failed} ok=${stats.failed === 0}${jsonPath ? ` json=${jsonPath}` : ''}`
|
||||
);
|
||||
}
|
||||
|
||||
process.exit(stats.failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
parseClassifierArgv();
|
||||
run().catch(async (err) => {
|
||||
console.error('Classifier error:', err);
|
||||
if (!classifierFeatures.has('no-report')) {
|
||||
try {
|
||||
const finishedAt = new Date().toISOString();
|
||||
const c = classifierMetricsCollector ?? createMetricsCollector();
|
||||
const started = classifierRunStartedAt ?? finishedAt;
|
||||
await writeTestReports(
|
||||
'test-ipc-classifier',
|
||||
{
|
||||
startedAt: started,
|
||||
finishedAt,
|
||||
error: String(err?.stack ?? err),
|
||||
passed: stats.passed,
|
||||
failed: stats.failed,
|
||||
ok: false,
|
||||
ipcClassifierLlama: ipcClassifierLlamaEnabled(),
|
||||
metrics: buildMetricsBundle(c, started, finishedAt),
|
||||
},
|
||||
{ cwd: process.cwd() }
|
||||
);
|
||||
} catch (_) {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
process.exit(1);
|
||||
});
|
||||
283
packages/media/cpp/orchestrator/test-ipc.mjs
Normal file
283
packages/media/cpp/orchestrator/test-ipc.mjs
Normal file
@ -0,0 +1,283 @@
|
||||
/**
|
||||
* orchestrator/test-ipc.mjs
|
||||
*
|
||||
* Integration test: spawn the C++ worker in UDS mode, exchange messages, verify responses.
|
||||
*
|
||||
* Run: npm run test:ipc
|
||||
*
|
||||
* Env:
|
||||
* KBOT_IPC_LLM — real LLM step is on by default; set to 0 / false / no / off to skip (CI / offline).
|
||||
* KBOT_ROUTER — router (default: openrouter; same defaults as C++ LLMClient / CLI)
|
||||
* KBOT_IPC_MODEL — optional model id (e.g. openrouter slug); else C++ default for that router
|
||||
* KBOT_IPC_PROMPT — custom prompt (default: capital of Germany; asserts "berlin" in reply)
|
||||
* KBOT_IPC_LLM_LOG_MAX — max chars to print for LLM text (default: unlimited)
|
||||
* KBOT_IPC_LLAMA — llama :8888 step on by default; set 0/false/no/off to skip
|
||||
* KBOT_IPC_LLAMA_AUTOSTART — if 0, do not spawn scripts/run-7b.sh when :8888 is closed
|
||||
* KBOT_LLAMA_* — KBOT_LLAMA_PORT, KBOT_LLAMA_BASE_URL, KBOT_LLAMA_MODEL, KBOT_LLAMA_START_TIMEOUT_MS
|
||||
*
|
||||
* Shared: presets.js, test-commons.js, reports.js
|
||||
* Report: cwd/tests/test-ipc__HH-mm.{json,md} (see reports.js)
|
||||
*/
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import net from 'node:net';
|
||||
import { existsSync, unlinkSync } from 'node:fs';
|
||||
|
||||
import {
|
||||
distExePath,
|
||||
platform,
|
||||
uds,
|
||||
timeouts,
|
||||
kbotAiPayloadFromEnv,
|
||||
kbotAiPayloadLlamaLocal,
|
||||
usingDefaultGermanyPrompt,
|
||||
ensureLlamaLocalServer,
|
||||
} from './presets.js';
|
||||
import {
|
||||
createAssert,
|
||||
payloadObj,
|
||||
logKbotAiResponse,
|
||||
ipcLlmEnabled,
|
||||
ipcLlamaEnabled,
|
||||
llamaAutostartEnabled,
|
||||
createIpcClient,
|
||||
pipeWorkerStderr,
|
||||
} from './test-commons.js';
|
||||
import {
|
||||
createMetricsCollector,
|
||||
buildMetricsBundle,
|
||||
writeTestReports,
|
||||
} from './reports.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const EXE = distExePath(__dirname);
|
||||
const stats = createAssert();
|
||||
const { assert } = stats;
|
||||
|
||||
/** Set at run start for error reports */
|
||||
let ipcRunStartedAt = null;
|
||||
let ipcMetricsCollector = null;
|
||||
/** `llm` object from kbot-ai job_result (usage, model, OpenRouter extras) — filled in steps 6–7 */
|
||||
let ipcKbotAiLlmRouter = null;
|
||||
let ipcKbotAiLlmLlama = null;
|
||||
|
||||
async function run() {
|
||||
ipcMetricsCollector = createMetricsCollector();
|
||||
ipcRunStartedAt = new Date().toISOString();
|
||||
ipcKbotAiLlmRouter = null;
|
||||
ipcKbotAiLlmLlama = null;
|
||||
console.log('\n🔧 IPC [UDS] Integration Tests\n');
|
||||
|
||||
if (!existsSync(EXE)) {
|
||||
console.error(`❌ Binary not found at ${EXE}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const CPP_UDS_ARG = uds.workerArg();
|
||||
if (!platform.isWin && existsSync(CPP_UDS_ARG)) {
|
||||
unlinkSync(CPP_UDS_ARG);
|
||||
}
|
||||
|
||||
// ── 1. Spawn & ready ────────────────────────────────────────────────────
|
||||
console.log('1. Spawn worker (UDS mode) and wait for ready signal');
|
||||
const workerProc = spawn(EXE, ['worker', '--uds', CPP_UDS_ARG], { stdio: 'pipe' });
|
||||
pipeWorkerStderr(workerProc);
|
||||
|
||||
let socket;
|
||||
for (let i = 0; i < timeouts.connectAttempts; i++) {
|
||||
try {
|
||||
await new Promise((res, rej) => {
|
||||
socket = net.connect(uds.connectOpts(CPP_UDS_ARG));
|
||||
socket.once('connect', res);
|
||||
socket.once('error', rej);
|
||||
});
|
||||
break;
|
||||
} catch (e) {
|
||||
if (i === timeouts.connectAttempts - 1) throw e;
|
||||
await new Promise((r) => setTimeout(r, timeouts.connectRetryMs));
|
||||
}
|
||||
}
|
||||
assert(true, 'Socket connected successfully');
|
||||
|
||||
const ipc = createIpcClient(socket);
|
||||
ipc.attach();
|
||||
|
||||
const readyMsg = await ipc.readyPromise;
|
||||
assert(readyMsg.type === 'ready', 'Worker sends ready message on startup');
|
||||
|
||||
// ── 2. Ping / Pong ─────────────────────────────────────────────────────
|
||||
console.log('2. Ping → Pong');
|
||||
const pong = await ipc.request({ type: 'ping' }, timeouts.ipcDefault);
|
||||
assert(pong.type === 'pong', `Response type is "pong" (got "${pong.type}")`);
|
||||
|
||||
// ── 3. Job echo ─────────────────────────────────────────────────────────
|
||||
console.log('3. Job → Job Result (echo payload)');
|
||||
const payload = { action: 'resize', width: 1024, format: 'webp' };
|
||||
const jobResult = await ipc.request({ type: 'job', payload }, timeouts.ipcDefault);
|
||||
assert(jobResult.type === 'job_result', `Response type is "job_result" (got "${jobResult.type}")`);
|
||||
assert(
|
||||
jobResult.payload?.action === 'resize' && jobResult.payload?.width === 1024,
|
||||
'Payload echoed back correctly'
|
||||
);
|
||||
|
||||
// ── 4. Unknown type → error ─────────────────────────────────────────────
|
||||
console.log('4. Unknown type → error response');
|
||||
const errResp = await ipc.request({ type: 'nonsense' }, timeouts.ipcDefault);
|
||||
assert(errResp.type === 'error', `Response type is "error" (got "${errResp.type}")`);
|
||||
|
||||
// ── 5. Multiple rapid requests ──────────────────────────────────────────
|
||||
console.log('5. Multiple concurrent requests');
|
||||
const promises = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
promises.push(ipc.request({ type: 'ping', payload: { seq: i } }, timeouts.ipcDefault));
|
||||
}
|
||||
const results = await Promise.all(promises);
|
||||
assert(results.length === 10, `All 10 responses received`);
|
||||
assert(results.every((r) => r.type === 'pong'), 'All responses are pong');
|
||||
|
||||
// ── 6. kbot-ai — real LLM (optional via ipcLlmEnabled) ─────────────────
|
||||
if (ipcLlmEnabled()) {
|
||||
const aiPayload = kbotAiPayloadFromEnv();
|
||||
const r = aiPayload.router;
|
||||
console.log(`6. kbot-ai → real LLM (router=${r}, timeout 3m)`);
|
||||
const live = await ipc.request(
|
||||
{
|
||||
type: 'kbot-ai',
|
||||
payload: aiPayload,
|
||||
},
|
||||
timeouts.kbotAi
|
||||
);
|
||||
assert(live.type === 'job_result', `LLM response type job_result (got "${live.type}")`);
|
||||
const lp = payloadObj(live);
|
||||
assert(lp?.status === 'success', `payload status success (got "${lp?.status}")`);
|
||||
assert(
|
||||
typeof lp?.text === 'string' && lp.text.trim().length >= 3,
|
||||
`assistant text present (length ${(lp?.text || '').length})`
|
||||
);
|
||||
if (usingDefaultGermanyPrompt()) {
|
||||
assert(
|
||||
/berlin/i.test(String(lp?.text || '')),
|
||||
'assistant text mentions Berlin (capital of Germany)'
|
||||
);
|
||||
}
|
||||
ipcKbotAiLlmRouter = lp?.llm ?? null;
|
||||
logKbotAiResponse('kbot-ai response', live);
|
||||
} else {
|
||||
console.log('6. kbot-ai — skipped (KBOT_IPC_LLM=0/false/no/off; default is to run live LLM)');
|
||||
}
|
||||
|
||||
// ── 7. kbot-ai — llama local :8888 (optional; llama-basics parity) ───────
|
||||
if (ipcLlamaEnabled()) {
|
||||
console.log('7. kbot-ai → llama local runner (OpenAI :8888, presets.llama)');
|
||||
let llamaReady = false;
|
||||
try {
|
||||
await ensureLlamaLocalServer({
|
||||
autostart: llamaAutostartEnabled(),
|
||||
startTimeoutMs: timeouts.llamaServerStart,
|
||||
});
|
||||
llamaReady = true;
|
||||
} catch (e) {
|
||||
console.error(` ❌ ${e?.message ?? e}`);
|
||||
}
|
||||
assert(llamaReady, 'llama-server listening on :8888 (or autostart run-7b.sh succeeded)');
|
||||
|
||||
if (llamaReady) {
|
||||
const llamaPayload = kbotAiPayloadLlamaLocal();
|
||||
const llamaRes = await ipc.request(
|
||||
{ type: 'kbot-ai', payload: llamaPayload },
|
||||
timeouts.llamaKbotAi
|
||||
);
|
||||
assert(llamaRes.type === 'job_result', `llama IPC response type job_result (got "${llamaRes.type}")`);
|
||||
const llp = payloadObj(llamaRes);
|
||||
assert(llp?.status === 'success', `llama payload status success (got "${llp?.status}")`);
|
||||
assert(
|
||||
typeof llp?.text === 'string' && llp.text.trim().length >= 1,
|
||||
`llama assistant text present (length ${(llp?.text || '').length})`
|
||||
);
|
||||
assert(/\b8\b/.test(String(llp?.text || '')), 'llama arithmetic: reply mentions 8 (5+3)');
|
||||
ipcKbotAiLlmLlama = llp?.llm ?? null;
|
||||
logKbotAiResponse('kbot-ai llama local', llamaRes);
|
||||
}
|
||||
} else {
|
||||
console.log('7. kbot-ai llama local — skipped (KBOT_IPC_LLAMA=0; default is to run)');
|
||||
}
|
||||
|
||||
// ── 8. Graceful shutdown ────────────────────────────────────────────────
|
||||
console.log('8. Graceful shutdown');
|
||||
const shutdownRes = await ipc.request({ type: 'shutdown' }, timeouts.ipcDefault);
|
||||
assert(shutdownRes.type === 'shutdown_ack', `Shutdown acknowledged (got "${shutdownRes.type}")`);
|
||||
|
||||
await new Promise((r) => setTimeout(r, timeouts.postShutdownMs));
|
||||
socket.destroy();
|
||||
assert(workerProc.exitCode === 0, `Worker exited with code 0 (got ${workerProc.exitCode})`);
|
||||
|
||||
// ── Summary ─────────────────────────────────────────────────────────────
|
||||
console.log(`\n────────────────────────────────`);
|
||||
console.log(` Passed: ${stats.passed} Failed: ${stats.failed}`);
|
||||
console.log(`────────────────────────────────\n`);
|
||||
|
||||
try {
|
||||
const finishedAt = new Date().toISOString();
|
||||
const { jsonPath, mdPath } = await writeTestReports(
|
||||
'test-ipc',
|
||||
{
|
||||
startedAt: ipcRunStartedAt,
|
||||
finishedAt,
|
||||
passed: stats.passed,
|
||||
failed: stats.failed,
|
||||
ok: stats.failed === 0,
|
||||
ipcLlm: ipcLlmEnabled(),
|
||||
ipcLlama: ipcLlamaEnabled(),
|
||||
env: {
|
||||
KBOT_IPC_LLM: process.env.KBOT_IPC_LLM ?? null,
|
||||
KBOT_IPC_LLAMA: process.env.KBOT_IPC_LLAMA ?? null,
|
||||
KBOT_IPC_LLAMA_AUTOSTART: process.env.KBOT_IPC_LLAMA_AUTOSTART ?? null,
|
||||
KBOT_ROUTER: process.env.KBOT_ROUTER ?? null,
|
||||
KBOT_IPC_MODEL: process.env.KBOT_IPC_MODEL ?? null,
|
||||
KBOT_IPC_PROMPT: process.env.KBOT_IPC_PROMPT ?? null,
|
||||
KBOT_LLAMA_PORT: process.env.KBOT_LLAMA_PORT ?? null,
|
||||
KBOT_LLAMA_BASE_URL: process.env.KBOT_LLAMA_BASE_URL ?? null,
|
||||
},
|
||||
metrics: buildMetricsBundle(ipcMetricsCollector, ipcRunStartedAt, finishedAt),
|
||||
kbotAi: {
|
||||
routerStep: ipcKbotAiLlmRouter,
|
||||
llamaStep: ipcKbotAiLlmLlama,
|
||||
},
|
||||
},
|
||||
{ cwd: process.cwd() }
|
||||
);
|
||||
console.log(` 📄 Report JSON: ${jsonPath}`);
|
||||
console.log(` 📄 Report MD: ${mdPath}\n`);
|
||||
} catch (e) {
|
||||
console.error(' ⚠️ Failed to write report:', e?.message ?? e);
|
||||
}
|
||||
|
||||
process.exit(stats.failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
run().catch(async (err) => {
|
||||
console.error('Test runner error:', err);
|
||||
try {
|
||||
const finishedAt = new Date().toISOString();
|
||||
const c = ipcMetricsCollector ?? createMetricsCollector();
|
||||
const started = ipcRunStartedAt ?? finishedAt;
|
||||
await writeTestReports(
|
||||
'test-ipc',
|
||||
{
|
||||
startedAt: started,
|
||||
finishedAt,
|
||||
error: String(err?.stack ?? err),
|
||||
passed: stats.passed,
|
||||
failed: stats.failed,
|
||||
ok: false,
|
||||
metrics: buildMetricsBundle(c, started, finishedAt),
|
||||
},
|
||||
{ cwd: process.cwd() }
|
||||
);
|
||||
} catch (_) {
|
||||
/* ignore */
|
||||
}
|
||||
process.exit(1);
|
||||
});
|
||||
193
packages/media/cpp/package-lock.json
generated
Normal file
193
packages/media/cpp/package-lock.json
generated
Normal file
@ -0,0 +1,193 @@
|
||||
{
|
||||
"name": "kbot-cpp",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "kbot-cpp",
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"yargs": "^17.7.2"
|
||||
}
|
||||
},
|
||||
"node_modules/ansi-regex": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
||||
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/ansi-styles": {
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
|
||||
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"color-convert": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/cliui": {
|
||||
"version": "8.0.1",
|
||||
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
|
||||
"integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"string-width": "^4.2.0",
|
||||
"strip-ansi": "^6.0.1",
|
||||
"wrap-ansi": "^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/color-convert": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
|
||||
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"color-name": "~1.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/color-name": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
|
||||
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/emoji-regex": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/escalade": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
||||
"integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/get-caller-file": {
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
|
||||
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": "6.* || 8.* || >= 10.*"
|
||||
}
|
||||
},
|
||||
"node_modules/is-fullwidth-code-point": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
||||
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/require-directory": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
|
||||
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/string-width": {
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"emoji-regex": "^8.0.0",
|
||||
"is-fullwidth-code-point": "^3.0.0",
|
||||
"strip-ansi": "^6.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/strip-ansi": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ansi-regex": "^5.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/wrap-ansi": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
||||
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^4.0.0",
|
||||
"string-width": "^4.1.0",
|
||||
"strip-ansi": "^6.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/y18n": {
|
||||
"version": "5.0.8",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
|
||||
"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs": {
|
||||
"version": "17.7.2",
|
||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
|
||||
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cliui": "^8.0.1",
|
||||
"escalade": "^3.1.1",
|
||||
"get-caller-file": "^2.0.5",
|
||||
"require-directory": "^2.1.1",
|
||||
"string-width": "^4.2.3",
|
||||
"y18n": "^5.0.5",
|
||||
"yargs-parser": "^21.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs-parser": {
|
||||
"version": "21.1.1",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
|
||||
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
41
packages/media/cpp/package.json
Normal file
41
packages/media/cpp/package.json
Normal file
@ -0,0 +1,41 @@
|
||||
{
|
||||
"name": "kbot-cpp",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"description": "KBot C++ CLI built with CMake.",
|
||||
"directories": {
|
||||
"test": "tests"
|
||||
},
|
||||
"dependencies": {
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"scripts": {
|
||||
"config": "cmake --preset dev",
|
||||
"config:release": "cmake --preset release",
|
||||
"build": "cmake --preset dev && cmake --build --preset dev",
|
||||
"build:release": "cmake --preset release && cmake --build --preset release",
|
||||
"build:linux": "bash build-linux.sh",
|
||||
"test": "ctest --test-dir build/dev -C Debug --output-on-failure",
|
||||
"test:release": "ctest --test-dir build/release -C Release --output-on-failure",
|
||||
"clean": "cmake -E rm -rf build dist",
|
||||
"rebuild": "npm run clean && npm run build",
|
||||
"run": ".\\dist\\kbot.exe --help",
|
||||
"worker": ".\\dist\\kbot.exe worker",
|
||||
"worker:uds": ".\\dist\\kbot.exe worker --uds \\\\.\\pipe\\kbot-worker",
|
||||
"kbot:ai": ".\\dist\\kbot.exe kbot ai --prompt \"hi\"",
|
||||
"kbot:run": ".\\dist\\kbot.exe kbot run --list",
|
||||
"test:ipc": "node orchestrator/test-ipc.mjs",
|
||||
"test:ipc:classifier": "node orchestrator/test-ipc-classifier.mjs",
|
||||
"test:files": "node orchestrator/test-files.mjs",
|
||||
"test:ipc:classifier:openrouter": "node orchestrator/classifier-openrouter.mjs",
|
||||
"test:ipc:classifier:openrouter:stress": "node orchestrator/classifier-openrouter-stress.mjs",
|
||||
"test:html": "cmake --preset release && cmake --build --preset release --target test_html && .\\dist\\test_html.exe"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://git.polymech.info/polymech/mono-cpp.git"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC"
|
||||
}
|
||||
33
packages/media/cpp/packages/html/CMakeLists.txt
Normal file
33
packages/media/cpp/packages/html/CMakeLists.txt
Normal file
@ -0,0 +1,33 @@
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
lexbor
|
||||
GIT_REPOSITORY https://github.com/lexbor/lexbor.git
|
||||
GIT_TAG v2.4.0
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
# Build lexbor as static
|
||||
set(LEXBOR_BUILD_SHARED OFF CACHE BOOL "" FORCE)
|
||||
set(LEXBOR_BUILD_STATIC ON CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(lexbor)
|
||||
|
||||
add_library(html STATIC
|
||||
src/html.cpp
|
||||
src/html2md.cpp
|
||||
src/table.cpp
|
||||
)
|
||||
|
||||
# MSVC: treat source and execution charset as UTF-8
|
||||
# (fixes \u200b zero-width-space mismatch in html2md tests)
|
||||
if(MSVC)
|
||||
target_compile_options(html PRIVATE /utf-8)
|
||||
endif()
|
||||
|
||||
target_include_directories(html
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
target_link_libraries(html
|
||||
PUBLIC lexbor_static
|
||||
)
|
||||
55
packages/media/cpp/packages/html/include/html/html.h
Normal file
55
packages/media/cpp/packages/html/include/html/html.h
Normal file
@ -0,0 +1,55 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace html {
|
||||
|
||||
/// Parsed element — tag name + text content.
|
||||
struct Element {
|
||||
std::string tag;
|
||||
std::string text;
|
||||
};
|
||||
|
||||
/// Link with href and optional attributes.
|
||||
struct Link {
|
||||
std::string href;
|
||||
std::string rel; // e.g. "canonical", "stylesheet"
|
||||
std::string text; // anchor text (for <a> tags)
|
||||
};
|
||||
|
||||
/// Parse an HTML string and return all elements with their text content.
|
||||
std::vector<Element> parse(const std::string &html_str);
|
||||
|
||||
/// Extract the text content of all elements matching a CSS selector.
|
||||
std::vector<std::string> select(const std::string &html_str,
|
||||
const std::string &selector);
|
||||
|
||||
// ── Enricher extraction helpers ─────────────────────────────────────────────
|
||||
|
||||
/// Extract the <title> text.
|
||||
std::string get_title(const std::string &html_str);
|
||||
|
||||
/// Extract a <meta name="X"> or <meta property="X"> content attribute.
|
||||
std::string get_meta(const std::string &html_str, const std::string &name);
|
||||
|
||||
/// Extract <link rel="canonical"> href.
|
||||
std::string get_canonical(const std::string &html_str);
|
||||
|
||||
/// Extract all <a href="..."> values (resolved links as-is from the HTML).
|
||||
std::vector<Link> get_links(const std::string &html_str);
|
||||
|
||||
/// Extract visible body text, stripping script/style/noscript/svg/iframe.
|
||||
std::string get_body_text(const std::string &html_str);
|
||||
|
||||
/// Extract raw JSON strings from <script type="application/ld+json">.
|
||||
std::vector<std::string> get_json_ld(const std::string &html_str);
|
||||
|
||||
/// Extract an attribute value from the first element matching a CSS selector.
|
||||
std::string get_attr(const std::string &html_str, const std::string &selector,
|
||||
const std::string &attr_name);
|
||||
|
||||
/// Convert HTML content to Markdown.
|
||||
std::string to_markdown(const std::string &html_str);
|
||||
|
||||
} // namespace html
|
||||
690
packages/media/cpp/packages/html/include/html/html2md.h
Normal file
690
packages/media/cpp/packages/html/include/html/html2md.h
Normal file
@ -0,0 +1,690 @@
|
||||
// Copyright (c) Tim Gromeyer
|
||||
// Licensed under the MIT License - https://opensource.org/licenses/MIT
|
||||
|
||||
#ifndef HTML2MD_H
|
||||
#define HTML2MD_H
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <cstdint>
|
||||
|
||||
/*!
|
||||
* \brief html2md namespace
|
||||
*
|
||||
* The html2md namespace provides:
|
||||
* 1. The Converter class
|
||||
* 2. Static wrapper around Converter class
|
||||
*
|
||||
* \note Do NOT try to convert HTML that contains a list in an ordered list or a
|
||||
* `blockquote` in a list!\n This will be a **total** mess!
|
||||
*/
|
||||
namespace html2md {
|
||||
|
||||
/*!
|
||||
* \brief Options for the conversion from HTML to Markdown
|
||||
* \warning Make sure to pass valid options; otherwise, the output will be
|
||||
* invalid!
|
||||
*
|
||||
* Example from `tests/main.cpp`:
|
||||
*
|
||||
* ```cpp
|
||||
* auto *options = new html2md::Options();
|
||||
* options->splitLines = false;
|
||||
*
|
||||
* html2md::Converter c(html, options);
|
||||
* auto md = c.convert();
|
||||
* ```
|
||||
*/
|
||||
struct Options {
|
||||
/*!
|
||||
* \brief Add new line when a certain number of characters is reached
|
||||
*
|
||||
* \see softBreak
|
||||
* \see hardBreak
|
||||
*/
|
||||
bool splitLines = true;
|
||||
|
||||
/*!
|
||||
* \brief softBreak Wrap after ... characters when the next space is reached
|
||||
* and as long as it's not in a list, table, image or anchor (link).
|
||||
*/
|
||||
int softBreak = 80;
|
||||
|
||||
/*!
|
||||
* \brief hardBreak Force a break after ... characters in a line
|
||||
*/
|
||||
int hardBreak = 100;
|
||||
|
||||
/*!
|
||||
* \brief The char used for unordered lists
|
||||
*
|
||||
* Valid:
|
||||
* - `-`
|
||||
* - `+`
|
||||
* - `*`
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```markdown
|
||||
* - List
|
||||
* + Also a list
|
||||
* * And this to
|
||||
* ```
|
||||
*/
|
||||
char unorderedList = '-';
|
||||
|
||||
/*!
|
||||
* \brief The char used after the number of the item
|
||||
*
|
||||
* Valid:
|
||||
* - `.`
|
||||
* - `)`
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```markdown
|
||||
* 1. Hello
|
||||
* 2) World!
|
||||
* ```
|
||||
*/
|
||||
char orderedList = '.';
|
||||
|
||||
/*!
|
||||
* \brief Whether title is added as h1 heading at the very beginning of the
|
||||
* markdown
|
||||
*
|
||||
* Whether title is added as h1 heading at the very beginning of the markdown.
|
||||
* Default is true.
|
||||
*/
|
||||
bool includeTitle = true;
|
||||
|
||||
/*!
|
||||
* \brief Whetever to format Markdown Tables
|
||||
*
|
||||
* Whetever to format Markdown Tables.
|
||||
* Default is true.
|
||||
*/
|
||||
bool formatTable = true;
|
||||
|
||||
/*!
|
||||
* \brief Whether to force left trim of lines in the final Markdown output
|
||||
*
|
||||
* Whether to force left trim of lines in the final Markdown output.
|
||||
* Default is false.
|
||||
*/
|
||||
bool forceLeftTrim = false;
|
||||
|
||||
/*!
|
||||
* \brief Whether to compress whitespace (tabs, multiple spaces) into a single
|
||||
* space
|
||||
*
|
||||
* Whether to compress whitespace (tabs, multiple spaces) into a single space.
|
||||
* Default is false.
|
||||
*/
|
||||
bool compressWhitespace = false;
|
||||
|
||||
/*!
|
||||
* \brief Whether to escape numbered lists (e.g. "4." -> "4\.") to prevent them
|
||||
* from being interpreted as lists in Markdown.
|
||||
*
|
||||
* Whether to escape numbered lists.
|
||||
* Default is true.
|
||||
*/
|
||||
bool escapeNumberedList = true;
|
||||
|
||||
/*!
|
||||
* \brief Whether to keep HTML entities (e.g. ` `) in the output
|
||||
*
|
||||
* If true, the converter will not replace HTML entities configured in the
|
||||
* internal conversion map. Default is false (current behaviour).
|
||||
*/
|
||||
bool keepHtmlEntities = false;
|
||||
|
||||
inline bool operator==(html2md::Options o) const {
|
||||
return splitLines == o.splitLines && unorderedList == o.unorderedList &&
|
||||
orderedList == o.orderedList && includeTitle == o.includeTitle &&
|
||||
softBreak == o.softBreak && hardBreak == o.hardBreak &&
|
||||
formatTable == o.formatTable && forceLeftTrim == o.forceLeftTrim &&
|
||||
compressWhitespace == o.compressWhitespace &&
|
||||
escapeNumberedList == o.escapeNumberedList &&
|
||||
keepHtmlEntities == o.keepHtmlEntities;
|
||||
};
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief Class for converting HTML to Markdown
|
||||
*
|
||||
* This class converts HTML to Markdown.
|
||||
* There is also a static wrapper for this class (see html2md::Convert).
|
||||
*
|
||||
* ## Usage example
|
||||
*
|
||||
* Option 1: Use the class:
|
||||
*
|
||||
* ```cpp
|
||||
* std::string html = "<h1>example</h1>";
|
||||
* html2md::Converter c(html);
|
||||
* auto md = c.convert();
|
||||
*
|
||||
* if (!c.ok()) std::cout << "There was something wrong in the HTML\n";
|
||||
* std::cout << md; // # example
|
||||
* ```
|
||||
*
|
||||
* Option 2: Use the static wrapper:
|
||||
*
|
||||
* ```cpp
|
||||
* std::string html = "<h1>example</h1>";
|
||||
*
|
||||
* auto md = html2md::Convert(html);
|
||||
* std::cout << md;
|
||||
* ```
|
||||
*
|
||||
* Advanced: use Options:
|
||||
*
|
||||
* ```cpp
|
||||
* std::string html = "<h1>example</h1>";
|
||||
*
|
||||
* auto *options = new html2md::Options();
|
||||
* options->splitLines = false;
|
||||
* options->unorderedList = '*';
|
||||
*
|
||||
* html2md::Converter c(html, options);
|
||||
* auto md = c.convert();
|
||||
* if (!c.ok()) std::cout << "There was something wrong in the HTML\n";
|
||||
* std::cout << md; // # example
|
||||
* ```
|
||||
*/
|
||||
class Converter {
|
||||
public:
|
||||
/*!
|
||||
* \brief Standard initializer, takes HTML as parameter. Also prepares
|
||||
* everything. \param html The HTML as std::string. \param options Options for
|
||||
* the Conversation. See html2md::Options() for more.
|
||||
*
|
||||
* \note Don't pass anything else than HTML, otherwise the output will be a
|
||||
* **mess**!
|
||||
*
|
||||
* This is the default initializer.<br>
|
||||
* You can use appendToMd() to append something to the beginning of the
|
||||
* generated output.
|
||||
*/
|
||||
explicit inline Converter(const std::string &html,
|
||||
struct Options *options = nullptr) {
|
||||
*this = Converter(&html, options);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Convert HTML into Markdown.
|
||||
* \return Returns the converted Markdown.
|
||||
*
|
||||
* This function actually converts the HTML into Markdown.
|
||||
* It also cleans up the Markdown so you don't have to do anything.
|
||||
*/
|
||||
[[nodiscard]] std::string convert();
|
||||
|
||||
/*!
|
||||
* \brief Append a char to the Markdown.
|
||||
* \param ch The char to append.
|
||||
* \return Returns a copy of the instance with the char appended.
|
||||
*/
|
||||
Converter *appendToMd(char ch);
|
||||
|
||||
/*!
|
||||
* \brief Append a char* to the Markdown.
|
||||
* \param str The char* to append.
|
||||
* \return Returns a copy of the instance with the char* appended.
|
||||
*/
|
||||
Converter *appendToMd(const char *str);
|
||||
|
||||
/*!
|
||||
* \brief Append a string to the Markdown.
|
||||
* \param s The string to append.
|
||||
* \return Returns a copy of the instance with the string appended.
|
||||
*/
|
||||
inline Converter *appendToMd(const std::string &s) {
|
||||
return appendToMd(s.c_str());
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Appends a ' ' in certain cases.
|
||||
* \return Copy of the instance with(maybe) the appended space.
|
||||
*
|
||||
* This function appends ' ' if:
|
||||
* - md does not end with `*`
|
||||
* - md does not end with `\n` aka newline
|
||||
*/
|
||||
Converter *appendBlank();
|
||||
|
||||
/*!
|
||||
* \brief Add an HTML symbol conversion
|
||||
* \param htmlSymbol The HTML symbol to convert
|
||||
* \param replacement The replacement string
|
||||
* \note This is useful for converting HTML entities to their Markdown
|
||||
* equivalents. For example, you can add a conversion for " " to
|
||||
* " " (space) or "<" to "<" (less than).
|
||||
* \note This is not a standard feature of the Converter class, but it can
|
||||
* be added to the class to allow for more flexibility in the conversion
|
||||
* process. You can use this feature to add custom conversions for any HTML
|
||||
* symbol that you want to convert to a specific Markdown representation.
|
||||
*/
|
||||
void addHtmlSymbolConversion(const std::string &htmlSymbol,
|
||||
const std::string &replacement) {
|
||||
htmlSymbolConversions_[htmlSymbol] = replacement;
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Remove an HTML symbol conversion
|
||||
* \param htmlSymbol The HTML symbol to remove
|
||||
* \note This is useful for removing custom conversions that you have added
|
||||
* previously.
|
||||
*/
|
||||
void removeHtmlSymbolConversion(const std::string &htmlSymbol) {
|
||||
htmlSymbolConversions_.erase(htmlSymbol);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Clear all HTML symbol conversions
|
||||
* \note This is useful for clearing the conversion map (it's empty afterwards).
|
||||
*/
|
||||
void clearHtmlSymbolConversions() { htmlSymbolConversions_.clear(); }
|
||||
|
||||
/*!
|
||||
* \brief Checks if everything was closed properly(in the HTML).
|
||||
* \return Returns false if there is a unclosed tag.
|
||||
* \note As long as you have not called convert(), it always returns true.
|
||||
*/
|
||||
[[nodiscard]] bool ok() const;
|
||||
|
||||
/*!
|
||||
* \brief Reset the generated Markdown
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/*!
|
||||
* \brief Checks if the HTML matches and the options are the same.
|
||||
* \param The Converter object to compare with
|
||||
* \return true if the HTML and options matches otherwise false
|
||||
*/
|
||||
inline bool operator==(const Converter *c) const { return *this == *c; }
|
||||
|
||||
inline bool operator==(const Converter &c) const {
|
||||
return html_ == c.html_ && option == c.option;
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns ok().
|
||||
*/
|
||||
inline explicit operator bool() const { return ok(); };
|
||||
|
||||
private:
|
||||
// Attributes
|
||||
static constexpr const char *kAttributeHref = "href";
|
||||
static constexpr const char *kAttributeAlt = "alt";
|
||||
static constexpr const char *kAttributeTitle = "title";
|
||||
static constexpr const char *kAttributeClass = "class";
|
||||
static constexpr const char *kAttributeSrc = "src";
|
||||
static constexpr const char *kAttrinuteAlign = "align";
|
||||
|
||||
static constexpr const char *kTagAnchor = "a";
|
||||
static constexpr const char *kTagBreak = "br";
|
||||
static constexpr const char *kTagCode = "code";
|
||||
static constexpr const char *kTagDiv = "div";
|
||||
static constexpr const char *kTagHead = "head";
|
||||
static constexpr const char *kTagLink = "link";
|
||||
static constexpr const char *kTagListItem = "li";
|
||||
static constexpr const char *kTagMeta = "meta";
|
||||
static constexpr const char *kTagNav = "nav";
|
||||
static constexpr const char *kTagNoScript = "noscript";
|
||||
static constexpr const char *kTagOption = "option";
|
||||
static constexpr const char *kTagOrderedList = "ol";
|
||||
static constexpr const char *kTagParagraph = "p";
|
||||
static constexpr const char *kTagPre = "pre";
|
||||
static constexpr const char *kTagScript = "script";
|
||||
static constexpr const char *kTagSpan = "span";
|
||||
static constexpr const char *kTagStyle = "style";
|
||||
static constexpr const char *kTagTemplate = "template";
|
||||
static constexpr const char *kTagTitle = "title";
|
||||
static constexpr const char *kTagUnorderedList = "ul";
|
||||
static constexpr const char *kTagImg = "img";
|
||||
static constexpr const char *kTagSeperator = "hr";
|
||||
|
||||
// Text format
|
||||
static constexpr const char *kTagBold = "b";
|
||||
static constexpr const char *kTagStrong = "strong";
|
||||
static constexpr const char *kTagItalic = "em";
|
||||
static constexpr const char *kTagItalic2 = "i";
|
||||
static constexpr const char *kTagCitation = "cite";
|
||||
static constexpr const char *kTagDefinition = "dfn";
|
||||
static constexpr const char *kTagUnderline = "u";
|
||||
static constexpr const char *kTagStrighthrought = "del";
|
||||
static constexpr const char *kTagStrighthrought2 = "s";
|
||||
|
||||
static constexpr const char *kTagBlockquote = "blockquote";
|
||||
|
||||
// Header
|
||||
static constexpr const char *kTagHeader1 = "h1";
|
||||
static constexpr const char *kTagHeader2 = "h2";
|
||||
static constexpr const char *kTagHeader3 = "h3";
|
||||
static constexpr const char *kTagHeader4 = "h4";
|
||||
static constexpr const char *kTagHeader5 = "h5";
|
||||
static constexpr const char *kTagHeader6 = "h6";
|
||||
|
||||
// Table
|
||||
static constexpr const char *kTagTable = "table";
|
||||
static constexpr const char *kTagTableRow = "tr";
|
||||
static constexpr const char *kTagTableHeader = "th";
|
||||
static constexpr const char *kTagTableData = "td";
|
||||
|
||||
size_t index_ch_in_html_ = 0;
|
||||
|
||||
bool is_closing_tag_ = false;
|
||||
bool is_in_attribute_value_ = false;
|
||||
bool is_in_code_ = false;
|
||||
bool is_in_list_ = false;
|
||||
bool is_in_p_ = false;
|
||||
bool is_in_pre_ = false;
|
||||
bool is_in_table_ = false;
|
||||
bool is_in_table_row_ = false;
|
||||
bool is_in_tag_ = false;
|
||||
bool is_self_closing_tag_ = false;
|
||||
bool skipping_leading_whitespace_ = true;
|
||||
|
||||
// relevant for <li> only, false = is in unordered list
|
||||
bool is_in_ordered_list_ = false;
|
||||
uint8_t index_ol = 0;
|
||||
|
||||
// store the table start
|
||||
size_t table_start = 0;
|
||||
|
||||
// number of lists
|
||||
uint8_t index_li = 0;
|
||||
|
||||
uint8_t index_blockquote = 0;
|
||||
|
||||
char prev_ch_in_md_ = 0, prev_prev_ch_in_md_ = 0;
|
||||
char prev_ch_in_html_ = 'x';
|
||||
|
||||
std::string html_;
|
||||
|
||||
uint16_t offset_lt_ = 0;
|
||||
std::string current_tag_;
|
||||
std::string prev_tag_;
|
||||
|
||||
// Line which separates header from data
|
||||
std::string tableLine;
|
||||
|
||||
size_t chars_in_curr_line_ = 0;
|
||||
|
||||
std::string md_;
|
||||
|
||||
Options option;
|
||||
|
||||
std::unordered_map<std::string, std::string> htmlSymbolConversions_ = {
|
||||
{""", "\""}, {"<", "<"}, {">", ">"},
|
||||
{"&", "&"}, {" ", " "}, {"→", "→"}};
|
||||
|
||||
// Tag: base class for tag types
|
||||
struct Tag {
|
||||
virtual void OnHasLeftOpeningTag(Converter *c) = 0;
|
||||
virtual void OnHasLeftClosingTag(Converter *c) = 0;
|
||||
};
|
||||
|
||||
// Tag types
|
||||
|
||||
// tags that are not printed (nav, script, noscript, ...)
|
||||
struct TagIgnored : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override {};
|
||||
void OnHasLeftClosingTag(Converter *c) override {};
|
||||
};
|
||||
|
||||
struct TagAnchor : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
|
||||
std::string current_href_;
|
||||
std::string current_title_;
|
||||
};
|
||||
|
||||
struct TagBold : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagItalic : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagUnderline : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagStrikethrought : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagBreak : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagDiv : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagHeader1 : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagHeader2 : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagHeader3 : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagHeader4 : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagHeader5 : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagHeader6 : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagListItem : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagOption : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagOrderedList : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagParagraph : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagPre : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagCode : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagSpan : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagTitle : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagUnorderedList : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagImage : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagSeperator : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagTable : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagTableRow : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagTableHeader : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagTableData : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
struct TagBlockquote : Tag {
|
||||
void OnHasLeftOpeningTag(Converter *c) override;
|
||||
void OnHasLeftClosingTag(Converter *c) override;
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Tag>> tags_;
|
||||
|
||||
explicit Converter(const std::string *html, struct Options *options);
|
||||
|
||||
void CleanUpMarkdown();
|
||||
|
||||
// Trim from start (in place)
|
||||
static void LTrim(std::string *s);
|
||||
|
||||
// Trim from end (in place)
|
||||
Converter *RTrim(std::string *s, bool trim_only_blank = false);
|
||||
|
||||
// Trim from both ends (in place)
|
||||
Converter *Trim(std::string *s);
|
||||
|
||||
// 1. trim all lines
|
||||
// 2. reduce consecutive newlines to maximum 3
|
||||
void TidyAllLines(std::string *str);
|
||||
|
||||
std::string ExtractAttributeFromTagLeftOf(const std::string &attr);
|
||||
|
||||
void TurnLineIntoHeader1();
|
||||
|
||||
void TurnLineIntoHeader2();
|
||||
|
||||
// Current char: '<'
|
||||
void OnHasEnteredTag();
|
||||
|
||||
Converter *UpdatePrevChFromMd();
|
||||
|
||||
/**
|
||||
* Handle next char within <...> tag
|
||||
*
|
||||
* @param ch current character
|
||||
* @return continue surrounding iteration?
|
||||
*/
|
||||
bool ParseCharInTag(char ch);
|
||||
|
||||
// Current char: '>'
|
||||
bool OnHasLeftTag();
|
||||
|
||||
inline static bool TagContainsAttributesToHide(std::string *tag) {
|
||||
using std::string;
|
||||
|
||||
return (*tag).find(" aria=\"hidden\"") != string::npos ||
|
||||
(*tag).find("display:none") != string::npos ||
|
||||
(*tag).find("visibility:hidden") != string::npos ||
|
||||
(*tag).find("opacity:0") != string::npos ||
|
||||
(*tag).find("Details-content--hidden-not-important") != string::npos;
|
||||
}
|
||||
|
||||
Converter *ShortenMarkdown(size_t chars = 1);
|
||||
inline bool shortIfPrevCh(char prev) {
|
||||
if (prev_ch_in_md_ == prev) {
|
||||
ShortenMarkdown();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
/**
|
||||
* @param ch
|
||||
* @return continue iteration surrounding this method's invocation?
|
||||
*/
|
||||
bool ParseCharInTagContent(char ch);
|
||||
|
||||
// Replace previous space (if any) in current markdown line by newline
|
||||
bool ReplacePreviousSpaceInLineByNewline();
|
||||
|
||||
static inline bool IsIgnoredTag(const std::string &tag) {
|
||||
return (tag[0] == '-' || kTagTemplate == tag || kTagStyle == tag ||
|
||||
kTagScript == tag || kTagNoScript == tag || kTagNav == tag);
|
||||
|
||||
// meta: not ignored to tolerate if closing is omitted
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsInIgnoredTag() const;
|
||||
}; // Converter
|
||||
|
||||
/*!
|
||||
* \brief Static wrapper around the Converter class
|
||||
* \param html The HTML passed to Converter
|
||||
* \param ok Optional: Pass a reference to a local bool to store the output of
|
||||
* Converter::ok() \return Returns the by Converter generated Markdown
|
||||
*/
|
||||
inline std::string Convert(const std::string &html, bool *ok = nullptr) {
|
||||
Converter c(html);
|
||||
auto md = c.convert();
|
||||
if (ok != nullptr)
|
||||
*ok = c.ok();
|
||||
return md;
|
||||
}
|
||||
|
||||
#ifndef PYTHON_BINDINGS
|
||||
inline std::string Convert(const std::string &&html, bool *ok = nullptr) {
|
||||
return Convert(html, ok);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace html2md
|
||||
|
||||
#endif // HTML2MD_H
|
||||
11
packages/media/cpp/packages/html/include/html/table.h
Normal file
11
packages/media/cpp/packages/html/include/html/table.h
Normal file
@ -0,0 +1,11 @@
|
||||
// Copyright (c) Tim Gromeyer
|
||||
// Licensed under the MIT License - https://opensource.org/licenses/MIT
|
||||
|
||||
#ifndef TABLE_H
|
||||
#define TABLE_H
|
||||
|
||||
#include <string>
|
||||
|
||||
[[nodiscard]] std::string formatMarkdownTable(const std::string &inputTable);
|
||||
|
||||
#endif // TABLE_H
|
||||
101
packages/media/cpp/packages/html/readme.md
Normal file
101
packages/media/cpp/packages/html/readme.md
Normal file
@ -0,0 +1,101 @@
|
||||
# Scraper Request
|
||||
|
||||
## OpenAPI Specification
|
||||
|
||||
```yaml
|
||||
openapi: 3.0.1
|
||||
info:
|
||||
title: ''
|
||||
description: ''
|
||||
version: 1.0.0
|
||||
paths:
|
||||
/api/v1/scraper/request:
|
||||
post:
|
||||
summary: Scraper Request
|
||||
deprecated: false
|
||||
description: ''
|
||||
tags:
|
||||
- Scraping API
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
actor:
|
||||
type: string
|
||||
input:
|
||||
type: object
|
||||
properties:
|
||||
url:
|
||||
type: string
|
||||
required:
|
||||
- url
|
||||
x-apidog-orders:
|
||||
- url
|
||||
proxy:
|
||||
type: object
|
||||
properties:
|
||||
country:
|
||||
type: string
|
||||
required:
|
||||
- country
|
||||
x-apidog-orders:
|
||||
- country
|
||||
async:
|
||||
type: boolean
|
||||
description: |-
|
||||
If true, the task will be executed asynchronously.
|
||||
If false, the task will be executed synchronously.
|
||||
required:
|
||||
- actor
|
||||
- input
|
||||
- proxy
|
||||
x-apidog-orders:
|
||||
- actor
|
||||
- input
|
||||
- proxy
|
||||
- async
|
||||
example:
|
||||
actor: scraper.xxx
|
||||
input:
|
||||
url: >-
|
||||
https://www.***.com/shop/us/products/stmicroelectronics/tda7265a-3074457345625542393/
|
||||
proxy:
|
||||
country: US
|
||||
async: false
|
||||
responses:
|
||||
'200':
|
||||
description: ''
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties: {}
|
||||
x-apidog-orders: []
|
||||
headers: {}
|
||||
x-apidog-name: Success
|
||||
security:
|
||||
- apikey-header-x-api-token: []
|
||||
x-apidog-folder: Scraping API
|
||||
x-apidog-status: released
|
||||
x-run-in-apidog: https://app.apidog.com/web/project/745098/apis/api-11949852-run
|
||||
components:
|
||||
schemas: {}
|
||||
securitySchemes:
|
||||
bearer:
|
||||
type: bearer
|
||||
scheme: bearer
|
||||
description: Bearer token authentication using your Scrapeless API key
|
||||
apikey-header-x-api-token:
|
||||
type: apiKey
|
||||
in: header
|
||||
name: x-api-token
|
||||
servers:
|
||||
- url: https://api.scrapeless.com
|
||||
description: Prod Env
|
||||
security:
|
||||
- apikey-header-x-api-token: []
|
||||
|
||||
```
|
||||
403
packages/media/cpp/packages/html/src/html.cpp
Normal file
403
packages/media/cpp/packages/html/src/html.cpp
Normal file
@ -0,0 +1,403 @@
|
||||
#include "html/html.h"
|
||||
|
||||
#include <lexbor/css/css.h>
|
||||
#include <lexbor/html/html.h>
|
||||
#include <lexbor/selectors/selectors.h>
|
||||
#include <html/html2md.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
namespace html {
|
||||
|
||||
// ── helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
static std::string node_text(lxb_dom_node_t *node) {
|
||||
size_t len = 0;
|
||||
lxb_char_t *text = lxb_dom_node_text_content(node, &len);
|
||||
if (!text)
|
||||
return {};
|
||||
std::string result(reinterpret_cast<const char *>(text), len);
|
||||
lxb_dom_document_destroy_text(node->owner_document, text);
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string tag_name(lxb_dom_element_t *el) {
|
||||
size_t len = 0;
|
||||
const lxb_char_t *name = lxb_dom_element_qualified_name(el, &len);
|
||||
if (!name)
|
||||
return {};
|
||||
return std::string(reinterpret_cast<const char *>(name), len);
|
||||
}
|
||||
|
||||
static std::string get_element_attr(lxb_dom_element_t *el, const char *attr) {
|
||||
size_t len = 0;
|
||||
const lxb_char_t *val = lxb_dom_element_get_attribute(
|
||||
el, reinterpret_cast<const lxb_char_t *>(attr), strlen(attr), &len);
|
||||
if (!val)
|
||||
return {};
|
||||
return std::string(reinterpret_cast<const char *>(val), len);
|
||||
}
|
||||
|
||||
static lxb_html_document_t *parse_doc(const std::string &html_str) {
|
||||
auto *doc = lxb_html_document_create();
|
||||
if (!doc) return nullptr;
|
||||
auto status = lxb_html_document_parse(
|
||||
doc, reinterpret_cast<const lxb_char_t *>(html_str.c_str()),
|
||||
html_str.size());
|
||||
if (status != LXB_STATUS_OK) {
|
||||
lxb_html_document_destroy(doc);
|
||||
return nullptr;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
// ── Helper: check if a tag name matches a noise element ─────────────────────
|
||||
|
||||
static bool is_noise_tag(const std::string &name) {
|
||||
return name == "script" || name == "style" || name == "noscript" ||
|
||||
name == "svg" || name == "iframe";
|
||||
}
|
||||
|
||||
// ── walk tree recursively ───────────────────────────────────────────────────
|
||||
|
||||
static void walk(lxb_dom_node_t *node, std::vector<Element> &out) {
|
||||
if (!node)
|
||||
return;
|
||||
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
||||
auto *el = lxb_dom_interface_element(node);
|
||||
auto txt = node_text(node);
|
||||
if (!txt.empty()) {
|
||||
out.push_back({tag_name(el), txt});
|
||||
}
|
||||
}
|
||||
auto *child = node->first_child;
|
||||
while (child) {
|
||||
walk(child, out);
|
||||
child = child->next;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Walk for visible text only (skip noise tags) ────────────────────────────
|
||||
|
||||
static void walk_text(lxb_dom_node_t *node, std::string &out) {
|
||||
if (!node) return;
|
||||
|
||||
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
||||
auto *el = lxb_dom_interface_element(node);
|
||||
auto name = tag_name(el);
|
||||
if (is_noise_tag(name)) return; // Skip noise subtrees entirely
|
||||
}
|
||||
|
||||
if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
|
||||
size_t len = 0;
|
||||
const lxb_char_t *data = lxb_dom_node_text_content(node, &len);
|
||||
if (data && len > 0) {
|
||||
std::string chunk(reinterpret_cast<const char *>(data), len);
|
||||
// Collapse whitespace
|
||||
bool needSpace = !out.empty() && out.back() != ' ' && out.back() != '\n';
|
||||
// Trim leading/trailing whitespace from chunk
|
||||
size_t start = chunk.find_first_not_of(" \t\n\r");
|
||||
size_t end = chunk.find_last_not_of(" \t\n\r");
|
||||
if (start != std::string::npos) {
|
||||
if (needSpace) out += ' ';
|
||||
out += chunk.substr(start, end - start + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto *child = node->first_child;
|
||||
while (child) {
|
||||
walk_text(child, out);
|
||||
child = child->next;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Walk <head> for meta/title/link ─────────────────────────────────────────
|
||||
|
||||
struct HeadData {
|
||||
std::string title;
|
||||
std::string canonical;
|
||||
std::vector<std::pair<std::string, std::string>> metas; // name/property → content
|
||||
std::vector<std::string> json_ld;
|
||||
};
|
||||
|
||||
static void walk_head(lxb_dom_node_t *node, HeadData &data) {
|
||||
if (!node) return;
|
||||
|
||||
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
||||
auto *el = lxb_dom_interface_element(node);
|
||||
auto name = tag_name(el);
|
||||
|
||||
if (name == "title") {
|
||||
data.title = node_text(node);
|
||||
} else if (name == "meta") {
|
||||
auto nameAttr = get_element_attr(el, "name");
|
||||
auto propAttr = get_element_attr(el, "property");
|
||||
auto content = get_element_attr(el, "content");
|
||||
if (!content.empty()) {
|
||||
if (!nameAttr.empty()) data.metas.emplace_back(nameAttr, content);
|
||||
if (!propAttr.empty()) data.metas.emplace_back(propAttr, content);
|
||||
}
|
||||
} else if (name == "link") {
|
||||
auto rel = get_element_attr(el, "rel");
|
||||
if (rel == "canonical") {
|
||||
data.canonical = get_element_attr(el, "href");
|
||||
}
|
||||
} else if (name == "script") {
|
||||
auto type = get_element_attr(el, "type");
|
||||
if (type == "application/ld+json") {
|
||||
auto text = node_text(node);
|
||||
if (!text.empty()) data.json_ld.push_back(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto *child = node->first_child;
|
||||
while (child) {
|
||||
walk_head(child, data);
|
||||
child = child->next;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Walk <body> for <a> links ───────────────────────────────────────────────
|
||||
|
||||
static void walk_links(lxb_dom_node_t *node, std::vector<Link> &out) {
|
||||
if (!node) return;
|
||||
|
||||
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
||||
auto *el = lxb_dom_interface_element(node);
|
||||
auto name = tag_name(el);
|
||||
|
||||
if (name == "a") {
|
||||
auto href = get_element_attr(el, "href");
|
||||
if (!href.empty()) {
|
||||
Link lk;
|
||||
lk.href = href;
|
||||
lk.rel = get_element_attr(el, "rel");
|
||||
lk.text = node_text(node);
|
||||
out.push_back(std::move(lk));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto *child = node->first_child;
|
||||
while (child) {
|
||||
walk_links(child, out);
|
||||
child = child->next;
|
||||
}
|
||||
}
|
||||
|
||||
// ── public API ──────────────────────────────────────────────────────────────
|
||||
|
||||
std::vector<Element> parse(const std::string &html_str) {
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return {};
|
||||
|
||||
std::vector<Element> result;
|
||||
auto *body = lxb_dom_interface_node(lxb_html_document_body_element(doc));
|
||||
walk(body, result);
|
||||
|
||||
lxb_html_document_destroy(doc);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── CSS selector callback ───────────────────────────────────────────────────
|
||||
|
||||
struct SelectCtx {
|
||||
std::vector<std::string> *out;
|
||||
};
|
||||
|
||||
static lxb_status_t select_cb(lxb_dom_node_t *node,
|
||||
lxb_css_selector_specificity_t spec, void *ctx) {
|
||||
(void)spec;
|
||||
auto *sctx = static_cast<SelectCtx *>(ctx);
|
||||
auto txt = node_text(node);
|
||||
if (!txt.empty()) {
|
||||
sctx->out->push_back(txt);
|
||||
}
|
||||
return LXB_STATUS_OK;
|
||||
}
|
||||
|
||||
std::vector<std::string> select(const std::string &html_str,
|
||||
const std::string &selector) {
|
||||
std::vector<std::string> result;
|
||||
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return result;
|
||||
|
||||
auto *css_parser = lxb_css_parser_create();
|
||||
lxb_css_parser_init(css_parser, nullptr);
|
||||
|
||||
auto *selectors = lxb_selectors_create();
|
||||
lxb_selectors_init(selectors);
|
||||
|
||||
auto *list = lxb_css_selectors_parse(
|
||||
css_parser, reinterpret_cast<const lxb_char_t *>(selector.c_str()),
|
||||
selector.size());
|
||||
|
||||
if (list) {
|
||||
SelectCtx ctx{&result};
|
||||
lxb_selectors_find(
|
||||
selectors, lxb_dom_interface_node(lxb_html_document_body_element(doc)),
|
||||
list, select_cb, &ctx);
|
||||
lxb_css_selector_list_destroy_memory(list);
|
||||
}
|
||||
|
||||
lxb_selectors_destroy(selectors, true);
|
||||
lxb_css_parser_destroy(css_parser, true);
|
||||
lxb_html_document_destroy(doc);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── Enricher extraction helpers ─────────────────────────────────────────────
|
||||
|
||||
std::string get_title(const std::string &html_str) {
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return {};
|
||||
|
||||
HeadData data;
|
||||
auto *head = lxb_dom_interface_node(lxb_html_document_head_element(doc));
|
||||
walk_head(head, data);
|
||||
|
||||
lxb_html_document_destroy(doc);
|
||||
return data.title;
|
||||
}
|
||||
|
||||
std::string get_meta(const std::string &html_str, const std::string &name) {
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return {};
|
||||
|
||||
HeadData data;
|
||||
auto *head = lxb_dom_interface_node(lxb_html_document_head_element(doc));
|
||||
walk_head(head, data);
|
||||
|
||||
lxb_html_document_destroy(doc);
|
||||
|
||||
for (auto &[key, val] : data.metas) {
|
||||
if (key == name) return val;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string get_canonical(const std::string &html_str) {
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return {};
|
||||
|
||||
HeadData data;
|
||||
auto *head = lxb_dom_interface_node(lxb_html_document_head_element(doc));
|
||||
walk_head(head, data);
|
||||
|
||||
lxb_html_document_destroy(doc);
|
||||
return data.canonical;
|
||||
}
|
||||
|
||||
std::vector<Link> get_links(const std::string &html_str) {
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return {};
|
||||
|
||||
std::vector<Link> links;
|
||||
auto *body = lxb_dom_interface_node(lxb_html_document_body_element(doc));
|
||||
walk_links(body, links);
|
||||
|
||||
lxb_html_document_destroy(doc);
|
||||
return links;
|
||||
}
|
||||
|
||||
std::string get_body_text(const std::string &html_str) {
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return {};
|
||||
|
||||
std::string text;
|
||||
auto *body = lxb_dom_interface_node(lxb_html_document_body_element(doc));
|
||||
walk_text(body, text);
|
||||
|
||||
lxb_html_document_destroy(doc);
|
||||
return text;
|
||||
}
|
||||
|
||||
std::vector<std::string> get_json_ld(const std::string &html_str) {
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return {};
|
||||
|
||||
HeadData data;
|
||||
// JSON-LD can be in head or body — walk entire document
|
||||
auto *root = lxb_dom_interface_node(
|
||||
lxb_dom_document_element(&doc->dom_document));
|
||||
walk_head(root, data);
|
||||
|
||||
lxb_html_document_destroy(doc);
|
||||
return data.json_ld;
|
||||
}
|
||||
|
||||
// ── get_attr via CSS selector ───────────────────────────────────────────────
|
||||
|
||||
struct AttrCtx {
|
||||
std::string attr_name;
|
||||
std::string result;
|
||||
bool found;
|
||||
};
|
||||
|
||||
static lxb_status_t attr_cb(lxb_dom_node_t *node,
|
||||
lxb_css_selector_specificity_t spec, void *ctx) {
|
||||
(void)spec;
|
||||
auto *actx = static_cast<AttrCtx *>(ctx);
|
||||
if (actx->found) return LXB_STATUS_OK;
|
||||
|
||||
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
||||
auto *el = lxb_dom_interface_element(node);
|
||||
auto val = get_element_attr(el, actx->attr_name.c_str());
|
||||
if (!val.empty()) {
|
||||
actx->result = val;
|
||||
actx->found = true;
|
||||
}
|
||||
}
|
||||
return LXB_STATUS_OK;
|
||||
}
|
||||
|
||||
std::string get_attr(const std::string &html_str, const std::string &selector,
|
||||
const std::string &attr_name) {
|
||||
auto *doc = parse_doc(html_str);
|
||||
if (!doc) return {};
|
||||
|
||||
auto *css_parser = lxb_css_parser_create();
|
||||
lxb_css_parser_init(css_parser, nullptr);
|
||||
|
||||
auto *selectors = lxb_selectors_create();
|
||||
lxb_selectors_init(selectors);
|
||||
|
||||
auto *list = lxb_css_selectors_parse(
|
||||
css_parser, reinterpret_cast<const lxb_char_t *>(selector.c_str()),
|
||||
selector.size());
|
||||
|
||||
std::string result;
|
||||
if (list) {
|
||||
AttrCtx ctx{attr_name, {}, false};
|
||||
auto *root = lxb_dom_interface_node(
|
||||
lxb_dom_document_element(&doc->dom_document));
|
||||
lxb_selectors_find(selectors, root, list, attr_cb, &ctx);
|
||||
result = ctx.result;
|
||||
lxb_css_selector_list_destroy_memory(list);
|
||||
}
|
||||
|
||||
lxb_selectors_destroy(selectors, true);
|
||||
lxb_css_parser_destroy(css_parser, true);
|
||||
lxb_html_document_destroy(doc);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string to_markdown(const std::string &html_str) {
|
||||
// Defense-in-depth: hard cap at 2 MB even if the caller forgets.
|
||||
// The enricher pipeline already caps at 512 KB, but future callers
|
||||
// may not — prevent OOM / multi-second hangs from html2md.
|
||||
static constexpr size_t MAX_HTML2MD_INPUT = 2 * 1024 * 1024;
|
||||
if (html_str.size() > MAX_HTML2MD_INPUT) {
|
||||
return "*[Content truncated: HTML too large for markdown conversion ("
|
||||
+ std::to_string(html_str.size() / 1024) + " KB)]*\n";
|
||||
}
|
||||
return html2md::Convert(html_str);
|
||||
}
|
||||
|
||||
} // namespace html
|
||||
1195
packages/media/cpp/packages/html/src/html2md.cpp
Normal file
1195
packages/media/cpp/packages/html/src/html2md.cpp
Normal file
File diff suppressed because it is too large
Load Diff
106
packages/media/cpp/packages/html/src/table.cpp
Normal file
106
packages/media/cpp/packages/html/src/table.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
// Copyright (c) Tim Gromeyer
|
||||
// Licensed under the MIT License - https://opensource.org/licenses/MIT
|
||||
|
||||
#include "html/table.h"
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
const size_t MIN_LINE_LENGTH = 3; // Minimum length of line
|
||||
|
||||
void removeLeadingTrailingSpaces(string &str) {
|
||||
size_t firstNonSpace = str.find_first_not_of(' ');
|
||||
if (firstNonSpace == string::npos) {
|
||||
str.clear(); // Entire string is spaces
|
||||
return;
|
||||
}
|
||||
|
||||
size_t lastNonSpace = str.find_last_not_of(' ');
|
||||
str = str.substr(firstNonSpace, lastNonSpace - firstNonSpace + 1);
|
||||
}
|
||||
|
||||
string enlargeTableHeaderLine(const string &str, size_t length) {
|
||||
if (str.empty() || length < MIN_LINE_LENGTH)
|
||||
return "";
|
||||
|
||||
size_t first = str.find_first_of(':');
|
||||
size_t last = str.find_last_of(':');
|
||||
|
||||
if (first == 0 && first == last)
|
||||
last = string::npos;
|
||||
|
||||
string line = string(length, '-');
|
||||
|
||||
if (first == 0)
|
||||
line[0] = ':';
|
||||
if (last == str.length() - 1)
|
||||
line[length - 1] = ':';
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
string formatMarkdownTable(const string &inputTable) {
|
||||
std::istringstream iss(inputTable);
|
||||
string line;
|
||||
vector<vector<string>> tableData;
|
||||
|
||||
// Parse the input table into a 2D vector
|
||||
while (std::getline(iss, line)) {
|
||||
std::istringstream lineStream(line);
|
||||
string cell;
|
||||
vector<string> rowData;
|
||||
|
||||
while (std::getline(lineStream, cell, '|')) {
|
||||
removeLeadingTrailingSpaces(cell); // Trim first
|
||||
if (!cell.empty()) { // Then check if empty
|
||||
rowData.push_back(cell);
|
||||
}
|
||||
}
|
||||
|
||||
if (!rowData.empty()) {
|
||||
tableData.push_back(std::move(rowData)); // Move rowData to avoid copying
|
||||
}
|
||||
}
|
||||
|
||||
if (tableData.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Determine maximum width of each column
|
||||
vector<size_t> columnWidths(tableData[0].size(), 0);
|
||||
for (const auto &row : tableData) {
|
||||
if (columnWidths.size() < row.size()) {
|
||||
columnWidths.resize(row.size(), 0);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < row.size(); ++i) {
|
||||
columnWidths[i] = std::max(columnWidths[i], row[i].size());
|
||||
}
|
||||
}
|
||||
|
||||
// Build the formatted table
|
||||
std::ostringstream formattedTable;
|
||||
for (size_t rowNumber = 0; rowNumber < tableData.size(); ++rowNumber) {
|
||||
const auto &row = tableData[rowNumber];
|
||||
|
||||
formattedTable << "|";
|
||||
|
||||
for (size_t i = 0; i < row.size(); ++i) {
|
||||
if (rowNumber == 1) {
|
||||
formattedTable << enlargeTableHeaderLine(row[i], columnWidths[i] + 2)
|
||||
<< "|";
|
||||
continue;
|
||||
}
|
||||
formattedTable << " " << std::setw(columnWidths[i]) << std::left << row[i]
|
||||
<< " |";
|
||||
}
|
||||
formattedTable << "\n";
|
||||
}
|
||||
|
||||
return formattedTable.str();
|
||||
}
|
||||
48
packages/media/cpp/packages/http/CMakeLists.txt
Normal file
48
packages/media/cpp/packages/http/CMakeLists.txt
Normal file
@ -0,0 +1,48 @@
|
||||
include(FetchContent)
|
||||
|
||||
# Work around curl's old cmake_minimum_required for CMake 4.x
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
|
||||
|
||||
FetchContent_Declare(
|
||||
CURL
|
||||
URL https://github.com/curl/curl/releases/download/curl-8_12_1/curl-8.12.1.tar.xz
|
||||
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
|
||||
)
|
||||
|
||||
# Minimal curl build — static, SChannel TLS, no optional deps
|
||||
set(BUILD_CURL_EXE OFF CACHE BOOL "" FORCE)
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
|
||||
set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
|
||||
|
||||
# TLS backend: platform-appropriate
|
||||
if(WIN32)
|
||||
set(CURL_USE_OPENSSL OFF CACHE BOOL "" FORCE)
|
||||
set(CURL_USE_SCHANNEL ON CACHE BOOL "" FORCE)
|
||||
else()
|
||||
set(CURL_USE_SCHANNEL OFF CACHE BOOL "" FORCE)
|
||||
set(CURL_USE_OPENSSL ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
||||
# Disable optional compression/protocol deps
|
||||
set(CURL_ZLIB OFF CACHE BOOL "" FORCE)
|
||||
set(CURL_BROTLI OFF CACHE BOOL "" FORCE)
|
||||
set(CURL_ZSTD OFF CACHE BOOL "" FORCE)
|
||||
set(USE_NGHTTP2 OFF CACHE BOOL "" FORCE)
|
||||
set(CURL_USE_LIBSSH2 OFF CACHE BOOL "" FORCE)
|
||||
set(CURL_USE_LIBPSL OFF CACHE BOOL "" FORCE)
|
||||
set(CURL_DISABLE_LDAP ON CACHE BOOL "" FORCE)
|
||||
set(CURL_DISABLE_LDAPS ON CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_MakeAvailable(CURL)
|
||||
|
||||
add_library(http STATIC
|
||||
src/http.cpp
|
||||
)
|
||||
|
||||
target_include_directories(http
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
target_link_libraries(http
|
||||
PUBLIC CURL::libcurl
|
||||
)
|
||||
40
packages/media/cpp/packages/http/include/http/http.h
Normal file
40
packages/media/cpp/packages/http/include/http/http.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace http {
|
||||
|
||||
struct Response {
|
||||
long status_code;
|
||||
std::string body;
|
||||
};
|
||||
|
||||
/// Options for customisable HTTP GET requests.
|
||||
struct GetOptions {
|
||||
std::string user_agent = "Mozilla/5.0 (compatible; PolymechBot/1.0)";
|
||||
int timeout_ms = 10000;
|
||||
bool follow_redirects = true;
|
||||
};
|
||||
|
||||
/// Perform an HTTP GET request. Returns the response body and status code.
|
||||
Response get(const std::string &url);
|
||||
|
||||
/// Perform an HTTP GET request with custom options.
|
||||
Response get(const std::string &url, const GetOptions &opts);
|
||||
|
||||
/// Perform an HTTP POST request with a body. Returns the response and status.
|
||||
Response post(const std::string &url, const std::string &body,
|
||||
const std::string &content_type = "application/json");
|
||||
|
||||
/// Options for customisable HTTP POST requests.
|
||||
struct PostOptions {
|
||||
std::string content_type = "application/json";
|
||||
std::string bearer_token; // Authorization: Bearer <token>
|
||||
int timeout_ms = 30000;
|
||||
};
|
||||
|
||||
/// Perform an HTTP POST request with custom options.
|
||||
Response post(const std::string &url, const std::string &body,
|
||||
const PostOptions &opts);
|
||||
|
||||
} // namespace http
|
||||
216
packages/media/cpp/packages/http/src/http.cpp
Normal file
216
packages/media/cpp/packages/http/src/http.cpp
Normal file
@ -0,0 +1,216 @@
|
||||
#include "http/http.h"
|
||||
|
||||
#include <curl/curl.h>
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
|
||||
namespace http {
|
||||
|
||||
static std::once_flag curl_init_flag;
|
||||
static void ensure_curl_init() {
|
||||
std::call_once(curl_init_flag, []() {
|
||||
curl_global_init(CURL_GLOBAL_ALL);
|
||||
});
|
||||
}
|
||||
|
||||
struct ThreadLocalCurl {
|
||||
CURL *handle;
|
||||
ThreadLocalCurl() {
|
||||
ensure_curl_init();
|
||||
handle = curl_easy_init();
|
||||
}
|
||||
~ThreadLocalCurl() {
|
||||
if (handle) curl_easy_cleanup(handle);
|
||||
}
|
||||
CURL *get() {
|
||||
if (handle) curl_easy_reset(handle);
|
||||
return handle;
|
||||
}
|
||||
};
|
||||
|
||||
thread_local ThreadLocalCurl tl_curl;
|
||||
|
||||
struct ProgressData {
|
||||
std::chrono::steady_clock::time_point start_time;
|
||||
int timeout_ms;
|
||||
};
|
||||
|
||||
static int progress_cb(void *clientp, curl_off_t dltotal, curl_off_t dlnow,
|
||||
curl_off_t ultotal, curl_off_t ulnow) {
|
||||
auto *pd = static_cast<ProgressData *>(clientp);
|
||||
if (pd->timeout_ms <= 0) return 0;
|
||||
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - pd->start_time).count();
|
||||
if (elapsed > pd->timeout_ms) {
|
||||
return 1; // Return non-zero to abort the transfer
|
||||
}
|
||||
return 0; // Continue
|
||||
}
|
||||
|
||||
static size_t write_cb(void *contents, size_t size, size_t nmemb, void *userp) {
|
||||
auto *out = static_cast<std::string *>(userp);
|
||||
out->append(static_cast<char *>(contents), size * nmemb);
|
||||
return size * nmemb;
|
||||
}
|
||||
|
||||
Response get(const std::string &url) {
|
||||
return get(url, GetOptions{});
|
||||
}
|
||||
|
||||
Response get(const std::string &url, const GetOptions &opts) {
|
||||
Response resp{};
|
||||
|
||||
CURL *curl = tl_curl.get();
|
||||
if (!curl) {
|
||||
resp.status_code = -1;
|
||||
resp.body = "curl_easy_init (thread_local) failed";
|
||||
return resp;
|
||||
}
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp.body);
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, opts.follow_redirects ? 1L : 0L);
|
||||
|
||||
ProgressData prog_data;
|
||||
if (opts.timeout_ms > 0) {
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, static_cast<long>(opts.timeout_ms));
|
||||
prog_data.start_time = std::chrono::steady_clock::now();
|
||||
prog_data.timeout_ms = opts.timeout_ms + 1000;
|
||||
curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &prog_data);
|
||||
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
|
||||
}
|
||||
|
||||
// Fail fast on dead sites (TCP SYN timeout)
|
||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS, 5000L);
|
||||
|
||||
// Prevent stalling: abort if transfer speed is less than 1 byte/sec for 10 seconds
|
||||
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, 10L);
|
||||
|
||||
// Prevent signal handlers from breaking in multithreaded environments
|
||||
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
|
||||
|
||||
if (!opts.user_agent.empty()) {
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, opts.user_agent.c_str());
|
||||
}
|
||||
|
||||
// Accept-Encoding for compressed responses
|
||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
if (res != CURLE_OK) {
|
||||
resp.status_code = -1;
|
||||
resp.body = curl_easy_strerror(res);
|
||||
} else {
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp.status_code);
|
||||
}
|
||||
|
||||
return resp;
|
||||
}
|
||||
|
||||
Response post(const std::string &url, const std::string &body,
|
||||
const std::string &content_type) {
|
||||
Response resp{};
|
||||
|
||||
CURL *curl = tl_curl.get();
|
||||
if (!curl) {
|
||||
resp.status_code = -1;
|
||||
resp.body = "curl_easy_init failed";
|
||||
return resp;
|
||||
}
|
||||
|
||||
struct curl_slist *headers = nullptr;
|
||||
headers =
|
||||
curl_slist_append(headers, ("Content-Type: " + content_type).c_str());
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp.body);
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10L);
|
||||
|
||||
ProgressData prog_data;
|
||||
prog_data.start_time = std::chrono::steady_clock::now();
|
||||
prog_data.timeout_ms = 11000;
|
||||
curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &prog_data);
|
||||
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
|
||||
|
||||
// Prevent stalling: abort if transfer speed is less than 1 byte/sec for 10 seconds
|
||||
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, 10L);
|
||||
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
if (res != CURLE_OK) {
|
||||
resp.status_code = -1;
|
||||
resp.body = curl_easy_strerror(res);
|
||||
} else {
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp.status_code);
|
||||
}
|
||||
|
||||
curl_slist_free_all(headers);
|
||||
return resp;
|
||||
}
|
||||
|
||||
Response post(const std::string &url, const std::string &body,
|
||||
const PostOptions &opts) {
|
||||
Response resp{};
|
||||
|
||||
CURL *curl = tl_curl.get();
|
||||
if (!curl) {
|
||||
resp.status_code = -1;
|
||||
resp.body = "curl_easy_init failed";
|
||||
return resp;
|
||||
}
|
||||
|
||||
struct curl_slist *headers = nullptr;
|
||||
headers =
|
||||
curl_slist_append(headers, ("Content-Type: " + opts.content_type).c_str());
|
||||
if (!opts.bearer_token.empty()) {
|
||||
headers = curl_slist_append(
|
||||
headers, ("Authorization: Bearer " + opts.bearer_token).c_str());
|
||||
headers = curl_slist_append(
|
||||
headers, ("x-api-token: " + opts.bearer_token).c_str());
|
||||
}
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp.body);
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
|
||||
ProgressData prog_data;
|
||||
if (opts.timeout_ms > 0) {
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, static_cast<long>(opts.timeout_ms));
|
||||
prog_data.start_time = std::chrono::steady_clock::now();
|
||||
prog_data.timeout_ms = opts.timeout_ms + 1000;
|
||||
curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &prog_data);
|
||||
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
|
||||
}
|
||||
|
||||
// Prevent stalling: abort if transfer speed is less than 1 byte/sec for 10 seconds
|
||||
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, 10L);
|
||||
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
if (res != CURLE_OK) {
|
||||
resp.status_code = -1;
|
||||
resp.body = curl_easy_strerror(res);
|
||||
} else {
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp.status_code);
|
||||
}
|
||||
|
||||
curl_slist_free_all(headers);
|
||||
return resp;
|
||||
}
|
||||
|
||||
} // namespace http
|
||||
45
packages/media/cpp/packages/ipc/CMakeLists.txt
Normal file
45
packages/media/cpp/packages/ipc/CMakeLists.txt
Normal file
@ -0,0 +1,45 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(ipc CXX)
|
||||
|
||||
option(IPC_BUILD_SHARED "Build ipc as a shared library (DLL/so)" OFF)
|
||||
|
||||
set(_ipc_sources src/ipc.cpp)
|
||||
|
||||
if(IPC_BUILD_SHARED)
|
||||
add_library(ipc SHARED ${_ipc_sources})
|
||||
target_compile_definitions(ipc PRIVATE IPC_BUILDING_LIBRARY)
|
||||
else()
|
||||
add_library(ipc STATIC ${_ipc_sources})
|
||||
target_compile_definitions(ipc PRIVATE IPC_STATIC_BUILD=1)
|
||||
target_compile_definitions(ipc INTERFACE IPC_STATIC_BUILD=1)
|
||||
endif()
|
||||
|
||||
target_include_directories(ipc
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
target_link_libraries(ipc
|
||||
PUBLIC json logger
|
||||
)
|
||||
|
||||
if(IPC_BUILD_SHARED)
|
||||
set_target_properties(ipc PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
|
||||
RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/dist"
|
||||
RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/dist"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
|
||||
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
|
||||
)
|
||||
endif()
|
||||
|
||||
install(TARGETS ipc
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib
|
||||
RUNTIME DESTINATION bin
|
||||
)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/ipc/ipc.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/ipc/ipc_export.h
|
||||
DESTINATION include/ipc
|
||||
)
|
||||
35
packages/media/cpp/packages/ipc/include/ipc/ipc.h
Normal file
35
packages/media/cpp/packages/ipc/include/ipc/ipc.h
Normal file
@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
|
||||
#include "ipc/ipc_export.h"
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ipc {
|
||||
|
||||
/// A single IPC message: { id, type, payload (raw JSON string) }.
|
||||
struct Message {
|
||||
std::string id;
|
||||
std::string type;
|
||||
std::string payload; // opaque JSON string (can be "{}" or any object)
|
||||
};
|
||||
|
||||
/// Encode a Message into a length-prefixed binary frame.
|
||||
/// Layout: [4-byte LE uint32 length][JSON bytes]
|
||||
IPC_API std::vector<uint8_t> encode(const Message &msg);
|
||||
|
||||
/// Decode a binary frame (without the 4-byte length prefix) into a Message.
|
||||
/// Returns false if the JSON is invalid or missing required fields.
|
||||
IPC_API bool decode(const uint8_t *data, size_t len, Message &out);
|
||||
IPC_API bool decode(const std::vector<uint8_t> &frame, Message &out);
|
||||
|
||||
/// Blocking: read exactly one length-prefixed message from a FILE*.
|
||||
/// Returns false on EOF or read error.
|
||||
IPC_API bool read_message(Message &out, FILE *in = stdin);
|
||||
|
||||
/// Write one length-prefixed message to a FILE*. Flushes after write.
|
||||
/// Returns false on write error.
|
||||
IPC_API bool write_message(const Message &msg, FILE *out = stdout);
|
||||
|
||||
} // namespace ipc
|
||||
25
packages/media/cpp/packages/ipc/include/ipc/ipc_export.h
Normal file
25
packages/media/cpp/packages/ipc/include/ipc/ipc_export.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
/**
|
||||
* DLL / shared-object exports for the length-prefixed JSON IPC framing library.
|
||||
*
|
||||
* CMake:
|
||||
* - Building libipc: IPC_BUILDING_LIBRARY (PRIVATE)
|
||||
* - Linking static ipc: IPC_STATIC_BUILD=1 (INTERFACE)
|
||||
*/
|
||||
|
||||
#if defined(IPC_STATIC_BUILD)
|
||||
# define IPC_API
|
||||
#elif defined(_WIN32)
|
||||
# if defined(IPC_BUILDING_LIBRARY)
|
||||
# define IPC_API __declspec(dllexport)
|
||||
# else
|
||||
# define IPC_API __declspec(dllimport)
|
||||
# endif
|
||||
#else
|
||||
# if defined(IPC_BUILDING_LIBRARY)
|
||||
# define IPC_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define IPC_API
|
||||
# endif
|
||||
#endif
|
||||
158
packages/media/cpp/packages/ipc/src/ipc.cpp
Normal file
158
packages/media/cpp/packages/ipc/src/ipc.cpp
Normal file
@ -0,0 +1,158 @@
|
||||
#include "ipc/ipc.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "json/json.h"
|
||||
#include "logger/logger.h"
|
||||
|
||||
// We use RapidJSON directly for structured serialization
|
||||
#include <rapidjson/document.h>
|
||||
#include <rapidjson/stringbuffer.h>
|
||||
#include <rapidjson/writer.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
namespace ipc {
|
||||
|
||||
// ── helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
static void write_u32_le(uint8_t *dst, uint32_t val) {
|
||||
dst[0] = static_cast<uint8_t>(val & 0xFF);
|
||||
dst[1] = static_cast<uint8_t>((val >> 8) & 0xFF);
|
||||
dst[2] = static_cast<uint8_t>((val >> 16) & 0xFF);
|
||||
dst[3] = static_cast<uint8_t>((val >> 24) & 0xFF);
|
||||
}
|
||||
|
||||
static uint32_t read_u32_le(const uint8_t *src) {
|
||||
return static_cast<uint32_t>(src[0]) |
|
||||
(static_cast<uint32_t>(src[1]) << 8) |
|
||||
(static_cast<uint32_t>(src[2]) << 16) |
|
||||
(static_cast<uint32_t>(src[3]) << 24);
|
||||
}
|
||||
|
||||
static bool read_exact(FILE *f, uint8_t *buf, size_t n) {
|
||||
size_t total = 0;
|
||||
while (total < n) {
|
||||
size_t got = std::fread(buf + total, 1, n - total, f);
|
||||
if (got == 0) return false; // EOF or error
|
||||
total += got;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ── encode ───────────────────────────────────────────────────────────────────
|
||||
|
||||
std::vector<uint8_t> encode(const Message &msg) {
|
||||
// Build JSON: { "id": "...", "type": "...", "payload": ... }
|
||||
// payload is stored as a raw JSON string, so we parse it first
|
||||
rapidjson::StringBuffer sb;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> w(sb);
|
||||
|
||||
w.StartObject();
|
||||
w.Key("id");
|
||||
w.String(msg.id.c_str(), static_cast<rapidjson::SizeType>(msg.id.size()));
|
||||
w.Key("type");
|
||||
w.String(msg.type.c_str(),
|
||||
static_cast<rapidjson::SizeType>(msg.type.size()));
|
||||
w.Key("payload");
|
||||
|
||||
// If payload is valid JSON, embed it as-is; otherwise embed as string
|
||||
rapidjson::Document pd;
|
||||
if (!msg.payload.empty() &&
|
||||
!pd.Parse(msg.payload.c_str()).HasParseError()) {
|
||||
pd.Accept(w);
|
||||
} else {
|
||||
w.String(msg.payload.c_str(),
|
||||
static_cast<rapidjson::SizeType>(msg.payload.size()));
|
||||
}
|
||||
|
||||
w.EndObject();
|
||||
|
||||
const char *json_str = sb.GetString();
|
||||
uint32_t json_len = static_cast<uint32_t>(sb.GetSize());
|
||||
|
||||
std::vector<uint8_t> frame(4 + json_len);
|
||||
write_u32_le(frame.data(), json_len);
|
||||
std::memcpy(frame.data() + 4, json_str, json_len);
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
// ── decode ───────────────────────────────────────────────────────────────────
|
||||
|
||||
bool decode(const uint8_t *data, size_t len, Message &out) {
|
||||
rapidjson::Document doc;
|
||||
doc.Parse(reinterpret_cast<const char *>(data), len);
|
||||
|
||||
if (doc.HasParseError() || !doc.IsObject()) return false;
|
||||
|
||||
if (!doc.HasMember("id") || !doc["id"].IsString()) return false;
|
||||
if (!doc.HasMember("type") || !doc["type"].IsString()) return false;
|
||||
|
||||
out.id = doc["id"].GetString();
|
||||
out.type = doc["type"].GetString();
|
||||
|
||||
if (doc.HasMember("payload")) {
|
||||
if (doc["payload"].IsString()) {
|
||||
out.payload = doc["payload"].GetString();
|
||||
} else {
|
||||
// Re-serialize non-string payload back to JSON string
|
||||
rapidjson::StringBuffer sb;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> w(sb);
|
||||
doc["payload"].Accept(w);
|
||||
out.payload = sb.GetString();
|
||||
}
|
||||
} else {
|
||||
out.payload = "{}";
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool decode(const std::vector<uint8_t> &frame, Message &out) {
|
||||
return decode(frame.data(), frame.size(), out);
|
||||
}
|
||||
|
||||
// ── read_message ─────────────────────────────────────────────────────────────
|
||||
|
||||
bool read_message(Message &out, FILE *in) {
|
||||
#ifdef _WIN32
|
||||
// Ensure binary mode on Windows to prevent \r\n translation
|
||||
_setmode(_fileno(in), _O_BINARY);
|
||||
#endif
|
||||
|
||||
uint8_t len_buf[4];
|
||||
if (!read_exact(in, len_buf, 4)) return false;
|
||||
|
||||
uint32_t msg_len = read_u32_le(len_buf);
|
||||
if (msg_len == 0 || msg_len > 10 * 1024 * 1024) { // sanity: max 10 MB
|
||||
logger::error("ipc::read_message: invalid length " +
|
||||
std::to_string(msg_len));
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> buf(msg_len);
|
||||
if (!read_exact(in, buf.data(), msg_len)) return false;
|
||||
|
||||
return decode(buf, out);
|
||||
}
|
||||
|
||||
// ── write_message ────────────────────────────────────────────────────────────
|
||||
|
||||
bool write_message(const Message &msg, FILE *out) {
|
||||
#ifdef _WIN32
|
||||
_setmode(_fileno(out), _O_BINARY);
|
||||
#endif
|
||||
|
||||
auto frame = encode(msg);
|
||||
size_t written = std::fwrite(frame.data(), 1, frame.size(), out);
|
||||
if (written != frame.size()) return false;
|
||||
|
||||
std::fflush(out);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ipc
|
||||
28
packages/media/cpp/packages/json/CMakeLists.txt
Normal file
28
packages/media/cpp/packages/json/CMakeLists.txt
Normal file
@ -0,0 +1,28 @@
|
||||
include(FetchContent)
|
||||
|
||||
# RapidJSON — use master for CMake 4.x compatibility (v1.1.0 is from 2016)
|
||||
FetchContent_Declare(
|
||||
rapidjson
|
||||
GIT_REPOSITORY https://github.com/Tencent/rapidjson.git
|
||||
GIT_TAG master
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
set(RAPIDJSON_BUILD_DOC OFF CACHE BOOL "" FORCE)
|
||||
set(RAPIDJSON_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
|
||||
set(RAPIDJSON_BUILD_TESTS OFF CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_GetProperties(rapidjson)
|
||||
if(NOT rapidjson_POPULATED)
|
||||
FetchContent_Populate(rapidjson)
|
||||
# Don't add_subdirectory — just use the headers
|
||||
endif()
|
||||
|
||||
add_library(json STATIC
|
||||
src/json.cpp
|
||||
)
|
||||
|
||||
target_include_directories(json
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
PUBLIC ${rapidjson_SOURCE_DIR}/include
|
||||
)
|
||||
23
packages/media/cpp/packages/json/include/json/json.h
Normal file
23
packages/media/cpp/packages/json/include/json/json.h
Normal file
@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace json {
|
||||
|
||||
/// Parse a JSON string and return a pretty-printed version.
|
||||
std::string prettify(const std::string &json_str);
|
||||
|
||||
/// Extract a string value by key from a JSON object (top-level only).
|
||||
std::string get_string(const std::string &json_str, const std::string &key);
|
||||
|
||||
/// Extract an int value by key from a JSON object (top-level only).
|
||||
int get_int(const std::string &json_str, const std::string &key);
|
||||
|
||||
/// Check if a JSON string is valid.
|
||||
bool is_valid(const std::string &json_str);
|
||||
|
||||
/// Get all top-level keys from a JSON object.
|
||||
std::vector<std::string> keys(const std::string &json_str);
|
||||
|
||||
} // namespace json
|
||||
62
packages/media/cpp/packages/json/src/json.cpp
Normal file
62
packages/media/cpp/packages/json/src/json.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include "json/json.h"
|
||||
|
||||
#include <rapidjson/document.h>
|
||||
#include <rapidjson/prettywriter.h>
|
||||
#include <rapidjson/stringbuffer.h>
|
||||
|
||||
namespace json {
|
||||
|
||||
std::string prettify(const std::string &json_str) {
|
||||
rapidjson::Document doc;
|
||||
doc.Parse(json_str.c_str());
|
||||
if (doc.HasParseError()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
rapidjson::StringBuffer buffer;
|
||||
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
|
||||
doc.Accept(writer);
|
||||
return std::string(buffer.GetString(), buffer.GetSize());
|
||||
}
|
||||
|
||||
std::string get_string(const std::string &json_str, const std::string &key) {
|
||||
rapidjson::Document doc;
|
||||
doc.Parse(json_str.c_str());
|
||||
if (doc.HasParseError() || !doc.IsObject())
|
||||
return {};
|
||||
auto it = doc.FindMember(key.c_str());
|
||||
if (it == doc.MemberEnd() || !it->value.IsString())
|
||||
return {};
|
||||
return std::string(it->value.GetString(), it->value.GetStringLength());
|
||||
}
|
||||
|
||||
int get_int(const std::string &json_str, const std::string &key) {
|
||||
rapidjson::Document doc;
|
||||
doc.Parse(json_str.c_str());
|
||||
if (doc.HasParseError() || !doc.IsObject())
|
||||
return 0;
|
||||
auto it = doc.FindMember(key.c_str());
|
||||
if (it == doc.MemberEnd() || !it->value.IsInt())
|
||||
return 0;
|
||||
return it->value.GetInt();
|
||||
}
|
||||
|
||||
bool is_valid(const std::string &json_str) {
|
||||
rapidjson::Document doc;
|
||||
doc.Parse(json_str.c_str());
|
||||
return !doc.HasParseError();
|
||||
}
|
||||
|
||||
std::vector<std::string> keys(const std::string &json_str) {
|
||||
std::vector<std::string> result;
|
||||
rapidjson::Document doc;
|
||||
doc.Parse(json_str.c_str());
|
||||
if (doc.HasParseError() || !doc.IsObject())
|
||||
return result;
|
||||
for (auto it = doc.MemberBegin(); it != doc.MemberEnd(); ++it) {
|
||||
result.emplace_back(it->name.GetString(), it->name.GetStringLength());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace json
|
||||
50
packages/media/cpp/packages/kbot/CMakeLists.txt
Normal file
50
packages/media/cpp/packages/kbot/CMakeLists.txt
Normal file
@ -0,0 +1,50 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(kbot CXX)
|
||||
|
||||
option(POLYMECH_KBOT_SHARED "Build kbot as a shared library (DLL/so)" OFF)
|
||||
|
||||
set(_kbot_sources kbot.cpp llm_client.cpp source_files.cpp)
|
||||
|
||||
if(POLYMECH_KBOT_SHARED)
|
||||
add_library(kbot SHARED ${_kbot_sources})
|
||||
target_compile_definitions(kbot PRIVATE POLYMECH_BUILDING_LIBRARY)
|
||||
else()
|
||||
add_library(kbot STATIC ${_kbot_sources})
|
||||
target_compile_definitions(kbot PRIVATE POLYMECH_STATIC_BUILD=1)
|
||||
target_compile_definitions(kbot INTERFACE POLYMECH_STATIC_BUILD=1)
|
||||
endif()
|
||||
|
||||
target_include_directories(kbot PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${taskflow_SOURCE_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(kbot PUBLIC
|
||||
logger
|
||||
json
|
||||
oai
|
||||
pranav_glob
|
||||
)
|
||||
|
||||
if(POLYMECH_KBOT_SHARED)
|
||||
set_target_properties(kbot PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
|
||||
RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/dist"
|
||||
RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/dist"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
|
||||
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/dist"
|
||||
)
|
||||
endif()
|
||||
|
||||
install(TARGETS kbot
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib
|
||||
RUNTIME DESTINATION bin
|
||||
)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kbot.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/llm_client.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/polymech_export.h
|
||||
DESTINATION include/polymech
|
||||
)
|
||||
189
packages/media/cpp/packages/kbot/kbot.cpp
Normal file
189
packages/media/cpp/packages/kbot/kbot.cpp
Normal file
@ -0,0 +1,189 @@
|
||||
#include "kbot.h"
|
||||
#include "source_files.h"
|
||||
#include <fstream>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include "logger/logger.h"
|
||||
#include "llm_client.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <rapidjson/stringbuffer.h>
|
||||
#include <rapidjson/writer.h>
|
||||
|
||||
namespace polymech {
|
||||
namespace kbot {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
static void replace_all(std::string &s, const std::string &from, const std::string &to) {
|
||||
std::size_t pos = 0;
|
||||
while ((pos = s.find(from, pos)) != std::string::npos) {
|
||||
s.replace(pos, from.length(), to);
|
||||
pos += to.length();
|
||||
}
|
||||
}
|
||||
|
||||
static std::string model_basename(const std::string &model) {
|
||||
if (model.empty())
|
||||
return "unknown_model";
|
||||
const auto slash = model.find_last_of("/\\");
|
||||
if (slash == std::string::npos)
|
||||
return model;
|
||||
return model.substr(slash + 1);
|
||||
}
|
||||
|
||||
static std::string expand_dst_path(const KBotOptions &opts, std::string raw) {
|
||||
const std::string m = model_basename(opts.model);
|
||||
const std::string r = opts.router.empty() ? std::string("unknown_router") : opts.router;
|
||||
replace_all(raw, "${MODEL}", m);
|
||||
replace_all(raw, "${MODEL_NAME}", m);
|
||||
replace_all(raw, "${ROUTER}", r);
|
||||
return raw;
|
||||
}
|
||||
|
||||
/** Same idea as TS `onCompletion`: write to --dst / --output; `dst` wins over legacy `output` if both set. */
|
||||
static std::string effective_completion_dst(const KBotOptions &opts) {
|
||||
if (!opts.dst.empty())
|
||||
return opts.dst;
|
||||
return opts.output;
|
||||
}
|
||||
|
||||
/** @returns true if wrote to file (caller should skip printing body to stdout). */
|
||||
static bool try_write_completion_to_dst(const KBotOptions &opts, const std::string &text) {
|
||||
const std::string raw = effective_completion_dst(opts);
|
||||
if (raw.empty())
|
||||
return false;
|
||||
|
||||
std::string expanded = expand_dst_path(opts, raw);
|
||||
fs::path p;
|
||||
try {
|
||||
p = fs::absolute(expanded);
|
||||
} catch (const std::exception &e) {
|
||||
logger::error(std::string("Invalid output path: ") + e.what());
|
||||
return false;
|
||||
}
|
||||
|
||||
std::error_code ec;
|
||||
fs::create_directories(p.parent_path(), ec);
|
||||
if (ec) {
|
||||
logger::error("Failed to create output directories: " + ec.message());
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool append_existing = (opts.append != "replace") && fs::exists(p);
|
||||
std::ofstream out(p, std::ios::binary | (append_existing ? std::ios::app : std::ios::trunc));
|
||||
if (!out) {
|
||||
logger::error("Failed to open output file: " + p.string());
|
||||
return false;
|
||||
}
|
||||
out << text;
|
||||
if (!text.empty() && text.back() != '\n')
|
||||
out.put('\n');
|
||||
logger::info(std::string(append_existing ? "Appended completion to " : "Wrote completion to ") + p.string());
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string json_job_result_ai(bool success, const std::string &text_or_error, bool is_text,
|
||||
const std::string &provider_meta_json = {}) {
|
||||
nlohmann::json o;
|
||||
o["status"] = success ? "success" : "error";
|
||||
o["mode"] = "ai";
|
||||
if (success && is_text) o["text"] = text_or_error;
|
||||
else if (!success) o["error"] = text_or_error;
|
||||
if (!provider_meta_json.empty()) {
|
||||
try {
|
||||
o["llm"] = nlohmann::json::parse(provider_meta_json);
|
||||
} catch (...) {
|
||||
o["llm"] = nlohmann::json{{"_parse_error", true}, {"raw", provider_meta_json}};
|
||||
}
|
||||
}
|
||||
return o.dump();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int run_kbot_ai_pipeline(const KBotOptions &opts, const KBotCallbacks &cb) {
|
||||
logger::debug("Starting kbot ai pipeline");
|
||||
|
||||
std::vector<std::string> source_rel_paths;
|
||||
const std::string full_prompt = build_prompt_with_sources(
|
||||
opts, opts.include_globs.empty() ? nullptr : &source_rel_paths);
|
||||
if (!opts.include_globs.empty()) {
|
||||
logger::info("kbot ai: attached " + std::to_string(source_rel_paths.size()) + " text source file(s)");
|
||||
}
|
||||
|
||||
if (opts.dry_run) {
|
||||
logger::info("Dry run triggered for kbot ai");
|
||||
if (cb.onEvent) {
|
||||
if (!opts.include_globs.empty()) {
|
||||
cb.onEvent("job_result", make_dry_run_ai_result(opts, full_prompt, source_rel_paths).dump());
|
||||
} else {
|
||||
cb.onEvent("job_result", json_job_result_ai(true, "[dry-run] no LLM call", true));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LLMClient client(opts);
|
||||
std::string target_prompt = full_prompt;
|
||||
if (target_prompt.empty()) {
|
||||
target_prompt = "Respond with 'Hello from KBot C++ AI Pipeline!'";
|
||||
}
|
||||
|
||||
logger::debug("Executing kbot ai completion via LLMClient...");
|
||||
LLMResponse res = client.execute_chat(target_prompt);
|
||||
|
||||
if (res.success) {
|
||||
if (!try_write_completion_to_dst(opts, res.text))
|
||||
std::cout << res.text << "\n";
|
||||
if (cb.onEvent) {
|
||||
cb.onEvent("ai_progress",
|
||||
"{\"message\":\"Task completion received\",\"has_text\":true}");
|
||||
}
|
||||
} else {
|
||||
logger::error("AI Task Failed: " + res.error);
|
||||
if (cb.onEvent) {
|
||||
rapidjson::StringBuffer ebuf;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> ew(ebuf);
|
||||
ew.StartObject();
|
||||
ew.Key("error");
|
||||
ew.String(res.error.c_str(),
|
||||
static_cast<rapidjson::SizeType>(res.error.size()));
|
||||
ew.EndObject();
|
||||
cb.onEvent("ai_error",
|
||||
std::string(ebuf.GetString(), ebuf.GetSize()));
|
||||
}
|
||||
}
|
||||
|
||||
if (cb.onEvent) {
|
||||
if (res.success)
|
||||
cb.onEvent("job_result", json_job_result_ai(true, res.text, true, res.provider_meta_json));
|
||||
else
|
||||
cb.onEvent("job_result", json_job_result_ai(false, res.error, false));
|
||||
}
|
||||
|
||||
return res.success ? 0 : 1;
|
||||
}
|
||||
|
||||
int run_kbot_run_pipeline(const KBotRunOptions &opts, const KBotCallbacks &cb) {
|
||||
logger::info("Starting kbot run pipeline (stub) for config: " + opts.config);
|
||||
if (opts.dry) {
|
||||
logger::info("Dry run triggered for kbot run");
|
||||
}
|
||||
if (opts.list) {
|
||||
logger::info("List configs mode enabled");
|
||||
}
|
||||
|
||||
if (!opts.dry && !opts.list) {
|
||||
logger::info("Simulating launching: .vscode/launch.json targeting " + opts.config);
|
||||
}
|
||||
|
||||
if (cb.onEvent) {
|
||||
cb.onEvent("job_result", "{\"status\":\"success\",\"mode\":\"run\"}");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace kbot
|
||||
} // namespace polymech
|
||||
79
packages/media/cpp/packages/kbot/kbot.h
Normal file
79
packages/media/cpp/packages/kbot/kbot.h
Normal file
@ -0,0 +1,79 @@
|
||||
#pragma once
|
||||
|
||||
#include "polymech_export.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
|
||||
namespace polymech {
|
||||
namespace kbot {
|
||||
|
||||
struct KBotOptions {
|
||||
std::string path = ".";
|
||||
std::string prompt;
|
||||
std::string output;
|
||||
std::string dst;
|
||||
std::string append = "concat";
|
||||
std::string wrap = "none";
|
||||
std::string each;
|
||||
std::vector<std::string> disable;
|
||||
std::vector<std::string> disable_tools;
|
||||
std::vector<std::string> tools;
|
||||
std::vector<std::string> include_globs;
|
||||
std::vector<std::string> exclude_globs;
|
||||
std::string glob_extension;
|
||||
std::string api_key;
|
||||
std::string model;
|
||||
std::string router = "openrouter";
|
||||
std::string mode = "tools";
|
||||
int log_level = 4;
|
||||
std::string profile;
|
||||
std::string base_url;
|
||||
std::string config_path;
|
||||
std::string dump;
|
||||
std::string preferences;
|
||||
std::string logs;
|
||||
bool stream = false;
|
||||
bool alt = false;
|
||||
std::string env = "default";
|
||||
std::string filters;
|
||||
std::string query;
|
||||
bool dry_run = false;
|
||||
std::string format;
|
||||
/** liboai HTTP timeout (ms). 0 = library default (~30s). IPC may set for long prompts. */
|
||||
int llm_timeout_ms = 0;
|
||||
/**
|
||||
* Optional chat completion `response_format` JSON (OpenAI structured outputs).
|
||||
* Example: {"type":"json_object"} or {"type":"json_schema","json_schema":{...}}.
|
||||
* Empty = omit (default text completion).
|
||||
*/
|
||||
std::string response_format_json;
|
||||
|
||||
// Internal
|
||||
std::string job_id;
|
||||
std::shared_ptr<std::atomic<bool>> cancel_token;
|
||||
};
|
||||
|
||||
struct KBotRunOptions {
|
||||
std::string config = "default";
|
||||
bool dry = false;
|
||||
bool list = false;
|
||||
std::string project_path;
|
||||
std::string log_file_path;
|
||||
|
||||
// Internal
|
||||
std::string job_id;
|
||||
std::shared_ptr<std::atomic<bool>> cancel_token;
|
||||
};
|
||||
|
||||
struct KBotCallbacks {
|
||||
std::function<void(const std::string& type, const std::string& json)> onEvent;
|
||||
};
|
||||
|
||||
POLYMECH_API int run_kbot_ai_pipeline(const KBotOptions& opts, const KBotCallbacks& cb);
|
||||
POLYMECH_API int run_kbot_run_pipeline(const KBotRunOptions& opts, const KBotCallbacks& cb);
|
||||
|
||||
} // namespace kbot
|
||||
} // namespace polymech
|
||||
165
packages/media/cpp/packages/kbot/llm_client.cpp
Normal file
165
packages/media/cpp/packages/kbot/llm_client.cpp
Normal file
@ -0,0 +1,165 @@
|
||||
#include "llm_client.h"
|
||||
#include "logger/logger.h"
|
||||
#include <liboai.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
|
||||
namespace polymech {
|
||||
namespace kbot {
|
||||
|
||||
LLMClient::LLMClient(const KBotOptions& opts)
|
||||
: api_key_(opts.api_key),
|
||||
model_(opts.model),
|
||||
router_(opts.router),
|
||||
llm_timeout_ms_(opts.llm_timeout_ms),
|
||||
response_format_json_(opts.response_format_json) {
|
||||
|
||||
// Set default base_url_ according to client.ts mappings
|
||||
if (opts.base_url.empty()) {
|
||||
if (router_ == "openrouter") base_url_ = "https://openrouter.ai/api/v1";
|
||||
else if (router_ == "openai") base_url_ = ""; // liboai uses the default URL automatically
|
||||
else if (router_ == "deepseek") base_url_ = "https://api.deepseek.com/v1";
|
||||
else if (router_ == "huggingface")base_url_ = "https://api-inference.huggingface.co/v1";
|
||||
else if (router_ == "ollama") base_url_ = "http://localhost:11434/v1";
|
||||
else if (router_ == "fireworks") base_url_ = "https://api.fireworks.ai/v1";
|
||||
else if (router_ == "gemini") base_url_ = "https://generativelanguage.googleapis.com/v1beta"; // or gemini openai compat endpt
|
||||
else if (router_ == "xai") base_url_ = "https://api.x.ai/v1";
|
||||
else base_url_ = "https://api.openai.com/v1"; // Fallback to openai API
|
||||
} else {
|
||||
base_url_ = opts.base_url;
|
||||
}
|
||||
|
||||
// Default models based on router (from client.ts)
|
||||
if (model_.empty()) {
|
||||
if (router_ == "openrouter") model_ = "anthropic/claude-sonnet-4";
|
||||
else if (router_ == "openai") model_ = "gpt-4o";
|
||||
else if (router_ == "deepseek") model_ = "deepseek-chat";
|
||||
else if (router_ == "huggingface") model_ = "meta-llama/2";
|
||||
else if (router_ == "ollama") model_ = "llama3.2";
|
||||
else if (router_ == "fireworks") model_ = "llama-v2-70b-chat";
|
||||
else if (router_ == "gemini") model_ = "gemini-1.5-pro";
|
||||
else if (router_ == "xai") model_ = "grok-1";
|
||||
else model_ = "gpt-4o";
|
||||
}
|
||||
}
|
||||
|
||||
LLMClient::~LLMClient() = default;
|
||||
|
||||
LLMResponse LLMClient::execute_chat(const std::string& prompt) {
|
||||
LLMResponse res;
|
||||
|
||||
logger::debug("LLMClient::execute_chat: Starting. api_key length: " + std::to_string(api_key_.length()));
|
||||
if (api_key_.empty()) {
|
||||
res.success = false;
|
||||
res.error = "API Key is empty.";
|
||||
return res;
|
||||
}
|
||||
|
||||
logger::debug("LLMClient::execute_chat: base_url_: " + base_url_);
|
||||
liboai::OpenAI oai_impl(base_url_.empty() ? "https://api.openai.com/v1" : base_url_);
|
||||
|
||||
logger::debug("LLMClient::execute_chat: Setting API Key");
|
||||
bool success = oai_impl.auth.SetKey(api_key_);
|
||||
if (!success) {
|
||||
res.success = false;
|
||||
res.error = "Failed to set API Key in liboai.";
|
||||
return res;
|
||||
}
|
||||
|
||||
if (llm_timeout_ms_ > 0) {
|
||||
oai_impl.auth.SetMaxTimeout(llm_timeout_ms_);
|
||||
logger::info("LLMClient: HTTP timeout set to " + std::to_string(llm_timeout_ms_) + " ms");
|
||||
}
|
||||
|
||||
std::string target_model = model_.empty() ? "gpt-4o" : model_;
|
||||
logger::debug("LLMClient::execute_chat: Target model: " + target_model);
|
||||
|
||||
logger::info("LLMClient: calling ChatCompletion (prompt chars=" + std::to_string(prompt.size()) + ")");
|
||||
logger::debug("LLMClient::execute_chat: Init Conversation");
|
||||
liboai::Conversation convo;
|
||||
convo.AddUserData(prompt);
|
||||
|
||||
std::optional<nlohmann::json> response_format;
|
||||
if (!response_format_json_.empty()) {
|
||||
try {
|
||||
response_format = nlohmann::json::parse(response_format_json_);
|
||||
} catch (const std::exception& e) {
|
||||
logger::warn("LLMClient: invalid --response-format / response_format_json, ignoring: " +
|
||||
std::string(e.what()));
|
||||
}
|
||||
}
|
||||
|
||||
logger::debug("LLMClient::execute_chat: Calling create()");
|
||||
try {
|
||||
liboai::Response response = oai_impl.ChatCompletion->create(
|
||||
target_model,
|
||||
convo,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
std::nullopt,
|
||||
response_format);
|
||||
logger::info("LLMClient: ChatCompletion returned (HTTP " + std::to_string(response.status_code) + ")");
|
||||
logger::debug("LLMClient::execute_chat: Got response with status: " + std::to_string(response.status_code));
|
||||
|
||||
// liboai may not populate raw_json for custom base URLs — parse content directly.
|
||||
nlohmann::json j;
|
||||
bool json_ok = false;
|
||||
if (!response.raw_json.empty() && response.raw_json.contains("choices")) {
|
||||
j = response.raw_json;
|
||||
json_ok = true;
|
||||
} else if (!response.content.empty()) {
|
||||
try {
|
||||
j = nlohmann::json::parse(response.content);
|
||||
json_ok = j.contains("choices");
|
||||
} catch (...) {}
|
||||
}
|
||||
|
||||
if (!json_ok || j["choices"].empty()) {
|
||||
res.success = false;
|
||||
if (json_ok && j.contains("error")) {
|
||||
res.error = "API Error: " + j["error"].dump();
|
||||
} else {
|
||||
res.error = "Invalid response format: no choices found. Raw: " + response.content;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
res.success = true;
|
||||
res.text = j["choices"][0]["message"]["content"].get<std::string>();
|
||||
|
||||
/* Usage, model, cost (OpenRouter), etc. — everything except message bodies in choices. */
|
||||
try {
|
||||
nlohmann::json meta = nlohmann::json::object();
|
||||
for (auto it = j.begin(); it != j.end(); ++it) {
|
||||
if (it.key() == "choices") continue;
|
||||
meta[it.key()] = it.value();
|
||||
}
|
||||
if (!meta.empty()) res.provider_meta_json = meta.dump();
|
||||
} catch (...) {
|
||||
/* keep text; omit provider_meta_json */
|
||||
}
|
||||
|
||||
} catch (std::exception& e) {
|
||||
logger::error("LLMClient::execute_chat: Exception caught: " + std::string(e.what()));
|
||||
res.success = false;
|
||||
res.error = e.what();
|
||||
} catch (...) {
|
||||
logger::error("LLMClient::execute_chat: Unknown exception caught");
|
||||
res.success = false;
|
||||
res.error = "Unknown error occurred inside LLMClient execute_chat.";
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace kbot
|
||||
} // namespace polymech
|
||||
37
packages/media/cpp/packages/kbot/llm_client.h
Normal file
37
packages/media/cpp/packages/kbot/llm_client.h
Normal file
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "kbot.h"
|
||||
|
||||
namespace polymech {
|
||||
namespace kbot {
|
||||
|
||||
struct LLMResponse {
|
||||
std::string text;
|
||||
bool success = false;
|
||||
std::string error;
|
||||
/** Top-level chat completion JSON minus `choices` (usage, model, id, OpenRouter extras). Empty if not captured. */
|
||||
std::string provider_meta_json;
|
||||
};
|
||||
|
||||
class POLYMECH_API LLMClient {
|
||||
public:
|
||||
// Initialize the client with the options (api_key, model, router).
|
||||
explicit LLMClient(const KBotOptions& opts);
|
||||
~LLMClient();
|
||||
|
||||
// Execute a basic chat completion using the provided prompt.
|
||||
LLMResponse execute_chat(const std::string& prompt);
|
||||
|
||||
private:
|
||||
std::string api_key_;
|
||||
std::string model_;
|
||||
std::string router_;
|
||||
std::string base_url_;
|
||||
int llm_timeout_ms_ = 0;
|
||||
/** Parsed in execute_chat; raw JSON from KBotOptions::response_format_json */
|
||||
std::string response_format_json_;
|
||||
};
|
||||
|
||||
} // namespace kbot
|
||||
} // namespace polymech
|
||||
26
packages/media/cpp/packages/kbot/polymech_export.h
Normal file
26
packages/media/cpp/packages/kbot/polymech_export.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
/**
|
||||
* DLL / shared-object exports for the Polymech kbot library (pipelines, LLM client).
|
||||
*
|
||||
* CMake:
|
||||
* - Building libkbot: POLYMECH_BUILDING_LIBRARY (PRIVATE)
|
||||
* - Linking static kbot: POLYMECH_STATIC_BUILD=1 (INTERFACE)
|
||||
* - Linking shared kbot: default import on Windows
|
||||
*/
|
||||
|
||||
#if defined(POLYMECH_STATIC_BUILD)
|
||||
# define POLYMECH_API
|
||||
#elif defined(_WIN32)
|
||||
# if defined(POLYMECH_BUILDING_LIBRARY)
|
||||
# define POLYMECH_API __declspec(dllexport)
|
||||
# else
|
||||
# define POLYMECH_API __declspec(dllimport)
|
||||
# endif
|
||||
#else
|
||||
# if defined(POLYMECH_BUILDING_LIBRARY)
|
||||
# define POLYMECH_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define POLYMECH_API
|
||||
# endif
|
||||
#endif
|
||||
221
packages/media/cpp/packages/kbot/source_files.cpp
Normal file
221
packages/media/cpp/packages/kbot/source_files.cpp
Normal file
@ -0,0 +1,221 @@
|
||||
#include "source_files.h"
|
||||
#include "logger/logger.h"
|
||||
#include <glob/glob.h>
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace polymech {
|
||||
namespace kbot {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::size_t kMaxBytesPerFile = 4 * 1024 * 1024;
|
||||
|
||||
std::string to_lower(std::string s) {
|
||||
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string ext_of(const fs::path& p) {
|
||||
std::string e = p.extension().string();
|
||||
return to_lower(e);
|
||||
}
|
||||
|
||||
/** Extensions handled as binary / non-text in this slice (expand later for vision). */
|
||||
bool is_image_ext(const std::string& ext) {
|
||||
static const char* kImg[] = {".jpg", ".jpeg", ".png", ".gif", ".webp",
|
||||
".bmp", ".tiff", ".tif", ".ico", ".heic", ".avif"};
|
||||
for (auto* x : kImg) {
|
||||
if (ext == x) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_pdf_ext(const std::string& ext) { return ext == ".pdf"; }
|
||||
|
||||
/** Filename / relative path glob with * and ? only (no **). */
|
||||
bool glob_match_segment(const std::string& text, const std::string& pat) {
|
||||
const size_t n = text.size(), m = pat.size();
|
||||
std::vector<std::vector<bool>> dp(n + 1, std::vector<bool>(m + 1, false));
|
||||
dp[0][0] = true;
|
||||
for (size_t j = 1; j <= m; ++j) {
|
||||
if (pat[j - 1] == '*') dp[0][j] = dp[0][j - 1];
|
||||
}
|
||||
for (size_t i = 1; i <= n; ++i) {
|
||||
for (size_t j = 1; j <= m; ++j) {
|
||||
if (pat[j - 1] == '*') {
|
||||
dp[i][j] = dp[i][j - 1] || dp[i - 1][j];
|
||||
} else if (pat[j - 1] == '?' || text[i - 1] == pat[j - 1]) {
|
||||
dp[i][j] = dp[i - 1][j - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
return dp[n][m];
|
||||
}
|
||||
|
||||
fs::path absolute_root(const std::string& path_opt) {
|
||||
if (path_opt.empty()) return fs::absolute(fs::path("."));
|
||||
return fs::absolute(fs::path(path_opt));
|
||||
}
|
||||
|
||||
bool excluded(const std::string& rel_fwd, const std::vector<std::string>& exclude_globs) {
|
||||
for (const auto& pat : exclude_globs) {
|
||||
if (pat.empty()) continue;
|
||||
if (glob_match_segment(rel_fwd, pat)) return true;
|
||||
fs::path p(rel_fwd);
|
||||
if (glob_match_segment(p.filename().string(), pat)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void push_unique(std::vector<fs::path>& out, std::unordered_set<std::string>& seen, const fs::path& file) {
|
||||
std::string key = file.generic_string();
|
||||
if (seen.insert(key).second) out.push_back(file);
|
||||
}
|
||||
|
||||
void expand_one_pattern(const fs::path& root, const std::string& pattern_str,
|
||||
std::vector<fs::path>& out, std::unordered_set<std::string>& seen) {
|
||||
fs::path pat_path = pattern_str.empty() ? fs::path() : fs::path(pattern_str);
|
||||
fs::path resolved = pat_path.is_absolute() ? pat_path : (root / pat_path);
|
||||
resolved = resolved.lexically_normal();
|
||||
|
||||
const std::string pat = resolved.string();
|
||||
|
||||
std::vector<fs::path> matched;
|
||||
try {
|
||||
if (pattern_str.find("**") != std::string::npos) {
|
||||
matched = glob::rglob(pat);
|
||||
} else {
|
||||
matched = glob::glob(pat);
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
logger::warn(std::string("source_files: glob failed: ") + e.what());
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto& p : matched) {
|
||||
std::error_code ec;
|
||||
if (!fs::is_regular_file(p, ec) || ec) continue;
|
||||
fs::path canon = fs::weakly_canonical(p, ec);
|
||||
if (!ec) push_unique(out, seen, canon);
|
||||
}
|
||||
}
|
||||
|
||||
std::string read_file_limited(const fs::path& p, std::size_t max_bytes) {
|
||||
std::ifstream in(p, std::ios::binary);
|
||||
if (!in) return {};
|
||||
std::string buf;
|
||||
buf.assign(std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>());
|
||||
if (buf.size() > max_bytes) {
|
||||
logger::warn("source_files: truncating large file " + p.generic_string());
|
||||
buf.resize(max_bytes);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool is_text_source_file(const std::string& path_generic) {
|
||||
fs::path p(path_generic);
|
||||
std::string ext = ext_of(p);
|
||||
if (is_image_ext(ext)) return false;
|
||||
if (is_pdf_ext(ext)) return false;
|
||||
if (ext.empty()) return true;
|
||||
/* Code / text-like extensions (aligned with TS text/* + common sources) */
|
||||
static const char* kText[] = {
|
||||
".txt", ".md", ".json", ".js", ".mjs", ".cjs", ".ts", ".tsx", ".jsx", ".css",
|
||||
".html", ".htm", ".xml", ".csv", ".yaml", ".yml", ".toml", ".sh", ".py",
|
||||
".rs", ".go", ".java", ".cpp", ".cc", ".cxx", ".h", ".hpp", ".c",
|
||||
".cs", ".rb", ".php", ".swift", ".kt", ".vue", ".svelte", ".scss", ".less",
|
||||
".ini", ".cfg", ".properties", ".gradle", ".cmake", ".mdx", ".log", ".sql",
|
||||
};
|
||||
for (auto* x : kText) {
|
||||
if (ext == x) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::string> collect_source_rel_paths(const KBotOptions& opts) {
|
||||
std::vector<std::string> rel;
|
||||
build_prompt_with_sources(opts, &rel);
|
||||
return rel;
|
||||
}
|
||||
|
||||
std::string build_prompt_with_sources(const KBotOptions& opts, std::vector<std::string>* out_rel_paths) {
|
||||
if (opts.include_globs.empty()) {
|
||||
return opts.prompt;
|
||||
}
|
||||
|
||||
const fs::path root = absolute_root(opts.path);
|
||||
std::vector<fs::path> files;
|
||||
std::unordered_set<std::string> seen;
|
||||
|
||||
for (const auto& inc : opts.include_globs) {
|
||||
if (inc.empty()) continue;
|
||||
expand_one_pattern(root, inc, files, seen);
|
||||
}
|
||||
|
||||
std::ostringstream body;
|
||||
for (const auto& abs : files) {
|
||||
std::error_code ec;
|
||||
if (!fs::is_regular_file(abs, ec) || ec) continue;
|
||||
|
||||
std::string abs_gen = abs.generic_string();
|
||||
if (!is_text_source_file(abs_gen)) {
|
||||
logger::info("source_files: skip non-text (e.g. image): " + abs_gen);
|
||||
continue;
|
||||
}
|
||||
|
||||
fs::path rel = fs::relative(abs, root, ec);
|
||||
if (ec) rel = abs.filename();
|
||||
std::string rel_fwd = rel.generic_string();
|
||||
|
||||
if (excluded(rel_fwd, opts.exclude_globs)) {
|
||||
logger::debug("source_files: excluded: " + rel_fwd);
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string content = read_file_limited(abs, kMaxBytesPerFile);
|
||||
if (out_rel_paths) out_rel_paths->push_back(rel_fwd);
|
||||
|
||||
body << "--- file: " << rel_fwd << " ---\n";
|
||||
body << content;
|
||||
if (!content.empty() && content.back() != '\n') body << '\n';
|
||||
body << '\n';
|
||||
}
|
||||
|
||||
if (!opts.prompt.empty()) {
|
||||
body << opts.prompt;
|
||||
}
|
||||
|
||||
return body.str();
|
||||
}
|
||||
|
||||
nlohmann::json make_dry_run_ai_result(const KBotOptions& opts, const std::string& augmented_prompt,
|
||||
const std::vector<std::string>& rel_paths) {
|
||||
nlohmann::json o;
|
||||
o["status"] = "success";
|
||||
o["mode"] = "ai";
|
||||
o["text"] = "[dry-run] no LLM call";
|
||||
o["dry_run"] = true;
|
||||
o["path"] = opts.path.empty() ? std::string(".") : opts.path;
|
||||
o["sources"] = rel_paths;
|
||||
o["prompt_char_count"] = augmented_prompt.size();
|
||||
const std::size_t cap = 2000;
|
||||
if (augmented_prompt.size() <= cap) {
|
||||
o["prompt_preview"] = augmented_prompt;
|
||||
} else {
|
||||
o["prompt_preview"] = augmented_prompt.substr(0, cap);
|
||||
o["prompt_preview_truncated"] = true;
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
} // namespace kbot
|
||||
} // namespace polymech
|
||||
32
packages/media/cpp/packages/kbot/source_files.h
Normal file
32
packages/media/cpp/packages/kbot/source_files.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include "kbot.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace polymech {
|
||||
namespace kbot {
|
||||
|
||||
/** True if we treat this path as a text source (UTF-8). Images/PDF reserved for future. */
|
||||
bool is_text_source_file(const std::string& path_generic);
|
||||
|
||||
/**
|
||||
* Resolve --include / IPC `include` patterns against `opts.path` (project root).
|
||||
* Skips non-text files (e.g. images) with a debug log. Applies `exclude_globs` to relative paths.
|
||||
*/
|
||||
std::vector<std::string> collect_source_rel_paths(const KBotOptions& opts);
|
||||
|
||||
/**
|
||||
* Build user prompt: optional file blocks (`--- file: rel ---` + contents) then `opts.prompt`.
|
||||
* If `out_rel_paths` is set, filled with forward-slash relative paths in read order (deduped).
|
||||
*/
|
||||
std::string build_prompt_with_sources(const KBotOptions& opts,
|
||||
std::vector<std::string>* out_rel_paths = nullptr);
|
||||
|
||||
/** JSON body for dry-run job_result when includes are used (sources + preview). */
|
||||
nlohmann::json make_dry_run_ai_result(const KBotOptions& opts, const std::string& augmented_prompt,
|
||||
const std::vector<std::string>& rel_paths);
|
||||
|
||||
} // namespace kbot
|
||||
} // namespace polymech
|
||||
49
packages/media/cpp/packages/liboai/.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
49
packages/media/cpp/packages/liboai/.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
name: Bug report
|
||||
description: Create a report to help us improve
|
||||
labels: ["bug"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: Describe the bug
|
||||
description: A clear and concise description of what the bug is, and any additional context.
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: repro-steps
|
||||
attributes:
|
||||
label: To Reproduce
|
||||
description: Steps to reproduce the behavior.
|
||||
placeholder: |
|
||||
1. Fetch a '...'
|
||||
2. Update the '....'
|
||||
3. See error
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: code-snippets
|
||||
attributes:
|
||||
label: Code snippets
|
||||
description: If applicable, add code snippets to help explain your problem.
|
||||
render: C++
|
||||
validations:
|
||||
required: false
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
label: OS
|
||||
placeholder: macOS
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: lib-version
|
||||
attributes:
|
||||
label: Library version
|
||||
placeholder: liboai v1.0.0
|
||||
validations:
|
||||
required: true
|
||||
20
packages/media/cpp/packages/liboai/.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
20
packages/media/cpp/packages/liboai/.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
name: Feature request
|
||||
description: Suggest an idea for this library
|
||||
labels: ["feature-request"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this feature request!
|
||||
- type: textarea
|
||||
id: feature
|
||||
attributes:
|
||||
label: Describe the feature or improvement you're requesting
|
||||
description: A clear and concise description of what you want to happen.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context about the feature request here.
|
||||
6
packages/media/cpp/packages/liboai/.gitignore
vendored
Normal file
6
packages/media/cpp/packages/liboai/.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
.vs
|
||||
[Bb]uild*
|
||||
out
|
||||
TestApp
|
||||
.cache
|
||||
/.idea
|
||||
24
packages/media/cpp/packages/liboai/AGENTS.md
Normal file
24
packages/media/cpp/packages/liboai/AGENTS.md
Normal file
@ -0,0 +1,24 @@
|
||||
# AGENTS.md
|
||||
|
||||
This repo is a maintained fork of liboai. Our goal is to make it more reliable and feature-complete without breaking existing api.
|
||||
|
||||
## Core Principles
|
||||
- Preserve backward compatibility; add features without breaking existing APIs.
|
||||
- Favor small, composable changes over rewrites.
|
||||
- Keep the codebase clean and maintainable; document anything user-facing.
|
||||
- Prioritize stability, correctness, and clear error handling.
|
||||
|
||||
## Current Priorities
|
||||
- Add OpenAI Responses API support for GPT-5.2 and gpt-5.2-pro.
|
||||
- Keep Chat Completions and other existing components intact.
|
||||
- Add documentation and examples for new features.
|
||||
|
||||
## Workflow
|
||||
- Update docs whenever you add or change public APIs.
|
||||
- Use existing patterns and naming conventions in liboai.
|
||||
- Avoid introducing new dependencies unless justified.
|
||||
|
||||
## Notes
|
||||
- The initial Responses API implementation should accept raw JSON payloads.
|
||||
- A ResponseInput helper is planned, but not part of the initial implementation.
|
||||
- Azure Responses support is out of scope for now.
|
||||
22
packages/media/cpp/packages/liboai/CMakeLists.txt
Normal file
22
packages/media/cpp/packages/liboai/CMakeLists.txt
Normal file
@ -0,0 +1,22 @@
|
||||
cmake_minimum_required(VERSION 3.21)
|
||||
|
||||
project(liboai)
|
||||
|
||||
IF(WIN32)
|
||||
set(VCPKG_CMAKE_PATH $ENV{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake CACHE FILEPATH "Location of vcpkg.cmake")
|
||||
include(${VCPKG_CMAKE_PATH})
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(nlohmann_json CONFIG REQUIRED)
|
||||
find_package(CURL REQUIRED)
|
||||
ENDIF()
|
||||
|
||||
option(BUILD_EXAMPLES "Build example applications" OFF)
|
||||
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
||||
|
||||
add_subdirectory(liboai)
|
||||
|
||||
if(BUILD_EXAMPLES)
|
||||
add_subdirectory(documentation)
|
||||
endif()
|
||||
|
||||
set_property(DIRECTORY PROPERTY VS_STARTUP_PROJECT oai)
|
||||
21
packages/media/cpp/packages/liboai/LICENSE
Normal file
21
packages/media/cpp/packages/liboai/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Dread
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
100
packages/media/cpp/packages/liboai/README.md
Normal file
100
packages/media/cpp/packages/liboai/README.md
Normal file
@ -0,0 +1,100 @@
|
||||
<p align="center">
|
||||
<img src="/images/_logo.png">
|
||||
</p>
|
||||
|
||||
<hr>
|
||||
<h1>Introduction</h1>
|
||||
<p><code>liboai</code> is a simple, <b>unofficial</b> C++17 library for the OpenAI API. It allows developers to access OpenAI endpoints through a simple collection of methods and classes. The library can most effectively be thought of as a <b>spiritual port</b> of OpenAI's Python library, simply called <code>openai</code>, due to its similar structure - with few exceptions.
|
||||
|
||||
<h3>Features</h3>
|
||||
|
||||
- [x] [ChatGPT](https://github.com/D7EAD/liboai/tree/main/documentation/chat)
|
||||
- [x] [Responses API](https://platform.openai.com/docs/api-reference/responses/create)
|
||||
- [X] [Audio](https://github.com/D7EAD/liboai/tree/main/documentation/audio)
|
||||
- [X] [Azure](https://github.com/D7EAD/liboai/tree/main/documentation/azure)
|
||||
- [X] [Functions](https://platform.openai.com/docs/api-reference/chat/create)
|
||||
- [x] [Image DALL·E](https://github.com/D7EAD/liboai/tree/main/documentation/images)
|
||||
- [x] [Models](https://github.com/D7EAD/liboai/tree/main/documentation/models)
|
||||
- [x] [Completions](https://github.com/D7EAD/liboai/tree/main/documentation/completions)
|
||||
- [x] [Edit](https://github.com/D7EAD/liboai/tree/main/documentation/edits)
|
||||
- [x] [Embeddings](https://github.com/D7EAD/liboai/tree/main/documentation/embeddings)
|
||||
- [x] [Files](https://github.com/D7EAD/liboai/tree/main/documentation/files)
|
||||
- [x] [Fine-tunes](https://github.com/D7EAD/liboai/tree/main/documentation/fine-tunes)
|
||||
- [x] [Moderation](https://github.com/D7EAD/liboai/tree/main/documentation/moderations)
|
||||
- [X] Asynchronous Support
|
||||
|
||||
<h1>Usage</h1>
|
||||
See below for just how similar in style <code>liboai</code> and its Python alternative are when generating an image using DALL-E.</p>
|
||||
<details open>
|
||||
<summary>DALL-E Generation in Python.</summary>
|
||||
<br>
|
||||
|
||||
```py
|
||||
import openai
|
||||
import os
|
||||
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
response = openai.Image.create(
|
||||
prompt="A snake in the grass!",
|
||||
n=1,
|
||||
size="256x256"
|
||||
)
|
||||
print(response["data"][0]["url"])
|
||||
```
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>DALL-E Generation in C++.</summary>
|
||||
<br>
|
||||
|
||||
```cpp
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
oai.auth.SetKeyEnv("OPENAI_API_KEY");
|
||||
|
||||
Response res = oai.Image->create(
|
||||
"A snake in the grass!",
|
||||
1,
|
||||
"256x256"
|
||||
);
|
||||
|
||||
std::cout << res["data"][0]["url"] << std::endl;
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<p>Running the above will print out the URL to the resulting generated image, which may or may not look similar to the one found below.</p>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Example Image</th>
|
||||
</tr>
|
||||
<td>
|
||||
|
||||
<img src="/images/snake.png">
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<p><i>Keep in mind the above C++ example is a minimal example and is not an exception-safe snippet. Please see <a href="/documentation">the documentation</a> for more detailed and exception-safe code snippets.</i></p>
|
||||
|
||||
<h1>Dependencies</h1>
|
||||
<p>For the library to work the way it does, it relies on two major dependencies. These dependencies can be found listed below.<p>
|
||||
|
||||
- <a href="https://github.com/nlohmann/json">nlohmann-json</a>
|
||||
- <a href="https://curl.se/">cURL</a>
|
||||
|
||||
*If building the library using the provided solution, it is recommended to install these dependencies using <b>vcpkg</b>.*
|
||||
|
||||
<h1>Documentation</h1>
|
||||
<p>For detailed documentation and additional code examples, see the library's documentation <a href="/documentation">here</a>.
|
||||
|
||||
<h1>Contributing</h1>
|
||||
<p>Artificial intelligence is an exciting and quickly-changing field.
|
||||
|
||||
If you'd like to partake in further placing the power of AI in the hands of everyday people, please consider contributing by submitting new code and features via a **Pull Request**. If you have any issues using the library, or just want to suggest new features, feel free to contact me directly using the info on my <a href="https://github.com/D7EAD">profile</a> or open an **Issue**.
|
||||
25
packages/media/cpp/packages/liboai/ROADMAP.md
Normal file
25
packages/media/cpp/packages/liboai/ROADMAP.md
Normal file
@ -0,0 +1,25 @@
|
||||
# liboai Roadmap
|
||||
|
||||
This is a living backlog of improvements and ideas as we deepen our use of the library. It is intentionally lightweight and updated as we discover new needs.
|
||||
|
||||
## Now
|
||||
- Responses API support (GPT-5.2, gpt-5.2-pro)
|
||||
- Keep all existing APIs stable and intact
|
||||
|
||||
## Next
|
||||
- Responses streaming helpers and SSE parsing
|
||||
- ResponseInput helper to build Responses `input` items
|
||||
- `output_text` convenience helper for Responses outputs
|
||||
- Structured outputs helpers for `text.format`
|
||||
- Tool definition builders for Responses (`tools`, `tool_choice`)
|
||||
|
||||
## Later
|
||||
- More robust testing coverage (unit + integration samples)
|
||||
- Improved error messaging with request context (safe, no secrets)
|
||||
- Expanded docs and cookbook-style examples
|
||||
- Performance pass on JSON construction and streaming
|
||||
|
||||
## Observations
|
||||
- The Conversation class is useful for Chat Completions; Responses lacks an equivalent.
|
||||
- The library is stable but needs modernization for new OpenAI primitives.
|
||||
- Maintaining compatibility is critical for existing users.
|
||||
@ -0,0 +1,28 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
|
||||
project(documentation)
|
||||
|
||||
macro(add_example target_name source_name)
|
||||
add_executable(${target_name} "${source_name}")
|
||||
target_link_libraries(${target_name} oai)
|
||||
set_target_properties(${target_name} PROPERTIES FOLDER "examples/${PROJECT_NAME}")
|
||||
endmacro()
|
||||
|
||||
macro(add_basic_example source_base_name)
|
||||
add_example(${source_base_name} "${source_base_name}.cpp")
|
||||
endmacro()
|
||||
|
||||
add_subdirectory(audio/examples)
|
||||
add_subdirectory(authorization/examples)
|
||||
add_subdirectory(azure/examples)
|
||||
add_subdirectory(chat/examples)
|
||||
add_subdirectory(chat/conversation/examples)
|
||||
add_subdirectory(completions/examples)
|
||||
add_subdirectory(edits/examples)
|
||||
add_subdirectory(embeddings/examples)
|
||||
add_subdirectory(files/examples)
|
||||
add_subdirectory(fine-tunes/examples)
|
||||
add_subdirectory(images/examples)
|
||||
add_subdirectory(models/examples)
|
||||
add_subdirectory(moderations/examples)
|
||||
add_subdirectory(responses/examples)
|
||||
217
packages/media/cpp/packages/liboai/documentation/README.md
Normal file
217
packages/media/cpp/packages/liboai/documentation/README.md
Normal file
@ -0,0 +1,217 @@
|
||||
<h1>Documentation</h1>
|
||||
<p>Both above and below, you can find resources and documentation for each component of the library.</p>
|
||||
|
||||
<h3>Basic Usage</h3>
|
||||
<p>In order to understand how to use each component of the library, it would be ideal to first understand the basic structure of the library as a whole. When using <code>liboai</code> in a project, you <b>should</b> only include one header file, <code>liboai.h</code>. This header provides an interface to all other components of the library such as <code>Images</code>, <code>Completions</code>, etc.
|
||||
|
||||
See below for both a correct and incorrect example.</p>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Correct</th>
|
||||
<th>Incorrect</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
```cpp
|
||||
#include "liboai.h"
|
||||
|
||||
int main() {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
</td>
|
||||
<td>
|
||||
|
||||
```cpp
|
||||
#include "fine_tunes.h"
|
||||
#include "models.h"
|
||||
// etc...
|
||||
|
||||
int main() {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<p>Once we have properly included the necessary header file to use the library--and assuming symbols are linked properly--we can make use of the class in <code>liboai.h</code> to get started. At some point in our source code, we will have to choose when to define a <code>liboai::OpenAI</code> object to access component interfaces. Each component interface stored in this object offers methods associated with it, so, for instance, interface <code>Image</code> will have a method <code>create(...)</code> to generate an image from text. Each non-async method returns a <code>liboai::Response</code> containing response information whereas async methods return a <code>liboai::FutureResponse</code>. However, before we start using these methods, we must first set our authorization information--otherwise it will not work!
|
||||
|
||||
<code>liboai::OpenAI</code> also houses another important member, the authorization member, which is used to set authorization information (such as the API key and organization IDs) before we call the API methods. For more information on additional members found in <code>liboai::Authorization</code>, refer to the <a href="./authorization">authorization</a> folder above.
|
||||
|
||||
See below for both a correct and incorrect control flow when generating an image.</p>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Correct</th>
|
||||
<th>Incorrect</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
```cpp
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
// Set our API key using an environment variable.
|
||||
// This is recommended as hard-coding API keys is
|
||||
// insecure.
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
Response response = oai.Image->create(
|
||||
"a siamese cat!"
|
||||
);
|
||||
}
|
||||
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
</td>
|
||||
<td>
|
||||
|
||||
```cpp
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
// Failure to set authorization info!
|
||||
// Will fail, exception will be thrown!
|
||||
Response response = oai.Image->create(
|
||||
"a siamese cat!"
|
||||
);
|
||||
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<p>As you can see above, authentication-set related functions return booleans to indicate success and failure, whereas component methods will throw an exception, <code>OpenAIException</code> or <code>OpenAIRateLimited</code>, to indicate their success or failure; these should be checked for accordingly. Below you can find an exception-safe version of the above correct snippet.</p>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Correct, exception-safe</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
```cpp
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
Response response = oai.Image->create(
|
||||
"a siamese cat!"
|
||||
);
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
|
||||
...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<p>Now, once we have made a call using a component interface, we most certainly want to get the information out of it. To do this, using our knowledge of the format of the API responses, we can extract the information, such as the resulting image's URL, using JSON indexing on the <code>liboai::Response</code> object. See below for an example where we print the generated image's URL.</p>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Accessing JSON Response Data</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
```cpp
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
Response response = oai.Image->create(
|
||||
"a siamese cat!"
|
||||
);
|
||||
std::cout << response["data"][0]["url"].get<std::string>() << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<p>What if we want to do more than just print the URL of the image? Why not download it right when it's done? Thankfully, <code>liboai</code> has a convenient function for that, <code>Network::Download(...)</code> (and <code>Network::DownloadAsync(...)</code>). See below for an example of downloading a freshly generated image.
|
||||
<table>
|
||||
<tr>
|
||||
<th>Downloading a Generated Image</th>
|
||||
</tr>
|
||||
<td>
|
||||
|
||||
```cpp
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
Response response = oai.Image->create(
|
||||
"a siamese cat!"
|
||||
);
|
||||
Network::Download(
|
||||
"C:/some/folder/file.png", // to
|
||||
response["data"][0]["url"].get<std::string>(), // from
|
||||
oai.auth.GetAuthorizationHeaders()
|
||||
);
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<p>After a successful run of the above snippet, the file found at the URL returned from the component call will be download to the path <code>C:/some/folder/file.png</code>.
|
||||
<br>
|
||||
|
||||
<h1>Synopsis</h1>
|
||||
<p>Each component interface found within <code>liboai::OpenAI</code> follows the same pattern found above. Whether you want to generate images, completions, or fine-tune models, the control flow should follow--or remain similar to--the above examples.
|
||||
|
||||
For detailed examples regarding individual component interfaces, refer to the appropriate folder listed above.</p>
|
||||
|
||||
<h3>Project Maintenance</h3>
|
||||
<p>Maintainers can find PR workflow notes in <a href="./maintenance">documentation/maintenance</a>.</p>
|
||||
@ -0,0 +1,96 @@
|
||||
<h1>Audio</h1>
|
||||
<p>The <code>Audio</code> class is defined in <code>audio.h</code> at <code>liboai::Audio</code>, and its interface can ideally be accessed through a <code>liboai::OpenAI</code> object.
|
||||
|
||||
This class and its associated <code>liboai::OpenAI</code> interface allow access to the <a href="https://beta.openai.com/docs/api-reference/audio">Audio</a> endpoint of the OpenAI API; this endpoint's functionality can be found below.</p>
|
||||
- Turn audio to text.
|
||||
- Turn text to audio.
|
||||
|
||||
<br>
|
||||
<h2>Methods</h2>
|
||||
<p>This document covers the method(s) located in <code>audio.h</code>. You can find their function signature(s) below.</p>
|
||||
|
||||
<h3>Create a Transcription</h3>
|
||||
<p>Transcribes audio into the input language. Returns a <code>liboai::Response</code> containing response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::Response transcribe(
|
||||
const std::filesystem::path& file,
|
||||
const std::string& model,
|
||||
std::optional<std::string> prompt = std::nullopt,
|
||||
std::optional<std::string> response_format = std::nullopt,
|
||||
std::optional<float> temperature = std::nullopt,
|
||||
std::optional<std::string> language = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Create a Transcription (async)</h3>
|
||||
<p>Asynchronously transcribes audio into the input language. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse transcribe_async(
|
||||
const std::filesystem::path& file,
|
||||
const std::string& model,
|
||||
std::optional<std::string> prompt = std::nullopt,
|
||||
std::optional<std::string> response_format = std::nullopt,
|
||||
std::optional<float> temperature = std::nullopt,
|
||||
std::optional<std::string> language = std::nullopt
|
||||
) const& noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Create a Translation</h3>
|
||||
<p>Translates audio into English. Returns a <code>liboai::Response</code> containing response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::Response translate(
|
||||
const std::filesystem::path& file,
|
||||
const std::string& model,
|
||||
std::optional<std::string> prompt = std::nullopt,
|
||||
std::optional<std::string> response_format = std::nullopt,
|
||||
std::optional<float> temperature = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Create a Translation (async)</h3>
|
||||
<p>Asynchronously translates audio into English. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse translate_async(
|
||||
const std::filesystem::path& file,
|
||||
const std::string& model,
|
||||
std::optional<std::string> prompt = std::nullopt,
|
||||
std::optional<std::string> response_format = std::nullopt,
|
||||
std::optional<float> temperature = std::nullopt
|
||||
) const& noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Text to Speech</h3>
|
||||
<p>Turn text into lifelike spoken audio. Returns a <code>liboai::Response</code> containing response data. The audio data is in the <code>content</code> field of the <code>liboai::Response</code></p>
|
||||
|
||||
```cpp
|
||||
liboai::Response speech(
|
||||
const std::string& model,
|
||||
const std::string& voice,
|
||||
const std::string& input,
|
||||
std::optional<std::string> response_format = std::nullopt,
|
||||
std::optional<float> speed = std::nullopt
|
||||
) const& noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Text to Speech (async)</h3>
|
||||
<p>Asynchronously turn text into lifelike spoken audio. Returns a <code>liboai::FutureResponse</code> containing response data. The audio data is in the <code>content</code> field of the <code>liboai::Response</code></p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse speech_async(
|
||||
const std::string& model,
|
||||
const std::string& voice,
|
||||
const std::string& input,
|
||||
std::optional<std::string> response_format = std::nullopt,
|
||||
std::optional<float> speed = std::nullopt
|
||||
) const& noexcept(false);
|
||||
```
|
||||
|
||||
<p>All function parameters marked <code>optional</code> are not required and are resolved on OpenAI's end if not supplied.</p>
|
||||
|
||||
<br>
|
||||
<h2>Example Usage</h2>
|
||||
<p>For example usage of the above function(s), please refer to the <a href="./examples">examples</a> folder.
|
||||
@ -0,0 +1,10 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
|
||||
project(audio)
|
||||
|
||||
add_basic_example(create_speech)
|
||||
add_basic_example(create_speech_async)
|
||||
add_basic_example(create_transcription)
|
||||
add_basic_example(create_transcription_async)
|
||||
add_basic_example(create_translation)
|
||||
add_basic_example(create_translation_async)
|
||||
@ -0,0 +1,24 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
Response res = oai.Audio->speech(
|
||||
"tts-1",
|
||||
"alloy",
|
||||
"Today is a wonderful day to build something people love!"
|
||||
);
|
||||
std::ofstream ocout("demo.mp3", std::ios::binary);
|
||||
ocout << res.content;
|
||||
ocout.close();
|
||||
std::cout << res.content.size() << std::endl;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,31 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
auto fut = oai.Audio->speech_async(
|
||||
"tts-1",
|
||||
"alloy",
|
||||
"Today is a wonderful day to build something people love!"
|
||||
);
|
||||
// do other work...
|
||||
|
||||
// check if the future is ready
|
||||
fut.wait();
|
||||
|
||||
// get the contained response
|
||||
auto res = fut.get();
|
||||
std::ofstream ocout("demo.mp3", std::ios::binary);
|
||||
ocout << res.content;
|
||||
ocout.close();
|
||||
std::cout << res.content.size() << std::endl;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
Response res = oai.Audio->transcribe(
|
||||
"C:/some/folder/audio.mp3",
|
||||
"whisper-1"
|
||||
);
|
||||
std::cout << res["text"].get<std::string>() << std::endl;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
// call async method; returns a future
|
||||
auto fut = oai.Audio->transcribe_async(
|
||||
"C:/some/folder/file.mp3",
|
||||
"whisper-1"
|
||||
);
|
||||
|
||||
// do other work...
|
||||
|
||||
// check if the future is ready
|
||||
fut.wait();
|
||||
|
||||
// get the contained response
|
||||
auto response = fut.get();
|
||||
|
||||
// print some response data
|
||||
std::cout << response["text"].get<std::string>() << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
Response res = oai.Audio->translate(
|
||||
"C:/some/folder/file.mp3",
|
||||
"whisper-1"
|
||||
);
|
||||
std::cout << res["text"] << std::endl;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
try {
|
||||
// call async method; returns a future
|
||||
auto fut = oai.Audio->translate_async(
|
||||
"C:/some/folder/file.mp3",
|
||||
"whisper-1"
|
||||
);
|
||||
|
||||
// do other work...
|
||||
|
||||
// check if the future is ready
|
||||
fut.wait();
|
||||
|
||||
// get the contained response
|
||||
auto response = fut.get();
|
||||
|
||||
// print some response data
|
||||
std::cout << response["text"].get<std::string>() << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,177 @@
|
||||
<h1>Authorization</h1>
|
||||
<p>The <code>Authorization</code> class is defined in <code>authorization.h</code> at <code>liboai::Authorization</code>. This class is responsible for sharing all set authorization information with all component classes in <code>liboai</code>.
|
||||
|
||||
All authorization information should be set prior to the calling of any component methods such as <code>Images</code>, <code>Embeddings</code>, and so on. Failure to do so will result in a <code>liboai::OpenAIException</code> due to authorization failure on OpenAI's end.</p>
|
||||
|
||||
<br>
|
||||
<h2>Methods</h2>
|
||||
<p>This document covers the method(s) located in <code>authorization.h</code>. You can find their function signature(s) below.</p>
|
||||
|
||||
<h3>Get Authorizer</h3>
|
||||
<p>Returns a reference to the <code>liboai::Authorization</code> singleton shared among all components.</p>
|
||||
|
||||
```cpp
|
||||
static Authorization& Authorizer() noexcept;
|
||||
```
|
||||
|
||||
<h3>Set API Key</h3>
|
||||
<p>Sets the API key to use in subsequent component calls.</p>
|
||||
|
||||
```cpp
|
||||
bool SetKey(std::string_view key) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Azure API Key</h3>
|
||||
<p>Sets the Azure API key to use in subsequent component calls.</p>
|
||||
|
||||
```cpp
|
||||
bool SetAzureKey(std::string_view key) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Active Directory Azure API Key</h3>
|
||||
<p>Sets the Active Directory Azure API key to use in subsequent component calls.</p>
|
||||
|
||||
```cpp
|
||||
bool SetAzureKeyAD(std::string_view key) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set API Key (File)</h3>
|
||||
<p>Sets the API key to use in subsequent component calls from data found in file at path.</p>
|
||||
|
||||
```cpp
|
||||
bool SetKeyFile(const std::filesystem::path& path) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Azure API Key (File)</h3>
|
||||
<p>Sets the Azure API key to use in subsequent component calls from data found in file at path.</p>
|
||||
|
||||
```cpp
|
||||
bool SetAzureKeyFile(const std::filesystem::path& path) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Active Directory Azure API Key (File)</h3>
|
||||
<p>Sets the Active Directory Azure API key to use in subsequent component calls from data found in file at path.</p>
|
||||
|
||||
```cpp
|
||||
bool SetAzureKeyFileAD(const std::filesystem::path& path) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set API Key (Environment Variable)</h3>
|
||||
<p>Sets the API key to use in subsequent component calls from an environment variable.</p>
|
||||
|
||||
```cpp
|
||||
bool SetKeyEnv(std::string_view var) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Azure API Key (Environment Variable)</h3>
|
||||
<p>Sets the Azure API key to use in subsequent component calls from an environment variable.</p>
|
||||
|
||||
```cpp
|
||||
bool SetAzureKeyEnv(std::string_view var) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Active Directory Azure API Key (Environment Variable)</h3>
|
||||
<p>Sets the Active Directory Azure API key to use in subsequent component calls from an environment variable.</p>
|
||||
|
||||
```cpp
|
||||
bool SetAzureKeyEnvAD(std::string_view var) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Organization ID</h3>
|
||||
<p>Sets the organization ID to send in subsequent component calls.</p>
|
||||
|
||||
```cpp
|
||||
bool SetOrganization(std::string_view org) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Organization ID (File)</h3>
|
||||
<p>Sets the organization ID to send in subsequent component calls from data found in file at path.</p>
|
||||
|
||||
```cpp
|
||||
bool SetOrganizationFile(const std::filesystem::path& path) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Organization ID (Environment Variable)</h3>
|
||||
<p>Sets the organization ID to send in subsequent component calls from an environment variable.</p>
|
||||
|
||||
```cpp
|
||||
bool SetOrganizationEnv(std::string_view var) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Proxies</h3>
|
||||
<p>Sets the proxy, or proxies, to use in subsequent component calls.</p>
|
||||
|
||||
```cpp
|
||||
void SetProxies(const std::initializer_list<std::pair<const std::string, std::string>>& hosts) noexcept;
|
||||
void SetProxies(std::initializer_list<std::pair<const std::string, std::string>>&& hosts) noexcept;
|
||||
void SetProxies(const std::map<std::string, std::string>& hosts) noexcept;
|
||||
void SetProxies(std::map<std::string, std::string>&& hosts) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Proxy Authentication</h3>
|
||||
<p>Sets the username and password to use when using a certain proxy protocol.</p>
|
||||
|
||||
```cpp
|
||||
void SetProxyAuth(const std::map<std::string, netimpl::components::EncodedAuthentication>& proto_up) noexcept;
|
||||
```
|
||||
|
||||
<h3>Set Timeout</h3>
|
||||
<p>Sets the timeout in milliseconds for the library to use in component calls.</p>
|
||||
|
||||
```cpp
|
||||
void SetMaxTimeout(int32_t ms) noexcept
|
||||
```
|
||||
|
||||
<h3>Get Key</h3>
|
||||
<p>Returns the currently set API key.</p>
|
||||
|
||||
```cpp
|
||||
constexpr const std::string& GetKey() const noexcept;
|
||||
```
|
||||
|
||||
<h3>Get Organization ID</h3>
|
||||
<p>Returns the currently set organization ID.</p>
|
||||
|
||||
```cpp
|
||||
constexpr const std::string& GetOrganization() const noexcept;
|
||||
```
|
||||
|
||||
|
||||
<h3>Get Proxies</h3>
|
||||
<p>Returns the currently set proxies.</p>
|
||||
|
||||
```cpp
|
||||
netimpl::components::Proxies GetProxies() const noexcept;
|
||||
```
|
||||
|
||||
<h3>Get Proxy Authentication</h3>
|
||||
<p>Returns the currently set proxy authentication information.</p>
|
||||
|
||||
```cpp
|
||||
netimpl::components::ProxyAuthentication GetProxyAuth() const noexcept;
|
||||
```
|
||||
|
||||
<h3>Get Timeout</h3>
|
||||
<p>Returns the currently set timeout.</p>
|
||||
|
||||
```cpp
|
||||
netimpl::components::Timeout GetMaxTimeout() const noexcept;
|
||||
```
|
||||
|
||||
<h3>Get Authorization Headers</h3>
|
||||
<p>Returns the currently set authorization headers based on set information.</p>
|
||||
|
||||
```cpp
|
||||
constexpr const netimpl::components::Header& GetAuthorizationHeaders() const noexcept;
|
||||
```
|
||||
|
||||
<h3>Get Azure Authorization Headers</h3>
|
||||
<p>Returns the currently set Azure authorization headers based on set information.</p>
|
||||
|
||||
```cpp
|
||||
constexpr const netimpl::components::Header& GetAzureAuthorizationHeaders() const noexcept;
|
||||
```
|
||||
|
||||
<br>
|
||||
<h2>Example Usage</h2>
|
||||
<p>For example usage of the above function(s), please refer to the <a href="./examples">examples</a> folder.
|
||||
@ -0,0 +1,15 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
|
||||
project(authorization)
|
||||
|
||||
add_basic_example(set_azure_key)
|
||||
add_basic_example(set_azure_key_env)
|
||||
add_basic_example(set_azure_key_file)
|
||||
add_basic_example(set_key)
|
||||
add_basic_example(set_key_env_var)
|
||||
add_basic_example(set_key_file)
|
||||
add_basic_example(set_organization)
|
||||
add_basic_example(set_organization_env_var)
|
||||
add_basic_example(set_organization_file)
|
||||
add_basic_example(set_proxies)
|
||||
add_basic_example(set_proxy_auth)
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetAzureKey("hard-coded-key")) { // NOT recommended
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetAzureKeyFile("C:/some/folder/key.dat")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKey("hard-coded-key")) { // NOT recommended
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyFile("C:/some/folder/key.dat")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY") && oai.auth.SetOrganization("org-123")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY") && oai.auth.SetOrganizationEnv("OPENAI_ORG_ID")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY") && oai.auth.SetOrganizationFile("C:/some/folder/org.dat")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
/*
|
||||
Set some proxies:
|
||||
when we go to an http site, use fakeproxy1
|
||||
when we go to an https site, use fakeproxy2
|
||||
*/
|
||||
oai.auth.SetProxies({
|
||||
{ "http", "http://www.fakeproxy1.com" },
|
||||
{ "https", "https://www.fakeproxy2.com" }
|
||||
});
|
||||
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,31 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
/*
|
||||
Set some proxies:
|
||||
when we go to an http site, use fakeproxy1
|
||||
when we go to an https site, use fakeproxy2
|
||||
*/
|
||||
oai.auth.SetProxies({
|
||||
{ "http", "http://www.fakeproxy1.com" },
|
||||
{ "https", "https://www.fakeproxy2.com" }
|
||||
});
|
||||
|
||||
/*
|
||||
Set the per-protocol proxy auth info:
|
||||
when we go to an http site, use fakeuser1 and fakepass1
|
||||
when we go to an https site, use fakeuser2 and fakepass2
|
||||
*/
|
||||
oai.auth.SetProxyAuth({
|
||||
{"http", {"fakeuser1", "fakepass1"}},
|
||||
{"https", {"fakeuser2", "fakepass2"}},
|
||||
});
|
||||
|
||||
if (oai.auth.SetKeyEnv("OPENAI_API_KEY")) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
204
packages/media/cpp/packages/liboai/documentation/azure/README.md
Normal file
204
packages/media/cpp/packages/liboai/documentation/azure/README.md
Normal file
@ -0,0 +1,204 @@
|
||||
<h1>Azure</h1>
|
||||
<p>The <code>Azure</code> class is defined in <code>azure.h</code> at <code>liboai::Azure</code>, and its interface can ideally be accessed through a <code>liboai::OpenAI</code> object.
|
||||
|
||||
This class and its associated <code>liboai::OpenAI</code> interface allow access to the <a href="https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference">Azure</a> OpenAI API components.
|
||||
|
||||
<br>
|
||||
<h2>Methods</h2>
|
||||
<p>This document covers the method(s) located in <code>azure.h</code>. You can find their function signature(s) below.</p>
|
||||
|
||||
<h3>Create a Completion</h3>
|
||||
<p>Given a prompt, the model will return one or more predicted completions. Returns a <code>liboai::Response</code> containing response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::Response create_completion(
|
||||
const std::string& resource_name,
|
||||
const std::string& deployment_id,
|
||||
const std::string& api_version,
|
||||
std::optional<std::string> prompt = std::nullopt,
|
||||
std::optional<std::string> suffix = std::nullopt,
|
||||
std::optional<uint16_t> max_tokens = std::nullopt,
|
||||
std::optional<float> temperature = std::nullopt,
|
||||
std::optional<float> top_p = std::nullopt,
|
||||
std::optional<uint16_t> n = std::nullopt,
|
||||
std::optional<std::function<bool(std::string, intptr_t)>> stream = std::nullopt,
|
||||
std::optional<uint8_t> logprobs = std::nullopt,
|
||||
std::optional<bool> echo = std::nullopt,
|
||||
std::optional<std::vector<std::string>> stop = std::nullopt,
|
||||
std::optional<float> presence_penalty = std::nullopt,
|
||||
std::optional<float> frequency_penalty = std::nullopt,
|
||||
std::optional<uint16_t> best_of = std::nullopt,
|
||||
std::optional<std::unordered_map<std::string, int8_t>> logit_bias = std::nullopt,
|
||||
std::optional<std::string> user = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Create a Completion (async)</h3>
|
||||
<p>Given a prompt, the model will asynchronously return one or more predicted completions. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse create_completion_async(
|
||||
const std::string& resource_name,
|
||||
const std::string& deployment_id,
|
||||
const std::string& api_version,
|
||||
std::optional<std::string> prompt = std::nullopt,
|
||||
std::optional<std::string> suffix = std::nullopt,
|
||||
std::optional<uint16_t> max_tokens = std::nullopt,
|
||||
std::optional<float> temperature = std::nullopt,
|
||||
std::optional<float> top_p = std::nullopt,
|
||||
std::optional<uint16_t> n = std::nullopt,
|
||||
std::optional<std::function<bool(std::string, intptr_t)>> stream = std::nullopt,
|
||||
std::optional<uint8_t> logprobs = std::nullopt,
|
||||
std::optional<bool> echo = std::nullopt,
|
||||
std::optional<std::vector<std::string>> stop = std::nullopt,
|
||||
std::optional<float> presence_penalty = std::nullopt,
|
||||
std::optional<float> frequency_penalty = std::nullopt,
|
||||
std::optional<uint16_t> best_of = std::nullopt,
|
||||
std::optional<std::unordered_map<std::string, int8_t>> logit_bias = std::nullopt,
|
||||
std::optional<std::string> user = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Create an Embedding</h3>
|
||||
<p>Creates an embedding vector representing the input text. Returns a <code>liboai::Response</code> containing response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::Response create_embedding(
|
||||
const std::string& resource_name,
|
||||
const std::string& deployment_id,
|
||||
const std::string& api_version,
|
||||
const std::string& input,
|
||||
std::optional<std::string> user = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Create an Embedding (async)</h3>
|
||||
<p>Asynchronously creates an embedding vector representing the input text. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse create_embedding_async(
|
||||
const std::string& resource_name,
|
||||
const std::string& deployment_id,
|
||||
const std::string& api_version,
|
||||
const std::string& input,
|
||||
std::optional<std::string> user = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Create a Chat Completion</h3>
|
||||
<p>Creates a completion for the chat message. Returns a <code>liboai::Response</code> containing response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::Response create_chat_completion(
|
||||
const std::string& resource_name,
|
||||
const std::string& deployment_id,
|
||||
const std::string& api_version,
|
||||
const Conversation& conversation,
|
||||
std::optional<float> temperature = std::nullopt,
|
||||
std::optional<uint16_t> n = std::nullopt,
|
||||
std::optional<std::function<bool(std::string, intptr_t)>> stream = std::nullopt,
|
||||
std::optional<std::vector<std::string>> stop = std::nullopt,
|
||||
std::optional<uint16_t> max_tokens = std::nullopt,
|
||||
std::optional<float> presence_penalty = std::nullopt,
|
||||
std::optional<float> frequency_penalty = std::nullopt,
|
||||
std::optional<std::unordered_map<std::string, int8_t>> logit_bias = std::nullopt,
|
||||
std::optional<std::string> user = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Create a Chat Completion (async)</h3>
|
||||
<p>Asynchronously creates a completion for the chat message. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse create_chat_completion_async(
|
||||
const std::string& resource_name,
|
||||
const std::string& deployment_id,
|
||||
const std::string& api_version,
|
||||
const Conversation& conversation,
|
||||
std::optional<float> temperature = std::nullopt,
|
||||
std::optional<uint16_t> n = std::nullopt,
|
||||
std::optional<std::function<bool(std::string, intptr_t)>> stream = std::nullopt,
|
||||
std::optional<std::vector<std::string>> stop = std::nullopt,
|
||||
std::optional<uint16_t> max_tokens = std::nullopt,
|
||||
std::optional<float> presence_penalty = std::nullopt,
|
||||
std::optional<float> frequency_penalty = std::nullopt,
|
||||
std::optional<std::unordered_map<std::string, int8_t>> logit_bias = std::nullopt,
|
||||
std::optional<std::string> user = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Request an Image Generation</h3>
|
||||
<p>Generate a batch of images from a text caption. Returns a <code>liboai::Response</code> containing response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::Response request_image_generation(
|
||||
const std::string& resource_name,
|
||||
const std::string& api_version,
|
||||
const std::string& prompt,
|
||||
std::optional<uint8_t> n = std::nullopt,
|
||||
std::optional<std::string> size = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Request an Image Generation (async)</h3>
|
||||
<p>Asynchronously generate a batch of images from a text caption. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse request_image_generation_async(
|
||||
const std::string& resource_name,
|
||||
const std::string& api_version,
|
||||
const std::string& prompt,
|
||||
std::optional<uint8_t> n = std::nullopt,
|
||||
std::optional<std::string> size = std::nullopt
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Get a Previously Generated Image</h3>
|
||||
<p>Retrieve the results (URL) of a previously called image generation operation. Returns a <code>liboai::Response</code> containing response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::Response get_generated_image(
|
||||
const std::string& resource_name,
|
||||
const std::string& api_version,
|
||||
const std::string& operation_id
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Get a Previously Generated Image (async)</h3>
|
||||
<p>Asynchronously retrieve the results (URL) of a previously called image generation operation. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse get_generated_image_async(
|
||||
const std::string& resource_name,
|
||||
const std::string& api_version,
|
||||
const std::string& operation_id
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Delete a Previously Generated Image</h3>
|
||||
<p>Deletes the corresponding image from the Azure server. Returns a <code>liboai::Response</code> containing response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::Response delete_generated_image(
|
||||
const std::string& resource_name,
|
||||
const std::string& api_version,
|
||||
const std::string& operation_id
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<h3>Delete a Previously Generated Image (async)</h3>
|
||||
<p>Asynchronously deletes the corresponding image from the Azure server. Returns a <code>liboai::FutureResponse</code> containing future response data.</p>
|
||||
|
||||
```cpp
|
||||
liboai::FutureResponse delete_generated_image_async(
|
||||
const std::string& resource_name,
|
||||
const std::string& api_version,
|
||||
const std::string& operation_id
|
||||
) const & noexcept(false);
|
||||
```
|
||||
|
||||
<p>All function parameters marked <code>optional</code> are not required and are resolved on OpenAI's end if not supplied.</p>
|
||||
|
||||
<br>
|
||||
<h2>Example Usage</h2>
|
||||
<p>For example usage of the above function(s), please refer to the <a href="./examples">examples</a> folder.
|
||||
@ -0,0 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
|
||||
project(azure)
|
||||
|
||||
add_example(create_chat_completion_azure "create_chat_completion.cpp")
|
||||
add_example(create_chat_completion_async_azure "create_chat_completion_async.cpp")
|
||||
add_basic_example(create_completion)
|
||||
add_basic_example(create_completion_async)
|
||||
add_example(create_embedding_azure "create_embedding.cpp")
|
||||
add_example(create_embedding_async_azure "create_embedding_async.cpp")
|
||||
add_basic_example(delete_generated_image)
|
||||
add_basic_example(delete_generated_image_async)
|
||||
add_basic_example(get_generated_image)
|
||||
add_basic_example(get_generated_image_async)
|
||||
add_basic_example(request_image_generation)
|
||||
add_basic_example(request_image_generation_async)
|
||||
@ -0,0 +1,28 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
Conversation convo;
|
||||
convo.AddUserData("Hi, how are you?");
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
Response res = oai.Azure->create_chat_completion(
|
||||
"resource", "deploymentID", "api_version",
|
||||
convo
|
||||
);
|
||||
|
||||
// update the conversation with the response
|
||||
convo.Update(res);
|
||||
|
||||
// print the response from the API
|
||||
std::cout << convo.GetLastResponse() << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,37 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
Conversation convo;
|
||||
convo.AddUserData("Hi, how are you?");
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
// call async method; returns a future
|
||||
auto fut = oai.Azure->create_chat_completion_async(
|
||||
"resource", "deploymentID", "api_version",
|
||||
convo
|
||||
);
|
||||
|
||||
// do other work...
|
||||
|
||||
// check if the future is ready
|
||||
fut.wait();
|
||||
|
||||
// get the contained response
|
||||
auto res = fut.get();
|
||||
|
||||
// update the conversation with the response
|
||||
convo.Update(res);
|
||||
|
||||
// print the response from the API
|
||||
std::cout << convo.GetLastResponse() << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
Response res = oai.Azure->create_completion(
|
||||
"resource", "deploymentID", "api_version",
|
||||
"Write a short poem about a snowman."
|
||||
);
|
||||
|
||||
std::cout << res["choices"][0]["text"].get<std::string>() << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,29 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
auto fut = oai.Azure->create_completion_async(
|
||||
"resource", "deploymentID", "api_version",
|
||||
"Write a short poem about a snowman."
|
||||
);
|
||||
|
||||
// do other stuff
|
||||
|
||||
// wait for the future to be ready
|
||||
fut.wait();
|
||||
|
||||
// get the result
|
||||
auto res = fut.get();
|
||||
|
||||
std::cout << res["choices"][0]["text"].get<std::string>() << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
Response res = oai.Azure->create_embedding(
|
||||
"resource", "deploymentID", "api_version",
|
||||
"String to get embedding for"
|
||||
);
|
||||
|
||||
std::cout << res << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,27 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
auto fut = oai.Azure->create_embedding_async(
|
||||
"resource", "deploymentID", "api_version",
|
||||
"String to get embedding for"
|
||||
);
|
||||
|
||||
// do other work
|
||||
|
||||
// wait for the future to complete
|
||||
auto res = fut.get();
|
||||
|
||||
// output the response
|
||||
std::cout << res << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
Response res = oai.Azure->delete_generated_image(
|
||||
"resource", "api_version",
|
||||
"f508bcf2-e651-4b4b-85a7-58ad77981ffa"
|
||||
);
|
||||
|
||||
// output the response
|
||||
std::cout << res << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
auto fut = oai.Azure->delete_generated_image_async(
|
||||
"resource", "api_version",
|
||||
"f508bcf2-e651-4b4b-85a7-58ad77981ffa"
|
||||
);
|
||||
|
||||
// do other work
|
||||
|
||||
// wait for the future to complete
|
||||
fut.wait();
|
||||
|
||||
// get the result
|
||||
auto res = fut.get();
|
||||
|
||||
// output the response
|
||||
std::cout << res << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
Response res = oai.Azure->get_generated_image(
|
||||
"resource", "api_version",
|
||||
"f508bcf2-e651-4b4b-85a7-58ad77981ffa"
|
||||
);
|
||||
|
||||
// output the response
|
||||
std::cout << res << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
#include "liboai.h"
|
||||
|
||||
using namespace liboai;
|
||||
|
||||
int main() {
|
||||
OpenAI oai;
|
||||
|
||||
if (oai.auth.SetAzureKeyEnv("AZURE_API_KEY")) {
|
||||
try {
|
||||
auto fut = oai.Azure->get_generated_image_async(
|
||||
"resource", "api_version",
|
||||
"f508bcf2-e651-4b4b-85a7-58ad77981ffa"
|
||||
);
|
||||
|
||||
// do other work
|
||||
|
||||
// wait for the future to complete
|
||||
fut.wait();
|
||||
|
||||
// get the result
|
||||
auto res = fut.get();
|
||||
|
||||
// output the response
|
||||
std::cout << res << std::endl;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user