diff --git a/packages/kbot/.gitignore b/packages/kbot/.gitignore index 9b65460b..af423dcc 100644 --- a/packages/kbot/.gitignore +++ b/packages/kbot/.gitignore @@ -32,3 +32,4 @@ systems/code-server-defaults systems/.code-server tests/assets/ packages/kbot/systems/gptr/gpt-researcher +.env diff --git a/packages/kbot/cpp/a.json b/packages/kbot/cpp/a.json deleted file mode 100644 index 916c8840..00000000 --- a/packages/kbot/cpp/a.json +++ /dev/null @@ -1,112 +0,0 @@ -{ - "items": [ - { - "label": "3D printing service", - "distance": 6.0 - }, - { - "label": "Drafting service", - "distance": 7.0 - }, - { - "label": "Engraver", - "distance": 6.5 - }, - { - "label": "Furniture maker", - "distance": 7.5 - }, - { - "label": "Industrial engineer", - "distance": 7.0 - }, - { - "label": "Industrial equipment supplier", - "distance": 5.5 - }, - { - "label": "Laser cutting service", - "distance": 4.5 - }, - { - "label": "Machine construction", - "distance": 3.0 - }, - { - "label": "Machine repair service", - "distance": 2.5 - }, - { - "label": "Machine shop", - "distance": 0.2 - }, - { - "label": "Machine workshop", - "distance": 0.0 - }, - { - "label": "Machinery parts manufacturer", - "distance": 2.0 - }, - { - "label": "Machining manufacturer", - "distance": 1.5 - }, - { - "label": "Manufacturer", - "distance": 6.0 - }, - { - "label": "Mechanic", - "distance": 5.0 - }, - { - "label": "Mechanical engineer", - "distance": 6.5 - }, - { - "label": "Mechanical plant", - "distance": 3.5 - }, - { - "label": "Metal fabricator", - "distance": 2.0 - }, - { - "label": "Metal heat treating service", - "distance": 3.5 - }, - { - "label": "Metal machinery supplier", - "distance": 5.0 - }, - { - "label": "Metal working shop", - "distance": 1.0 - }, - { - "label": "Metal workshop", - "distance": 1.2 - }, - { - "label": "Novelty store", - "distance": 10.0 - }, - { - "label": "Plywood supplier", - "distance": 9.5 - }, - { - "label": "Sign shop", - "distance": 7.5 - }, - { - "label": "Tool manufacturer", - "distance": 3.0 - }, - { - "label": "Trophy shop", - "distance": 8.0 - } - ] -} \ No newline at end of file diff --git a/packages/kbot/cpp/polymech.md b/packages/kbot/cpp/polymech.md deleted file mode 100644 index d79ee001..00000000 --- a/packages/kbot/cpp/polymech.md +++ /dev/null @@ -1,315 +0,0 @@ -# Polymech C++ Gridsearch Worker — Design - -## Goal - -Port the [gridsearch-worker.ts](../src/products/locations/gridsearch-worker.ts) pipeline to native C++, running as a **CLI subcommand** (`polymech-cli gridsearch`) while keeping all logic in internal libraries under `packages/`. The worker communicates progress via the [IPC framing protocol](./packages/ipc/) and writes results to Supabase via the existing [postgres](./packages/postgres/) package. - ---- - -## Status - -| Package | Status | Tests | Assertions | -|---------|--------|-------|------------| -| `geo` | ✅ Done | 23 | 77 | -| `gadm_reader` | ✅ Done | 18 | 53 | -| `grid` | ✅ Done | 13 | 105 | -| `search` | ✅ Done | 8 | 13 | -| CLI `gridsearch` | ✅ Done | — | dry-run verified (3ms) | -| IPC `gridsearch` | ✅ Done | 1 | 30 | -| **Total** | | **63** | **278** | - ---- - -## Existing C++ Inventory - -| Package | Provides | -|---------|----------| -| `ipc` | Length-prefixed JSON over stdio | -| `postgres` | Supabase PostgREST: `query`, `insert` | -| `http` | libcurl `GET`/`POST` | -| `json` | RapidJSON validate/prettify | -| `logger` | spdlog (stdout or **stderr** in worker mode) | -| `html` | HTML parser | - ---- - -## TypeScript Pipeline (Reference) - -``` -GADM Resolve → Grid Generate → SerpAPI Search → Enrich → Supabase Upsert -``` - -| Phase | Input | Output | Heavy work | -|-------|-------|--------|------------| -| **1. GADM Resolve** | GID list + target level | `GridFeature[]` (GeoJSON polygons with GHS props) | Read pre-cached JSON files from `cache/gadm/boundary_{GID}_{LEVEL}.json` | -| **2. Grid Generate** | `GridFeature[]` + settings | `GridSearchHop[]` (waypoints: lat/lng/radius) | Centroid, bbox, distance, area, point-in-polygon, cell sorting | -| **3. Search** | Waypoints + query + SerpAPI key | Place results (JSON) | HTTP calls to `serpapi.com`, per-waypoint caching | -| **4. Enrich** | Place results | Enriched data (emails, pages) | HTTP scraping | -| **5. Persist** | Enriched places | Supabase `places` + `grid_search_runs` | PostgREST upsert | - ---- - -## Implemented Packages - -### 1. `packages/geo` — Geometry primitives ✅ - -Header + `.cpp`, no external deps. Implements the **turf.js subset** used by the grid generator. - -```cpp -namespace geo { - -struct Coord { double lon, lat; }; -struct BBox { double minLon, minLat, maxLon, maxLat; }; - -BBox bbox(const std::vector& ring); -Coord centroid(const std::vector& ring); -double area_sq_m(const std::vector& ring); -double distance_km(Coord a, Coord b); -bool point_in_polygon(Coord pt, const std::vector& ring); - -std::vector square_grid(BBox extent, double cellSizeKm); -std::vector hex_grid(BBox extent, double cellSizeKm); -std::vector buffer_circle(Coord center, double radiusKm, int steps = 6); -} // namespace geo -``` - -**Rationale**: ~200 lines avoids pulling GEOS/Boost.Geometry. Adopts `pip.h` ray-casting pattern from `packages/gadm/cpp/` without the GDAL/GEOS/PROJ dependency (~700MB). - ---- - -### 2. `packages/gadm_reader` — Boundary resolver ✅ - -Reads pre-cached GADM boundary JSON from disk. No network calls. - -```cpp -namespace gadm { - -struct Feature { - std::string gid, name; - int level; - std::vector> rings; - double ghsPopulation, ghsBuiltWeight; - geo::Coord ghsPopCenter, ghsBuiltCenter; - std::vector> ghsPopCenters; // [lon, lat, weight] - std::vector> ghsBuiltCenters; - double areaSqKm; -}; - -BoundaryResult load_boundary(const std::string& gid, int targetLevel, - const std::string& cacheDir = "cache/gadm"); -} // namespace gadm -``` - -Handles `Polygon`/`MultiPolygon`, GHS enrichment fields, fallback resolution by country code prefix. - ---- - -### 3. `packages/grid` — Grid generator ✅ - -Direct port of [grid-generator.ts](../../shared/src/products/places/grid-generator.ts). - -```cpp -namespace grid { - -struct Waypoint { int step; double lng, lat, radius_km; }; -struct GridOptions { - std::string gridMode; // "hex", "square", "admin", "centers" - double cellSize; // km - double cellOverlap, centroidOverlap; - int maxCellsLimit; - double maxElevation, minDensity, minGhsPop, minGhsBuilt; - std::string ghsFilterMode; // "AND" | "OR" - bool allowMissingGhs, bypassFilters; - std::string pathOrder; // "zigzag", "snake", "spiral-out", "spiral-in", "shortest" - bool groupByRegion; -}; -struct GridResult { std::vector waypoints; int validCells, skippedCells; std::string error; }; - -GridResult generate(const std::vector& features, const GridOptions& opts); -} // namespace grid -``` - -**4 modes**: `admin` (centroid + radius), `centers` (GHS deduplicated), `hex`, `square` (tessellation + PIP) -**5 sort algorithms**: `zigzag`, `snake`, `spiral-out`, `spiral-in`, `shortest` (greedy NN) - ---- - -### 4. `packages/search` — SerpAPI client + config ✅ - -```cpp -namespace search { - -struct Config { - std::string serpapi_key, geocoder_key, bigdata_key; - std::string postgres_url, supabase_url, supabase_service_key; -}; - -Config load_config(const std::string& path = "config/postgres.toml"); - -struct SearchOptions { - std::string query; - double lat, lng; - int zoom = 13, limit = 20; - std::string engine = "google_maps", hl = "en", google_domain = "google.com"; -}; - -struct MapResult { - std::string title, place_id, data_id, address, phone, website, type; - std::vector types; - double rating; int reviews; - GpsCoordinates gps; -}; - -SearchResult search_google_maps(const Config& cfg, const SearchOptions& opts); -} // namespace search -``` - -Reads `[services].SERPAPI_KEY`, `GEO_CODER_KEY`, `BIG_DATA_KEY` from `config/postgres.toml`. HTTP pagination via `http::get()`, JSON parsing with RapidJSON. - ---- - -## CLI Subcommands ✅ - -### 1. `gridsearch` (One-shot execution) - -``` -polymech-cli gridsearch [OPTIONS] - -Positionals: - GID GADM GID (e.g. ESP.1.1_1) — ignored when --settings is used - QUERY Search query — ignored when --settings is used - -Options: - -l, --level INT Target GADM level (default: 0) - -m, --mode TEXT Grid mode: hex|square|admin|centers (default: hex) - -s, --cell-size FLOAT Cell size in km (default: 5.0) - --limit INT Max results per area (default: 20) - -z, --zoom INT Google Maps zoom (default: 13) - --sort TEXT Path order: snake|zigzag|spiral-out|spiral-in|shortest - -c, --config TEXT TOML config path (default: config/postgres.toml) - --cache-dir TEXT GADM cache directory (default: cache/gadm) - --settings TEXT JSON settings file (matches TypeScript GuidedPreset shape) - --enrich Run enrichment pipeline (meta + email) after search - --persistence-postgres Persist run data natively via Postgres - -o, --output TEXT Output JSON file (default: gridsearch-HH-MM.json in cwd) - --dry-run Generate grid only, skip SerpAPI search -``` - -### 2. `worker` (IPC Daemon execution) - -``` -polymech-cli worker [OPTIONS] - -Options: - --daemon Run persistent daemon pool (tier-based) - -c, --config TEXT TOML config path (default: config/postgres.toml) - --user-uid TEXT User ID to bind this daemon to (needed for place owner) - --uds TEXT Run over Unix Domain Socket / Named Pipe (TCP on Windows) at the given path -``` - -### Execution flow - -``` -1. load_config(configPath) → Config (TOML) -2. gadm::load_boundary(gid, level) → features[] -3. grid::generate(features, opts) → waypoints[] -4. --dry-run → output JSON array and exit -5. For each waypoint → search::search_google_maps(cfg, sopts) -6. Stream JSON summary to stdout -``` - -### Example - -```bash -polymech-cli gridsearch ABW "recycling" --dry-run -# → [{"step":1,"lat":12.588582,"lng":-70.040465,"radius_km":3.540}, ...] -# [info] Dry-run complete in 3ms -``` - -### IPC worker mode - -The `worker` subcommand natively routes multiplexed asynchronous `gridsearch` payloads. When launched via `--uds `, it provisions a high-performance Asio streaming server (AF_UNIX sockets on POSIX, TCP sockets on Windows). Event frames (`grid-ready`, `waypoint-start`, `location`, `node`, etc) emit bi-directionally utilizing the IPC bridging protocol, dropping locking blockades completely. - ---- - -## Exposed Configuration / Tuning Parameters - -As we integrate deeper with the core business logic, the Node orchestrator and internal services should configure and enforce limits on the underlying C++ concurrent engine. Relevant configuration surfaces we need to expose for the primary ecosystem libraries include: - -### 1. Taskflow (`https://github.com/taskflow/taskflow`) -- **`executor_threads` (`num_workers`)**: The size of the `tf::Executor` thread pool. As Gridsearch is heavily I/O network bound (HTTP calls for search/enrichment), setting this significantly higher than `std::thread::hardware_concurrency()` may aggressively improve HTTP ingestion throughput globally. -- **`max_concurrent_jobs_per_user`**: A structural limit dictating how many concurrent gridsearch invocation graphs a single tenant/user can enqueue and run actively to prevent monopolization. -- **`http_concurrency_throttle`**: Task limits enforced upon node scraping or SerpAPI requests per-pipeline graph to avoid widespread `429 Too Many Requests` bans. - -### 2. Moodycamel ConcurrentQueue (`https://github.com/cameron314/concurrentqueue`) -- **`queue_depth_max` / `backpressure`**: Since Moodycamel queue memory allocates dynamically and lock-free to any capacity, we must mandate a hard software ceiling/backpressure limit over the Node-to-C++ IPC layer. If Node blindly streams jobs faster than Taskflow can execute them, the daemon will eventually OOM. -- **`bulk_dequeue_size`**: Exposing tuning parameters for the dispatch thread on how many concurrent IPC tasks should be sucked out of the queue simultaneously. - -### 3. Boost.Asio (`https://github.com/chriskohlhoff/asio`) -- **`ipc_timeout_ms` (Read/Write)**: Mandatory timeouts for the IPC socket layer. If the orchestrator stalls, crashes, or goes silent, Asio must reap the connection and automatically GC the in-flight tasks to prevent Zombie worker processes. -- **`max_ipc_connections`**: Absolute limit on simultaneous orchestration pipelines dialing into a single Worker Pod. -- **`buffer_size_max`**: Soft constraints on async payload allocations so a malformed 200MB JSON frame from Node.js doesn't memory-spike the `asio::read` operations abruptly. - ---- - -## Build Integration - -### Dependency graph - -``` - ┌──────────┐ - │ polymech │ (the lib) - │ -cli │ (the binary) - └────┬─────┘ - ┌────────────┼────────────────┐ - ▼ ▼ ▼ - ┌──────────┐ ┌──────────┐ ┌──────────┐ - │ search │ │ grid │ │ ipc │ - └────┬─────┘ └────┬─────┘ └──────────┘ - │ │ - ▼ ▼ - ┌──────────┐ ┌───────────────┐ - │ http │ │ gadm_reader │ - └──────────┘ └────┬──────────┘ - ▼ - ┌──────────┐ - │ geo │ ← no deps (math only) - └──────────┘ - ┌──────────┐ - │ json │ ← RapidJSON - └──────────┘ -``` - -All packages depend on `logger` and `json` implicitly. - ---- - -## Testing - -### Unit tests (Catch2) - -Catch2 targets live in `tests/CMakeLists.txt` (e.g. `test_logger`, `test_html`, `test_postgres`, `test_json`, `test_http`, `test_polymech`, `test_cmd_kbot`, `test_ipc`, `test_functional`, e2e targets). The old geo / gadm_reader / grid / search / enrichers / `test_postgres_live` suites were removed with those package implementations. - -### Integration test (Node.js) - -- Existing `orchestrator/test-ipc.mjs` validates spawn/lifecycle/ping/job -- `orchestrator/test-gridsearch-ipc.mjs` validates full pipeline via IPC (8 event types + job result) -- `orchestrator/test-gridsearch-ipc-uds.mjs` validates high-throughput Unix Domain Sockets mapping, backpressure boundaries, and soft cancellation injections utilizing `action: cancel` frames mid-flight. - ---- - -## IPC Cancellation & Dynamic Job Tuning - -The high-performance UDS daemon now natively tracks and intercepts JSON `action: cancel` frames referencing specific `jobId`s to gracefully exit Taskflow jobs mid-flight. -Dynamic tuning limits, such as memory buffering boundaries or threading capacities, are inherently validated and bound by hard ceilings established inside the `[system]` constraint block of `config/postgres.toml`. - ---- - -## Deferred (Phase 2) - -| Item | Reason | -|------|--------| -| SerpAPI response caching | State store managed by orchestrator for now | -| Protobuf framing | JSON IPC sufficient for current throughput | -| Multi-threaded search | Sequential is fine for SerpAPI rate limits | -| GEOS integration | Custom geo is sufficient for grid math | diff --git a/packages/kbot/logs/uds.json b/packages/kbot/logs/uds.json index 96908e75..cc9745dc 100644 --- a/packages/kbot/logs/uds.json +++ b/packages/kbot/logs/uds.json @@ -659,3 +659,33 @@ [2026-03-30 17:28:11.067] [info] LLMClient: ChatCompletion returned (HTTP 200) [2026-03-30 17:28:11.070] [info] Wrote completion to C:\Users\zx\Desktop\polymech\polymech-mono\packages\kbot\cpp\a.json [2026-03-30 17:28:11.072] [info] KBot UDS: shutdown requested +[2026-04-15 11:32:11.482] [info] Worker mode: listening on stdin +[2026-04-15 11:32:11.483] [info] Worker mode: UDS Server active on 4001 +[2026-04-15 11:32:11.483] [info] Starting KBot UDS on 4001 +[2026-04-15 11:32:11.483] [info] KBot UDS: bound TCP 127.0.0.1:4001 +[2026-04-15 11:32:11.485] [info] KBot UDS ready; waiting for connections… +[2026-04-15 11:32:11.867] [info] KBot UDS client connected +[2026-04-15 11:32:11.881] [info] Receiving AI task over IPC... job: 2c385b83-538a-47e9-b320-1089f11f721f +[2026-04-15 11:32:11.881] [info] LLMClient: calling ChatCompletion (prompt chars=60) +[2026-04-15 11:32:12.127] [error] LLMClient::execute_chat: Exception caught: liboai::Response::CheckResponse(): User not found. (E_APIERROR:0x02) +[2026-04-15 11:32:12.128] [error] AI Task Failed: liboai::Response::CheckResponse(): User not found. (E_APIERROR:0x02) +[2026-04-15 11:33:56.320] [info] Worker mode: listening on stdin +[2026-04-15 11:33:56.320] [info] Worker mode: UDS Server active on 4001 +[2026-04-15 11:33:56.320] [info] Starting KBot UDS on 4001 +[2026-04-15 11:33:56.321] [info] KBot UDS: bound TCP 127.0.0.1:4001 +[2026-04-15 11:33:56.322] [info] KBot UDS ready; waiting for connections… +[2026-04-15 11:33:56.707] [info] KBot UDS client connected +[2026-04-15 11:33:56.717] [info] Receiving AI task over IPC... job: d59f02a1-0608-4de5-980d-8425a8408a35 +[2026-04-15 11:33:56.718] [info] LLMClient: calling ChatCompletion (prompt chars=60) +[2026-04-15 11:33:56.955] [error] LLMClient::execute_chat: Exception caught: liboai::Response::CheckResponse(): User not found. (E_APIERROR:0x02) +[2026-04-15 11:33:56.955] [error] AI Task Failed: liboai::Response::CheckResponse(): User not found. (E_APIERROR:0x02) +[2026-04-15 11:34:24.126] [info] Worker mode: listening on stdin +[2026-04-15 11:34:24.126] [info] Worker mode: UDS Server active on 4001 +[2026-04-15 11:34:24.126] [info] Starting KBot UDS on 4001 +[2026-04-15 11:34:24.127] [info] KBot UDS: bound TCP 127.0.0.1:4001 +[2026-04-15 11:34:24.129] [info] KBot UDS ready; waiting for connections… +[2026-04-15 11:34:24.515] [info] KBot UDS client connected +[2026-04-15 11:34:24.528] [info] Receiving AI task over IPC... job: 01a52aac-1f7d-4f88-b038-2aed372d5399 +[2026-04-15 11:34:24.529] [info] LLMClient: calling ChatCompletion (prompt chars=60) +[2026-04-15 11:34:24.618] [error] LLMClient::execute_chat: Exception caught: liboai::Response::CheckResponse(): User not found. (E_APIERROR:0x02) +[2026-04-15 11:34:24.618] [error] AI Task Failed: liboai::Response::CheckResponse(): User not found. (E_APIERROR:0x02) diff --git a/packages/media/cpp/dist/pm-image.exe b/packages/media/cpp/dist/pm-image.exe new file mode 100644 index 00000000..bac983ee Binary files /dev/null and b/packages/media/cpp/dist/pm-image.exe differ