Compare commits

..

1169 Commits

Author SHA1 Message Date
argenis de la rosa
3663de7d2a fix(tools): harden channel_ack_config function schema 2026-03-05 11:15:42 -05:00
Argenis
6e8d41e042
Merge pull request #2813 from zeroclaw-labs/replay/pr-2809-main-20260305
chore: remove Linear and Hetzner integrations
2026-03-05 02:18:01 -05:00
argenis de la rosa
2dba3b5e57 chore: remove Linear and Hetzner integrations (replay #2809) 2026-03-05 02:17:32 -05:00
Argenis
5e40c7bc2d
Merge pull request #2812 from zeroclaw-labs/replay/pr-2797-main-20260305
fix(cron,security,onboarding): unify shell policy and custom-home-safe persistence
2026-03-05 02:14:07 -05:00
argenis de la rosa
d8c716f99a fix(cron,security,onboarding): unify shell policy and custom-home-safe persistence (replay #2797) 2026-03-05 02:13:43 -05:00
Argenis
87524eb153
Merge pull request #2811 from zeroclaw-labs/replay/pr-2709-main-20260305
feat(integrations): support lmstudio custom connector endpoint
2026-03-05 02:08:54 -05:00
argenis de la rosa
f6870ff733 feat(integrations): support lmstudio custom connector endpoint (replay #2709) 2026-03-05 02:07:10 -05:00
Argenis
bf701498de
Merge pull request #2756 from zeroclaw-labs/chore/main-codeowners-tri-owner-20260304
chore(codeowners): align main with dev tri-owner approver routing
2026-03-05 01:54:26 -05:00
Argenis
66f4369813
Merge pull request #2712 from zeroclaw-labs/chore_landing_readme_i18n_20260304
feat(site): add es/pt/it locale support with UI selector
2026-03-05 01:53:51 -05:00
Argenis
7c22088807
Merge pull request #2708 from zeroclaw-labs/issue-2679-audit-log-init
fix(audit): initialize log file when audit logging is enabled
2026-03-05 01:53:47 -05:00
Argenis
50d5199caa
Merge pull request #2681 from zeroclaw-labs/issue-2590-semantic-vectordb-guard
feat(security): add semantic vectordb guard and corpus updater
2026-03-05 01:53:39 -05:00
Argenis
dc514cf5ef
Merge pull request #2592 from zeroclaw-labs/issue-2472-enhanced-recall-safe
feat(memory): multi-query expansion with error-safe recall
2026-03-05 01:53:35 -05:00
Argenis
1c8392561d
Merge pull request #2577 from zeroclaw-labs/issue-2503-napcat-channel
feat(channels): add napcat/onebot onboarding and config UI
2026-03-05 01:53:31 -05:00
Argenis
f10ad8ed69
Merge pull request #2576 from zeroclaw-labs/issue-2510-activation-warning
fix(config): report initialized state correctly on load
2026-03-05 01:53:28 -05:00
Argenis
26cdebff5a
Merge pull request #2574 from zeroclaw-labs/issue-2460-group-sender-identity
fix(channels): include telegram sender identity in group LLM prompts
2026-03-05 01:53:25 -05:00
Argenis
84e530b54a
Merge pull request #2565 from zeroclaw-labs/issue-2551-codex-ws-sse-fallback
fix(provider): fallback to sse on codex websocket no-response
2026-03-05 01:53:20 -05:00
Argenis
4e552654b9
Merge pull request #2496 from zeroclaw-labs/issue-2486-skill-invocation-links
fix(skills): allow safe cross-skill markdown references
2026-03-05 01:53:17 -05:00
Argenis
63acfefe30
Merge pull request #2345 from zeroclaw-labs/issue-2274-crm-heartbeat
feat(heartbeat): add lightweight proactive task dedupe and per-tick caps
2026-03-05 01:53:13 -05:00
Argenis
d22657fac0
Merge pull request #2323 from zeroclaw-labs/pr-1837-s34-main-rebased
feat(hardware): replay pico toolchain + prompt wiring on main [RMN-1837]
2026-03-05 01:53:06 -05:00
Argenis
e160922872
Merge pull request #2288 from zeroclaw-labs/pr-2049-mainfix
fix(security): deny approval-required tools on non-CLI channels
2026-03-05 01:53:03 -05:00
Argenis
709411d533
Merge pull request #2804 from zeroclaw-labs/wt-electric-blue-live
feat(web): Electric Blue dashboard with polished UI
2026-03-05 01:33:38 -05:00
Argenis
35c21c4fdf
Merge branch 'main' into wt-electric-blue-live 2026-03-05 01:27:09 -05:00
argenis de la rosa
05f52fdab5 docs: resolve remaining main merge collision in troubleshooting 2026-03-05 01:26:29 -05:00
argenis de la rosa
0ce9434a09 fix(pr-2804): resolve main merge conflicts for dashboard release 2026-03-05 01:23:45 -05:00
JordanTheJet
83767cbacc
Merge branch 'main' into chore_landing_readme_i18n_20260304 2026-03-04 14:48:50 -05:00
Preventnetworkhacking
070a7ffeac fix(gateway): preserve handler Cache-Control + add error response test
- Use entry().or_insert() for Cache-Control so SSE no-cache is preserved
- Add security_headers_are_set_on_error_responses test
- Addresses CodeRabbit review feedback on #2476
2026-03-04 13:46:16 -05:00
Preventnetworkhacking
a4e04d0f93 feat(gateway): add security response headers 2026-03-04 13:46:16 -05:00
argenis de la rosa
848f36c371 fix(nextcloud): support Activity Streams 2.0 Talk webhooks
(cherry picked from commit 30fe8c7685)
2026-03-04 13:44:12 -05:00
argenis de la rosa
edf43d681f fix(discord): treat application/ogg as audio for transcription 2026-03-04 13:36:18 -05:00
argenis de la rosa
877f94990e fix(channels,runtime): backport discord transcription and WSL shell guard 2026-03-04 13:36:18 -05:00
xj
7e8dcd3e5a docs(governance): align PR gate policy with hardened protection 2026-03-04 13:34:35 -05:00
argenis de la rosa
41b3db39b7 fix(matrix): break OTK conflict retry loop
(cherry picked from commit 851a3e339b)
2026-03-04 13:33:35 -05:00
argenis de la rosa
9fd73601c8 feat(skills): load skill bodies on demand in compact mode 2026-03-04 13:30:15 -05:00
Argenis
2e2045b53d chore(codeowners): align main with dev tri-owner approver routing 2026-03-04 11:38:58 -05:00
Argenis
69f6c846bf
Merge pull request #2738 from zeroclaw-labs/codex/raw-refs-heads-installer-url
installer: use raw refs/heads/main bootstrap URL
2026-03-04 09:40:58 -05:00
argenis de la rosa
1f2a44bee2 installer: use raw refs/heads/main bootstrap URL 2026-03-04 09:38:27 -05:00
Argenis
25e0465d12
Merge pull request #2737 from zeroclaw-labs/codex/installer-tty-tui-default
installer: keep TUI onboarding default for curl|bash flows
2026-03-04 09:33:36 -05:00
argenis de la rosa
7ed4c32359 installer: default to interactive onboarding under piped installs 2026-03-04 09:31:32 -05:00
Argenis
56ff9711ee
Merge pull request #2736 from zeroclaw-labs/cherry-css
feat(web): add polished dashboard styles
2026-03-04 09:27:20 -05:00
argenis de la rosa
2ee9efb82e feat(web): add polished dashboard styles
Add production-ready CSS styling for the embedded web dashboard
with electric theme, collapsible sections, and responsive layout.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 09:26:47 -05:00
Argenis
f9b9e95b15
Merge pull request #2729 from zeroclaw-labs/fix/tui-force-overwrite-unlock
fix(onboarding): unlock overwrite toggle in TUI flow
2026-03-04 06:58:11 -05:00
argenis de la rosa
35811c837e fix(onboarding): unlock overwrite toggle in TUI flow 2026-03-04 06:57:15 -05:00
Argenis
fe96edf53f
Merge pull request #2727 from zeroclaw-labs/codex/tui-onboarding-clean
feat(onboarding): make TUI wizard default one-click flow
2026-03-04 06:44:36 -05:00
argenis de la rosa
5ac14e28b2 feat(onboarding): make TUI wizard default one-click flow 2026-03-04 06:38:35 -05:00
Argenis
7b7be365c0
Merge pull request #2718 from zeroclaw-labs/feat/readme-i18n-es-pt-it
docs(i18n): add Spanish, Portuguese, and Italian README translations
2026-03-04 06:02:53 -05:00
argenis de la rosa
7c48d364ab docs(i18n): add Spanish, Portuguese, and Italian README translations
- Add language links to main README for es/pt/it
- Create docs/i18n/es/README.md with Spanish translation
- Create docs/i18n/pt/README.md with Portuguese translation
- Create docs/i18n/it/README.md with Italian translation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 06:00:18 -05:00
argenis de la rosa
82b715304f docs(config): clarify fallback_api_keys contract 2026-03-04 05:52:37 -05:00
argenis de la rosa
d595e2f692 fix(ci): fail hard on required rust components
- validate reliability.fallback_api_keys mappings and values

- assert primary mock in fallback API key regression test

- avoid logging tainted fallback entries
2026-03-04 05:52:37 -05:00
argenis de la rosa
29fc58f63a ci: scope rust tooling checks by job type 2026-03-04 05:52:37 -05:00
argenis de la rosa
312fe8121a ci: ensure rustfmt and rustdoc in toolchain setup 2026-03-04 05:52:37 -05:00
argenis de la rosa
5232e3e1d9 feat(reliability): support per-fallback API keys for custom endpoints 2026-03-04 05:52:37 -05:00
argenis de la rosa
ea9cda1083 docs(config): clarify custom auth_header compatibility contract 2026-03-04 05:38:56 -05:00
argenis de la rosa
b31491bf38 fix(providers): harden custom auth_header resolution 2026-03-04 05:38:56 -05:00
argenis de la rosa
3a8e7d6edf feat(providers): support custom auth_header for custom endpoints 2026-03-04 05:38:56 -05:00
argenis de la rosa
c09b1c0aaa test(channels): ensure runtime config cleanup before assert 2026-03-04 05:37:33 -05:00
argenis de la rosa
2f19d5ec49 fix(channels): use routed provider for channel startup
Initialize channel runtime providers through routed provider construction so model_routes, hint defaults, and route-scoped credentials are honored.

Add a regression test that verifies start_channels succeeds when global provider credentials are absent but route-level config is present.

Refs #2537
2026-03-04 05:37:33 -05:00
argenis de la rosa
8dc4f3722b fix(daemon): add shutdown grace window and signal hint parity 2026-03-04 05:37:30 -05:00
argenis de la rosa
9c2f8efa70 fix(daemon): handle sigterm shutdown signal
Wait for either SIGINT or SIGTERM on Unix so daemon mode behaves correctly under container and process-manager termination flows.

Record signal-specific shutdown reasons and add unit tests for shutdown signal labeling.

Refs #2529
2026-03-04 05:37:30 -05:00
argenis de la rosa
b3887d7ddf feat(site): add es/pt/it locale support with UI selector
- Expand runtime locale support from 2 to 5 locales (en, zh, es, pt, it)
- Add locale normalization/cycling logic and button labels
- Add Spanish/Portuguese/Italian UI text bundles
- Update localized labels for journey/audience/kind, reading paths,
  command-lane hints, and engineering pillars
- Rework locale rendering with fallback helper for untranslated fields
- Update language toggle behavior in command palette
- Add localized README files (es, pt, it)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 04:50:31 -05:00
argenis de la rosa
4b45802bf7 fix(audit): initialize log file when audit logging is enabled 2026-03-04 04:18:24 -05:00
argenis de la rosa
26cda3dd6b chore(ci): retrigger flaky workflow lanes 2026-03-03 23:38:34 -05:00
argenis de la rosa
860a646c70 chore(ci): rerun PR checks after main hotfix 2026-03-03 22:13:17 -05:00
killf
02cf1a558a
Merge pull request #2680 from zeroclaw-labs/fix/skill-location-path-normalization
fix(skills): normalize path separators to forward slashes for XML output
2026-03-04 10:51:05 +08:00
Argenis
289b406d0d
Merge pull request #2675 from zeroclaw-labs/hotfix/main-ci-red-20260303
fix(ci): unblock main lint and codeql regressions
2026-03-03 21:08:19 -05:00
argenis de la rosa
f2e23b35fc feat(security): add semantic vectordb guard and corpus updater 2026-03-03 20:31:26 -05:00
killf
c697497e34 fix(skills): normalize path separators to forward slashes for XML output
On Windows, file paths use backslashes (\) but the test expected forward
slashes (/). The render_skill_location function now normalizes all path
separators to forward slashes for consistent XML output across platforms.

This fixes the failing test:
prompt_skills_compact_mode_omits_instructions_and_tools

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-04 09:20:47 +08:00
argenis de la rosa
8ac7879d43 fix(skills): avoid merge conflicts by fully qualifying symlink test refs 2026-03-03 20:17:20 -05:00
killf
535f23b5c3
Merge pull request #2677 from zeroclaw-labs/fix/one-time-approval-bypass-token
fix(agent): one-time non-cli allow-all token now bypasses all approvals
2026-03-04 09:08:01 +08:00
argenis de la rosa
8a5e17d6f3 fix(skills): keep symlink test helpers in scope on merge 2026-03-03 20:07:36 -05:00
killf
d286ecf820 fix(test): align session grant test with actual behavior
The test `run_tool_call_loop_uses_non_cli_session_grant_without_waiting_for_prompt`
was expecting session grant to bypass interactive approval for shell tool, but
session grant only bypasses non-interactive approvals by design.

Fix: add shell to auto_approve list in test config so echo hi doesn't require
interactive approval, allowing session grant to work as intended.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-04 09:05:16 +08:00
Argenis
e1fd9296ee
Merge pull request #1853 from xuhao1/feat/feishu-doc-tool
feat(tools): add Feishu document operation tool with 13 actions (RMN-294)
2026-03-03 19:57:43 -05:00
killf
4dbfd171f6
Merge pull request #2676 from zeroclaw-labs/fix/one-time-approval-bypass-token
fix(agent): one-time non-cli allow-all token now bypasses all approvals
2026-03-04 08:56:40 +08:00
killf
5d38843f38 fix(agent): one-time non-cli allow-all token now bypasses all approvals
Previously, the one-time `non_cli_allow_all_once` token only bypassed
non-interactive approvals due to a condition that required
`!requires_interactive_approval`. This caused tools like `shell`
(which require interactive approval in supervised mode) to be blocked
even when the one-time bypass token was consumed.

Fix: Separate the bypass logic:
- One-time bypass token: bypass ALL approvals (including interactive)
- Session grant: bypass only non-interactive approvals (unchanged)

This fixes the failing test
`run_tool_call_loop_consumes_one_time_non_cli_allow_all_token`.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-04 08:55:29 +08:00
argenis de la rosa
cc53ba6685 fix(gateway): clarify public-bind warning wording 2026-03-03 19:54:57 -05:00
Argenis
1a371c9ca9
Merge pull request #1754 from LupoGrigi0/fix/release-v0.1.8-build-errors
fix(build): remove duplicate ModelProviderConfig and fix App.tsx destructure (RMN-298)
2026-03-03 19:48:55 -05:00
killf
ea629a31f2
Merge pull request #2673 from zeroclaw-labs/fix/test-compilation-warnings
fix(test): resolve compilation errors and warnings
2026-03-04 08:36:54 +08:00
killf
f6fdf0545b fix(test): resolve compilation errors and warnings
- Fix missing canary_tokens_enabled parameter in run_tool_call_loop_with_non_cli_approval_context test call
- Remove unused imports in symlink_tests.rs (Config, handle_command, load_skills_with_config, SkillCommands)
- Remove unused import async_trait in plugins/loader.rs tests
- Prefix unused approval_manager variables with underscore in 5 channel test functions

Resolves compilation errors that prevented cargo test from completing and clears all unused import/variable warnings.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-04 08:34:48 +08:00
argenis de la rosa
6bdcb4417c fix(ci): unblock main lint and codeql gates 2026-03-03 19:03:29 -05:00
argenis de la rosa
4df1487e28 fix(http_request): hard-fail empty credential env values 2026-03-03 18:18:58 -05:00
argenis de la rosa
37d2244070 fix(http_request): stabilize credential_profile env resolution 2026-03-03 18:18:58 -05:00
argenis de la rosa
e7c388b695 fix(telegram): handle native draft stream fallback edges 2026-03-03 18:15:45 -05:00
argenis de la rosa
135e4ed730 feat(telegram): add StreamMode::On native draft streaming 2026-03-03 18:15:45 -05:00
Argenis
978cbdc7a1
Merge pull request #2664 from zeroclaw-labs/issue-2641-post-turn-memory-extraction
feat(memory): add post-turn durable fact extraction across entry points
2026-03-03 18:15:25 -05:00
Argenis
403fd2dc2b
Merge pull request #2663 from zeroclaw-labs/issue-2651-agent-allowed-denied-tools
feat(agent): add primary allowed_tools/denied_tools filtering
2026-03-03 18:15:23 -05:00
argenis de la rosa
0947a928a5 fix(agent): expose test_locks module for binary test builds 2026-03-03 17:55:21 -05:00
argenis de la rosa
7ba6e1ff83 fix(repo): update stale ZeroClaw GitHub URLs 2026-03-03 17:36:54 -05:00
argenis de la rosa
5471be7c14 ci(docker): publish GHCR image built with all cargo features 2026-03-03 17:36:52 -05:00
argenis de la rosa
9eb0b2c2f3 fix(cron): preserve active model for custom endpoint jobs 2026-03-03 17:36:29 -05:00
argenis de la rosa
5b59aee016 fix(skills): broaden ClawhHub URL detection for installer 2026-03-03 17:35:27 -05:00
argenis de la rosa
1162df77f2 fix(mcp): persist and forward Mcp-Session-Id for HTTP transport 2026-03-03 17:34:53 -05:00
argenis de la rosa
3702d224e9 feat(security): add canary token exfiltration guard 2026-03-03 17:27:19 -05:00
argenis de la rosa
429ea06d69 feat(approval): add command-level shell approval rules 2026-03-03 17:20:06 -05:00
argenis de la rosa
0140d6920c chore(lockfile): refresh Cargo.lock for locked CI jobs 2026-03-03 17:20:05 -05:00
argenis de la rosa
f4997cef91 test(config): cover slack group_reply nested table parsing 2026-03-03 17:20:05 -05:00
argenis de la rosa
09d7684cfa feat(memory): add post-turn durable fact extraction across all agent entry points 2026-03-03 16:20:45 -05:00
argenis de la rosa
696a0c5432 feat(agent): add primary tool allowlist and denylist filtering 2026-03-03 16:19:28 -05:00
xj
3141e9a585 Revert "chore(release): prepare v0.2.0 changelog"
This reverts commit a7a3c99e80.
2026-03-03 13:11:46 -08:00
xj
a2d842fae2 Revert "chore(release): bump version to 0.2.0"
This reverts commit f3999ab476.
2026-03-03 13:11:46 -08:00
argenis de la rosa
8579ae5c07 chore(ci): retrigger checks for codex fallback branch 2026-03-03 16:11:22 -05:00
xj
f3999ab476 chore(release): bump version to 0.2.0 2026-03-03 13:03:26 -08:00
xj
a7a3c99e80 chore(release): prepare v0.2.0 changelog 2026-03-03 12:56:58 -08:00
argenis de la rosa
f2ba33fce8 ci: enforce cargo component in provenance job 2026-03-03 14:00:29 -05:00
argenis de la rosa
eefeb347b3 ci: skip pub release job for prerelease tags 2026-03-03 13:46:32 -05:00
argenis de la rosa
1c0e5d957a ci: stabilize release/provenance workflow execution 2026-03-03 13:35:12 -05:00
argenis de la rosa
1bdf4abd1c chore: bump version to 0.1.8 alpha track 2026-03-03 12:10:14 -05:00
argenis de la rosa
e2ca22052f ci: scope release tests to quick sanity 2026-03-03 12:10:14 -05:00
argenis de la rosa
d689dd7e8f ci: align release quality gate with repo baseline 2026-03-03 12:10:14 -05:00
Argenis
62fdddc690 ci: activate toolchain PATH for cargo fmt/clippy in release build 2026-03-03 09:53:29 -05:00
argenis de la rosa
d214ebfa1a ci: ensure rustfmt/clippy components in production release build 2026-03-03 09:44:44 -05:00
argenis de la rosa
cc13e88c8b style: fix rustfmt drift blocking production release build 2026-03-03 09:40:16 -05:00
argenis de la rosa
21616689f8 ci: add Blacksmith production release build workflow 2026-03-03 09:06:38 -05:00
Argenis
ad6a10a903 ci: align Blacksmith runner label with repository policy 2026-03-03 09:02:11 -05:00
blacksmith-sh[bot]
426fee3568 Migrate workflows to Blacksmith 2026-03-03 09:02:11 -05:00
killf
c89fc6efa9
Merge pull request #2636 from zeroclaw-labs/refactor/update-remove-unused-import
refactor(update): remove unused ErrorKind import
2026-03-03 20:31:38 +08:00
killf
46d087eb8f refactor(update): remove unused ErrorKind import
Remove unused `std::io::ErrorKind` import and use fully qualified path instead.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-03 20:29:33 +08:00
killf
33eca48c6f
Merge pull request #2632 from zeroclaw-labs/fix/cron-past-time-validation
fix(agent): track tool execution success status correctly
2026-03-03 18:59:00 +08:00
killf
a6f25d8796 fix(agent): track tool execution success status correctly
Track actual success status from tool execution results instead of
assuming all tool calls succeed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-03 18:49:21 +08:00
xj
2749c77625
Merge pull request #2627 from zeroclaw-labs/fix/ci-docker-api-autodetect
fix(ci): auto-detect Docker API version for Buildx
2026-03-03 02:12:13 -08:00
xj
da2d0aee08 fix(ci): detect docker api version before buildx 2026-03-03 02:11:49 -08:00
xj
b0dab4ee1b
Merge pull request #2626 from zeroclaw-labs/fix/release-docker-manual-tag-publish
fix(release): allow manual GHCR publish for existing tags
2026-03-03 02:09:29 -08:00
xj
426b3b01c6 fix(release): enable manual GHCR publish for tagged releases 2026-03-03 02:09:04 -08:00
xj
4f66ecbb1d
Merge pull request #2625 from zeroclaw-labs/fix/release-apt-lock-timeout
fix(release): harden apt lock handling in pub-release
2026-03-03 01:32:05 -08:00
xj
93963566d6 fix(release): add apt lock timeout and retries in pub-release 2026-03-03 01:31:27 -08:00
xj
ede33ff9ed
Merge pull request #2624 from zeroclaw-labs/fix/release-build-blockers-rustup-isolation
fix(release): isolate rust toolchain homes in pub-release matrix
2026-03-03 01:26:38 -08:00
xj
07ba229a46 fix(release): harden pub-release cross and apt reliability 2026-03-03 01:25:18 -08:00
xj
02f6a5cb98 fix(release): isolate rust toolchain homes in pub-release 2026-03-03 01:19:31 -08:00
killf
e8e7e787f4
Merge pull request #2306 from killf/chore/gitignore-add-claude-dir
chore: add .claude to .gitignore
2026-03-03 16:47:37 +08:00
killf
39e7c4fa79
Merge pull request #2538 from killf/fix/feishu-announcement-lark-fallback
fix(cron): support feishu announcement fallback to lark config
2026-03-03 16:45:21 +08:00
killf
4059640436
Merge pull request #2621 from zeroclaw-labs/fix/windows-stack-size
fix(windows): increase stack size to resolve runtime overflow
2026-03-03 16:44:42 +08:00
xj
32c6cdb5c0
Merge pull request #2622 from zeroclaw-labs/fix/release-build-blockers
fix(ci): bump macOS binary size limit to 22MB
2026-03-03 00:44:31 -08:00
xj
8d41442fa1 fix(ci): bump macOS binary size hard limit from 20MB to 22MB
macOS x86_64 release binary is 20.4MB, exceeding the 20MB safeguard.
2026-03-03 00:43:19 -08:00
xj
49efa16418
Merge pull request #2618 from zeroclaw-labs/fix/release-build-blockers
fix(ci): pass GH_TOKEN to release trigger guard step
2026-03-03 00:21:32 -08:00
xj
e77d9cf8fb fix(ci): pass GH_TOKEN to release trigger guard validation step
The gh CLI was installed with GH_TOKEN but the validate step that
actually calls it was missing the env var, causing auth failure.
2026-03-03 00:19:25 -08:00
xj
9e8672c435
Merge pull request #2616 from zeroclaw-labs/fix/release-build-blockers
fix(ci): propagate checkout auth to release trigger guard
2026-03-03 00:15:15 -08:00
xj
0dcad871ab fix(ci): propagate checkout auth to release trigger guard git operations
The release trigger guard uses git ls-remote and git fetch against a
bare HTTPS URL, which fails on private repos (403). Use the checkout
directory's configured auth headers instead.
2026-03-02 23:57:04 -08:00
xj
03875c6aa2
Merge pull request #2615 from gh-xj/fix/release-build-blockers
fix(ci): install gh CLI for release trigger guard on self-hosted runners
2026-03-02 23:51:18 -08:00
xj
a1306384b9 fix(ci): install gh CLI on self-hosted runners for release trigger guard
The release_trigger_guard.py requires gh CLI to verify CI Required Gate
status in publish mode. Self-hosted hetzner runners don't have gh
pre-installed, causing the guard to fail with exit code 3.

Add a gh CLI install step before the guard runs, with a skip if gh is
already available.
2026-03-02 23:50:38 -08:00
xj
312f30f25f
Merge pull request #2614 from gh-xj/fix/release-build-blockers
fix(ci): unblock release builds — binary size limit + cross-compile headers
2026-03-02 23:46:51 -08:00
xj
dcfb23d711 fix(ci): unblock release builds — bump binary size limit and add cross-compile headers
Two pre-existing issues blocking Pub Release builds:

1. x86_64-unknown-linux-gnu binary grew to 24MB, exceeding the 23MB
   hard limit. Bump Linux safeguard from 23MB to 26MB to accommodate
   recent feature growth. Binary size investigation deferred to follow-up.

2. armv7-unknown-linux-gnueabihf fails compiling ring/aws-lc-sys due to
   missing libc6-dev-armhf-cross headers. Add libc dev package install
   for armv7 and aarch64 cross-compile targets.
2026-03-02 23:45:00 -08:00
xj
bb8979d7a1
Merge pull request #2613 from gh-xj/feat/release-safety-gates
fix(ci): restore GitHub-hosted runner labels for macOS/Windows release builds
2026-03-02 23:35:05 -08:00
argenis de la rosa
b89a3cd6d3 feat(dashboard): update installer, pairing flow, and branding
- Switch one-click bootstrap docs and scripts to the new zeroclawlabs.ai install.sh domain-based installer
- Refine dashboard pairing UX, sidebar branding, and layout; wire Vite proxy to gateway /pair on the default port for reliable 6-digit pairing

Made-with: Cursor
2026-03-03 02:32:39 -05:00
argenis de la rosa
7c7ff66751 refactor(web): remove dashboard mock mode for ship 2026-03-03 02:32:39 -05:00
argenis de la rosa
d56ad644af feat(dashboard): add localized mock dashboard and mobile smoke coverage 2026-03-03 02:32:39 -05:00
xj
8c1366dc00 fix(ci): restore GitHub-hosted runner labels for macOS and Windows release builds
The release safety gates branch inadvertently replaced all matrix os
labels with self-hosted Linux runner arrays, including macOS and Windows
targets that require GitHub-hosted runners. This caused all three
cross-platform builds to fail: macOS builds attempted C compilation with
GNU cc (missing -arch flag), and Windows MSVC builds failed without
lib.exe.

Restore the original GitHub-hosted labels:
- macos-15-intel for x86_64-apple-darwin
- macos-14 for aarch64-apple-darwin
- windows-latest for x86_64-pc-windows-msvc
2026-03-02 23:28:30 -08:00
xj
a300878f39
Merge pull request #2610 from gh-xj/feat/release-safety-gates
ci: ensure cargo component before cache and e2e tests
2026-03-02 22:58:40 -08:00
xj
776e15e381 ci: enforce strict cargo component check for pinned toolchains 2026-03-02 22:45:48 -08:00
xj
8f4a400b60 ci: ensure cargo component before cache and e2e tests 2026-03-02 22:27:54 -08:00
killf
1e4fc3c287 fix(windows): increase stack size to resolve runtime overflow
Windows platforms have a default stack size (1-2MB) that is too small
for the heavy JsonSchema derives in config/schema.rs (133 derives).
This causes "thread 'main' has overflowed its stack" on startup.

Changes:
- Increase stack size to 8MB for x86_64-pc-windows-msvc
- Increase stack size to 8MB for aarch64-pc-windows-msvc
- Remove unused ErrorKind import in src/update.rs

Fixes: cargo run --bin zeroclaw stack overflow on Windows
2026-03-03 14:23:56 +08:00
xj
0a0433bae6
Merge pull request #2604 from gh-xj/feat/release-safety-gates
ci(release): add automated release safety gates
2026-03-02 21:10:32 -08:00
xj
0d96fcd352 fix: downgrade CI gate to warning when commit SHA not found on remote
The test environment uses local-only commits that don't exist on
GitHub, causing HTTP 422 "No commit found". Distinguish this from
real API failures by checking stderr for the specific error message
and downgrading to a warning. Real API errors still fail closed in
publish mode.
2026-03-02 21:05:46 -08:00
xj
6c7679c464 fix: fail closed on CI gate verification failure in publish mode
When publish_release=true, treat api_error and gh_not_found as
violations (blocking) instead of warnings. In verify mode, these
remain advisory warnings. This ensures publish cannot proceed without
verified CI status.

Addresses CodeRabbit review feedback on PR #2604.
2026-03-02 21:03:27 -08:00
xj
316e38546c fix: address Copilot review feedback on release safety gates
- Dry-run gate: use server-side query params instead of client-side jq
  filtering to avoid pagination issues
- Post-release validation: use artifact contract JSON for expected asset
  count instead of hardcoded magic number
- Post-release validation: use grep -Fq for fixed-string version match
  to avoid regex interpretation
- cut_release_tag.sh: clarify CI gate comment header
2026-03-02 20:53:50 -08:00
xj
d4c24f6a83 fix(ci): address coderabbit review findings
- Split GH_TOKEN away from binary smoke-test step to prevent token
  exfiltration via compromised release artifact
- Wrap gh subprocess calls in try/except FileNotFoundError so the
  guard degrades gracefully when gh CLI is not installed
- Remove stderr suppression from cargo check --locked so diagnostics
  are visible on failure
2026-03-02 20:40:13 -08:00
xj
0aabe5112a fix(ci): downgrade CI gate api_error to warning for test compatibility
The CI green gate queried gh api for check-run status, but in test
environments the commit SHA doesn't exist on GitHub, causing HTTP 422.
Downgrade api_error from violation to warning so the guard remains
functional in offline/test contexts while still blocking on real CI
failures (pending, not_found on actual repos, non-success conclusions).
2026-03-02 20:30:59 -08:00
xj
fbe7a7ed35 ci(release): add automated release safety gates
- release_trigger_guard.py: block publish if CI Required Gate hasn't
  passed on the tag commit; warn if no prior dry-run exists
- cut_release_tag.sh: check CI status via gh api before creating tag;
  run cargo check --locked to catch stale Cargo.lock locally
- ci-post-release-validation.yml: new workflow triggered on release
  publish — validates asset count, SHA256 checksums, and binary version
2026-03-02 20:21:05 -08:00
Chummy
10b7838bfa fix(ci): avoid login shell side effects in shell command execution 2026-03-03 10:12:29 +08:00
Chummy
a8958ca728 fix(ci): pin shell execution to /bin/sh for runtime and tests 2026-03-03 10:12:29 +08:00
Chummy
b22dc4875e ci: expose toolchain bin path before cargo test flake gate 2026-03-03 10:12:29 +08:00
Chummy
5a84b3a728 ci: make strict delta gate resilient to toolchain cargo path 2026-03-03 10:12:29 +08:00
Chummy
ffb05e7392 ci: expose toolchain bin on PATH for cargo fmt/clippy 2026-03-03 10:12:29 +08:00
Chummy
0453519e1c ci: run rust quality subcommands via rustup toolchain 2026-03-03 10:12:29 +08:00
Chummy
c1a230515e ci: self-heal rustfmt/clippy components in rust quality gate 2026-03-03 10:12:29 +08:00
Chummy
a62a55867d fix(test): sanitize shell ENV hooks for deterministic command execution 2026-03-03 10:12:29 +08:00
xj
deaa6f670f release: v0.2.0 2026-03-03 10:12:29 +08:00
killf
b9ae04667d fix(cron): support feishu announcement fallback to lark config
When using feishu with scheduled tasks, the system could not deliver
announcements because it only looked for [channels_config.feishu] and
did not fall back to [channels_config.lark] with use_feishu=true.

This change allows feishu announcements to use the lark config as a
fallback when use_feishu is enabled, fixing the delivery path for users
who configure under the lark section.

Fixes inability to send feishu messages when channel is configured as Lark.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-03 07:32:56 +08:00
argenis de la rosa
d80b535f5b fix(memory): propagate primary recall errors in enhanced retrieval 2026-03-02 17:53:43 -05:00
argenis de la rosa
ff8017a1f6 feat(memory): add multi-query keyword expansion 2026-03-02 17:53:36 -05:00
argenis de la rosa
631ec4baf4 fix(channels): include telegram group sender identity in llm prompt 2026-03-02 14:14:25 -05:00
argenis de la rosa
31ca8c2fed feat(channels): add napcat/onebot onboarding and config UI 2026-03-02 14:08:08 -05:00
argenis de la rosa
e946854758 fix(config): report initialized state correctly on load 2026-03-02 14:08:08 -05:00
argenis de la rosa
1707f974e6 fix(skills): constrain missing cross-skill link escapes 2026-03-02 14:06:23 -05:00
argenis de la rosa
0eab71eb73 fix(provider): fallback to sse on codex websocket no-response
Classify websocket idle/no-response conditions as transport-unavailable so Codex auto mode can fall back to SSE.

Keep partial-output timeout cases as stream errors and add focused regression tests for classification behavior.

Refs #2551
2026-03-02 13:32:22 -05:00
Chummy
b21a1a91ac ci: prioritize release branch runs across queue 2026-03-03 00:14:49 +08:00
Chummy
f4df039621 ci: prioritize release codeql with dedicated hetzner lane 2026-03-03 00:14:49 +08:00
Chummy
31426d66db ci: bind codeql to dedicated hetzner lane 2026-03-02 23:57:45 +08:00
Chummy
ca2eb0d466 ci: rebalance lightweight gates to aws-india lane 2026-03-02 23:17:02 +08:00
Chummy
c37ef88d5e ci: whitelist aws light runner labels in actionlint 2026-03-02 22:47:22 +08:00
Chummy
bdb873e743 ci: route lightweight jobs to aws-india cpu40 runners 2026-03-02 22:47:22 +08:00
Chummy
27341a067b ci: offload lightweight workflows from hetzner runner lane 2026-03-02 21:17:09 +08:00
Chummy
7382fda5e4 ci: recover cc shim in current step after bootstrap 2026-03-02 20:10:50 +08:00
Chummy
77bf7ff75a ci(security): harden C toolchain bootstrap for non-sudo runners 2026-03-02 20:10:50 +08:00
Chummy
5cac4f873f style(rustfmt): align orchestration config tests 2026-03-02 20:10:50 +08:00
Chummy
1697cc276d docs+tests(agent): complete orchestration runtime config coverage 2026-03-02 20:10:50 +08:00
Chummy
fd5187b754 fix(docker): include root build script in cached build context 2026-03-02 18:28:28 +08:00
Chummy
21b91dfea4 fix(docker): include workspace crate manifests in prebuild stage 2026-03-02 18:28:28 +08:00
Chummy
4443406311 ci: pin docker api level for self-hosted daemon compatibility 2026-03-02 18:28:28 +08:00
Chummy
04653366b2 ci: use system python on self-hosted runners 2026-03-02 18:28:28 +08:00
Chummy
1e6d4f17f5 ci: route workflows to hetzner self-hosted runner pool 2026-03-02 18:28:28 +08:00
Chummy
53c541547d fix(qq): enforce parsed https URLs for media upload requests 2026-03-02 18:03:55 +08:00
Chummy
61398eb900 fix(channels): robust qq/feishu image delivery and multimodal proxy fetch routing 2026-03-02 18:03:55 +08:00
xj
f18fac5b26
Merge pull request #2463 from gh-xj/fix/wati-webhook-auth
fix(gateway): require WATI webhook auth (RMN-323)
2026-03-02 00:58:58 -08:00
Chummy
9a0a1155ba fix(ci): keep scheduled rollback audits non-blocking 2026-03-02 16:35:23 +08:00
xj
b316a351cf fix(config): avoid secret-derived debug value in WATI redaction 2026-03-02 00:29:53 -08:00
xj
91d8abf723 feat(observability): add labeled WATI webhook auth failure metric 2026-03-02 00:29:53 -08:00
xj
4d75e84503 fix: address review findings on plugin/runtime and CI portability 2026-03-02 00:29:53 -08:00
xj
b171955335 fix(gateway): accept case-insensitive WATI Bearer token scheme 2026-03-02 00:29:52 -08:00
xj
a56479f15b fix: address coderabbit feedback for wati auth/docs 2026-03-02 00:29:52 -08:00
xj
b4087d547b fix(gateway): satisfy strict lint and codeql for wati tests 2026-03-02 00:29:52 -08:00
xj
ca99948993 fix(gateway): accept any valid WATI signature header 2026-03-02 00:29:52 -08:00
xj
d59b2cb13e fix(gateway): enforce WATI webhook authentication 2026-03-02 00:29:52 -08:00
Chummy
0633ed6ce3 fix(ci): keep scheduled canary audits non-blocking 2026-03-02 16:16:11 +08:00
Chummy
c4b79e98a1 fix(ci): align rollback guard defaults with trigger ref 2026-03-02 16:06:46 +08:00
Chummy
ad2010ef5d fix(ci): satisfy strict-delta lint for orchestration changes 2026-03-02 15:49:29 +08:00
Chummy
4635ece80b fix(agent): restore BuildHasher import after rebase 2026-03-02 15:49:29 +08:00
Chummy
0b5665ad9b feat(agent): add adaptive load balancing for teams and subagents 2026-03-02 15:49:29 +08:00
Chummy
49384b1678 feat(agent): intelligent team/subagent orchestration with hot config 2026-03-02 15:49:29 +08:00
Chummy
b17e788211 fix(runtime): harden windows shell fallback and doctor diagnostics 2026-03-02 15:49:25 +08:00
xj
d38a22bc3f
Merge pull request #2505 from gh-xj/fix/2441-repro-build-profile
ci: use release profile in reproducible build check
2026-03-01 23:00:30 -08:00
xj
7000b45e53
Merge branch 'main' into fix/2441-repro-build-profile 2026-03-01 22:41:56 -08:00
Chummy
9b64c44f78 fix: harden tool follow-through and workspace path resolution 2026-03-02 14:35:05 +08:00
xj
374990b351 ci: use release profile in reproducible build check 2026-03-01 22:20:52 -08:00
Chum Yin
af53996604
Merge pull request #2373 from gh-xj/feat/max-token-continuation-policy
feat(agent): add provider-agnostic max-token continuation policy
2026-03-02 13:40:35 +08:00
Chummy
a746b9a3ea
merge(main): sync upstream main and resolve CI workflow conflicts 2026-03-02 13:16:20 +08:00
Argenis
dd946c21c8
Merge pull request #2491 from zeroclaw-labs/issue-2470-windows-link-count
fix(security): avoid unstable windows link-count API
2026-03-01 23:41:28 -05:00
Argenis
b47efad471
Merge pull request #2490 from zeroclaw-labs/issue-2474-codex-oauth-docs-and-model
docs(codex): add oauth quickstart and gpt-5.3 model
2026-03-01 23:41:19 -05:00
Argenis
d941dcf1bf
Merge pull request #2489 from zeroclaw-labs/issue-2483-lark-image-download
fix(lark): fetch image messages via resource endpoint
2026-03-01 23:41:00 -05:00
argenis de la rosa
91a50d2478 fix(sec-audit): restore pinned cargo-deny action on ubuntu 2026-03-01 23:40:36 -05:00
argenis de la rosa
e95f731d05 fix(ci): provision ar and harden syft installer fallbacks 2026-03-01 23:40:36 -05:00
argenis de la rosa
665f7dfb5a fix(ci): normalize target triples in zig cc shim 2026-03-01 23:40:36 -05:00
argenis de la rosa
6a9d5a1f1d fix(ci): add zig-based cc fallback for locked-down runners 2026-03-01 23:40:36 -05:00
argenis de la rosa
8d238c1332 fix(ci): harden self-hosted tool bootstrap for sec-audit 2026-03-01 23:40:36 -05:00
argenis de la rosa
182d7742d1 fix(ci): bootstrap cc toolchain on self-hosted rust workflows 2026-03-01 23:40:36 -05:00
xj
e99ea1a6a3
fix(deps): bump rollup 4.57.1 → 4.59.0 to patch CVE-2026-27606 (#2492)
Fixes arbitrary file write via path traversal in rollup <4.59.0
(GHSA-mw96-cpmx-2vgc). Transitive dev dependency via vite in web/.

Supersedes #1883 (Dependabot could not rebase/recreate due to
removed dependabot.yml entry).

Co-authored-by: xj <gh-xj@users.noreply.github.com>
2026-03-01 20:12:55 -08:00
argenis de la rosa
dcba116a99 fix(skills): allow safe cross-skill markdown references 2026-03-01 22:51:44 -05:00
argenis de la rosa
5ad5a86c4f fix(security): avoid unstable windows link-count API 2026-03-01 22:37:09 -05:00
argenis de la rosa
8179a5ae87 docs(codex): add oauth quickstart and gpt-5.3 model 2026-03-01 22:37:09 -05:00
argenis de la rosa
84d6f8c472 fix(lark): fetch image messages via resource endpoint 2026-03-01 22:37:09 -05:00
Chum Yin
a8e1187ff3
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 11:12:03 +08:00
Chum Yin
ea7ad86a67
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 11:09:42 +08:00
xj
99f6b6664f
Merge pull request #2479 from gh-xj/fix/ci-runner-fallback-main-red
ci(runners): unblock main and add runner incident diagnostics
2026-03-01 19:09:21 -08:00
xj
e33af55436 ci(security): always report required gate on PRs 2026-03-01 19:06:14 -08:00
xj
8d6a425e51 docs(incident): redact runner host identifiers 2026-03-01 18:46:41 -08:00
xj
acbe9d1e7c fix(ci): address copilot review notes 2026-03-01 18:26:30 -08:00
xj
0dfdb578c1 ci(runners): unblock main with hosted fallback and health checks 2026-03-01 18:22:43 -08:00
xj
5a42c17548 Merge remote-tracking branch 'upstream/main' into feat/max-token-continuation-policy 2026-03-01 18:19:45 -08:00
Chum Yin
9abe9119c5
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 10:16:33 +08:00
Chum Yin
d4e13f775a
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 10:07:14 +08:00
xj
4b16ac9219
Merge pull request #2468 from gh-xj/review/pr-1363-plugin-hook-fixes
refactor(plugins): add validation profiles with strict runtime defaults
2026-03-01 18:04:39 -08:00
Chum Yin
7e68b84dd3
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 09:55:41 +08:00
xj
b8e5707d18
Merge pull request #2469 from gh-xj/fix/plugin-p0-audit
fix(plugins): harden approval provenance and dedupe crash path (RMN-270)
2026-03-01 17:54:23 -08:00
Chum Yin
b62aee65fb
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 09:51:11 +08:00
xj
e16bc37017 fix(plugins): block manifest auto-approve spoofing and discovery panic (RMN-270) 2026-03-01 17:28:54 -08:00
Chum Yin
b1c501fa59
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 09:26:42 +08:00
xj
64a2a271c7
Merge pull request #2464 from gh-xj/fix/web-fetch-html2md-wiring
fix(web-fetch): wire html2md feature dependency (RMN-310)
2026-03-01 17:25:12 -08:00
Chum Yin
081d5b99cb
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 09:24:27 +08:00
xj
704e481b79 Merge remote-tracking branch 'upstream/main' into feat/max-token-continuation-policy 2026-03-01 17:23:54 -08:00
xj
036d69daa2
Merge pull request #2341 from zeroclaw-labs/issue-2263-first-extraction
refactor(workspace): scaffold M4-5 PR-1 crate shells and CI lanes [RMN-248]
2026-03-01 17:21:59 -08:00
xj
ab3108c248 merge(main): sync upstream main and resolve CI threshold conflicts 2026-03-01 17:10:43 -08:00
xj
36b047179d fix(ci): format manifest profile regression tests 2026-03-01 17:09:58 -08:00
xj
0cc3144db5 ci(security): verify cargo-deny and enforce strict toolchain pin 2026-03-01 17:05:06 -08:00
xj
b0f2832b14 fix(ci): stabilize workspace/package gates for crate split 2026-03-01 17:05:03 -08:00
xj
362a81a3e5 refactor(plugins): add validation profiles with strict runtime defaults 2026-03-01 17:04:17 -08:00
xj
8441851dbc Merge remote-tracking branch 'upstream/main' into issue-2263-first-extraction 2026-03-01 16:39:16 -08:00
Chum Yin
f7a3c4b3cb
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 08:36:38 +08:00
Chum Yin
676eb0b201
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 08:36:34 +08:00
xj
b9ddd4a867
Merge pull request #2383 from gh-xj/review/pr-1363-plugin-hook-fixes
fix(plugins): align manifest tests with optional metadata (RMN-270)
2026-03-01 16:34:59 -08:00
xj
93010bf75b fix(web-fetch): wire html2md feature dependency 2026-03-01 15:27:11 -08:00
xj
c2380f8457 fix(ci): honor legacy MB binary size overrides 2026-03-01 15:08:31 -08:00
xj
dbd04574db merge(main): sync upstream main and resolve workflow/tooling conflicts 2026-03-01 14:48:14 -08:00
Chum Yin
86747c6e74
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 06:45:59 +08:00
xj
44d168e28e fix(ci): increase timeout budgets for heavy checks 2026-03-01 14:40:51 -08:00
xj
c695f1812e fix(ci): make binary size gate host-aware 2026-03-01 14:39:22 -08:00
xj
8c235ee7af fix(ci): satisfy lint gate and dedupe manifest validation 2026-03-01 14:37:02 -08:00
xj
f01a864481 fix(plugins): align manifest tests with optional metadata 2026-03-01 14:37:02 -08:00
xj
13feef9cee fix(plugins): address copilot review follow-ups 2026-03-01 14:37:02 -08:00
Chum Yin
ee780af047
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 06:02:34 +08:00
xj
61ee2a4664
Merge pull request #1365 from gh-xj/feat/wasm-plugin-runtime-exec
feat(plugins): add wasm runtime execution bridge, limits, and docs (part 2/2) (RMN-271)
2026-03-01 14:00:26 -08:00
Chum Yin
aa85ee47cf
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 05:43:03 +08:00
xj
e85cbd6f79 fix(ci): resolve strict-delta assertion and setup-python pin 2026-03-01 13:34:55 -08:00
xj
6a21ae6026 fix(ci): unblock lint and binary-size guard after main sync 2026-03-01 13:16:11 -08:00
xj
aa0e58aa3c Merge remote-tracking branch 'upstream/main' into feat/max-token-continuation-policy 2026-03-01 13:13:55 -08:00
xj
f547e4d966 merge(main): sync upstream main into feature branch 2026-03-01 13:02:09 -08:00
xj
64ece84706 fix(ci): sync main merge fallout and stabilize sec/test gates 2026-03-01 12:54:35 -08:00
Chum Yin
3082cee0ae
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 04:53:41 +08:00
argenis de la rosa
3726d82f08 feat(autonomy): expand practical defaults for real-world workflows 2026-03-01 15:51:49 -05:00
argenis de la rosa
3f6e192b14 feat(channel): add native Discord approval buttons and interactions 2026-03-01 15:51:27 -05:00
argenis de la rosa
13790ac2b9 fix(agent): refresh system prompt datetime across long-lived turns 2026-03-01 15:51:10 -05:00
xj
b145093488 Merge remote-tracking branch 'upstream/main' into tmp/pr1365-sync 2026-03-01 12:40:02 -08:00
Chum Yin
21eafc5980
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 04:34:58 +08:00
xj
1a52cc078c fix(ci): stabilize hosted-runner security and artifact checks 2026-03-01 12:33:37 -08:00
xj
2cc7589d1a fix(ci): run remaining sec-audit gates on ubuntu hosted 2026-03-01 12:30:02 -08:00
Chum Yin
d8f6820a4c
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 04:29:04 +08:00
Argenis
f7a2fa76e5
Merge pull request #2446 from zeroclaw-labs/issue-2377-non-hot-config-apply-startup
feat(channels): hot-reload startup-bound runtime defaults
2026-03-01 15:27:58 -05:00
Argenis
75078df36e
Merge pull request #2445 from zeroclaw-labs/issue-2378-core-memory-boost
feat(memory): boost Core memories in context retrieval
2026-03-01 15:27:47 -05:00
Argenis
7db3a57d04
Merge pull request #2444 from zeroclaw-labs/issue-2381-pre-compaction-memory-flush
feat(memory): pre-compaction durable fact flush
2026-03-01 15:27:39 -05:00
argenis de la rosa
9fb0e30dac feat(channels): hot-reload runtime tool-loop and memory defaults 2026-03-01 15:24:39 -05:00
argenis de la rosa
3fb11acade feat(memory): boost core memories during context retrieval 2026-03-01 15:24:39 -05:00
argenis de la rosa
2052c720cc feat(memory): flush durable facts before compaction 2026-03-01 15:24:38 -05:00
Chum Yin
888bd4101d
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 04:20:16 +08:00
Argenis
464f8e664b
Merge pull request #2437 from zeroclaw-labs/issue-2371-ui-config-editor
feat(web): add line numbers and TOML syntax highlighting to config editor
2026-03-01 15:19:44 -05:00
Argenis
1821065159
Merge pull request #2436 from zeroclaw-labs/issue-2369-noncli-process-default
feat(autonomy): exclude process by default for non-cli channels
2026-03-01 15:19:42 -05:00
Argenis
73504154da
Merge pull request #2435 from zeroclaw-labs/issue-2376-gemini-multimodal
feat(gemini): support multimodal inlineData in user messages
2026-03-01 15:19:39 -05:00
argenis de la rosa
f3205da359 feat(web): add line numbers and TOML syntax highlighting to config editor 2026-03-01 15:18:50 -05:00
argenis de la rosa
cc9ff1820b feat(autonomy): exclude process by default for non-cli channels 2026-03-01 15:18:48 -05:00
argenis de la rosa
b01462d7a9 feat(gemini): support multimodal inlineData in user messages 2026-03-01 15:18:46 -05:00
Chum Yin
87ae1c8ca6
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 04:14:16 +08:00
Argenis
0787a9cebe
Merge pull request #2387 from reidliu41/time-decay
feat(memory): add time-decay scoring with Core evergreen exemption
2026-03-01 15:12:28 -05:00
Argenis
617135fcda
Merge pull request #2439 from zeroclaw-labs/issue-2384-public-api
feat(agent): expose tool_specs and public run_tool_call_loop
2026-03-01 15:12:25 -05:00
Argenis
ddf0df569e
Merge pull request #2438 from zeroclaw-labs/issue-2391-streamable-http
fix(mcp): support streamable HTTP headers and SSE responses
2026-03-01 15:12:23 -05:00
xj
fd3944eaaa fix(ci): run rust-heavy workflows on github-hosted ubuntu 2026-03-01 12:11:53 -08:00
Chum Yin
0d988e21c6
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 04:05:59 +08:00
argenis de la rosa
12870dfe1f fix(ci): scope pr-auto-response concurrency by event action 2026-03-01 15:05:12 -05:00
argenis de la rosa
51627f7f67 fix(ci): prevent duplicate workflow runs for contributor tier job
The contributor-tier-issues job was triggering on both opened and labeled
events, while labeled-routes also triggers on labeled events. This caused
duplicate workflow runs with "Canceling since a higher priority waiting
request" message.

Fix by limiting contributor-tier-issues to only run for opened/reopened
events, since contributor tier evaluation should happen when the issue/PR
is first created, not when labels are added later.

Fixes #2352

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 15:05:12 -05:00
argenis de la rosa
a0bba8ee1b test(telegram): verify approval prompt markdown payload 2026-03-01 15:05:12 -05:00
argenis de la rosa
5dede160a2 fix(telegram): add Markdown parse_mode to approval prompts
The approval prompt message uses backticks for code formatting
but was missing the parse_mode field, Telegram displays the
backticks literally instead of rendering them as code.

Add "parse_mode": "Markdown" to the sendMessage request body
to enable proper formatting.

Fixes #2359

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 15:05:12 -05:00
argenis de la rosa
09d32dcd79 feat(security): add context-aware command allow rules 2026-03-01 15:05:12 -05:00
argenis de la rosa
f7b6295e9b feat(agent): expose tool_specs and public run_tool_call_loop 2026-03-01 14:56:09 -05:00
argenis de la rosa
c479ea7b10 fix(mcp): support streamable HTTP headers and SSE responses 2026-03-01 14:56:08 -05:00
xj
dd0e504db2 fix(ci): ensure C toolchain for self-hosted rust jobs 2026-03-01 11:49:13 -08:00
xj
0226dac67f fix(ci): make smoke binary size gate configurable on hosted runners 2026-03-01 11:47:40 -08:00
Chum Yin
ba42935923
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 03:44:19 +08:00
argenis de la rosa
890b5b86a9 fix(compatible): preserve native tool-call message sequencing 2026-03-01 14:43:13 -05:00
argenis de la rosa
32172f56da style(onboard): apply rustfmt in provider fallback assertions 2026-03-01 14:43:02 -05:00
argenis de la rosa
058ce1d1d7 fix(anthropic): ignore empty text content blocks 2026-03-01 14:43:02 -05:00
xj
3747f190ad fix(ci): route docs deploy workflow to ubuntu hosted 2026-03-01 11:26:59 -08:00
xj
05b14f56f6 fix(ci): use github context for rust path isolation 2026-03-01 11:25:59 -08:00
xj
06f3470701 fix(ci): run rust security gates on ubuntu hosted runners 2026-03-01 11:25:32 -08:00
xj
6c5c3927fb fix(ci): isolate rust homes and pin Linux self-hosted runners 2026-03-01 11:24:03 -08:00
xj
886d48ccbe fix(ci): isolate rust homes and harden self-hosted cache usage 2026-03-01 11:23:13 -08:00
Chummy
3f81157156
fix(ci): add stable fallback and portable cargo-deny install 2026-03-02 03:21:01 +08:00
Chum Yin
550a856670
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 02:56:06 +08:00
Chummy
05407c3cb4
fix(ci): stabilize cargo toolchain and remove docker deny dependency 2026-03-02 02:39:33 +08:00
Chum Yin
6a5b05fc75
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 02:23:36 +08:00
Argenis
561c4765e1
feat(providers): add responses-mode chat-completions fallback (#2417) 2026-03-01 13:22:55 -05:00
chumyin
d024877ba8 Revert "Feature/multitenant deployment enhancements (#2380)"
This reverts commit 404c43bbe3.
2026-03-02 02:19:47 +08:00
argenis de la rosa
fa2f902259 fix(approval): keep approve-allow one-shot for pending requests
Route /approve-allow through a runtime-only pending-request path so it does not
persist approval policy changes for normal tools.

This preserves always_ask semantics for Telegram inline HITL approvals while
keeping /approve and /approve-request + /approve-confirm as the persistent
approval flows. Update docs and regression assertions accordingly.
2026-03-01 13:17:30 -05:00
argenis de la rosa
c5d84ebc59 test(gateway): fill bluebubbles fields in AppState fixture
Add missing bluebubbles fixture fields to the node-control AppState test
initializer so lib tests compile after AppState struct expansion.
2026-03-01 13:16:47 -05:00
argenis de la rosa
deb13569df fix(config): simplify feishu legacy key detection 2026-03-01 13:16:33 -05:00
argenis de la rosa
d800b1caf5 fix(feishu): map legacy config keys and improve feature guidance 2026-03-01 13:16:33 -05:00
argenis de la rosa
fe3556da58 feat(file_edit): add whitespace-flexible fallback matching 2026-03-01 13:16:19 -05:00
argenis de la rosa
237845f490 feat(cli): include git short sha in version output 2026-03-01 13:15:53 -05:00
Chummy
0e9bd0589b
chore(fmt): align provider fallback assertions with rustfmt 2026-03-02 02:01:39 +08:00
xj
9bc0f24718 fix(ci): route security/repro jobs to Linux-labeled runners 2026-03-01 10:00:36 -08:00
xj
34adc04c7d fix(ci): self-heal when cargo is missing from rust toolchain 2026-03-01 09:58:52 -08:00
xj
9294d38eba Merge remote-tracking branch 'upstream/main' into tmp/pr1365-sync
# Conflicts:
#	.github/workflows/ci-run.yml
2026-03-01 09:57:27 -08:00
xj
c3507e8427 fix(ci): raise long-run job timeouts on self-hosted runners 2026-03-01 09:52:51 -08:00
Chummy
c1a400a859
fix(rebase): restore missing struct fields after main sync 2026-03-02 01:50:42 +08:00
Chum Yin
12578d78ba
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-02 01:47:16 +08:00
Chum Yin
4f222b953f
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 01:38:40 +08:00
Vernon Stinebaker
c66b05194d
fix(gateway): rebuild web/dist assets to fix dashboard WebSocket auth (#2169)
* fix(gateway): rebuild web/dist assets to fix dashboard WebSocket auth

Commit 2ecfa0d2 updated web/src/lib/ws.ts to use Sec-WebSocket-Protocol
for bearer token auth, but web/dist/ was never rebuilt. The embedded
assets still sent the token as a query parameter, which the server
rejects — breaking the Agent tab WebSocket connection for all users.

Rebuild web/dist from current web/src to align embedded assets with the
server-side WebSocket auth contract.

Closes #2168

* ci: retrigger stalled checks

---------

Co-authored-by: Chummy <chumyin0912@gmail.com>
Co-authored-by: chumyin <chumyin@users.noreply.github.com>
2026-03-01 12:36:29 -05:00
myhkstar
404c43bbe3
Feature/multitenant deployment enhancements (#2380)
* feat(deploy): Add initial multitenant deployment script

* feat(deploy): Harden security for multitenant script

- Change gateway host to 127.0.0.1 to prevent direct access.
- Integrate Certbot for automatic HTTPS configuration.
- Improve password handling by using htpasswd directly.
- Rename credentials file and add security warnings.
- Update firewall rules to only allow SSH and Nginx traffic.
- Add comments and improve script readability.

* feat(deploy): Migrate process management to systemd

- Add a systemd service template 'zeroclaw@.service' for robust process management.
- Rebuild 'zeroclaw-ctl' to use systemctl for start, stop, restart, enable, and disable actions.
- Redirect ZeroClaw logs to systemd-journald for centralized logging.
- Update 'zeroclaw-ctl logs' and 'zeroclaw-ctl pairing' to read from journalctl.
- Enhance security of the systemd service with PrivateTmp and ProtectSystem.
- Simplify user-facing commands in the management tool.

* fix(deploy): Improve robustness and user experience

- Add automatic creation of a swap file to prevent out-of-memory issues.
- Revamp 'zeroclaw-ctl status' to display a rich, formatted table including
  status, PID, memory usage, enabled state, and the latest pairing code.
- Confirm that the user ID format mismatch bug was resolved in prior stages.
- Add final polish and comments to the script.

* docs(deploy): Add detailed README for multitenant script

---------

Co-authored-by: GitHub Action <action@github.com>
2026-03-01 12:34:30 -05:00
killf
b0028a05fb
docs: fix web_fetch provider list to include tavily (#2178)
* docs: fix web_fetch provider list to include tavily

The code supports `tavily` as a web_fetch provider (see
src/tools/web_fetch.rs:376-377 and src/config/schema.rs:1603),
but the config reference was missing this provider in the list.

This updates the documentation to reflect the actual implementation.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* ci: retrigger intake after metadata update

* ci: retrigger stalled checks

* ci: retrigger stale checks

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Chummy <chumyin0912@gmail.com>
2026-03-01 12:33:58 -05:00
killf
ccdbaf61ec
fix: resolve compilation errors and warnings (#2177)
* ci: retrigger intake after metadata update

* ci: retrigger checks after canceled runs

---------

Co-authored-by: Chummy <chumyin0912@gmail.com>
2026-03-01 12:33:10 -05:00
Chum Yin
dac7e06531
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 01:24:37 +08:00
Argenis
86c60909d0
fix(web): rebuild dist to match ws auth/session behavior (#2343)
- regenerate web/dist from current web/src with npm run build\n- fix AgentChat history typing so web build is type-clean\n- keep websocket auth via Sec-WebSocket-Protocol bearer token and session_id path parity\n\nCloses #2168
2026-03-01 12:22:29 -05:00
Chum Yin
ea938c10a3
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 01:17:02 +08:00
chumyin
528aed53e0 ci: skip docker smoke on self-hosted runners without docker 2026-03-02 01:11:59 +08:00
Chum Yin
3e7eeb3447
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 01:08:09 +08:00
chumyin
b0b747e9bc ci: fix queue hygiene auth for apply mode 2026-03-02 01:00:44 +08:00
Chummy
b07e2f52a2
chore(ci): retrigger workflows after cancelled runners 2026-03-02 00:55:01 +08:00
Chummy
0ffd395745
fix(agent): parse native tool args using normalized slice 2026-03-02 00:55:01 +08:00
chumyin
c691820fa8
test(agent): cover valid native max-tokens tool-call path 2026-03-02 00:55:01 +08:00
chumyin
49b447982f
fix(agent): prefer retry over hard-fail for truncated native calls 2026-03-02 00:55:01 +08:00
chumyin
5c0d66f967
fix(agent): fail closed on malformed native tool args 2026-03-02 00:55:01 +08:00
xj
ceb3aae654
fix(agent): fail closed on truncated native tool calls 2026-03-02 00:55:01 +08:00
xj
ad58bdf99e
fix(providers): harden continuation and gemini stop handling 2026-03-02 00:55:01 +08:00
xj
4f87e96b01
fix(bench): include stop-reason fields in chat responses 2026-03-02 00:55:01 +08:00
xj
f8fd241869
fix(agent): enforce post-merge continuation output cap 2026-03-02 00:55:01 +08:00
xj
f7167ea485
feat(agent): add normalized stop reasons and max-token continuation 2026-03-02 00:55:00 +08:00
Chum Yin
5cac79cbbe
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 00:36:30 +08:00
chumyin
e4fc97f5f2 ci: harden smoke build against transient runner termination 2026-03-02 00:27:25 +08:00
Chum Yin
b36a8d41a6
Merge branch 'main' into feat/feishu-doc-tool 2026-03-02 00:22:27 +08:00
Chummy
364ab048ac fix(security): harden non-local gateway auth boundaries 2026-03-02 00:21:19 +08:00
chumyin
1ab6d2db41 fix: restore security and stability scan gates 2026-03-02 00:21:16 +08:00
chumyin
0e54a64dfd docs(commands): include stepfun in models refresh support list 2026-03-02 00:21:12 +08:00
chumyin
feabd7e488 fix(onboard): honor provider fallback env keys for model discovery 2026-03-02 00:21:12 +08:00
chumyin
2630486ca8 feat(providers): add StepFun provider with onboarding and docs parity 2026-03-02 00:21:12 +08:00
chumyin
37b19365c8 fix: stabilize bedrock credential test and portable sha256 2026-03-02 00:21:08 +08:00
chumyin
69fbad0381 chore: drop markdown-only replay artifacts from backfill PR 2026-03-02 00:21:08 +08:00
Chummy
6d25a060c1 feat(skills): add trusted domain policy and transparent preloads 2026-03-02 00:21:08 +08:00
Chummy
afe615162a ci: remove dev-to-main promotion gate and align main flow 2026-03-02 00:21:08 +08:00
chumyin
3b2c601e6e providers: fallback native tools on 516 schema errors 2026-03-02 00:21:05 +08:00
chumyin
49a63d5e30 chore(pr-2394): remove internal docs/project artifacts 2026-03-02 00:20:46 +08:00
chumyin
7c8e4d115a fix(ci): resolve lint gate for orchestration PR 2026-03-02 00:20:46 +08:00
chumyin
479b7a9043 style: apply rustfmt to shared memory and xlsx modules 2026-03-02 00:20:46 +08:00
chumyin
be0f52fce7 feat(agent): add end-to-end team orchestration bundle 2026-03-02 00:20:46 +08:00
killf
6d8beb80be chore: add .claude to .gitignore
Add .claude directory to .gitignore to exclude Claude Code
configuration and cache files from version control.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-01 22:39:55 +08:00
Chum Yin
8a944fcdb0
Merge branch 'main' into feat/feishu-doc-tool 2026-03-01 22:25:50 +08:00
Chum Yin
a95f55c82b
Merge branch 'main' into fix/release-v0.1.8-build-errors 2026-03-01 22:25:38 +08:00
Chummy
b64cae9d3d docs(test): note Rust 1.88 alignment for release checks 2026-03-01 22:23:07 +08:00
Chummy
efcc4928ea docs(changelog): note agent session persistence rollout keys 2026-03-01 22:22:53 +08:00
Chummy
8724945742 docs(testing): add mention_only non-text regression check 2026-03-01 22:22:25 +08:00
Chummy
bf660f0b4c
docs(ci): clarify PR intake re-trigger semantics 2026-03-01 22:12:32 +08:00
Chummy
68c61564c6
ci: make PR intake Linear key advisory 2026-03-01 21:52:45 +08:00
argenis de la rosa
82692b38d9 chore(ci): trigger intake checks after template sync 2026-03-01 08:03:20 -05:00
chumyin
3cf66c281a
chore: trigger intake checks for RMN-298 2026-03-01 12:39:56 +00:00
chumyin
3236c92c93
chore: trigger intake checks for RMN linkage 2026-03-01 12:36:24 +00:00
chumyin
a3f22961ce
Merge origin/main into feat/feishu-doc-tool 2026-03-01 11:49:43 +00:00
chumyin
758072cf6e
Merge origin/main into fix/release-v0.1.8-build-errors 2026-03-01 11:48:21 +00:00
reidliu41
1431e9e864 feat(memory): add time-decay scoring with Core evergreen exemption 2026-03-01 19:25:54 +08:00
xj
0605f65ca8 style: apply rustfmt for CI lint gate 2026-03-01 03:11:24 -08:00
xj
62e1a123a0 fix(ci): stabilize self-hosted runner compatibility 2026-03-01 02:32:01 -08:00
xj
30bd2bac71 fix(plugins): satisfy strict-delta clippy on runtime 2026-03-01 01:41:48 -08:00
xj
ac27788a3b fix(plugins): wire provider routing and timeout permit release 2026-03-01 01:21:53 -08:00
xj
1da53f154c
Merge branch 'main' into feat/wasm-plugin-runtime-exec 2026-03-01 00:57:15 -08:00
argenis de la rosa
898e102510 feat(heartbeat): add dedupe and per-tick runtime caps 2026-03-01 00:50:51 -05:00
argenis de la rosa
4756d70d95 feat(workspace): scaffold M4-5 crate shells and CI package lanes 2026-03-01 00:38:20 -05:00
argenis de la rosa
4e70abf407 fix(cost): validate route_down hint against model routes 2026-03-01 00:17:27 -05:00
argenis de la rosa
4043056332 feat(cost): enforce preflight budget policy in agent loop 2026-03-01 00:17:27 -05:00
Preventnetworkhacking
9ef617289f fix(mcp): stdio transport reads server notifications as tool responses, registering 0 tools [CDV-2327]
Replace single read with deadline-bounded loop that skips JSON-RPC messages
where id is None (server notifications like notifications/initialized).

Some MCP servers send notifications/initialized after the initialize response
but before the tools/list response. The old code would read this notification
as the tools/list reply, see result: None, and report 0 tools registered.

The fix uses a deadline-bounded loop to skip any JSON-RPC message where
id is None while preserving the total timeout across all iterations.

Fixes: zeroclaw-labs/zeroclaw#2327
2026-03-01 00:15:41 -05:00
argenis de la rosa
20d4e1599a feat(skills): add trusted symlink roots for workspace skills 2026-03-01 00:03:53 -05:00
Argenis
305d9ccb7c
fix(docs): keep install guidance canonical in README/docs (#2335) 2026-02-28 23:54:26 -05:00
Argenis
0683467bc1
fix(channels): prompt non-CLI always_ask approvals (#2337)
* fix(channels): prompt non-cli always_ask approvals

* chore(ci): retrigger intake after PR template update
2026-02-28 23:53:59 -05:00
Argenis
f3c82cb13a
feat(tools): add xlsx_read tool for spreadsheet extraction (#2338)
* feat(tools): add xlsx_read tool for spreadsheet extraction

* chore(ci): retrigger intake after PR template update
2026-02-28 23:51:34 -05:00
argenis de la rosa
f83c9732ca chore(ci): keep gateway docs fix docs-only 2026-02-28 23:34:35 -05:00
argenis de la rosa
fb124b61d4 fix(docs): correct first-run gateway commands 2026-02-28 23:34:35 -05:00
argenis de la rosa
28eaef1782 fix(ci): reduce queue saturation via branch supersedence 2026-02-28 23:34:19 -05:00
argenis de la rosa
1ecace23a7 feat(update): add install-aware guidance and safer self-update 2026-02-28 23:28:06 -05:00
argenis de la rosa
0129b5da06 feat(onboard): add hybrid sqlite+qdrant memory option in wizard 2026-02-28 23:27:17 -05:00
argenis de la rosa
ddfbf3d9f8 fix(bootstrap): fallback when /dev/stdin is unreadable in guided mode 2026-02-28 23:24:11 -05:00
argenis de la rosa
08ce6fefd8 fix(plugins): align wasm runtime rebase with main schema 2026-02-28 21:33:11 -05:00
xj
db3a16c86a docs: fix markdown lint issues in wasm plugin docs 2026-02-28 21:33:11 -05:00
xj
bdcb8b6916 fix(runtime): resolve wasm store borrow and default impl conflicts 2026-02-28 21:33:11 -05:00
xj
2af737518b fix(security): upgrade wasmtime to rustsec-patched 36.0.5 2026-02-28 21:33:11 -05:00
xj
9b0aa53adf feat(plugins): enforce runtime limits and add echo plugin example 2026-02-28 21:33:11 -05:00
xj
6091553d12 test(plugins): add runtime abi and registry mapping unit tests 2026-02-28 21:33:11 -05:00
xj
5d181670ac docs(plugins): add experimental runtime contract and wasm abi guide 2026-02-28 21:33:11 -05:00
xj
05d36862c5 feat(plugins): add hot-reload state and activate observer bridge 2026-02-28 21:33:11 -05:00
xj
49a520df3e feat(plugins): execute wasm tools/providers via host abi bridge 2026-02-28 21:33:11 -05:00
Argenis
403e1e3e18
Merge branch 'main' into pr-1837-s34-main-rebased 2026-02-28 21:23:56 -05:00
argenis de la rosa
a1d51b6454 feat(agent): add ProgressTracker for in-place tool progress updates 2026-02-28 21:19:16 -05:00
argenis de la rosa
3bcce8b6fa feat(hardware): replay prompt wiring and shorthand tool-call parsing [RMN-1837] 2026-02-28 21:17:42 -05:00
argenis de la rosa
50372c116a feat(hardware): add pico flash and runtime code toolchain 2026-02-28 21:17:42 -05:00
Argenis
bcaf4c4156
docs: update description from "Operating System" to "Framework"
Update ZeroClaw's description to better reflect its role as a framework for building agentic workflows.
2026-02-28 21:11:25 -05:00
argenis de la rosa
250a2247cd feat(hardware): add gpio_read/gpio_write tool implementations 2026-02-28 21:09:59 -05:00
argenis de la rosa
8eeea3fca1 feat(hardware): add device registry and serial transport foundations 2026-02-28 21:09:59 -05:00
argenis de la rosa
bfacba20cb feat(config): add ProgressMode enum for streaming channel draft updates 2026-02-28 20:56:16 -05:00
Preventnetworkhacking
84b43ba4b2 feat(memory): add reindex command to rebuild embeddings [CDV-28]
Adds `zeroclaw memory reindex` CLI command to rebuild embeddings for all
stored memories. Use this after changing the embedding model/provider to
ensure vector search works correctly with the new embeddings.

Changes:
- Add `Reindex` variant to `MemoryCommands` enum (lib.rs, main.rs)
- Add `reindex` method to `Memory` trait with default not-supported impl
- Implement `reindex` in SqliteMemory:
  - Clears embedding_cache table
  - Iterates all memories and recomputes embeddings
  - Updates embedding column in memories table
- Add CLI handler with confirmation prompt and progress output

Usage:
  zeroclaw memory reindex        # Interactive confirmation
  zeroclaw memory reindex --yes  # Skip confirmation
  zeroclaw memory reindex --progress=false  # Hide progress

Fixes #2273
2026-02-28 20:56:03 -05:00
argenis de la rosa
6fa9dd013c docs(rfi): add F1-3 and Q0-3 state machine design docs 2026-02-28 20:39:16 -05:00
Chummy
7ea54caff5 docs(changelog): list feishu_doc tool actions 2026-03-01 09:11:33 +08:00
xj
11498ab099 fix(build): update history test call and apply rustfmt drift 2026-02-28 17:09:50 -08:00
xj
339cff20f8 test(session): deflake sqlite session expiry cleanup assertion 2026-02-28 17:00:19 -08:00
xj
236706a4ac style(security): apply rustfmt in policy tests 2026-02-28 17:00:18 -08:00
xj
9095a54de3 fix(main): restore rust gates after bluebubbles merge 2026-02-28 17:00:18 -08:00
argenis de la rosa
36a490388c fix(plugins): align hook config with rebased foundation 2026-02-28 19:51:37 -05:00
argenis de la rosa
0ccff1cd12 fix(plugins): preserve tool errors and support config-aware reinit 2026-02-28 19:51:37 -05:00
argenis de la rosa
f677367e4b style: apply rustfmt to agent-authored changes 2026-02-28 19:51:37 -05:00
argenis de la rosa
f90ac82d4c fix(security): add capability gating for hook tool-result modification
Add `capabilities()` method to HookHandler trait so the runner can check
whether a hook has ModifyToolResults permission before allowing it to
mutate tool results. Without this, any registered hook could flip success,
rewrite output, or suppress errors with no gate.
2026-02-28 19:51:37 -05:00
argenis de la rosa
467fea87c6 refactor(hooks): extract HookRunner factory and make plugin registry init idempotent
- Add HookRunner::from_config() factory that encapsulates hook construction
  from HooksConfig, replacing 3 duplicated blocks in agent/loop_, gateway,
  and channels modules.
- Make plugin registry initialize_from_config() idempotent: skip re-init
  if already initialized, log debug message instead of silently overwriting.
- Add capability gating for tool_result_persist hook modifications.
2026-02-28 19:51:37 -05:00
argenis de la rosa
52e8fd9cc3 fix(build): add missing mut binding and remove duplicated plugin tool block 2026-02-28 19:51:37 -05:00
argenis de la rosa
ddb88bb021 docs: fix markdown lint issues in wasm plugin design plans 2026-02-28 19:51:37 -05:00
argenis de la rosa
5bc98842b7 chore: refresh lockfile and apply rustfmt 2026-02-28 19:51:37 -05:00
argenis de la rosa
10b12ba2cb fix(build): restore rust 1.87 compatibility for plugin foundation 2026-02-28 19:51:37 -05:00
argenis de la rosa
ade0e91898 feat(plugins): route declared tools/providers through plugin registry 2026-02-28 19:51:37 -05:00
argenis de la rosa
1d6afe792b feat(plugins): scaffold wasm runtime and wire core hook lifecycle 2026-02-28 19:51:37 -05:00
argenis de la rosa
c3dbd9a7a7 fix(quality): remove infallible unwraps in sop and skillforge 2026-02-28 19:37:38 -05:00
argenis de la rosa
d9dba0c76f fix(observability): propagate prometheus metric registration failures 2026-02-28 19:34:19 -05:00
argenis de la rosa
5b6348d103 fix(telegram): deduplicate attachment markers in single reply 2026-02-28 19:24:39 -05:00
argenis de la rosa
9a16098f49 fix(gateway): pass session id in bluebubbles chat path 2026-02-28 19:23:30 -05:00
argenis de la rosa
c56c33d477 test(channels): add new runtime context fields in approval command tests 2026-02-28 19:23:30 -05:00
Chummy
cc80d51388 fix: align telegram ack constructor usage after rebase 2026-02-28 19:23:30 -05:00
Chummy
f1009c43a3 fix: resolve ack config rebase drift across telegram runtime 2026-02-28 19:23:30 -05:00
Chummy
762ca25e19 feat(channels): add chat-scoped ACK rules and simulation aggregates 2026-02-28 19:23:30 -05:00
argenis de la rosa
cd3c6375d7 fix(channels): resolve approval command merge conflict on main 2026-02-28 19:23:30 -05:00
Chummy
f594a233b0 feat(channels): enrich ack reaction policy with regex sampling and simulate 2026-02-28 19:23:30 -05:00
Chummy
8583f59066 feat(channels): add configurable ack reactions and channel ack config tool 2026-02-28 19:23:30 -05:00
argenis de la rosa
0b72b45d90 fix(providers): harden circuit breaker cooldown and validation 2026-02-28 19:22:09 -05:00
argenis de la rosa
c4458a3d5d fix(migration): harden openclaw onboarding and tool safety 2026-02-28 19:01:50 -05:00
Chummy
39daa626b4 fix(ci): align telegram/channel fixtures and strict-delta blockers 2026-02-28 19:01:50 -05:00
Chummy
d9d9bedf3e feat(migration): ship merge-first openclaw onboarding + agent tool 2026-02-28 19:01:50 -05:00
argenis de la rosa
df82a0ce64 fix(config): enforce ZEROCLAW_API_KEY precedence over regional aliases 2026-02-28 18:58:04 -05:00
Ignas Baranauskas
02e50f3b39 fix(config): prevent generic API_KEY env var from overriding configured api_key
The generic `API_KEY` environment variable unconditionally overwrote the
api_key loaded from config.toml, even when a valid key was already
configured. Since `API_KEY` is a very common env var name set by many
unrelated tools, this caused silent auth failures when the unrelated
value was sent to the configured provider.

Change the precedence so that `ZEROCLAW_API_KEY` always wins (explicit
intent), while `API_KEY` is only used as a fallback when the config has
no api_key set.
2026-02-28 18:58:04 -05:00
xj
09119d46ec
Merge pull request #2271 from maxtongwang/feat/channel-bluebubbles
feat(channel): add BlueBubbles iMessage channel
2026-02-28 15:56:36 -08:00
bevis
812c2f62f8 fix(channels): bound dingtalk rich text recursion 2026-02-28 18:50:59 -05:00
bevis
c2b361d093 fix(channels): accept richer dingtalk callback text payloads 2026-02-28 18:50:59 -05:00
argenis de la rosa
c3a6e8acfe chore(scripts): add REST-first PR verification helper 2026-02-28 18:48:08 -05:00
argenis de la rosa
7d6d90174f feat(channel): use DingTalk Open API for sending messages
- Switch from sessionWebhook to /v1.0/robot/oToMessages/batchSend API
- Add access_token caching with automatic refresh (60s buffer)
- Enable cron job delivery to DingTalk (no user interaction required)

This change allows DingTalk to actively send messages (e.g., cron
reminders) without requiring the user to send a message first.
2026-02-28 18:41:51 -05:00
ZeroClaw Bot
979b5fa791 fix(skills): harden tool_handler security per CodeRabbit review
- Validate Integer/Boolean parameter types before shell substitution
- Add SecurityPolicy checks (rate limit, command validation, action recording)
- Redact rendered command from debug logs (log template only)

Made-with: Cursor
2026-02-28 18:33:49 -05:00
ZeroClaw Bot
7672ca9044 feat(skills): add native tool handler for SKILL.toml-based skills
Add SkillToolHandler that converts SKILL.toml definitions into native
tool schemas, enabling skills to be invoked as standard tools through
the agent's tool-use protocol.

Made-with: Cursor
2026-02-28 18:33:49 -05:00
argenis de la rosa
4f32820cde fix(deps): align wasmtime/wasmtime-wasi 36.0.6 and update WASI pipe path 2026-02-28 18:33:48 -05:00
ZeroClaw Bot
630a52b397 fix(auth): harden OAuth UX per CodeRabbit review
- Replace brittle split("state=") with parse_query_params utility
- Use const PROFILE_MISMATCH_PREFIX with starts_with instead of fragile contains

Made-with: Cursor
2026-02-28 18:33:17 -05:00
ZeroClaw Bot
fd1a9b7a07 fix(auth): address CodeRabbit review feedback on OAuth UX
Made-with: Cursor
2026-02-28 18:33:17 -05:00
ZeroClaw Bot
2d5c0142d2 feat(auth): improve OAuth UX for server environments
Add stale pending login detection (auto-cleanup after 24h), improved
device-code flow error messages with Cloudflare/403 detection, shared
OAuth helpers, and Box::pin fixes for large async futures.

Made-with: Cursor
2026-02-28 18:33:17 -05:00
argenis de la rosa
7058b15cc4 fix(security): harden git config readonly checks 2026-02-28 18:23:37 -05:00
argenis de la rosa
4ce4ec5f34 feat(security): allow read-only git config operations
Previously, `is_args_safe()` blocked ALL `git config`, `git alias`,
and `git -c` subcommands unconditionally. This forced administrators
to pre-create `.gitconfig` files outside ZeroClaw.

Now allow read-only git config operations:
- `git config --get <key>` - read single value
- `git config --list` / `git config -l` - list all config
- `git config --get-all <key>` - get all values for key
- `git config --get-regexp <pattern>` - list matching keys
- `git config --get-urlmatch <pattern>` - URL matching

Write operations remain blocked:
- `git config user.name "value"` (plain write)
- `git config --unset <key>`
- `git config --add <key> <value>`
- `git config --global <key> <value>` (scoped write)
- `git config -e` / `--edit` (opens editor)
- `git alias.*` and `git -c` remain fully blocked

Security impact: Read operations have no side effects and cannot
be used for code execution. The dangerous keys (core.editor,
credential.helper, alias.*) remain protected since we only allow
explicitly read-only operations.

Fixes #1398

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 18:23:37 -05:00
argenis de la rosa
728782d369 fix(agent): close run() wrapper calls in replayed hook wiring 2026-02-28 18:19:48 -05:00
argenis de la rosa
408616b34e feat(agent): expose hooks parameter in public run() entry point
Add `hooks: Option<&crate::hooks::HookRunner>` as the last parameter
to the public `agent::run()` (re-exported from `loop_::run`).

This enables library consumers to inject custom HookHandler
implementations (before_tool_call, on_after_tool_call) without
patching the crate. The hooks are threaded through to
`run_tool_call_loop` which already accepts and dispatches them.

All existing call sites pass `None`, preserving backward compatibility.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 18:19:48 -05:00
argenis de la rosa
fe688d6b1a fix(agent): remove stale loop session imports 2026-02-28 18:18:43 -05:00
argenis de la rosa
b0a3fbd338 test(security): assert /mnt in default forbidden path checks 2026-02-28 18:09:35 -05:00
Antigravity Agent
28b9d81464 security: add /mnt to default forbidden_paths 2026-02-28 18:09:35 -05:00
argenis de la rosa
11c34fa7e6 fix(build): restore Catalina (macOS 10.15) compatibility
Two root causes were addressed:

1. `wasm-tools` (wasmtime 28 + cranelift JIT) was listed in `default`
   features. wasmtime's JIT backend has macOS version dependencies that
   break builds and/or runtime on Catalina. The feature is now opt-in;
   the default build is free of JIT dependencies and Catalina-safe.
   Users on macOS 11+ can still enable it with `--features wasm-tools`.

2. `.cargo/config.toml` had no macOS target entries, so the binary's
   minimum deployment version was left to toolchain defaults (which can
   be set to macOS 11+ on newer hosts). Added explicit
   `-mmacosx-version-min=10.15` for `x86_64-apple-darwin` and
   `-mmacosx-version-min=11.0` for `aarch64-apple-darwin` (no Catalina
   hardware exists for Apple Silicon).

Also added a "macOS Catalina (10.15) Compatibility" section to
`docs/troubleshooting.md` covering symptoms, root causes, and fixes.

https://claude.ai/code/session_01L2arD1QmRH1cRejbCmhyRf
2026-02-28 18:03:22 -05:00
xj
da54f8f85f fix(config): redact BlueBubbles server_url userinfo in Debug 2026-02-28 14:59:41 -08:00
argenis de la rosa
9ffe9c381b fix(tools): register bg_run tools in runtime registry 2026-02-28 17:54:22 -05:00
Daniel Willitzer
d943f9c28c feat(tools): add bg_run — background tool execution with security hardening
Adds async background tool execution with auto-injection of completed results:
- BgRunTool: Dispatches any tool in background, returns job_id immediately
- BgStatusTool: Queries job status by ID or lists all jobs
- BgJobStore: In-memory job tracking per session
- Auto-injection: Completed jobs appear as <bg_result> XML in agent history

Security hardening (Track C):
- MAX_CONCURRENT_JOBS=5 prevents resource exhaustion
- XML escaping prevents injection attacks in format_bg_result_for_injection
- Recursion guard blocks bg_run spawning itself or bg_status
- Hard 600s timeout per job guaranteed
- One-time delivery prevents duplicate injection
- 5-minute auto-expiry bounds memory growth

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 17:54:22 -05:00
xj
f2c7498345 Merge remote-tracking branch 'upstream/main' into feat/channel-bluebubbles
# Conflicts:
#	src/channels/mod.rs
2026-02-28 14:52:34 -08:00
Shadman Hossain
d5cea40fed fix(bedrock): auto-refresh AWS credentials before STS token expiry
Add CachedCredentials with 50-minute TTL that transparently refreshes
from the ECS container credential endpoint, env vars, or EC2 IMDS.

- Add from_ecs() to credential resolve chain for ECS/Fargate support
- Move streaming credential fetch into async context for TTL validation
- Remove sync credential fallback (all paths now use TTL-aware cache)
- Double-checked locking prevents thundering herd on refresh

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 17:51:57 -05:00
argenis de la rosa
61d538b6d6 feat(slack): support listening on multiple configured channel IDs 2026-02-28 17:48:20 -05:00
Argenis
e1b571140f
Merge pull request #2093 from VirtualHotBar/main
feat(session): Add channel session persistence support
2026-02-28 17:48:17 -05:00
argenis de la rosa
7f645e1a5c Merge remote-tracking branch 'origin/main' into pr2093-mainmerge 2026-02-28 17:43:48 -05:00
dependabot[bot]
c3ff4997fd chore(deps): bump tokio-postgres-rustls from 0.12.0 to 0.13.0
Bumps [tokio-postgres-rustls](https://github.com/jbg/tokio-postgres-rustls) from 0.12.0 to 0.13.0.
- [Release notes](https://github.com/jbg/tokio-postgres-rustls/releases)
- [Commits](https://github.com/jbg/tokio-postgres-rustls/compare/v0.12.0...v0.13.0)

---
updated-dependencies:
- dependency-name: tokio-postgres-rustls
  dependency-version: 0.13.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-02-28 17:42:56 -05:00
argenis de la rosa
c07314bd92 merge(main): resolve #2093 conflicts and restore session build/test parity 2026-02-28 17:42:37 -05:00
argenis de la rosa
b687678d9c fix(ci): pin GitHub Actions in deploy-web workflow 2026-02-28 17:41:58 -05:00
argenis de la rosa
d09bf17245 docs(readme): simplify to essential info only
- Remove detailed setup/docs sections (point to docs/ folder)
- Keep hero, features, benchmark, and essential footer
- Fix web App.tsx loading variable destructuring
- Add GitHub Pages deployment workflow
2026-02-28 17:41:58 -05:00
dependabot[bot]
e7b4cdc91f chore(deps): bump lycheeverse/lychee-action from 2.7.0 to 2.8.0 in the actions-all group
Dependabot couldn't find the original pull request head commit, 485163defac8130d7d3062144804df1c059c4c2b.
2026-02-28 17:39:30 -05:00
argenis de la rosa
e663ad274e fix(acp): harden process lifecycle and health validation 2026-02-28 17:36:39 -05:00
argenis de la rosa
1b8d747e1f fix(acp): stabilize send path, remove unreachable arms, and fix docs tables 2026-02-28 17:36:39 -05:00
feishiheng
579f0f3d9a feat(channels): add comprehensive ACP channel tests and fix implementation bugs (conflict resolution)
- Add 14 test functions for ACP channel (allowlist logic and JSON-RPC structures)
- Fix mutex guard across await in send() method using take() pattern
- Add acp: None default fields to ChannelsConfig in schema.rs
- Integrate ACP channel into channels/mod.rs and collect_configured_channels()
- Update channels-reference.md documentation
- Resolve merge conflicts with upstream/dev

All 17 ACP tests pass successfully.
2026-02-28 17:36:39 -05:00
argenis de la rosa
f7de9cda3a Merge remote-tracking branch 'origin/main' into pr2093-mainmerge 2026-02-28 17:33:17 -05:00
argenis de la rosa
52b5cf5c7e fix(web): stabilize config form state transitions and a11y 2026-02-28 17:30:21 -05:00
boengai
11b08d2184 feat(web): add data-driven config form editor with category navigation
Replace raw TOML textarea with a structured form editor supporting
48 config sections (~200 fields), masked sensitive values, category
pill navigation, two-column responsive grid, and search filtering.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 17:30:21 -05:00
argenis de la rosa
470af7051c fix(security): deny tool calls requiring approval on remote channels
Remote channels (Telegram, Discord, Slack, etc.) previously either
auto-approved tool calls requiring approval (in the else-branch) or
bypassed the approval check entirely (by passing None for the
ApprovalManager). Both paths allowed unapproved tool execution.

This fix:
- Wires ApprovalManager into ChannelRuntimeContext so remote channels
  actually enter the approval check
- Changes the non-CLI branch from auto-approve to deny-by-default
- Adds a tracing::warn log and descriptive error message guiding users
  to configure auto_approve or set autonomy level to Full
- Updates stale doc comment on prompt_cli

Co-authored-by: xj <gh-xj@users.noreply.github.com>
2026-02-28 17:27:45 -05:00
argenis de la rosa
17921f24fc fix(cron): require recurring opt-in for agent cron schedules 2026-02-28 17:25:22 -05:00
argenis de la rosa
9a6d7e03c4 fix(cron): require explicit opt-in for recurring agent jobs 2026-02-28 17:25:22 -05:00
argenis de la rosa
af314611af test(memory): verify cortex bridge failure preserves local sqlite writes 2026-02-28 17:25:03 -05:00
argenis de la rosa
f4d06a3a73 feat(memory): add optional cortex-mem backend profile and bridge 2026-02-28 17:25:03 -05:00
argenis de la rosa
9b9c36e84d test(channels): lock assistant autosave key namespace 2026-02-28 17:24:31 -05:00
argenis de la rosa
6a24f25f91 fix(memory): auto-save assistant responses alongside user messages 2026-02-28 17:24:31 -05:00
Argenis
dfeb891821
docs: update description from "AI assistant infrastructure" to "Operating System"
Update the project description to better reflect ZeroClaw's role as an operating system for agentic workflows rather than just assistant infrastructure.
2026-02-28 17:22:15 -05:00
maxtongwang
e057e17de5 fix(channel/bluebubbles): register service key + fix pre-existing fmt
- add "channel.bluebubbles" to SUPPORTED_PROXY_SERVICE_KEYS so proxy
  scope = "services" can target BlueBubbles via exact service key
  (addresses final CodeRabbit finding on PR #2271)
- apply cargo fmt to auth_profile.rs and quota_tools.rs (pre-existing
  formatting drift that would block cargo fmt --check in CI)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-28 13:37:18 -08:00
Preventnetworkhacking
276c470c1f docs(readme): restore Quick Start section [CDV-26]
Fixes #2275

The Quick Start section was previously removed, leaving a broken anchor link.
This restores essential installation instructions directly in the README:

- Homebrew install (one command)
- Clone + bootstrap (recommended for most users)
- Cargo install (for Rust developers)
- First run commands

Users no longer need to hunt through docs/ to find basic install steps.
2026-02-28 16:35:28 -05:00
Preventnetworkhacking
d89e7ab415 fix(utf8): prevent panic on CJK text truncation [CDV-27]
Fix UTF-8 boundary panics in two locations not covered by PR #2154:

1. telegram.rs:3112 - OTP/approval message preview truncation
   - Changed from byte-based `&raw_args[..260]` to char-based truncation
   - Uses existing `truncate_with_ellipsis` utility

2. detection.rs:222 - Tool output hash prefix
   - Changed from `&output[..4096]` to UTF-8-safe boundary
   - Uses existing `floor_utf8_char_boundary` utility

Added test: hash_output_utf8_boundary_safe() verifies no panic on CJK text.

Fixes #2276
2026-02-28 16:35:09 -05:00
argenis de la rosa
df9ebcb3d2 fix(model): resolve provider-aware fallback model IDs 2026-02-28 16:33:04 -05:00
argenis de la rosa
5d248bf6bf fix(build): restore gateway and cursor compile compatibility 2026-02-28 16:32:38 -05:00
argenis de la rosa
f9330750f0 chore(rfi): satisfy markdown quality gate for spike doc 2026-02-28 16:32:38 -05:00
argenis de la rosa
0321741b79 docs(project): add m4-5 workspace RFI baseline and benchmark harness 2026-02-28 16:32:38 -05:00
argenis de la rosa
9e4ecc0ee6 fix(slack): resolve sender display names with cache 2026-02-28 16:23:45 -05:00
argenis de la rosa
bd0a12ad3c fix(gateway): persist ws chat history by session 2026-02-28 16:23:15 -05:00
argenis de la rosa
a30869112e fix(lark,config): complete feishu dedupe and profile config compatibility 2026-02-28 16:17:24 -05:00
argenis de la rosa
4f32b6d8e4 fix(lark): dedupe ws and webhook events by event key 2026-02-28 16:17:24 -05:00
argenis de la rosa
991955ddce fix(gateway): pass optional session id in github webhook path 2026-02-28 16:01:42 -05:00
argenis de la rosa
4af196ab04 fix(routing): normalize hint whitespace for default_model routes 2026-02-28 16:01:42 -05:00
argenis de la rosa
12018b4a03 fix(provider): include quota metadata in cursor chat response 2026-02-28 16:01:42 -05:00
argenis de la rosa
2d91536f92 feat(routing): support hint default_model during startup 2026-02-28 16:01:42 -05:00
xj
11e68485e9
fix(fmt): correct remaining rustfmt violations on main (#2282)
Fixes formatting in auth_profile.rs and quota_tools.rs missed by #2280.

Co-authored-by: xj <gh-xj@users.noreply.github.com>
2026-02-28 13:01:21 -08:00
xj
d2d979738e
fix(fmt): correct rustfmt violations on main (#2280)
Fixes formatting issues in cron/store.rs, memory/sqlite.rs, and
tools/git_operations.rs that cause cargo fmt --check to fail.

Closes #2279

Co-authored-by: xj <gh-xj@users.noreply.github.com>
2026-02-28 12:56:54 -08:00
maxtongwang
32150c85fb fix(channel/bluebubbles): address CodeRabbit review findings
- typing_handle → typing_handles: per-recipient HashMap to prevent
  concurrent conversations from cancelling each other's typing loops
- add is_sender_ignored() method; enforce ignore_senders before allowlist
  evaluation in parse_webhook_payload (precedence: ignore > allow)
- wire BlueBubblesConfig.password and .webhook_secret into
  decrypt_channel_secrets / encrypt_channel_secrets in config/schema.rs
- add 3 unit tests covering is_sender_ignored edge cases
2026-02-28 12:20:47 -08:00
maxtongwang
e37a53c690
fix(web-fetch): remove dead feature gates and add noise stripping (#2262)
* fix(web-fetch): remove dead feature gates, add noise stripping, add docstrings

The nanohtml2text and fast_html2md providers were both guarded by
cfg(feature) checks for features (web-fetch-plaintext, web-fetch-html2md)
that are never declared in Cargo.toml. This caused every web_fetch call
to silently return an error instead of fetching content.

Changes:
- Add strip_noise_elements() which removes <script>, <style>, <nav>,
  <header>, <footer>, <aside>, <noscript>, <form>, <button> blocks
  before text extraction, eliminating menu/ad/boilerplate noise.
- Fix fast_html2md path: when web-fetch-html2md feature is not compiled
  in, fall through to nanohtml2text rather than returning an error.
- Fix nanohtml2text path: remove dead cfg(feature = "web-fetch-plaintext")
  gate; nanohtml2text is a direct dependency and needs no feature flag.
- Both previously gated tests (html_to_markdown_conversion_preserves_structure,
  html_to_plaintext_conversion_removes_html_tags) are now always-on.
  Added strip_noise_removes_nav_scripts_footer test.
- Add docstrings to all public/private methods to meet coverage threshold.

Tavily and firecrawl providers are unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(web-fetch): align default provider to nanohtml2text, remove dead feature

- Change empty-provider default from deprecated 'fast_html2md' to
  'nanohtml2text' to match WEB_FETCH_PROVIDER_HELP and PR description.
- Remove dead 'web-fetch-plaintext' feature from Cargo.toml (no code
  references it after the feature-gate removal).
- Apply cargo fmt to strip_noise_elements array formatting.

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: xj <gh-xj@users.noreply.github.com>
2026-02-28 12:19:40 -08:00
maxtongwang
4d195be713 feat(channel): add BlueBubbles iMessage channel
Adds a BlueBubbles channel so ZeroClaw can send and receive iMessages
via a locally-running BlueBubbles server on macOS.

What changed:
- src/channels/bluebubbles.rs — new BlueBubblesChannel implementing Channel
  - Webhook-based ingestion (push, no polling)
  - Allowlist + ignore_senders filtering
  - Rich text via iMessage Private API (attributedBody bold/italic/underline)
  - Typing indicator (start_typing / stop_typing) while LLM processes
  - Message effects ([EFFECT:confetti], [EFFECT:slam], etc.)
  - 500-entry fromMe FIFO cache for reply-context resolution
  - Attachment placeholder format matching OpenClaw (<media:image>)
  - 38 unit tests covering parsing, filtering, timestamps, effects
- src/config/schema.rs — BlueBubblesConfig struct + ChannelsConfig field
  - Fields: server_url, password, allowed_senders, webhook_secret, ignore_senders
  - Debug impl redacts password and webhook_secret
- src/gateway/mod.rs — POST /bluebubbles route + handler
  - Bearer token auth if webhook_secret is set
  - Typing indicator around LLM call
  - Memory auto-save on incoming messages
- src/channels/mod.rs — module + re-export + iMessage delivery instructions
- src/providers/cursor.rs — fix pre-existing missing quota_metadata field

Non-goals: BlueBubbles Private API pairing, polling mode, contact management.

Closes #2268
2026-02-28 12:08:24 -08:00
argenis de la rosa
3825eca3dc fix(memory): thread session IDs through runtime paths 2026-02-28 15:00:27 -05:00
argenis de la rosa
352adb5a82 fix(agent): avoid orphan tool messages after compaction 2026-02-28 14:41:13 -05:00
argenis de la rosa
9784e3bfc1 feat(channel): add github native channel MVP 2026-02-28 14:03:10 -05:00
argenis de la rosa
5aac1af065 feat(channel): support onebot aliases for napcat config 2026-02-28 14:00:35 -05:00
argenis de la rosa
aa319e71b0 test(cli): add coverage for config show/get/set 2026-02-28 14:00:35 -05:00
argenis de la rosa
20ed60d2a0 feat(config): add show/get/set subcommands for runtime config inspection and modification 2026-02-28 14:00:35 -05:00
dexter
f6278373cb
feat: add cursor headless cli support (#2195)
* Initial plan

* feat(providers): add Cursor headless CLI provider

Co-authored-by: langhuihui <3647405+langhuihui@users.noreply.github.com>

* fix(cursor): harden headless CLI invocation and safety guards

* chore(pr): retrigger intake after template and linear updates

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: langhuihui <3647405+langhuihui@users.noreply.github.com>
Co-authored-by: argenis de la rosa <theonlyhennygod@gmail.com>
2026-02-28 13:41:56 -05:00
argenis de la rosa
3f70cbbf9b feat(gateway): add paired devices API and dashboard tab 2026-02-28 13:34:33 -05:00
argenis de la rosa
0253752bc9 feat(memory): add observation memory tool 2026-02-28 13:32:13 -05:00
argenis de la rosa
e3e648eea7 fix(tools): harden pptx_read ordering and extraction limits 2026-02-28 13:29:05 -05:00
argenis de la rosa
5cc482ebe1 fix(pptx_read): handle empty a:t tags safely 2026-02-28 13:29:05 -05:00
reidliu41
cb1cd14cbb feat(tools): add pptx_read tool for PowerPoint text extraction
- Problem: Agent cannot read PPTX files — file_read returns garbled binary, making presentations inaccessible
  - Why it matters: PPTX is the last major Office format gap after pdf_read and docx_read; presentations are ubiquitous in
  business and education workflows
  - What changed: Added pptx_read tool using existing zip + quick-xml to extract plain text from all slides in order
  - What did not change: No changes to existing tools, agent loop, security policy, config schema, or dependencies

  Label Snapshot (required)

  - Risk label: risk: low
  - Size label: size: S
  - Scope labels: tool
  - Module labels: tool: pptx_read

  Change Metadata

  - Change type: feature
  - Primary scope: tool

  Linked Issue

  - Closes #

  Validation Evidence (required)

  cargo fmt --all -- --check   # pass
  cargo clippy --all-targets -- -D warnings   # pass (zero new warnings)
  cargo test pptx_read   # 14/14 passed

  - Evidence provided: test results

  Security Impact (required)

  - New permissions/capabilities? No
  - New external network calls? No
  - Secrets/tokens handling changed? No
  - File system access scope changed? No

  Privacy and Data Hygiene (required)

  - Data-hygiene status: pass
  - Redaction/anonymization notes: Test fixtures use neutral content ("Hello PPTX", "Slide One", "Slide Two")
  - Neutral wording confirmation: Yes

  Compatibility / Migration

  - Backward compatible? Yes
  - Config/env changes? No
  - Migration needed? No

  i18n Follow-Through

  - i18n follow-through triggered? No

  Human Verification (required)

  - Verified scenarios: Multi-slide extraction produces correct ordered text
  - Edge cases checked: invalid ZIP, missing slides, symlink escape, path traversal, rate limiting, truncation
  - What was not verified: encrypted PPTX (out of scope), speaker notes

  Side Effects / Blast Radius (required)

  - Affected subsystems/workflows: Tool registry only
  - Potential unintended effects: None — additive only
  - Guardrails/monitoring: Identical security chain as pdf_read/docx_read

  Rollback Plan (required)

  - Fast rollback command/path: git revert <commit>
  - Feature flags or config toggles: None needed
  - Observable failure symptoms: pptx_read tool missing from tool list

  Risks and Mitigations

  - Risk: None — zero new dependencies, follows established pattern exactly
    - Mitigation: N/A
2026-02-28 13:29:05 -05:00
argenis de la rosa
83f7399c72 fix(copilot): preserve first text while merging split tool calls 2026-02-28 13:29:04 -05:00
Tim Stewart
8b7b0b0776 fix(copilot): merge tool_calls from all response choices
The Copilot API proxy for Claude models (Opus 4.6, Opus 4.6-1m) splits
text content and tool_calls into separate choices. Previously only
choices[0] was read, causing all tool calls to be silently dropped
when they appeared in choices[1].

Merge text and tool_calls from all choices so tool calling works
regardless of how the proxy splits the response.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-02-28 13:29:04 -05:00
killf
dfe9b3d02d
[RFC] AWW: Agent Wide Web — A World Wide Web for AI Agent Experiences (#2189)
* docs(rfc): add AWW (Agent Wide Web) proposal

Add RFC 001 proposing AWW (Agent Wide Web), a decentralized
experience exchange network for AI Agents, analogous to the
World Wide Web for human knowledge sharing.

Key features:
- Structured experience pages (similar to HTML)
- AWP protocol (Agent Web Protocol, similar to HTTP)
- AWW URL format for experience addressing
- ZeroClaw integration with auto-publish/query
- Phased roadmap from protocol to ecosystem

Vision: Enable agents to learn from each other's experiences,
building collective intelligence over time.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* docs(rfc): fix lint and reference quality for AWW proposal

* chore(pr): retrigger intake after template and linear updates

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: argenis de la rosa <theonlyhennygod@gmail.com>
2026-02-28 13:25:20 -05:00
Argenis
a25ca6524f
feat(skills): support front-matter metadata and always-inject skills (#2248)
* feat(skills): support front matter always injection in compact mode

* chore(pr): retrigger intake after template and linear updates
2026-02-28 13:11:57 -05:00
argenis de la rosa
6500f048bc feat(email): add IMAP ID extension support 2026-02-28 13:05:49 -05:00
argenis de la rosa
cc0bc49b2f feat(channel): add napcat support for qq protocol 2026-02-28 13:02:55 -05:00
Chummy
6e444e0311 fix(zip): adapt test zip writers for zip 8.1 2026-02-28 12:33:32 -05:00
dependabot[bot]
31b328f754 chore(deps): bump zip from 0.6.6 to 8.1.0
Bumps [zip](https://github.com/zip-rs/zip2) from 0.6.6 to 8.1.0.
- [Release notes](https://github.com/zip-rs/zip2/releases)
- [Changelog](https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md)
- [Commits](https://github.com/zip-rs/zip2/commits/v8.1.0)

---
updated-dependencies:
- dependency-name: zip
  dependency-version: 8.1.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-02-28 12:33:32 -05:00
argenis de la rosa
5ce11b94e0 build(deps): bump debian from f6e2cfa to 1d3c811 2026-02-28 12:17:57 -05:00
Argenis
db16188c74
Merge pull request #2241 from zeroclaw-labs/supersede-pr-1923-v2-20260228
fix(channel:discord): robust inbound image marker detection
2026-02-28 12:10:41 -05:00
argenis de la rosa
376d965088 Merge remote-tracking branch 'origin/main' into supersede-pr-1923-mergefix 2026-02-28 12:05:28 -05:00
Argenis
2f1380a03f
Merge pull request #2064 from peitschie/fix/cron-matrix-delivery
RMN-218: feat(channels): add matrix integration for sovereign communication
2026-02-28 12:05:26 -05:00
weykon
9ecb8dffa6 feat(memory): add sqlite_journal_mode config for shared filesystem support
SQLite WAL mode requires shared-memory (mmap/shm) which is unavailable
on many network and virtual shared filesystems (NFS, SMB/CIFS,
UTM/VirtioFS, VirtualBox shared folders), causing xShmMap I/O errors
at startup.

Add `sqlite_journal_mode` config option under `[memory]` that accepts
"wal" (default) or "delete". When set to "delete", SQLite uses the
legacy DELETE journal mode and disables mmap, allowing ZeroClaw to run
with workspaces on shared/network filesystems.

Usage:
  [memory]
  sqlite_journal_mode = "delete"

Changes:
- config/schema.rs: Add sqlite_journal_mode field to MemoryConfig
- memory/sqlite.rs: Add with_options() supporting journal mode selection
- memory/mod.rs: Pass journal_mode from config to SqliteMemory
- onboard/wizard.rs: Include new field in default MemoryConfig
2026-02-28 12:04:46 -05:00
loydccc
2044e828de fix(channel): mark discord inbound image attachments as [IMAGE]
# Conflicts:
#	src/channels/discord.rs
2026-02-28 11:57:17 -05:00
argenis de la rosa
b287b2420a fix(cron): avoid merge conflict in matrix scheduler tests 2026-02-28 11:48:22 -05:00
Chummy
3aa1eb1fd5 chore(fmt): normalize rustfmt output to satisfy quality gate 2026-03-01 00:45:18 +08:00
argenis de la rosa
74c8cae95d fix(quota): wire provider quota modules on main replay 2026-02-28 11:45:02 -05:00
ZeroClaw Bot
5ac9c3e955 fix(quota): address CodeRabbit review feedback
- Fix low-quota warning format string readability (parenthesized percentage)
- Add QuotaFormat enum for CLI --format validation (fail-fast on invalid input)
- Fix backoff eviction strategy comments (soonest-to-expire, not LRU)
- Custom Default for ProviderUsageMetrics (last_reset_at = Utc::now())
- Fix fail_count==0 always-fail case in stress test
- Add providers-quota to commands-reference.md
- Document fresh ProviderHealthTracker intent in quota_tools.rs

Made-with: Cursor
2026-02-28 11:45:02 -05:00
ZeroClaw Bot
d5fe47acff feat(tools): wire auth_profile + quota tools into agent loop and persist switch_provider
- Register 4 new tools (ManageAuthProfileTool, CheckProviderQuotaTool,
  SwitchProviderTool, EstimateQuotaCostTool) in all_tools_with_runtime
- SwitchProviderTool now loads config from disk and calls save() to
  persist default_provider/default_model to config.toml
- Inject Provider & Budget Context section into system prompt when
  Config is available
- Remove emoji from tool output for cleaner parsing
- Replace format! push_str with std::fmt::Write for consistency

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:45:02 -05:00
ZeroClaw Bot
8c0be20422 feat(providers): add quota_metadata to ChatResponse across all providers
Wire QuotaMetadata into ChatResponse for all provider implementations,
enabling quota tracking data to flow from API responses through the
agent loop to quota monitoring tools.

Depends on: circuit breaker (#1842) + quota monitoring (#1904)

Made-with: Cursor
2026-02-28 11:45:02 -05:00
ZeroClaw Bot
247d89e39e feat(providers): implement quota monitoring system with CLI and agent tools
Add comprehensive quota monitoring: QuotaMetadata types, quota-aware
agent loop with proactive warnings, CLI providers-quota command, and
3 built-in tools (check_provider_quota, switch_provider,
estimate_quota_cost).

Depends on: circuit breaker + provider health (#1842)

Made-with: Cursor
2026-02-28 11:45:02 -05:00
ZeroClaw Bot
d2b0338afd feat(providers): implement circuit breaker with provider health tracking
Add ProviderHealthTracker and BackoffStore for circuit breaker pattern
that tracks provider failures, enforces cooldown periods, and enables
automatic fallback to healthy providers.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-28 11:45:02 -05:00
argenis de la rosa
e854238a39 fix(channels): resolve main drift for matrix cron delivery 2026-02-28 11:44:21 -05:00
Chummy
ab325e5dad fix(ci): align telegram test constructor and declare cfg features 2026-02-28 23:41:33 +08:00
Chummy
cf59171937 test: align channel context defaults and de-flake proxy assertion 2026-02-28 23:01:53 +08:00
Chummy
a89f5c25be fix: resolve rebase drift in channel approval runtime 2026-02-28 23:01:53 +08:00
Chummy
42471f4d3e fix: restore ws query-token fallback and telegram test fixtures 2026-02-28 23:01:53 +08:00
Chummy
be8f7efe82 test: stabilize flaky threshold assertions 2026-02-28 23:01:53 +08:00
Chummy
5ee6024914 Fix channel turn persistence and low-sensitivity leak detection 2026-02-28 23:01:53 +08:00
argenis de la rosa
4cc156611c fix(channel:discord): robust image marker detection for inbound attachments 2026-02-28 09:56:35 -05:00
Tim Stewart
f1adc79f38 fix(browser): add return before snapshot IIFE in rust_native backend
WebDriver's execute() wraps the script as a function body. The snapshot
script used an IIFE without a top-level return, so the IIFE's return
value was discarded and the WebDriver function returned undefined (null).

All other execute() calls in the file (scroll, scrollIntoView, click)
correctly use explicit return statements.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-02-28 09:27:42 -05:00
argenis de la rosa
b9d9798774 fix: resolve compilation errors in channels, tools, and gateway modules 2026-02-28 09:18:30 -05:00
argenis de la rosa
bb25e5fbf6 fix(tests): align channel runtime fixtures for main compatibility 2026-02-28 09:17:35 -05:00
Chummy
5cced82e3f fix(channels): import ApprovalResponse for runtime approval handling 2026-02-28 21:58:52 +08:00
argenis de la rosa
1484b238e5 fix(docs): restore canonical README content 2026-02-28 08:57:46 -05:00
ake117
c6e1324f84 Update README.md 2026-02-28 08:57:46 -05:00
ake117
69c3ac7355 Update README.md 2026-02-28 08:57:46 -05:00
ake117
3ae2e63ac5 docs(rpi): clarify gnueabihf toolchain workaround for musl builds
Explain why gcc-arm-linux-gnueabihf is installed for musleabihf builds:
- Pure arm-linux-musleabihf-gcc not available in standard repos
- Use gnueabihf linker as tool with Rust target spec
- Static linking via -C link-arg=-static produces portable musl binary

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 08:57:46 -05:00
argenis de la rosa
57fd23c381 fix: resolve 3 compilation errors 2026-02-28 08:54:29 -05:00
argenis de la rosa
a029c720a6 feat(security): add safety heartbeat reinjection with cadence fixes 2026-02-28 08:52:16 -05:00
Chummy
3341608d52 fix(channels): remove duplicate perplexity snapshot helper 2026-02-28 21:49:11 +08:00
Chummy
aa401f29c3 fix(channels): restore pending approval flow and docx tool export 2026-02-28 21:49:11 +08:00
Chummy
1509cc5b69 fix(telegram): unify mention gate and typing target handling 2026-02-28 21:49:11 +08:00
argenis de la rosa
62b719c447 fix(gateway): allow ws query fallback without subprotocol header 2026-02-28 08:36:18 -05:00
Chummy
4c0fa1c1d4 ci(security): add governance metadata for RUSTSEC-2024-0436 2026-02-28 21:30:37 +08:00
Chummy
d9cdaa0757 fix: resolve post-rebase compile and test stability issues 2026-02-28 21:30:37 +08:00
Chummy
51ad52d0e8 security: harden sensitive I/O and outbound leak controls 2026-02-28 21:30:37 +08:00
ake117
6ed02e5351 docs(rpi): expand cross-compilation guide with static linking details
- Add build time comparison (native vs cross-compile)
- Note that .cargo/config.toml is now included in repo
- Add static linking benefits table
- Include verification commands for static binaries
- Add cross-platform prerequisites (Linux, macOS, Windows)
- Add workflow diagram for cross-compilation process

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 08:23:04 -05:00
ake117
d3b81ce68b build(cargo): add ARMv6 musl target config for Raspberry Pi Zero W
- Add armv6l-unknown-linux-musleabihf target to .cargo/config.toml
- Add target spec JSON for cross-compilation support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 08:23:04 -05:00
ake117
87fa327e0d feat(telegram): add ack_enabled option to control emoji reactions
Add configuration option to enable/disable Telegram emoji reaction
acknowledgments (️, 👌, 👀, 🔥, 👍) sent to incoming messages.

Changes:
- Add ack_enabled field to TelegramConfig (default: true)
- Add ack_enabled field to TelegramChannel struct
- Add with_ack_enabled() builder method
- Conditionally send reactions in try_add_ack_reaction_nonblocking()
- Update all call sites and tests
- Update documentation with usage example

Usage:
  [channels_config.telegram]
  ack_enabled = false  # Disable emoji reactions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 08:23:04 -05:00
ake117
f89e99b7f9 docs(hardware): add Raspberry Pi Zero W build guide
Add comprehensive step-by-step guide for compiling ZeroClaw on
Raspberry Pi Zero W (512MB RAM, ARMv6). Includes:

- Target ABI comparison (gnueabihf vs musleabihf)
- Native compilation instructions with swap setup
- Cross-compilation from more powerful hosts
- systemd service configuration
- Troubleshooting for constrained devices

musleabihf is recommended for smaller static binaries and better
portability across Raspberry Pi OS versions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 08:23:04 -05:00
Chummy
61e738287b chore(ws): mention query token auth in unauthorized hint 2026-02-28 21:21:33 +08:00
Chummy
2a865ac713 chore(gateway): remove unused ws chat artifacts 2026-02-28 21:21:33 +08:00
Chummy
32205fb038 fix(gateway): accept websocket token query fallback 2026-02-28 21:21:33 +08:00
Chummy
5ca656be07 fix(qq): keep sender-level history across messages 2026-02-28 08:19:23 -05:00
Argenis
58cf5b4cf3
Merge pull request #2197 from zeroclaw-labs/issue-2175
fix(build): restore missing runtime approval and docx symbols
2026-02-28 08:17:22 -05:00
Chummy
cb46084111 docs(android): fix heading spacing for docs quality gate 2026-02-28 21:16:52 +08:00
Chummy
5d2472bd56 feat(android): add strict self-check mode with warning gates 2026-02-28 21:16:52 +08:00
Chummy
664dcdcb82 feat(android): standardize self-check error codes and offline diagnostics 2026-02-28 21:16:52 +08:00
Chummy
48cba9e076 feat(android): add structured error codes and stdout JSON mode 2026-02-28 21:16:52 +08:00
Chummy
88f7d842e5 feat(android): add JSON self-check report and regression tests 2026-02-28 21:16:52 +08:00
Chummy
424f67d948 feat(android): support offline log diagnosis and tests 2026-02-28 21:16:52 +08:00
Chummy
3b8fbcaa38 feat(android): auto-diagnose cargo check toolchain failures 2026-02-28 21:16:52 +08:00
Chummy
e5aacec1a5 feat(android): add mode-aware source-build self-check 2026-02-28 21:16:52 +08:00
Chummy
bebb881b5b fix(android): harden Termux source-build and wasm-tools fallback 2026-02-28 21:16:52 +08:00
argenis de la rosa
40de96ed77 fix(build): resolve main conflict for runtime approval wiring 2026-02-28 08:14:34 -05:00
argenis de la rosa
f0a5bbdb1b feat(http_request): add env credential profiles and onboarding guards 2026-02-28 08:07:37 -05:00
Chummy
46b50cbb49 fix: reconcile supersede replay with current main runtime 2026-02-28 20:56:09 +08:00
Jaime Linares
a88d37f3cb fix(whatsapp-web): strip MIME parameters before matching in audio_mime_to_filename
MIME strings like 'audio/webm; codecs=opus' were incorrectly matched by
the 'opus' branch (contains-check) before reaching the 'webm' branch,
returning 'voice.opus' instead of 'voice.webm'. This could cause the
Groq Whisper API to reject or misidentify the file format.

Fix: split on ';' to extract only the base MIME type, then match
exhaustively. Also add 'audio/x-wav' as a wav alias.

Adds a regression test: audio_mime_to_filename('audio/webm; codecs=opus')
must return 'voice.webm'.

Reported by CodeRabbit in PR review.

(cherry picked from commit 84861c727a)
2026-02-28 20:56:09 +08:00
Jaime Linares
f8eef67a03 feat(whatsapp-web): transcribe voice messages via Groq Whisper
Audio/voice messages on the WhatsApp Web channel were silently dropped
because `text_content()` returns an empty string for non-text messages
and no transcription path existed (unlike the Telegram channel which
already uses `transcription::transcribe_audio()`).

Changes:

- **Cargo.toml**: Move `qrcode` and all `wa-rs-*` crates out of the
  `[target.'cfg(any(linux|macos|windows))'.dependencies]` section into
  the unconditional `[dependencies]` section.  All affected crates are
  `optional = true`, so they add no compile cost unless
  `--features whatsapp-web` is active.  The previous placement caused
  Cargo to exclude them when targeting `android` (target_os = "android"
  does not match the cfg predicate), producing E0433 unresolved-crate
  errors for every wa-rs import in `whatsapp_web.rs` and
  `whatsapp_storage.rs` on Android cross-compilation.

- **whatsapp_web.rs**:
  - Add `transcription: Option<TranscriptionConfig>` field.
  - Add `with_transcription()` builder (mirrors `TelegramChannel`).
  - Add `audio_mime_to_filename()` helper mapping WhatsApp MIME types
    (e.g. `audio/ogg; codecs=opus`) to filenames the Groq Whisper API
    accepts.
  - Extend `Event::Message` handler: when text is empty, check
    `msg.audio_message`; download and decrypt audio via
    `client.download(audio_msg.as_ref())` (`.as_ref()` required because
    prost boxes nested proto fields as `Box<AudioMessage>`, which does
    not itself implement `Downloadable`); forward decrypted bytes to
    `transcription::transcribe_audio()`.
  - Add three unit tests: builder enable/disable guard and MIME mapping.

- **mod.rs**: Chain `.with_transcription(config.transcription.clone())`
  onto `WhatsAppWebChannel::new(...)` in the `"web"` factory branch so
  transcription is active whenever the global `[transcription]` section
  is enabled.

Activation: set `[transcription] enabled = true` and export
`GROQ_API_KEY` in the environment.

(cherry picked from commit 325241aeb6)
2026-02-28 20:56:09 +08:00
Chummy
6fd69f05e0
chore(pr): retrigger intake checks with updated PR template 2026-02-28 12:54:53 +00:00
Chummy
6b89446b46
test(tools): guard docx_read registration in all_tools 2026-02-28 12:51:17 +00:00
Chummy
da2bb5825f Fix shellcheck redirect style in queue hygiene workflow 2026-02-28 20:48:22 +08:00
Chummy
6a0b367535 Add scheduled CI queue hygiene automation 2026-02-28 20:48:22 +08:00
Chummy
733598a039 Deduplicate PR workflow triggers on feature branch pushes 2026-02-28 20:34:23 +08:00
argenis de la rosa
006eb4b9c2 fix(delivery): handle HEARTBEAT_OK sentinel case-insensitively 2026-02-28 07:31:45 -05:00
Chummy
ea9b0e7b68 Move maintenance workflows to hosted runners 2026-02-28 20:22:04 +08:00
Chummy
84d1e43c4b Run docs-quality on hosted runner 2026-02-28 20:00:54 +08:00
Chummy
ab28b02bb7 Remove CI owner approval gate and refresh workflow docs 2026-02-28 20:00:54 +08:00
Chummy
1d622b3b72 Move ci-run fast-path and gate jobs to hosted runners 2026-02-28 20:00:54 +08:00
cyberpapi
4fe18d3548 fix(telegram): redact raw response body in register_commands error log
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 19:59:22 +08:00
cyberpapi
1177a83e4a feat(telegram): register bot commands with setMyCommands on startup
Register /new, /model, and /models commands with Telegram's Bot API
on startup so they appear in the command menu for users. Registration
is non-fatal — if the API call fails, a warning is logged and the
bot continues listening normally.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 19:59:22 +08:00
cyberpapi
9125651775 fix(telegram): log specific deleteMessage failure reason in finalize_draft
Split the catch-all `_` match arm on the deleteMessage result into
separate `Ok(r)` and `Err(e)` arms so that HTTP status codes and
network errors are logged individually. The response body is not
logged (security policy).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 19:59:20 +08:00
cyberpapi
b2fc063d88 fix(telegram): prevent duplicate messages in finalize_draft fallback
When editMessageText returns 'message is not modified', the draft
already contains the correct content from update_draft. Detect this
Telegram API response and treat it as success rather than falling
through to the delete+send fallback, which would create a visible
duplicate message.

Also guard the final fallback: only send a new message after
successfully deleting the draft. If deleteMessage fails, the draft
still shows the response text, so sending would create a duplicate.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 19:59:20 +08:00
Chummy
1872078ce8 Fix hosted workflow lint and python runtime regressions 2026-02-28 19:47:47 +08:00
Chummy
13c5fa581f ci: move lightweight PR/workflow checks to github-hosted runners 2026-02-28 19:47:47 +08:00
Chummy
dea0d5e447
fix(build): restore missing runtime approval and docx symbols 2026-02-28 11:37:46 +00:00
VirtualHotBar
1ac510283f Fix tests after AutonomyLevel move 2026-02-28 19:16:14 +08:00
VirtualHotBar
857cb3c054 Merge remote-tracking branch 'origin/main' 2026-02-28 19:00:27 +08:00
chumyin
7b241ec739
chore: refresh head for legacy promotion-check cleanup (#1754) 2026-02-28 10:58:54 +00:00
chumyin
ab6c4cfcab
chore: refresh head for legacy promotion-check cleanup (#2064) 2026-02-28 10:55:53 +00:00
VirtualHotBar
0f321994c5 session: make get_history side-effect free 2026-02-28 18:51:30 +08:00
Chummy
0373582415 ci: increase supply-chain provenance timeout to 60m 2026-02-28 18:50:53 +08:00
VirtualHotBar
0a357064d9 session/history: allowlist persistable roles (user, assistant) via ROLE_* constants; unify filtering in channel + agent; memory/session: reduce read contention with RwLock+AtomicI64 and refresh updated_at on get_history; providers: export role constants and helper; security: switch HMAC verifications to ring::hmac for Linq/Nextcloud/WhatsApp; channels tests: auto-approve mock_price to avoid non-CLI approval dead-wait; misc: ignore target_ci/.idea; main: use local rag module. 2026-02-28 18:40:48 +08:00
VirtualHotBar
824ce19622 Share SessionManager across runtime 2026-02-28 17:45:29 +08:00
Chummy
4fb784e7e1 ci(security): allow transitive paste advisory in cargo-deny 2026-02-28 17:44:47 +08:00
Chummy
9015eef883 ci(security): include android/template crates in unsafe debt audit 2026-02-28 17:44:47 +08:00
Chummy
4a6dc2dfc6 fix(security): patch wasmtime advisories by pinning 24.0.6 2026-02-28 17:44:47 +08:00
VirtualHotBar
2a4902c3a5 fix(qq): stabilize conversation history key 2026-02-28 17:34:00 +08:00
VirtualHotBar
fae10cd5c4 Merge remote-tracking branch 'origin/main'
# Conflicts:
#	src/channels/mod.rs
#	src/config/mod.rs
#	src/config/schema.rs
2026-02-28 16:12:02 +08:00
Chummy
7470bded5d fix: harden browser and web search validation paths 2026-02-28 15:50:06 +08:00
Chummy
ad432b9981 docs: expand runtime policy and web search configuration workflows 2026-02-28 15:50:06 +08:00
Chummy
6716391502 feat: harden web access policy and add flexible web search/runtime config 2026-02-28 15:50:06 +08:00
killf
955c572c02 feat(tools): add Chrome/Firefox/Edge support to browser_open tool
Add support for Chrome, Firefox, and Edge browsers to the browser_open tool,
which previously only supported Brave. Users can now specify the browser
via the browser_open config option.

Changes:
- Add browser_open config field: "disable" | "brave" | "chrome" | "firefox" | "edge" | "default"
- Implement platform-specific launch commands for Chrome, Firefox, and Edge
- When set to "disable", only the browser automation tool is registered, not the browser_open tool
- Update tool descriptions and error messages to reflect browser selection

Co-Authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-28 15:50:06 +08:00
VirtualHotBar
0a42329ca5 fix: session leftovers after db47f56
- Demote session normal flow logs to debug\n- Skip session operations when CHANNEL_SESSION_CONFIG is uninitialized\n- Add spawn_blocking panic context for SQLite session manager\n- Fix fmt/clippy regressions (Box::pin large futures, cfg features, misc lints)
2026-02-28 15:23:42 +08:00
xj
a381963623
fix(ci): stabilize post-2142 local quality gates (COM-2142) (#2171)
* chore(ci): retrigger PR checks after intake body update

* fix(ci): stabilize local quality gates on rebased main

---------

Co-authored-by: Chummy <chumyin0912@gmail.com>
Co-authored-by: xj <gh-xj@users.noreply.github.com>
2026-02-27 23:18:25 -08:00
Preventnetworkhacking
479b6da4ce feat(cost): wire provider token usage to cost tracking (#2111)
Implement CostObserver that intercepts LlmResponse observer events and
records token usage to the CostTracker with proper cost calculations.

Changes:
- Add src/observability/cost.rs: CostObserver implementation
  - Listens for LlmResponse events with token counts
  - Looks up model pricing from CostConfig (with fallback defaults)
  - Records usage via CostTracker.record_usage()
  - Includes model family matching for pricing lookups

- Update src/observability/mod.rs:
  - Export CostObserver
  - Add create_observer_with_cost_tracking() helper that wraps base
    observer with CostObserver when cost tracking is enabled

- Update src/gateway/mod.rs:
  - Use create_observer_with_cost_tracking() to wire cost observer
    into the gateway observer stack when config.cost.enabled is true

The /api/cost endpoint already exists and will now return accurate
session/daily/monthly cost data populated by the CostObserver.

Resolves #2111
2026-02-28 02:00:13 -05:00
Argenis
65967aedde
Merge pull request #2042 from Preventnetworkhacking/feat/android-phase3
feat(android): Phase 3 - WorkManager, Quick Settings, battery optimization [CDV-21]
2026-02-28 01:30:07 -05:00
Argenis
5a2f1902c4
Merge pull request #2103 from Preventnetworkhacking/docs/cron-scheduling-clean
docs: add cron/scheduling reference documentation [COM-2]
2026-02-28 01:30:02 -05:00
Argenis
43e3e9b897
Merge pull request #2134 from Preventnetworkhacking/feat/economic-agents-mvp
feat(economic): ZeroClaw Economic Agents - Phase 1 Foundation [CDV-20]
2026-02-28 01:29:57 -05:00
Argenis
7b1c63cf27
Merge pull request #2157 from zeroclaw-labs/feat/providers-sf-volcengine
feat: integrate Volcengine ARK and SiliconFlow providers
2026-02-28 01:28:09 -05:00
Argenis
4ddc4fe1e7
Merge pull request #2127 from zeroclaw-labs/fix/ci-require-chumyin-approval
ci: require explicit @chumyin approval for CI/CD changes
2026-02-28 01:28:00 -05:00
Argenis
e620a63fee
Merge pull request #1973 from zeroclaw-labs/supersede-pr-1879-20260226144348-3162467-theirs
[supersede #1879] fix(approvals): clear non-cli exclusions when approving tools
2026-02-28 01:27:46 -05:00
Argenis
0d695746e6
Merge pull request #1968 from zeroclaw-labs/supersede-pr-1853-20260226143137-2982344-theirs
[supersede #1853] feat(tools): add Feishu document operation tool with 13 actions
2026-02-28 01:27:41 -05:00
argenis de la rosa
f0d091d348 fix(cron): filter NO_REPLY sentinel in delivery paths 2026-02-28 01:26:43 -05:00
Preventnetworkhacking
a2f1f09364 feat(economic): add TaskClassifier for BLS occupation-based task valuation
Phase 2 of ClawWork integration. Implements:
- TaskClassifier with 44 BLS occupations and wage data
- OccupationCategory enum (Tech/Business/Healthcare/Legal)
- Keyword-based classification with confidence scoring
- Hours estimation based on instruction complexity
- Fuzzy matching for occupation lookup

Reference: ClawWork/clawmode_integration/task_classifier.py
2026-02-27 22:26:04 -08:00
Chummy
79f86223a5 test(ci): assert checkout commands in scope tests 2026-02-28 14:06:08 +08:00
Chummy
e3ffe57162 ci: align main-first policy wording and harden add assertions 2026-02-28 14:06:08 +08:00
Chummy
c275663c8f ci: harden scope tests and align main-first policy text 2026-02-28 14:06:08 +08:00
Chummy
9e298f8e90 ci: fix pull_request scope detection for merge refs 2026-02-28 14:06:08 +08:00
Chummy
cd70a18fa0 ci: remove dev-to-main promotion gate and align main flow 2026-02-28 14:06:08 +08:00
Argenis
b00066b7e3
Merge pull request #1867 from zeroclaw-labs/test-runner-workflow
Test self-hosted runner
2026-02-28 01:04:50 -05:00
reidliu41
6662601a6c feat(agent): add result-aware loop detection for tool-call loop 2026-02-28 00:58:31 -05:00
Chummy
e57173333f test(dingtalk): add onboarding regression coverage 2026-02-28 00:58:30 -05:00
Chummy
9d0ea63ace
docs(i18n): sync vi provider docs for Volcengine/SiliconFlow 2026-02-28 05:55:35 +00:00
Chummy
da1f805cf7
test(providers): harden Volcengine/SiliconFlow alias coverage 2026-02-28 05:55:31 +00:00
Chum Yin
ca8ef10dcf
Merge branch 'main' into fix/ci-require-chumyin-approval 2026-02-28 13:49:32 +08:00
Chummy
48e084dd58 ci: relax codex workflow timeouts for heavy jobs 2026-02-28 13:48:41 +08:00
Chummy
6ce4192b31 ci: remove human review approval gate from ci run 2026-02-28 13:48:41 +08:00
Chummy
1b82597eac fix(codex): harden transport validation and fallback 2026-02-28 13:48:41 +08:00
Chummy
81387f9896 fix(codex): preserve transport overrides across runtimes 2026-02-28 13:48:41 +08:00
Chummy
b721754ead feat(codex): add websocket-first transport selection 2026-02-28 13:48:41 +08:00
Preventnetworkhacking
769538ad23 fix: address CodeRabbit review comments for cron docs
- Fix MD036: convert bold text to proper heading at line 197
- Add Vietnamese localization (cron-scheduling.md)
- Update Vietnamese SUMMARY.md with cron-scheduling link
2026-02-27 21:43:51 -08:00
Preventnetworkhacking
842c2a93fc chore: retrigger CI checks 2026-02-27 21:42:48 -08:00
Preventnetworkhacking
dd88bb254e chore: retrigger CI checks 2026-02-27 21:42:43 -08:00
Preventnetworkhacking
055e9b7b31 fix: remove trailing whitespace from economic module 2026-02-27 21:42:19 -08:00
Chummy
f3f44c48f4
feat(providers): integrate Volcengine ARK and SiliconFlow
Add SiliconFlow provider factory support and alias/env handling.

Normalize onboarding UX to volcengine while preserving doubao/ark runtime aliases.

Add integration registry entries and provider resolution coverage tests.

Expand provider and command docs with setup and validation examples.
2026-02-28 05:25:15 +00:00
Preventnetworkhacking
8b5bbf9456 fix: remove trailing whitespace from android-bridge 2026-02-27 20:56:12 -08:00
VirtualHotBar
db47f569ce channels: persist sessions via SessionManager
Fix channel runtime history persistence (load/seed/update) and remove duplicate agent turn call in process_message.
2026-02-28 12:51:03 +08:00
VirtualHotBar
fa8a98113e Merge remote-tracking branch 'origin/main' 2026-02-28 12:06:01 +08:00
VirtualHotBar
32dc3a460a fix(mcp): 修复 SSE 传输并兼容 endpoint/messages
- 修复 /sse/message 404:支持 legacy SSE endpoint 事件与单请求 SSE 响应两种模式

- 新增 mcp_smoke 二进制用于联通性验证

- Windows 增大默认栈并忽略 otp-secret
2026-02-28 12:01:51 +08:00
argenis de la rosa
1a0bb175f2 fix(agent): retry deferred tool follow-through in CJK contexts 2026-02-27 22:48:51 -05:00
argenis de la rosa
684503f5fc feat(onboard): add GitHub Copilot to interactive wizard 2026-02-27 21:34:40 -05:00
Argenis
aa4c7f0d4c
docs: update hardware references
Update hardware references to be more inclusive.
2026-02-27 20:18:47 -05:00
Argenis
7d65b290e7
docs: streamline README by consolidating detailed sections
Remove detailed installation, configuration, and development sections from root README. These comprehensive sections are better maintained in the dedicated documentation system (docs/README.md, docs/SUMMARY.md, docs/commands-reference.md, etc.). This change makes the README more focused and accessible as a project overview while keeping detailed information in the proper documentation locations.

## Changes

- Removed: Prerequisites, Quick Start (Homebrew, bootstrap, pre-built binaries)
- Removed: Subscription Auth section
- Removed: Architecture, Runtime support, Memory System details
- Removed: Security checklist and detailed channel allowlists
- Removed: Configuration examples (Ollama, llama.cpp, vLLM, Osaurus, etc.)
- Removed: Gateway API, Commands, Service Management sections
- Removed: Development, Open-Skills Opt-In sections
- Removed: Collaboration & Docs sections, Support ZeroClaw

Kept essential overview elements: features, benchmark, license, contributing links, and key navigation pointers to the docs hub.
2026-02-27 20:13:15 -05:00
Preventnetworkhacking
b238e8fd5e feat(config): add economic agent configuration schema
Adds EconomicConfig to config schema:

- enabled: bool (default false, opt-in)
- initial_balance: f64 (default 1000.0)
- signature: String (agent identifier)
- data_path: PathBuf (persistence location)
- token_pricing: EconomicTokenPricing
  - input_price_per_million: f64 (default 3.0)
  - output_price_per_million: f64 (default 15.0)
- min_evaluation_threshold: f64 (default 0.6)

Integrates with existing config loading and onboard wizard.

Relates to: CDV-20
2026-02-27 15:54:58 -08:00
Preventnetworkhacking
9798b34f8d feat(economic): add ClawWork economic tracking module
Phase 1 of ZeroClaw Economic Agents (CDV-20):

## New Module: src/economic/

- **tracker.rs** (995 lines): EconomicTracker with balance management,
  token cost tracking, income ledger, and JSONL persistence
- **status.rs** (212 lines): SurvivalStatus enum with 5 states
  (Thriving/Stable/Struggling/Critical/Bankrupt)
- **costs.rs** (369 lines): TokenCostAccumulator for multi-channel
  cost tracking (LLM, search, OCR, etc.)
- **mod.rs** (81 lines): Module exports with comprehensive docs

## Features

- Track agent balance and economic health
- Configurable token pricing (input/output per million)
- Task-level cost isolation with start_task/end_task
- Work income with quality threshold (0.6 default)
- Persistent ledger (balance.jsonl, token_costs.jsonl)
- Daily cost aggregation

## Next Steps

- Phase 2: Task classifier (44 occupations)
- Phase 3: Work evaluator (LLM scoring)
- Phase 4: Economic tools (decide_activity, submit_work, etc.)

Relates to: CDV-20, Discussion #2132
2026-02-27 15:54:47 -08:00
Preventnetworkhacking
2e06953550 fix: Android CI blockers - trailing whitespace and Cargo.toml deps
- Remove trailing whitespace in build.gradle.kts, SIZE.md, README.md
- Add rt-multi-thread feature to tokio dependency
- Add tracing-subscriber for logging support

Part of Android Phase 3 integration work.
2026-02-27 15:21:05 -08:00
Chummy
b6290e041c
ci: require explicit @chumyin approval for CI/CD changes 2026-02-27 22:11:27 +00:00
Chummy
55cabbb70c ci: narrow feature-matrix push scope and add queue hygiene tool 2026-02-28 05:28:25 +08:00
Chummy
467888b872 ci: tighten low-signal triggers and gate e2e by paths 2026-02-28 05:13:09 +08:00
Chummy
e0c1b67929 ops(ci): add self-hosted runner remediation tooling 2026-02-28 05:00:32 +08:00
Chummy
a5cd875f1b ci: reduce non-blocking workflow load 2026-02-28 05:00:19 +08:00
Chummy
6da4a321b8 ci: remove blacksmith action dependencies 2026-02-28 05:00:01 +08:00
Chummy
211bff082b
perf(ci): optimize CI/CD pipeline critical path and runner utilization
Reduce PR CI critical path from ~80min to ~30min and total runner
pressure by ~40% through job parallelization, deduplication, and
cache unification.

Changes:
- ci-run.yml: run lint, test, and build in parallel (remove lint→test
  serial dependency); merge flake-probe retry logic into test job;
  unify rust-cache prefix-key for lint/test; simplify ci-required gate
- ci-build-fast.yml: delete redundant workflow (cargo build --release
  with lto=fat, codegen-units=1 duplicated ci-run.build's release-fast)
- feature-matrix.yml: skip 4 compilation lanes on PRs by default (only
  trigger on ci:full or ci:feature-matrix label); remove unnecessary
  fetch-depth: 0 from checkout
- docs/ci-map.md: update documentation to reflect new job topology,
  parallel execution model, and label-gated feature matrix behavior
2026-02-27 20:52:23 +00:00
argenis de la rosa
498fca9d08 feat(memory): add sqlite+qdrant hybrid backend 2026-02-27 15:28:56 -05:00
argenis de la rosa
1a0372709d feat(whatsapp): support heartbeat and cron delivery for whatsapp_web 2026-02-27 15:28:48 -05:00
argenis de la rosa
8004260e4d fix(agent): retry deferred-action replies missing tool calls 2026-02-27 15:28:38 -05:00
argenis de la rosa
281236a94d feat(identity): add openclaw extra_files support 2026-02-27 15:15:39 -05:00
argenis de la rosa
5981e50514 fix(agent): avoid anthropic vision preflight false negatives 2026-02-27 15:15:17 -05:00
argenis de la rosa
b8de8ce8b9 feat(transcription): support config-level api_key 2026-02-27 15:15:02 -05:00
Preventnetworkhacking
a5348b60c0 fix(android): Address CodeRabbit HIGH and MEDIUM severity issues
HIGH severity fixes:
- SettingsRepository: Add IOException handling for DataStore corruption
- BootReceiver: Remove double pendingResult.finish() call
- ShareHandler: Add text/uri-list MIME type routing
- ZeroClawTileService: Add API 34+ PendingIntent overload for startActivityAndCollapse
- proguard-rules.pro: Remove dangerous Kotlin Intrinsics null check stripping

MEDIUM severity fixes:
- SettingsScreen: Refresh isOptimized state on lifecycle resume
- HeartbeatWorker: Enforce 15-min minimum, use UPDATE policy for config changes
- ZeroClawApp: Add settings change listener to update heartbeat schedule

Formatting:
- Remove trailing whitespace from all Kotlin files

Also adds PR_DESCRIPTION_UPDATE.md with required sections:
- Validation Evidence
- Security Impact
- Privacy and Data Hygiene
- Rollback Plan
2026-02-27 11:46:38 -08:00
argenis de la rosa
a09f146145 feat(security): add role-policy and otp challenge foundations 2026-02-27 14:29:31 -05:00
argenis de la rosa
cb67aac265 fix(telegram): redact bot token and reduce transient poll noise 2026-02-27 14:29:17 -05:00
Tom Cr00se
d41936ac05
Merge branch 'main' into docs/cron-scheduling-clean 2026-02-27 11:24:02 -08:00
Preventnetworkhacking
80ce59fa6c docs: add cron/scheduling reference documentation
Closes #88 (partial - cron section)

Comprehensive documentation for ZeroClaw's cron system:

- Schedule types: cron, at, every
- Job types: shell, agent
- Session targeting: main vs isolated
- Delivery configuration
- CLI commands reference
- Configuration file examples
- Tool integration (cron_add, cron_list, etc.)
- Migration guide from OpenClaw
- Best practices and troubleshooting

This addresses the 'Cron / Scheduling System' section of issue #88
which asked for documentation of these features.
2026-02-27 09:36:59 -08:00
VirtualHotBar
e8310a7841 fix(pr-review): address code review comments for session persistence 2026-02-28 00:41:31 +08:00
VirtualHotBar
0d68992fb7 feat(session): Add channel session persistence 2026-02-28 00:41:30 +08:00
VirtualHotBar
5619aac366 fix(mcp): add SSE Accept header and parse data: prefix for阿里云MCP 2026-02-28 00:41:30 +08:00
argenis de la rosa
ff1f2d6c1a feat(gateway): add streaming mode for webhook responses 2026-02-27 11:32:48 -05:00
argenis de la rosa
1ebaa673b5 feat(gateway): add streaming mode for webhook responses 2026-02-27 11:28:48 -05:00
argenis de la rosa
eed4f0651d fix(telegram): redact bot token and reduce transient poll noise 2026-02-27 11:24:44 -05:00
argenis de la rosa
60b73b6cd3 feat(slack): add socket mode listener fallback 2026-02-27 11:15:27 -05:00
xuhao
af731149b5
fix(tools): address remaining fourth-round review findings for feishu_doc
Addresses 4 findings from CodeRabbit's fourth review that were not
covered by the maintainer's commit 7ef075e:

1. [Major] http_client() per-call allocation: cache reqwest::Client in
   FeishuDocTool struct field, return &reqwest::Client. Enables
   connection pooling across all API calls.

2. [Major] SSRF bypass via HTTP redirects: download_media now uses a
   no-redirect reqwest client (Policy::none()) to prevent attackers
   from using a public URL that 301/302-redirects to internal IPs.

3. [Minor] Missing empty-conversion guard in action_upload_image:
   added converted.is_empty() check consistent with all other
   convert_markdown_blocks callers.

4. [Minor] Schema description for link_share stale: updated from
   'default: true' to 'default: false' to match actual behavior.

Validation:
- cargo check --features channel-lark 
- cargo clippy -p zeroclaw --lib --features channel-lark -- -D warnings 
- cargo test --features channel-lark -- feishu_doc  (7/7 tests pass)

(cherry picked from commit e846604a13)
2026-02-27 15:58:43 +00:00
argenis de la rosa
7dbdd030a1
fix(tools): address final feishu_doc review blockers
(cherry picked from commit 7ef075e6c9)
2026-02-27 15:58:43 +00:00
xuhao
a66866c367
fix(tools): address third-round code review findings for feishu_doc
Addresses all 5 findings from CodeRabbit's third review on PR #1853:

1. [Minor] action_append: add empty-conversion guard to prevent silent
   no-op (blocks_appended: 0). Consistent with action_write/update_block.

2. [Major] link_share default: change from opt-out (true) to opt-in
   (false). Documents no longer become publicly link-readable by default.
   Follows 'never silently broaden permissions' guideline.

3. [Minor] optional_usize: strict validation. Now returns Result and
   rejects invalid/negative/non-integer values with clear error instead
   of silently converting to None.

4. [Major] Media size bound: add MAX_MEDIA_BYTES (25 MiB) limit for
   both remote downloads (content-length pre-check + post-download
   check) and local file reads (metadata size check). Prevents memory
   exhaustion from oversized uploads.

5. [Major] Malformed JSON handling: parse_json_or_empty now returns
   Result (propagates parse errors instead of swallowing to {}).
   ensure_api_success now requires 'code' field presence instead of
   defaulting missing code to 0 (success). Prevents misclassifying
   malformed 2xx responses as success.

Validation:
- cargo check --features channel-lark 
- cargo clippy -p zeroclaw --lib --features channel-lark -- -D warnings 
- cargo test --features channel-lark -- feishu_doc  (7/7 tests pass)

(cherry picked from commit e9352b793e)
2026-02-27 15:58:43 +00:00
xuhao
b0f3c13db1
fix(tools): address second-round code review findings for feishu_doc
Addresses all 5 findings from CodeRabbit's second review on PR #1853:

1. [Major] list_all_blocks: add MAX_PAGES (200) hard cap to prevent
   unbounded pagination loops on misbehaving APIs or huge documents.

2. [Major] Empty conversion guard: action_write, action_update_block,
   and write_single_cell now bail with explicit error when
   convert_markdown_blocks returns empty results, preventing silent
   data loss (delete-then-write-nothing scenario).

3. [Minor] action_create: grant_owner_permission failure is now a soft
   warning instead of hard error. Document is already created and
   verified; permission failure is reported in the response JSON
   'warning' field instead of propagating as an error.

4. [Nitpick] extract_ttl_seconds: remove unreachable as_i64 fallback
   branch (as_u64 already covers all non-negative integers).

5. [Nitpick] Add unit tests: test_extract_ttl_seconds_defaults_and_clamps
   and test_write_rejects_empty_conversion.

Validation:
- cargo check --features channel-lark 
- cargo clippy -p zeroclaw --lib --features channel-lark -- -D warnings 
- cargo test --features channel-lark -- feishu_doc  (7/7 tests pass)

(cherry picked from commit 762e6082ec)
2026-02-27 15:58:43 +00:00
xuhao
a579cdfbec
fix(tools): address code review findings for feishu_doc
- Reorder convert-before-delete in action_write, action_update_block,
  and write_single_cell to prevent data loss if markdown conversion fails
- Separate create POST from verification retry loop in action_create
  to prevent duplicate document creation on retry
- Add resolve_doc_token to upload_image and upload_file so wiki
  node_token resolution works for upload actions
- Add SSRF protection to download_media: validate URL scheme (http/https
  only), block local/private hosts via existing url_validation module
- Guard empty credentials in mod.rs: skip FeishuDocTool registration
  when app_id or app_secret are empty/whitespace-only

(cherry picked from commit feb1d46f41)
2026-02-27 15:58:43 +00:00
xuhao
dcd712d825
feat(tools): add Feishu document operation tool with 13 actions
Add FeishuDocTool implementing the Tool trait with full Feishu/Lark
document API coverage: read, write, append, create, list_blocks,
get_block, update_block, delete_block, create_table, write_table_cells,
create_table_with_values, upload_image, and upload_file.

Key design decisions:
- Self-contained tenant_access_token auth with auto-refresh cache
- Feishu/Lark dual-domain support via use_feishu config flag
- Wiki node_token resolution for wiki-hosted documents
- Autonomy-level enforcement: read ops always allowed, write ops
  require Act permission
- Prompt-level behavioral rules in tool description for agent guidance
- Create verification with retry to prevent phantom document tokens

Gated behind existing channel-lark feature flag. Reads app_id and
app_secret from channels_config.feishu or channels_config.lark.

14/14 integration tests pass against live Feishu API.

(cherry picked from commit 6a228944ae)
2026-02-27 15:58:43 +00:00
argenis de la rosa
dcb3aaab91 fix(channels): avoid UTF-8 panic in approval prompt truncation 2026-02-27 10:50:46 -05:00
killf
2156763c6c chore: add .idea to .gitignore
Add JetBrains IDE configuration directory to gitignore.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 10:45:01 -05:00
argenis de la rosa
15457cc368 fix(agent): parse direct XML tool tags in web chat 2026-02-27 10:42:13 -05:00
argenis de la rosa
4631c344aa fix(ci): pin prerelease GNU build to ubuntu-22.04 2026-02-27 10:41:40 -05:00
reidliu41
df6f7455e7 feat(tools): add docx_read tool for DOCX text extraction
Summary

  - Problem: Agent cannot read DOCX files — file_read returns garbled binary/XML, making Word documents inaccessible to the
  agent
  - Why it matters: DOCX is the most common business document format; without native extraction, users must manually convert
  files, breaking autonomous workflows
  - What changed: Added docx_read tool using zip (existing) + quick-xml (new) to extract plain text from DOCX Office Open XML
  - What did not change: No changes to file_read, agent loop, security policy, config schema, or any existing tool behavior

  Label Snapshot (required)

  - Risk label: risk: low
  - Size label: size: S
  - Scope labels: tool
  - Module labels: tool: docx_read
  - If any auto-label is incorrect: N/A

  Change Metadata

  - Change type: feature
  - Primary scope: tool

  Linked Issue

  - Closes #(issue number)

  Validation Evidence (required)

  cargo fmt --all -- --check   # pass
  cargo clippy --all-targets -- -D warnings   # pass (zero new warnings)
  cargo test docx_read   # 14/14 passed

  - Evidence provided: test results, manual verification with zeroclaw agent -m against real DOCX file

  Security Impact (required)

  - New permissions/capabilities? No (mirrors existing pdf_read security model exactly)
  - New external network calls? No
  - Secrets/tokens handling changed? No
  - File system access scope changed? No

  Privacy and Data Hygiene (required)

  - Data-hygiene status: pass
  - Redaction/anonymization notes: Test fixtures use neutral content ("Hello DOCX", "First", "Second")
  - Neutral wording confirmation: Yes

  Compatibility / Migration

  - Backward compatible? Yes
  - Config/env changes? No
  - Migration needed? No

  i18n Follow-Through

  - i18n follow-through triggered? No (no docs or user-facing wording changes)

  Human Verification (required)

  - Verified scenarios: zeroclaw agent -m "read the file test-test.docx and output the content" — model selected docx_read,
  extracted text correctly
  - Edge cases checked: invalid ZIP, missing word/document.xml, symlink escape, path traversal, rate limiting, truncation
  - What was not verified: encrypted DOCX (out of scope), extremely large files (>50MB)

  Side Effects / Blast Radius (required)

  - Affected subsystems/workflows: Tool registry only — one new tool added
  - Potential unintended effects: None — additive only, no existing behavior changed
  - Guardrails/monitoring: Tool follows identical security chain as pdf_read

  Rollback Plan (required)

  - Fast rollback command/path: git revert <commit>
  - Feature flags or config toggles: None needed (always-on, like pdf_read)
  - Observable failure symptoms: docx_read tool missing from tool list

  Risks and Mitigations

  - Risk: quick-xml new dependency adds to compile time
    - Mitigation: quick-xml is lightweight pure Rust (~15K LOC), widely used (100M+ downloads), and will be shared when
  XLSX/PPTX tools are added later
2026-02-27 10:39:53 -05:00
argenis de la rosa
ffdf66b77e
fix(approvals): clear non-cli exclusions when approving tools
(cherry picked from commit a7557cd28d)
2026-02-27 15:27:26 +00:00
argenis de la rosa
c8980a047a feat(agent): add slash command tab completion and hints 2026-02-27 08:09:18 -05:00
argenis de la rosa
0683ca9416 fix(docker): support feature builds and include templates 2026-02-27 08:08:58 -05:00
argenis de la rosa
7307aab103 feat(tools): add Tavily provider and API-key round-robin 2026-02-27 06:37:57 -05:00
Philip Peitsch
276ff7bd42 feat(channels): add matrix integration for sovereign communication 2026-02-27 20:34:16 +11:00
argenis de la rosa
e2a08f2536 fix(lark): accept object-shaped websocket message content 2026-02-27 02:44:55 -05:00
Preventnetworkhacking
c1f255af96 perf(android): aggressive binary size optimization
Target: <10MB APK for arm64-v8a

Rust optimizations (Cargo.toml):
- opt-level = 'z' (size over speed)
- LTO enabled (dead code elimination)
- panic = 'abort' (no unwinding)
- strip = true (remove symbols)
- Minimal tokio/serde features

Android optimizations (build.gradle.kts):
- Split APKs by ABI (users get only their arch)
- Remove material-icons-extended (-5MB)
- Remove unused serialization (-300KB)
- Remove debug symbols in release

ProGuard (proguard-rules.pro):
- Strip Log.v/d/i in release
- Remove Kotlin null checks
- Aggressive repackaging
- 5 optimization passes

Added SIZE.md with optimization strategy.

Expected sizes:
- Native lib: <3MB per ABI
- APK (arm64): <10MB
- APK (universal): <20MB
2026-02-26 22:10:43 -08:00
Preventnetworkhacking
8bf7d042e5 chore: remove install scripts - APK is the product
Removed:
- scripts/android/termux-install.sh
- scripts/android/adb-install.sh
- site/android-install.html

The native APK is the install experience. No need for CLI complexity.
2026-02-26 22:04:26 -08:00
Preventnetworkhacking
8a1dea306e feat(android): Phase 4 - Widget, accessibility, one-liner installers
Phase 4 polish features:

Widget:
- ZeroClawWidget for home screen
- Shows agent status (running/stopped)
- Toggle button to start/stop
- Tap to open app
- Material 3 styling with rounded corners

Accessibility:
- AccessibilityUtils for TalkBack support
- Content descriptions for all UI elements
- Screen reader detection
- Live region announcements
- ContentDescriptions constants

Install Scripts:
- termux-install.sh - One-liner for Termux users
- adb-install.sh - Install from computer via USB
- android-install.html - Web installer page with:
  - Platform detection (Android vs desktop)
  - Direct APK download
  - QR code for desktop users
  - Step-by-step instructions
  - Copy-to-clipboard for commands

Files:
- widget/ZeroClawWidget.kt (128 lines)
- accessibility/AccessibilityUtils.kt (123 lines)
- res/layout/widget_zeroclaw.xml
- res/xml/widget_info.xml
- res/drawable/widget_*.xml
- scripts/android/*.sh
- site/android-install.html

Total: +799 lines across 12 files
2026-02-26 21:34:06 -08:00
Preventnetworkhacking
ec22b33de6 fix(android): correct CI action name and settings recomposition
- Fix dtolnay/rust-action -> dtolnay/rust-toolchain
- Remove unnecessary remember{} wrapper for battery check
2026-02-26 21:29:43 -08:00
Preventnetworkhacking
da899a3046 feat(android): Phase 3 - WorkManager, tiles, battery optimization
Complete Phase 3 integration features:

WorkManager:
- HeartbeatWorker for periodic agent checks
- Cron job scheduling support
- Respects Doze mode and battery optimization

Data Persistence:
- SettingsRepository with DataStore
- Encrypted API key storage (Android Keystore)
- Settings flow for reactive UI

Quick Settings:
- ZeroClawTileService for notification shade
- Toggle agent on/off
- Shows running status

Share Intent:
- ShareHandler parses incoming content
- Supports text, URLs, images
- Generates agent prompts

Battery Optimization:
- BatteryUtils for exemption requests
- Manufacturer-specific handling (Xiaomi, Huawei, etc.)
- Settings UI shows optimization status

Other:
- Updated BootReceiver with settings integration
- CI workflow for Android builds (ci-android.yml)
- Updated README with Phase 3 completion

Total: ~950 new lines across 11 files
2026-02-26 21:20:04 -08:00
Argenis
7f84b5508d Merge pull request #2037 from reidliu41/remove-duplicate-define 2026-02-27 05:14:49 +00:00
Argenis
79c4277506
Merge pull request #2040 from Preventnetworkhacking/feat/android-client
feat(android): Native Android client with UniFFI bridge
2026-02-27 00:13:45 -05:00
Preventnetworkhacking
943c464201 docs(android): update README with accurate phase status 2026-02-26 21:06:04 -08:00
Preventnetworkhacking
ed80ffb59a fix(android): cleanup and add missing resources
- Remove UDL file (using proc-macro UniFFI instead)
- Remove build.rs (not needed with setup_scaffolding!)
- Comment out zeroclaw dep temporarily (standalone build)
- Add placeholder launcher icon drawable
- Add notification icon drawable
- Fix service notification to use hardcoded strings

The UniFFI proc-macro approach is cleaner and doesn't require
separate UDL definitions - the Rust types are annotated directly.
2026-02-26 21:05:39 -08:00
Argenis
e356c42b4f fix(config): remove duplicate default_otp_enabled definition
Resolves compile error E0428 reported in #2038.
2026-02-27 05:02:45 +00:00
Argenis
93f052778f fix(memory): restore tokio-postgres-rustls dependency for postgres backend
- wire tokio-postgres-rustls as optional dependency\n- include it in memory-postgres feature set\n\nRefs: #2038
2026-02-27 05:02:16 +00:00
Preventnetworkhacking
dd94cac1bd feat(android): Phase 2 - UniFFI bridge and settings UI
- Add android-bridge crate with UniFFI bindings
- ZeroClawController interface for Kotlin
- AgentStatus, ChatMessage, ZeroClawConfig types
- Settings screen with provider/model selection
- API key storage via Android Keystore ready
- Gradle task for native lib build

Part of Android Phase 2 - Core Features
2026-02-26 21:00:48 -08:00
reidliu41
80293cf976 fix(config): remove duplicate default_otp_enabled function 2026-02-27 12:57:32 +08:00
Preventnetworkhacking
b2462585b7 feat(android): add Android client foundation
Initial Android client implementation for ZeroClaw:

Structure:
- clients/android/ - Self-contained Android project
- Kotlin/Jetpack Compose UI
- Material 3 with dynamic theming

Components:
- MainActivity with chat UI scaffold
- ZeroClawService (foreground service)
- ZeroClawBridge (JNI stub for Rust FFI)
- BootReceiver (auto-start on boot)
- Theme with ZeroClaw brand colors

Technical:
- Target SDK 34, Min SDK 26
- arm64-v8a, armeabi-v7a, x86_64 ABIs
- WorkManager ready for background tasks
- Android Keystore ready for secrets

TODO:
- UniFFI bindings generation
- Cargo NDK build integration
- Native library loading
- Config persistence (DataStore)

Part of Android ownership initiative - Phase 1
2026-02-26 20:52:43 -08:00
Argenis
c0e78ba2bd
Merge pull request #1980 from zeroclaw-labs/supersede-pr-1872-20260226154155-3787543-theirs
[supersede #1872] feat(discord): forward inbound image attachments as markers
2026-02-26 23:48:05 -05:00
Argenis
d1c44f71bc
Merge branch 'main' into supersede-pr-1872-20260226154155-3787543-theirs 2026-02-26 23:44:57 -05:00
argenis de la rosa
b63dfb8985 fix(config): resolve windows compile blockers (#2032) 2026-02-26 23:44:35 -05:00
argenis de la rosa
5ecea422c7 feat(security): enable otp by default in quick setup
(cherry picked from commit 9319ba2521)
2026-02-26 23:43:31 -05:00
Samy
860e8b2442 fix: address remaining CodeRabbit review items
- Fix convert_channels() return type: dict → tuple[dict, list[str]]
- Remove unnecessary f-string prefixes on plain string literals
- Add blank lines after markdown headings (MD022 compliance)
- Handle triple-quote edge case in TOML multiline string output
- Add handler-level validation tests: missing message rejection,
  empty messages detection, whitespace-only user message, and
  context extraction correctness

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 04:43:16 +00:00
sudomove
0aa2994423 fix: address PR review issues — auth guard, doc refs, TOML escaping
- Add non-loopback auth guard to /v1/chat/completions (matching /api/chat)
- Fix migration guide references to non-existent files (api_chat.rs,
  openai_compat_shim.rs, mod_patch.rs) — endpoints live in openclaw_compat.rs
- Remove phantom `provider` field from /api/chat response docs
- Add TOML string escaping to config converter to handle special chars
- Add proper JSON parse error handling in config converter
- Update deployment checklist and troubleshooting to match actual file layout

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 04:43:16 +00:00
Samy
dde8b82ea0 feat(gateway): add OpenClaw migration compat layer with /api/chat and tools-enabled /v1/chat/completions
Add a complete OpenClaw → ZeroClaw migration toolkit:

- POST /api/chat: ZeroClaw-native endpoint with full agent loop (tools, memory,
  context enrichment). Supports session_id scoping and context[] injection for
  conversation history. Same code path as Linq/WhatsApp/Nextcloud handlers.

- POST /v1/chat/completions: OpenAI-compatible shim that routes through
  run_gateway_chat_with_tools instead of the simple provider.chat_with_history
  path. Extracts last user message + up to 10 messages of conversation context
  from the messages[] array. Supports streaming (simulated SSE). Drop-in
  replacement for OpenClaw callers with zero code changes.

Both endpoints include full observability instrumentation (AgentStart, LlmRequest,
LlmResponse, RequestLatency, AgentEnd), auth (pairing + webhook secret), rate
limiting, auto-save to memory, and response sanitization.

Also adds:
- scripts/convert-openclaw-config.py: Converts openclaw.json → config.toml with
  provider mapping, channel detection, and migration notes
- docs/migration/openclaw-migration-guide.md: Full migration walkthrough with
  endpoint reference, config mapping, callsite examples, and deployment checklist

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 04:43:16 +00:00
Argenis
96de49d57b
Merge branch 'main' into supersede-pr-1872-20260226154155-3787543-theirs 2026-02-26 23:43:00 -05:00
argenis de la rosa
e3e4878ade fix(gateway): add explicit webhook usage and empty-message errors 2026-02-26 23:24:35 -05:00
argenis de la rosa
cec99ffacb docs(security): codify official anti-fraud channels statement 2026-02-26 22:57:46 -05:00
argenis de la rosa
bfe3e4295d feat(security): add opt-in perplexity adversarial suffix filter 2026-02-26 22:55:23 -05:00
argenis de la rosa
6e8b95d709 fix(slack): use lossless cast for retry jitter 2026-02-26 22:55:22 -05:00
argenis de la rosa
2f250bfbf7 fix(slack): retry history requests after rate limits 2026-02-26 22:55:22 -05:00
argenis de la rosa
90c82dc6b1 docs(structure): add function-oriented navigation map 2026-02-26 22:54:31 -05:00
argenis de la rosa
21e13c8ae5 fix(qq): add sandbox mode and passive msg id fallback 2026-02-26 22:53:06 -05:00
argenis de la rosa
bde9d45ead feat(cron): add lark and feishu delivery targets 2026-02-26 22:52:46 -05:00
argenis de la rosa
96d941f83a feat(discord): forward inbound image attachments as markers 2026-02-26 22:50:45 -05:00
argenis de la rosa
d1eccd4928 fix(approvals): clear non-cli exclusions when approving tools 2026-02-26 22:50:32 -05:00
argenis de la rosa
cd26886f15 fix(multimodal): optimize image markers for prompt budget 2026-02-26 22:49:21 -05:00
argenis de la rosa
34852919da feat(onboard): support identity backend selection and AIEOS scaffolding 2026-02-26 22:48:11 -05:00
argenis de la rosa
77c6aba24c feat(provider): add qwen-coding-plan endpoint alias 2026-02-26 22:40:07 -05:00
argenis de la rosa
a258741e2f feat(security): enable otp by default in quick setup 2026-02-26 22:23:23 -05:00
Argenis
7f3b7302b1 fix(config): resolve env credential reporting and safer compaction default
- report api_key_configured via provider credential resolution (env + overrides)\n- set agent.compact_context default to true for new configs\n- align docs and tests with the new default\n\nRefs: #1983\nRefs: #1984\nContext: #1358\n\nCo-authored-by: Argenis <144828210+theonlyhennygod@users.noreply.github.com>
2026-02-27 03:15:55 +00:00
Argenis
4fa8206332
Merge pull request #2013 from zeroclaw-labs/issue-1380-mcp-main
feat(mcp): add external MCP server support on main
2026-02-26 22:14:33 -05:00
argenis de la rosa
5f29e96187 fix(telegram): suppress unauthorized bind prompts for non-mentioned group messages 2026-02-26 22:13:56 -05:00
argenis de la rosa
6186b34903 refactor(mcp): use schema paths to avoid config re-export conflicts 2026-02-26 22:13:27 -05:00
argenis de la rosa
970ef57f21 feat(security): add aho-corasick and entropy leak heuristics 2026-02-26 22:10:31 -05:00
argenis de la rosa
8180e7dc82 feat(skills): add WASM skill engine with secure registry install 2026-02-26 22:09:24 -05:00
argenis de la rosa
d63a6a8ceb feat(security): unify URL validation with configurable CIDR/domain allowlist 2026-02-26 22:07:07 -05:00
argenis de la rosa
6ed7248d65 refactor(config): split mcp re-exports to avoid main merge conflict 2026-02-26 21:59:34 -05:00
argenis de la rosa
992ecd9aee fix(config): include plugin exports to keep mcp branch mergeable 2026-02-26 21:56:40 -05:00
argenis de la rosa
1fd0645fe3 fix(config): update plugin test config initializers 2026-02-26 21:49:10 -05:00
argenis de la rosa
b5292f54aa feat: plugin system
Implements a plugin system for ZeroClaw modeled after OpenClaw's architecture.

Key components:
- Plugin trait and PluginApi for registering tools/hooks
- Plugin manifest (zeroclaw.plugin.toml) for metadata
- Plugin discovery from bundled, global, and workspace directories
- PluginRegistry managing loaded plugins, tools, and hooks
- Error isolation via panic catching in register()
- Config integration via [plugins] section

Example plugin included in extensions/hello-world/.

Closes #1414

# Conflicts:
#	src/config/mod.rs
#	src/config/schema.rs
2026-02-26 21:49:10 -05:00
argenis de la rosa
48cce73f88 fix(onboard): resolve borrow after move error
The memory_config value is moved into Config at line 512, but was
borrowed at line 547. Use config.memory.backend instead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 21:48:35 -05:00
argenis de la rosa
e7e513d7ec fix(onboard): tailor memory scaffolding by backend 2026-02-26 21:48:35 -05:00
argenis de la rosa
4f8c9d2066 feat(mcp): add external MCP server support on main 2026-02-26 21:43:54 -05:00
argenis de la rosa
4196fd32a4 fix(gateway): align webchat system prompt with tool protocol 2026-02-26 21:43:43 -05:00
argenis de la rosa
ffaf927690 fix(web): improve web access guidance and search failure diagnostics 2026-02-26 21:42:13 -05:00
argenis de la rosa
6ce47af3d6 fix(web): add mobile sidebar toggle and responsive layout offset 2026-02-26 21:32:50 -05:00
argenis de la rosa
6aa2164d16 fix(web): advertise browser automation tool in prompts 2026-02-26 21:30:32 -05:00
argenis de la rosa
a851d1bd2f feat(skills): add configurable script-file audit override 2026-02-26 21:23:27 -05:00
argenis de la rosa
fb3b7b8edf fix(security): apply LeakDetector in channel outbound sanitization 2026-02-26 21:18:54 -05:00
argenis de la rosa
a9bd880a4f fix(security): use expect_err in role cycle test 2026-02-26 21:11:41 -05:00
argenis de la rosa
7aee6d9dc7 feat(security): add role-policy and otp challenge foundations 2026-02-26 21:11:41 -05:00
argenis de la rosa
b27b44829a chore: promote dev snapshot to main (resolve #1978/#1970) 2026-02-26 21:09:33 -05:00
argenis de la rosa
b355956400 fix(model-routing): detect env-backed provider credentials
Compute api_key_configured through provider credential resolution so env-variable credentials are reported correctly for scenarios and delegate agents.

Closes #1983
2026-02-26 21:07:07 -05:00
argenis de la rosa
779b193de6 fix(config): default compact_context to true
Set AgentConfig compact_context default to true and align config defaults/tests/docs so daemon conversations recover from context pressure out of the box.

Closes #1984
2026-02-26 21:07:05 -05:00
argenis de la rosa
dedb59a4ef fix(agent): stop converting plain URLs into shell calls 2026-02-26 21:04:32 -05:00
argenis de la rosa
36d5d2f3f8 feat(skills): seed bundled zeroclaw skill on startup 2026-02-26 21:04:13 -05:00
argenis de la rosa
1e70c23c11 fix(bootstrap): initialize container arrays under set -u 2026-02-26 21:04:04 -05:00
Argenis
7bea2b89d4 fix(channels): resolve #1959 build break and #1960 mention-only noise
- restore Lark constructor wiring and platform-specific builders used by channel-lark builds\n- re-export syscall anomaly detector types from security module\n- suppress unauthorized Telegram prompts for unmentioned group messages when mention_only=true (still allow /bind)\n\nRefs: #1959\nRefs: #1960
2026-02-27 01:50:34 +00:00
Crossing-2d23
cce80971a3
fix(build): remove duplicate ModelProviderConfig and fix App.tsx destructure
Two build errors on release/v0.1.8:

1. `src/config/schema.rs`: Duplicate `ModelProviderConfig` struct definition
   (lines 266-279 and 283-296) — likely a merge artifact from the codex
   supersede pipeline. Removed the second identical copy.

2. `web/src/App.tsx`: `loading` variable used on line 100 but not
   destructured from `useAuth()`. Added `loading` to the destructure
   on line 83.

Both prevent `cargo build` and `npm run build` respectively.

Signed-off-by: Crossing-2d23 <crossing-2d23@smoothcurves.nexus>
On-behalf-of: Lupo <lupo@smoothcurves.nexus>
2026-02-26 17:02:10 +00:00
Chummy
c54a30f68c supersede: file-replay changes from #1897
Automated conflict recovery via changed-file replay on latest main.
2026-02-26 16:37:17 +00:00
argenis de la rosa
e92a976226
feat(discord): forward inbound image attachments as markers
(cherry picked from commit a37cebd33a)
2026-02-26 15:41:58 +00:00
argenis de la rosa
a9e592828e fix(discord): infer text attachments when content type is missing 2026-02-26 23:09:14 +08:00
Yihen
26b2a70069
docs: add quick-reference, workspace, feature flags, and architecture overview to CLAUDE.md (#1940)
Credit: @theonlyhennygod for coordinating low-risk merge flow.
2026-02-26 09:54:47 -05:00
Reid
f836291200
fix(gateway): acknowledge WebSocket subprotocol to unblock agent chat (#1954) 2026-02-26 09:25:35 -05:00
Argenis
3ac53297ca
Merge pull request #1936 from zeroclaw-labs/release/1927-toolcall-dashboard
fix(web/gateway): backport empty tool-call response fix to main
2026-02-26 09:07:46 -05:00
argenis de la rosa
7b20697d93 fix(security): harden env passthrough validation coverage 2026-02-26 20:34:46 +08:00
argenis de la rosa
eeaa010f1a fix(security): allow configured shell env expansions 2026-02-26 20:34:46 +08:00
Chummy
6d4291d958 ci: pin ci-run jobs to linux x64 self-hosted runners 2026-02-26 20:31:54 +08:00
Chummy
26ae6f71a1 ci: harden change-scope checkout for old git runners 2026-02-26 20:31:54 +08:00
Chummy
3afbad1e89 ci: stabilize main checks for docs and e2e scope 2026-02-26 20:31:54 +08:00
Chummy
7bf3efcf88 ci(build-fast): run fast build only for rust-surface changes 2026-02-26 20:14:36 +08:00
Chummy
bf81986abc docs(policy): align PR routing and merge flow to main 2026-02-26 20:14:36 +08:00
Chummy
b081077b9a ci(sec-audit): scope heavy checks to rust-surface changes 2026-02-26 20:14:36 +08:00
Chummy
c9201ccfd0 ci(runners): support root self-hosted apt installs 2026-02-26 20:14:36 +08:00
Chummy
0c8529fc47 fix(ci): regenerate Cargo.lock for locked security tests 2026-02-26 20:14:36 +08:00
Chummy
0f9c86eb6d ci: pin rust toolchain before cargo-audit action 2026-02-26 20:14:36 +08:00
Chummy
64d317ac2a fix(ci): restore crate unsafe guard in main crate 2026-02-26 20:14:36 +08:00
Chummy
bbe0e8a18c ci(runners): use self-hosted labels and python3 sanity check 2026-02-26 20:14:36 +08:00
Chummy
a570df7f34 ci(policy): drop dev-retarget enforcement for main PRs 2026-02-26 20:14:36 +08:00
Chummy
102af29f97 ci(intake): remove Linear dependency from public PR flow 2026-02-26 20:14:36 +08:00
Chummy
51bf76448f ci(intake): make Linear key check advisory only 2026-02-26 20:14:36 +08:00
Argenis
f220973192 fix(web/gateway): prevent empty dashboard replies after tool calls (#1930)
* fix(gateway): prevent empty websocket tool-call responses

* fix(web): render fallback for empty done messages
2026-02-26 04:51:05 -05:00
Tom Cr00se
8b097ec965
fix: enable native-tls feature for reqwest to respect system certificates (#1921)
Fixes #1794

Adds native-tls feature to reqwest dependency, allowing ZeroClaw to use
certificates trusted by the system's native TLS implementation. This
enables proper certificate validation for users with custom CA roots
or corporate PKI infrastructure.

Co-authored-by: Preventnetworkhacking <preventnetworkhacking@users.noreply.github.com>
2026-02-26 04:45:32 -05:00
Chum Yin
9b0e70b2f2
supersede: file-replay changes from #1895 (#1926)
Automated conflict recovery via changed-file replay on latest main.
2026-02-26 04:15:47 -05:00
Chummy
d9b3d6f3e5 feat(site): deepen docs IA with pathways and taxonomy 2026-02-26 15:22:59 +08:00
Chummy
9fbab15222 feat(site): ship full-docs reader with generated manifest 2026-02-26 15:01:14 +08:00
Chummy
829dd7af77 feat(site): shift docs UI to vercel-style engineering language 2026-02-26 14:35:12 +08:00
Chummy
e07c4d29cd feat(site): redesign docs hub with in-page markdown reader 2026-02-26 14:28:15 +08:00
Chummy
b0c86b47a4 fix(site): simplify page title to ZeroClaw 2026-02-26 14:18:58 +08:00
Chummy
44bcb4cd6b feat(site): launch responsive docs hub and pages deploy 2026-02-26 14:08:37 +08:00
Chummy
07848ac1c4 ci: install native build tools for self-hosted codeql build 2026-02-26 13:44:48 +08:00
Chummy
caae81b3c9 ci: remove blacksmith runner/actions and use self-hosted labels 2026-02-26 13:33:15 +08:00
Chummy
63fcd7dd54 feat(telegram): support custom Bot API base_url
(cherry picked from commit 3ea7b6a996)
2026-02-26 12:59:23 +08:00
Chummy
ec16c387b5 fix(update): simplify version check branch for clippy
(cherry picked from commit 1e2d203535)
2026-02-26 12:59:23 +08:00
Chummy
3c3e4d3d9e style(update): format self-update command implementation
(cherry picked from commit 12c007f895)
2026-02-26 12:59:23 +08:00
argenis de la rosa
390373dbcb feat(cli): add self-update command
Implements self-update functionality that downloads the latest release
from GitHub and replaces the current binary.

Features:
- `zeroclaw update` - downloads and installs latest version
- `zeroclaw update --check` - checks for updates without installing
- `zeroclaw update --force` - forces update even if already latest
- Cross-platform support (Linux, macOS, Windows)
- Atomic binary replacement on Unix, rename+copy on Windows
- Platform-specific archive handling (.tar.gz on Unix, .zip on Windows)

Closes #1352

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
(cherry picked from commit c4ba69b6bf)
2026-02-26 12:59:23 +08:00
Chummy
a38d2c5676 test(telegram): satisfy strict-delta lint in mention-only cases
(cherry picked from commit ddaab9250a)
2026-02-26 12:59:23 +08:00
argenis de la rosa
7ca24775ac fix(channels/telegram): respect mention_only for non-text messages in groups
When mention_only=true is set, the bot should not respond to non-text
messages (photos, documents, videos, stickers, voice) in group chats
unless the caption contains a bot mention.

Changes:
- Add mention_only check in try_parse_attachment_message() for group messages
  - Check if caption contains bot mention before processing
  - Skip attachment if no caption or no mention
- Add mention_only check in try_parse_voice_message() for group messages
  - Voice messages cannot contain mentions, so always skip in groups
- Add unit tests for the new behavior

Fixes #1662

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
(cherry picked from commit 419376b1f1)
2026-02-26 12:59:23 +08:00
Chummy
dfc8c239ef fix(apply-patch): avoid format_push_string on logs
(cherry picked from commit 873ebce6b3)
2026-02-26 12:59:23 +08:00
Chummy
13d53f388e style(tools): rustfmt apply_patch implementation
(cherry picked from commit 17a3a4a3b0)
2026-02-26 12:59:23 +08:00
hopesojourner
fbb3c6aee0 feat(tools): add apply_patch tool and update tests
(cherry picked from commit 8594ad98ae)
2026-02-26 12:59:23 +08:00
hopesojourner
ff64980599 fix(agent): parse tool-call tag variants in XML dispatcher
(cherry picked from commit b7c0a6d6b2)
2026-02-26 12:59:23 +08:00
Chummy
c993ee80c7
docs: publish pages with html index landing 2026-02-26 04:00:04 +00:00
Chummy
d3af83db63
fix: align merged code with dev to restore build 2026-02-26 03:47:01 +00:00
Chummy
e0f6f24a5e
merge: promote dev into main (dev-first conflict resolution) 2026-02-26 03:41:59 +00:00
Chummy
b25e4018ed
ci: gate release docker publish with env approval 2026-02-26 03:38:04 +00:00
Chummy
d41ddf74ea
ci: require release env approval for publish 2026-02-26 03:35:05 +00:00
Argenis
83dfb38fe5
Merge pull request #1860 from zeroclaw-labs/issue-1836-session-context-iteration
fix(agent): improve iteration-limit recovery and continuity
2026-02-25 22:08:18 -05:00
Argenis
8d9222ebd8
Merge pull request #1859 from zeroclaw-labs/issue-1845-linq-v3-webhook
fix(linq): support current v3 webhook payload format
2026-02-25 22:08:17 -05:00
Argenis
c27fd2c6b3
Merge pull request #1858 from zeroclaw-labs/issue-1854-glibc-baseline
fix(release): restore GNU Linux GLIBC compatibility baseline
2026-02-25 22:08:08 -05:00
ZeroClaw Runner
4815d06bf9 Revert test runner change 2026-02-26 01:23:55 +00:00
ZeroClaw Runner
f4420202c3 test: use self-hosted runner for e2e 2026-02-26 01:22:59 +00:00
ZeroClaw Runner
a567c7cc8d Add test workflow for self-hosted runner 2026-02-26 01:16:42 +00:00
argenis de la rosa
e071a9722d fix(release): pin GNU Linux builds to ubuntu-22.04 2026-02-25 17:51:11 -05:00
xuhao
e846604a13 fix(tools): address remaining fourth-round review findings for feishu_doc
Addresses 4 findings from CodeRabbit's fourth review that were not
covered by the maintainer's commit 7ef075e:

1. [Major] http_client() per-call allocation: cache reqwest::Client in
   FeishuDocTool struct field, return &reqwest::Client. Enables
   connection pooling across all API calls.

2. [Major] SSRF bypass via HTTP redirects: download_media now uses a
   no-redirect reqwest client (Policy::none()) to prevent attackers
   from using a public URL that 301/302-redirects to internal IPs.

3. [Minor] Missing empty-conversion guard in action_upload_image:
   added converted.is_empty() check consistent with all other
   convert_markdown_blocks callers.

4. [Minor] Schema description for link_share stale: updated from
   'default: true' to 'default: false' to match actual behavior.

Validation:
- cargo check --features channel-lark 
- cargo clippy -p zeroclaw --lib --features channel-lark -- -D warnings 
- cargo test --features channel-lark -- feishu_doc  (7/7 tests pass)
2026-02-26 06:40:56 +08:00
argenis de la rosa
1e8c09d34a fix(agent): improve iteration-limit recovery and defaults 2026-02-25 17:33:32 -05:00
argenis de la rosa
ae0159bad6 fix(linq): support current v3 webhook payload shape 2026-02-25 17:25:08 -05:00
argenis de la rosa
7ef075e6c9 fix(tools): address final feishu_doc review blockers 2026-02-25 17:06:27 -05:00
xuhao
e9352b793e fix(tools): address third-round code review findings for feishu_doc
Addresses all 5 findings from CodeRabbit's third review on PR #1853:

1. [Minor] action_append: add empty-conversion guard to prevent silent
   no-op (blocks_appended: 0). Consistent with action_write/update_block.

2. [Major] link_share default: change from opt-out (true) to opt-in
   (false). Documents no longer become publicly link-readable by default.
   Follows 'never silently broaden permissions' guideline.

3. [Minor] optional_usize: strict validation. Now returns Result and
   rejects invalid/negative/non-integer values with clear error instead
   of silently converting to None.

4. [Major] Media size bound: add MAX_MEDIA_BYTES (25 MiB) limit for
   both remote downloads (content-length pre-check + post-download
   check) and local file reads (metadata size check). Prevents memory
   exhaustion from oversized uploads.

5. [Major] Malformed JSON handling: parse_json_or_empty now returns
   Result (propagates parse errors instead of swallowing to {}).
   ensure_api_success now requires 'code' field presence instead of
   defaulting missing code to 0 (success). Prevents misclassifying
   malformed 2xx responses as success.

Validation:
- cargo check --features channel-lark 
- cargo clippy -p zeroclaw --lib --features channel-lark -- -D warnings 
- cargo test --features channel-lark -- feishu_doc  (7/7 tests pass)
2026-02-26 05:58:16 +08:00
xuhao
762e6082ec fix(tools): address second-round code review findings for feishu_doc
Addresses all 5 findings from CodeRabbit's second review on PR #1853:

1. [Major] list_all_blocks: add MAX_PAGES (200) hard cap to prevent
   unbounded pagination loops on misbehaving APIs or huge documents.

2. [Major] Empty conversion guard: action_write, action_update_block,
   and write_single_cell now bail with explicit error when
   convert_markdown_blocks returns empty results, preventing silent
   data loss (delete-then-write-nothing scenario).

3. [Minor] action_create: grant_owner_permission failure is now a soft
   warning instead of hard error. Document is already created and
   verified; permission failure is reported in the response JSON
   'warning' field instead of propagating as an error.

4. [Nitpick] extract_ttl_seconds: remove unreachable as_i64 fallback
   branch (as_u64 already covers all non-negative integers).

5. [Nitpick] Add unit tests: test_extract_ttl_seconds_defaults_and_clamps
   and test_write_rejects_empty_conversion.

Validation:
- cargo check --features channel-lark 
- cargo clippy -p zeroclaw --lib --features channel-lark -- -D warnings 
- cargo test --features channel-lark -- feishu_doc  (7/7 tests pass)
2026-02-26 05:19:55 +08:00
xuhao
feb1d46f41 fix(tools): address code review findings for feishu_doc
- Reorder convert-before-delete in action_write, action_update_block,
  and write_single_cell to prevent data loss if markdown conversion fails
- Separate create POST from verification retry loop in action_create
  to prevent duplicate document creation on retry
- Add resolve_doc_token to upload_image and upload_file so wiki
  node_token resolution works for upload actions
- Add SSRF protection to download_media: validate URL scheme (http/https
  only), block local/private hosts via existing url_validation module
- Guard empty credentials in mod.rs: skip FeishuDocTool registration
  when app_id or app_secret are empty/whitespace-only
2026-02-26 05:04:44 +08:00
xuhao
6a228944ae feat(tools): add Feishu document operation tool with 13 actions
Add FeishuDocTool implementing the Tool trait with full Feishu/Lark
document API coverage: read, write, append, create, list_blocks,
get_block, update_block, delete_block, create_table, write_table_cells,
create_table_with_values, upload_image, and upload_file.

Key design decisions:
- Self-contained tenant_access_token auth with auto-refresh cache
- Feishu/Lark dual-domain support via use_feishu config flag
- Wiki node_token resolution for wiki-hosted documents
- Autonomy-level enforcement: read ops always allowed, write ops
  require Act permission
- Prompt-level behavioral rules in tool description for agent guidance
- Create verification with retry to prevent phantom document tokens

Gated behind existing channel-lark feature flag. Reads app_id and
app_secret from channels_config.feishu or channels_config.lark.

14/14 integration tests pass against live Feishu API.
2026-02-26 04:27:21 +08:00
Chummy
8888dc6bc5 fix(codeql): avoid logging raw matrix error payloads 2026-02-26 02:19:14 +08:00
Chummy
f0774d75f7 fix(ci): align feishu gateway test fixtures with schema defaults 2026-02-26 02:19:14 +08:00
Chummy
2958ff417f fix(codeql): sanitize matrix error logs and clear note alert 2026-02-26 02:19:14 +08:00
Chummy
134850733d fix(tests): align channel runtime context mutex types 2026-02-26 02:19:14 +08:00
Chummy
410ece8458 fix(ci): resolve strict-delta clippy regressions 2026-02-26 02:19:14 +08:00
Chummy
1ad2d71c9b feat(approval): add one-time all-tools non-cli approval flow 2026-02-26 02:19:14 +08:00
Chummy
fd86e67d67 fix: restore config reexports after dev rebase 2026-02-26 02:19:14 +08:00
Chummy
d8a1d1d14c fix: reconcile non-cli approval governance with current dev APIs 2026-02-26 02:19:14 +08:00
Chummy
1fcf2df28b feat: harden non-CLI approval governance and runtime policy sync 2026-02-26 02:19:14 +08:00
Chummy
5ac885de7b fix(subagent): avoid lossy signed-to-unsigned cast 2026-02-26 02:14:20 +08:00
dave
c90853ba99 fix: address CodeRabbit review — race condition, UTF-8 safety, cast
Fixes all 4 issues from CodeRabbit review:

1. Race condition in spawn: replaced separate running_count() check +
   insert() with atomic try_insert(session, max) that holds the write
   lock for both the count check and insertion.

2. UTF-8 byte slice panic in subagent_manage: output truncation now
   uses char_indices().nth(500) to find a safe byte boundary.

3. UTF-8 byte slice panic in truncate_task: now uses chars().count()
   for length check and char_indices().nth() for safe slicing.
   Added truncate_task_multibyte_safe test with emoji input.

4. cast_unsigned() replaced with 'as u64' — standard Rust cast for
   duration milliseconds.

Test count: 57 (56 + 1 new multibyte safety test).
2026-02-26 02:14:20 +08:00
dave
90289ccc91 docs: add module-level and item-level docstrings for subagent tools
Improve docstring coverage to meet the 80% threshold required
by CI. Adds //! module docs and /// item docs to all public
types and functions in the subagent tool modules.
2026-02-26 02:14:20 +08:00
dave
067eb8a188 feat(tools): add sub-agent orchestration (spawn, list, manage)
Add background sub-agent orchestration tools that extend the existing
delegate tool with async execution, session tracking, and lifecycle
management.

New tools:
- subagent_spawn: Spawn delegate agents in background via tokio::spawn,
  returns session_id immediately. Respects security policy, depth limits,
  rate limits, and configurable concurrent session cap.
- subagent_list: List running/completed/failed/killed sessions with
  status filtering. Read-only, allowed in all autonomy modes.
- subagent_manage: Kill running sessions via CancellationToken or
  query status with partial output. Enforces Act policy for kill.

Shared state:
- SubAgentRegistry: Thread-safe session store using
  Arc<parking_lot::RwLock<HashMap>> with lazy cleanup of sessions
  older than 1 hour. Tracks session metadata, status, timing, and
  results.

Test coverage: 56 tests across all 4 modules covering happy paths,
error handling, security enforcement, concurrency, parameter
validation, and edge cases.

No new dependencies added. No existing tests broken.
2026-02-26 02:14:20 +08:00
Chummy
f47af0a850 style(cron): apply rustfmt for scheduler tests 2026-02-26 02:01:43 +08:00
Jeff Lee
66ee2eb17e test(security): stabilize prompt guard and scheduler assertions 2026-02-26 02:01:43 +08:00
Jeff Lee
56d4b7c25e fix(integrations): resolve CodeRabbit concurrency and provider-alias findings 2026-02-26 02:01:43 +08:00
Jeff Lee
03bf3f105d feat(integrations): enhance integrations settings UX and provider metadata
Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
2026-02-26 02:01:43 +08:00
Chummy
c6b9469b10 fix(goals): use schema GoalLoopConfig path in tests 2026-02-26 01:50:24 +08:00
Chummy
ac036a3525 style(goals): apply rustfmt for lint gate 2026-02-26 01:50:24 +08:00
Allen Huang
6064890415 feat: goals engine, heartbeat delivery, daemon improvements, and cron consolidation
- goals: add autonomous goal loop engine for long-term goal execution
- goals: add goal-level reflection for stalled goals
- goals: make GoalStatus and StepStatus deserialization self-healing
- goals: remove initiative planning from Rust, use cron job instead
- daemon: add PID lock and goal-loop supervisor
- daemon: add per-task failure tracking and auto-disable for heartbeat
- daemon: deliver heartbeat results to configured channels
- cron: add nightly consolidation cron job
- cron: set delete_after_run for one-shot shell jobs
- cron: add session_source to agent prompt building
- service: forward provider env vars into generated service files
- agent: add reflection flywheel — cron context injection, tool audit, nightly consolidation
- agent: make state reconciliation opt-in per call site

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-26 01:50:24 +08:00
Chummy
4eddc70ae4 fix(test): align draft update mock return type with Channel trait 2026-02-26 01:39:47 +08:00
Chummy
21696e1956 fix(lark): add new draft config fields in tests 2026-02-26 01:21:32 +08:00
Chummy
4e9752f5da fix(channels): align draft update signatures with lark config defaults 2026-02-26 01:21:32 +08:00
Allen Huang
cc8aac5918 feat: channel improvements (Lark rich-text, WhatsApp QR, draft config)
- lark: convert send to rich-text post format with markdown parsing
- lark: add draft edit throttling and shell polling guidance
- lark: auto-detect receive_id_type from recipient prefix
- lark: deliver heartbeat as interactive card
- lark: use valid Feishu API emoji_type keys for ack reactions
- lark: handle flat post format from WS and add diagnostic logging
- lark: replace unsupported code_inline tag and strip leaked tool blocks
- lark: gate LarkChannel behind channel-lark feature flag
- whatsapp: render WhatsApp Web pairing QR in terminal
- channels: update_draft returns Option<String> for new draft IDs
- config: add draft_update_interval_ms and max_draft_edits to Lark/FeishuConfig

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-26 01:21:32 +08:00
Chummy
16961bab84 feat(channels): hide internal tool progress unless explicitly requested 2026-02-26 01:00:06 +08:00
Chummy
42f280abf4 fix(ci): satisfy strict-delta clippy manual_string_new 2026-02-26 00:05:32 +08:00
Chummy
a9e8526d67 feat(channels): add unified group-reply policy and sender overrides 2026-02-26 00:05:32 +08:00
Chummy
11b9fe759f style(ci): apply rustfmt for lint-gate compatibility 2026-02-25 23:43:42 +08:00
Chummy
de6f572051 fix(ci): align onboard + web search tests with current APIs 2026-02-25 23:43:42 +08:00
Chummy
1410ca0be5 fix(onboard): restore missing web tool helper functions 2026-02-25 23:43:42 +08:00
Ricardo Magaña
da62bd172f feat(tools): add user_agent config and setup_web_tools wizard step
Ports remaining changes from feat/unify-web-fetch-providers that were
not yet integrated into dev:

- config/schema.rs: add `user_agent` field (default "ZeroClaw/1.0") to
  HttpRequestConfig, WebFetchConfig, and WebSearchConfig, with a shared
  default_user_agent() helper. Field is serde-default so existing configs
  remain backward compatible.

- tools/http_request.rs: accept user_agent in constructor; pass it to
  reqwest::Client via .user_agent() replacing the implicit default.

- tools/web_fetch.rs: accept user_agent in constructor; replace hardcoded
  "ZeroClaw/0.1 (web_fetch)" in build_http_client with the configured value.

- tools/web_search_tool.rs: accept user_agent in constructor; replace
  hardcoded Chrome UA string in search_duckduckgo and add .user_agent()
  to the Brave and Firecrawl client builders.

- tools/mod.rs: wire user_agent from each config struct into the
  corresponding tool constructor (HttpRequestTool, WebFetchTool,
  WebSearchTool).

- onboard/wizard.rs: add setup_web_tools() as wizard Step 6 "Web &
  Internet Tools" (total steps bumped from 9 to 10). Configures
  WebSearchConfig, WebFetchConfig, and HttpRequestConfig interactively
  with provider selection and optional API key/URL prompts. Step 5
  setup_tool_mode() http_request and web_search outputs are now discarded
  (_, _) since step 6 owns that configuration. Uses dev's generic
  api_key/api_url schema fields unchanged.

Co-authored-by: Cursor <cursoragent@cursor.com>
(cherry picked from commit fb83da8db021903cf5844852bdb67b9b259941d7)
2026-02-25 23:43:42 +08:00
Chummy
584af05020 fix(coordination): satisfy strict-delta clippy gates 2026-02-25 23:16:27 +08:00
Chummy
938d900106 fix(build): include coordination module in binary crate 2026-02-25 23:16:27 +08:00
Chummy
c692ff98c1 fix(coordination): harden delegate key parser and overflow correlation consistency 2026-02-25 23:16:27 +08:00
Chummy
82bc66bc9b fix(coordination): enforce delegate context correlation invariants
- normalize correlation matching in inbox filtered peek path\n- reject delegate context patches with invalid key shape\n- require correlation_id for delegate context patches\n- reject delegate context patches when key correlation != envelope correlation\n- expose delegate context count fields in status output for clearer semantics\n- add regression coverage for new validation and normalized correlation behavior
2026-02-25 23:16:27 +08:00
Chummy
856afe8780 feat(coordination): deep-complete agent coordination message bus
- add typed coordination protocol envelopes/payload validation and deterministic in-memory bus\n- integrate delegate runtime lifecycle tracing with shared coordination bus\n- add delegate_coordination_status read-only observability tool\n- add config/onboarding wiring and coordination enable/limits controls\n- harden retention/memory bounds with inbox/dead-letter/context/dedupe caps\n- add runtime metrics and pagination/offset metadata for status inspection\n- add correlation-scoped fast-path indexes for context/dead-letter/inbox queries\n- expand unit/integration tests for ordering, idempotency, conflict handling, paging, and filters
2026-02-25 23:16:27 +08:00
Chummy
c52603305c docs(ci): align nightly governance docs with active matrix profile 2026-02-25 23:01:49 +08:00
Chummy
c53e023b81 feat(ci): add nightly profile retries and trend snapshot evidence 2026-02-25 23:01:49 +08:00
Chummy
3d86fde6f2 fix(ci): allow wasm security bool config lint 2026-02-25 22:49:57 +08:00
Chummy
163f2fb524 feat(wasm): harden module integrity and symlink policy 2026-02-25 22:49:57 +08:00
Mike-7777777
0b172c4554 docs(config): add [agents_ipc] section to config-reference
Document the agents_ipc configuration section (enabled, db_path,
staleness_secs) and the five IPC tools registered when enabled.
Closes the documentation gap from PR #1668 (agents IPC feature).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 22:36:34 +08:00
Chummy
9769822dc8 docs(ci): harden matrix/nightly gate mapping and escalation runbooks 2026-02-25 22:29:26 +08:00
Chummy
d9a81409fb feat(ci): formalize canary cohorts and observability policy 2026-02-25 22:29:26 +08:00
Rui Chen
7d07e46798 ci: remove Homebrew core publishing flow
Remove the manual Homebrew-core publishing workflow and related docs references.

Signed-off-by: Rui Chen <rui@chenrui.dev>
(cherry picked from commit bc8b721b7e)
2026-02-25 22:28:23 +08:00
reidliu41
47ad3d010b feat(integrations): add list and search subcommands 2026-02-25 22:06:10 +08:00
Chummy
17c606205b docs(ci): document docs deploy promotion and rollback policy 2026-02-25 21:55:13 +08:00
Chummy
b1a9fbe894 test(ci): cover docs deploy guard policy behavior 2026-02-25 21:55:13 +08:00
Chummy
4e7c3dcc13 feat(ci): enforce docs deploy promotion and rollback contract 2026-02-25 21:55:13 +08:00
Chummy
cbbce330bb fix(ci): remove wasmi advisory and lint regression 2026-02-25 21:46:05 +08:00
Chummy
604f64f3e7 feat(runtime): add configurable wasm security runtime and tooling 2026-02-25 21:46:05 +08:00
Chummy
e3c9bd9189 docs(i18n): consolidate localized readmes under docs/i18n 2026-02-25 21:37:51 +08:00
Chummy
53829623fa docs(release): document GHCR vulnerability gate policy 2026-02-25 21:35:57 +08:00
Chummy
7bfd17e69d test(ci): cover GHCR vulnerability gate guard behavior 2026-02-25 21:35:57 +08:00
Chummy
7849d10a69 feat(ci): add GHCR vulnerability gate policy and audit traceability 2026-02-25 21:35:57 +08:00
Chummy
1189ff59b8 docs(security): standardize private vuln workflow and SLA templates 2026-02-25 21:32:32 +08:00
Chummy
fe48240e41 fix(ci): satisfy actionlint output redirect guard 2026-02-25 21:10:19 +08:00
Chummy
84e3e02e0a docs(release): document GHCR tag immutability contract 2026-02-25 21:10:19 +08:00
Chummy
b1327ec3f1 test(ci): cover GHCR publish contract guard behavior 2026-02-25 21:10:19 +08:00
Chummy
e5d5a49857 feat(ci): enforce GHCR publish tag contract and rollback mapping 2026-02-25 21:10:19 +08:00
Chummy
efdd40787c feat(config): add deprecated runtime reasoning_level compatibility alias 2026-02-25 21:00:59 +08:00
Chummy
cfe1e578bf feat(security): add and harden syscall anomaly detection 2026-02-25 20:43:38 +08:00
Chummy
268b01fcf0 hardening(security): sanitize upstream error bodies across channels 2026-02-25 20:41:51 +08:00
Chummy
0134a11697 docs(release): map release-notes supply-chain flow 2026-02-25 20:38:51 +08:00
Chummy
a28b213334 test(ci): cover release notes supply-chain references 2026-02-25 20:38:51 +08:00
Chummy
fcc3d0e93a feat(release): automate supply-chain release notes preface 2026-02-25 20:38:51 +08:00
Chummy
076444ce50 docs(release): document artifact contract guard flow 2026-02-25 20:16:35 +08:00
Chummy
49b4efc6c4 test(ci): cover release artifact guard contract checks 2026-02-25 20:16:35 +08:00
Chummy
629253f63e feat(release): enforce artifact contract guard 2026-02-25 20:16:35 +08:00
Chummy
495d7717c7 hardening(logging): sanitize channel API error bodies 2026-02-25 19:59:31 +08:00
Chummy
b50e66731a docs(ci): document release trigger guardrails 2026-02-25 19:54:17 +08:00
Chummy
7de007dbf9 test(ci): cover release trigger guard paths 2026-02-25 19:54:17 +08:00
Chummy
5e91f074a8 feat(ci): add release trigger authorization guard 2026-02-25 19:54:17 +08:00
Chummy
1f257d7bf8 Sanitize WebSocket chat done responses to prevent tool artifact leaks 2026-02-25 19:54:09 +08:00
Chummy
3b6786d0d7 Fix tool-call artifact leaks across channel and gateway replies 2026-02-25 19:54:09 +08:00
Sir Wesley
38585a8e00 docs(channels): improve Lark config placeholder values
Replace vague placeholders with descriptive ones:
- cli_xxx → your_lark_app_id
- xxx → your_lark_app_secret

Makes it clearer what values users need to substitute.
2026-02-25 19:40:42 +08:00
Chummy
006a4db7a0 fix(ci): satisfy actionlint output redirection rule 2026-02-25 19:30:11 +08:00
Chummy
9e7f3cbe81 docs(ci): document stage matrix and history audit outputs 2026-02-25 19:30:11 +08:00
Chummy
c468fea7db test(ci): expand prerelease guard transition coverage 2026-02-25 19:30:11 +08:00
Chummy
c2fd20cf25 feat(ci): harden prerelease stage matrix and transition audit 2026-02-25 19:30:11 +08:00
Chummy
667c7a4c2f hardening(deps): govern matrix indexeddb derivative advisory 2026-02-25 19:23:53 +08:00
donghao
26d2de7db5 chore: add Asia/Shanghai to wizard timezone setup 2026-02-25 19:16:55 +08:00
Chummy
14f3c2678f hardening: eliminate cleartext secret logging paths flagged by codeql 2026-02-25 18:58:48 +08:00
Chummy
bf48bd9cec fix(ci): correct CodeRabbit config schema for reviews.poem 2026-02-25 18:42:49 +08:00
Chummy
d579fb9c3c feat(ci): bridge canary abort to rollback guard dispatch 2026-02-25 18:39:11 +08:00
Chummy
976e50a1cb ci: add security regression gate and focused test suite 2026-02-25 18:33:28 +08:00
Chummy
346f58a6a1 hardening: strengthen tool policy enforcement and sandbox defaults 2026-02-25 18:33:28 +08:00
Chummy
d5cd65bc4f hardening: tighten gateway auth and secret lifecycle handling 2026-02-25 18:33:28 +08:00
Chummy
2ecfa0d269 hardening: enforce channel tool boundaries and websocket auth 2026-02-25 18:33:28 +08:00
Chummy
1941906169 style(channels): apply rustfmt for query classification routing 2026-02-25 18:07:37 +08:00
argenis de la rosa
883f92409e feat(channels): add query classification routing with logging for channels
Add query classification support to channel message processing (Telegram,
Discord, Slack, etc.). When query_classification is enabled with model_routes,
each incoming message is now classified and routed to the appropriate model
with an INFO-level log line.

Changes:
- Add query_classification and model_routes fields to ChannelRuntimeContext
- Add classify_message_route function that logs classification decisions
- Update process_channel_message to try classification before default routing
- Initialize new fields in channel runtime context
- Update all test contexts with new fields

The logging matches the existing agent.rs implementation:
- target: "query_classification"
- fields: hint, model, rule_priority, message_length
- level: INFO

Closes #1367

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 18:07:37 +08:00
Chummy
6fdeea84f7
fix(peripherals): import Peripheral trait for all-features build 2026-02-25 09:57:35 +00:00
Chummy
343bfc02cb fix(ci): satisfy actionlint for feature-matrix lane exit handling 2026-02-25 17:51:04 +08:00
Chummy
701f293785 test(runtime): fix postgres and browser test compatibility after rebase 2026-02-25 17:51:04 +08:00
Chummy
3aed919c47 docs(ci): add runbooks and required-check mapping for new lanes 2026-02-25 17:51:04 +08:00
Chummy
83d5421368 feat(ci): add release/canary/nightly automation and governance guards 2026-02-25 17:51:04 +08:00
Chummy
7ffb91105b style: apply rustfmt for reasoning-level changes 2026-02-25 17:51:00 +08:00
Chummy
aa743786c7 fix(config): wire provider reasoning overrides in schema 2026-02-25 17:51:00 +08:00
argenis de la rosa
aac87ca437 feat(provider): add reasoning level override
(cherry picked from commit 8d46469c40)
2026-02-25 17:51:00 +08:00
FlashFamily
931cf40636 fix: resolve all clippy warnings across codebase
Fix all clippy errors reported by `cargo clippy --all-targets -- -D warnings`
on Rust 1.93, covering both the original codebase and upstream dev changes.

Changes by category:
- format!() appended to String → write!/writeln! (telegram, discord)
- Redundant field names, unnecessary boolean not (agent/loop_)
- Long numeric literals (wati, nextcloud, telegram, gemini)
- Wildcard match on single variant (security/leak_detector)
- Derivable Default impls (config/schema)
- &Option<T> → Option<&T> or allow (config/schema, config/mod, gateway/api)
- Identical match arms merged (gateway/ws, observability, providers, main, onboard)
- Cast truncation allowed with rationale (discord, lark)
- Unnecessary borrows/returns removed (multiple files)
- Unused imports removed (channels/mod, peripherals/mod, tests)
- MSRV-gated APIs allowed locally (memory/hygiene, tools/shell, tools/screenshot)
- Unnecessary .get().is_none() → !contains_key() (gemini)
- Explicit iteration → reference loop (gateway/api)
- Test-only: useless vec!, field_reassign_with_default, doc indentation

Validated: cargo fmt, cargo clippy --all-targets -- -D warnings, cargo test
Co-authored-by: Cursor <cursoragent@cursor.com>
(cherry picked from commit 49e90cf3e4)
2026-02-25 17:50:56 +08:00
Chummy
864684a5d0 feat(ci): add MUSL static binaries for release artifacts 2026-02-25 17:50:52 +08:00
argenis de la rosa
f386f50456 fix(build): add explicit [[bin]] configuration to prevent target inference conflicts
This addresses the Windows build issues reported in #1654:
- Adds explicit [[bin]] configuration for the zeroclaw binary
- Prevents potential silent build failures when src/lib.rs and src/main.rs coexist
- The raw string syntax issues in leak_detector.rs and Deserialize imports
  were already fixed in previous commits

Closes #1654

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
(cherry picked from commit a2c032fe51)
2026-02-25 17:50:49 +08:00
Chummy
d4e5cb73e3 fix(channels): support /clear alias and cross-channel history reset 2026-02-25 17:50:45 +08:00
Chummy
afc49486f3 supersede: replay changes from #1664
Automated conflict recovery onto latest dev.
2026-02-25 17:50:41 +08:00
Chummy
8bbf256fa9 supersede: replay changes from #1661
Automated conflict recovery onto latest dev.
2026-02-25 17:39:37 +08:00
Chum Yin
db175c3690
[supersede #1545] feat(providers): implement Qwen OAuth quota tracking (#1746)
* feat(providers): implement Qwen OAuth quota tracking

Add static quota display for Qwen OAuth provider (portal.qwen.ai).
Qwen OAuth API does not return rate-limit headers, so this provides
a static quota indicator based on known OAuth free-tier limits.

Changes:
- Add QwenQuotaExtractor in quota_adapter.rs
  - Parses rate-limit errors for retry backoff
  - Registered for all Qwen aliases (qwen, qwen-code, dashscope, etc.)
- Add Qwen OAuth detection in quota_cli.rs
  - Auto-detects ~/.qwen/oauth_creds.json
  - Displays static quota: ?/1000 (unknown remaining, 1000/day total)
- Improve quota display formatting
  - Shows "?/total" when only total limit is known
- Add comprehensive test report and testing scripts
  - Full integration test report: docs/qwen-provider-test-report.md
  - Model availability, context window, and latency tests
  - Reusable test scripts in scripts/ directory

Test results:
- Available model: qwen3-coder-plus (verified)
- Context window: ~32K tokens
- Average latency: ~2.8s
- All 15 quota tests passing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
(cherry picked from commit fa91b6a170)

* docs: satisfy markdownlint spacing in qwen docs

---------

Co-authored-by: ZeroClaw Bot <zeroclaw_bot@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-25 03:09:38 -05:00
Chum Yin
9a407690b6
supersede: file-replay changes from #1595 (#1728)
Automated conflict recovery via changed-file replay on latest dev.
2026-02-25 02:49:23 -05:00
Argenis
fa6790b35b
Merge pull request #1720 from zeroclaw-labs/chore/blacksmith-ci
chore(ci): lock workflow ownership and use blacksmith runners
2026-02-25 00:11:48 -05:00
argenis de la rosa
20b9ff4602 chore(ci): lock workflow ownership and use blacksmith runners 2026-02-24 23:34:10 -05:00
Chum Yin
b38fad2035
Merge pull request #1716 from zeroclaw-labs/codex/supersede-pr-1639-20260225021812-271412-files
[supersede #1639] [supersede #1617] [supersede #1263] feat(agent): add research phase for proactive information gathering
2026-02-25 11:37:19 +08:00
Chum Yin
6a057bf7d7
Merge branch 'dev' into codex/supersede-pr-1639-20260225021812-271412-files 2026-02-25 11:27:53 +08:00
Chummy
a797b5456c test(onboard): isolate quick setup env vars in tests 2026-02-25 11:17:11 +08:00
Chummy
97bd12c26a fix(onboard): resolve strict clippy blockers in wizard 2026-02-25 11:17:11 +08:00
Chummy
6f34f4e2c8 fix(lark): include mention_only in wizard config init 2026-02-25 11:17:11 +08:00
Chummy
479df22ea7 supersede: file-replay changes from #1622
Automated conflict recovery via changed-file replay on latest dev.
2026-02-25 11:17:11 +08:00
Chum Yin
dc7cf36a0f
Merge branch 'dev' into codex/supersede-pr-1639-20260225021812-271412-files 2026-02-25 11:06:52 +08:00
Chummy
cd4d816a83 fix(providers): keep runtime options backward compatible 2026-02-25 10:56:31 +08:00
reidliu41
3a38c80c05 feat(config): add model_support_vision override for per-model vision control
`supports_vision` is currently hardcoded per-provider. The same Ollama instance can run `llava` (vision) or
  `codellama` (no vision), but the code fixes vision support at the provider level with no user override.

  This adds a top-level `model_support_vision: Option<bool>` config key — tri-state:
  - **Unset (default):** provider's built-in value, zero behavior change
  - **`true`:** force vision on (e.g. Ollama + llava)
  - **`false`:** force vision off

  Follows the exact same pattern as `reasoning_enabled`. Override is applied at the wrapper layer (`ReliableProvider` /
   `RouterProvider`) — no concrete provider code is touched.

  ## Changes

  **Config surface:**
  - Top-level `model_support_vision` field in `Config` struct with `#[serde(default)]`
  - Env override: `ZEROCLAW_MODEL_SUPPORT_VISION` / `MODEL_SUPPORT_VISION`

  **Provider wrappers (core logic):**
  - `ReliableProvider`: `vision_override` field + `with_vision_override()` builder + `supports_vision()` override
  - `RouterProvider`: same pattern

  **Wiring (1-line each):**
  - `ProviderRuntimeOptions` struct + factory functions
  - 5 construction sites: `loop_.rs`, `channels/mod.rs`, `gateway/mod.rs`, `tools/mod.rs`, `onboard/wizard.rs`

  **Docs (i18n parity):**
  - `config-reference.md` — Core Keys table
  - `providers-reference.md` — new "Ollama Vision Override" section
  - Vietnamese sync: `docs/i18n/vi/` + `docs/vi/` (4 files)

  ## Non-goals

  - Does not change any concrete provider implementation
  - Does not auto-detect model vision capability

  ## Test plan

  - [x] `cargo fmt --all -- --check`
  - [x] `cargo clippy --all-targets -- -D warnings` (no new errors)
  - [x] 5 new tests passing:
    - `model_support_vision_deserializes` — TOML parse + default None
    - `env_override_model_support_vision` — env var override + invalid value ignored
    - `vision_override_forces_true` — ReliableProvider override
    - `vision_override_forces_false` — ReliableProvider override
    - `vision_override_none_defers_to_provider` — passthrough behavior

  ## Risk and Rollback

  - **Risk:** Low. `None` default = zero behavior change for existing users.
  - **Rollback:** Revert commit. Field is `#[serde(default)]` so old configs without it will deserialize fine.

(cherry picked from commit a1b8dee785)
2026-02-25 10:56:31 +08:00
Chummy
bfe87b1c55 fix: resolve supersede 1267 CI failures 2026-02-25 10:45:00 +08:00
Chummy
b5ec2dce88 supersede: replay changes from #1267
Automated replay on latest dev.
2026-02-25 10:45:00 +08:00
Chummy
f750db1b6d
style(config): apply rustfmt for module exports 2026-02-25 02:44:22 +00:00
Chummy
a43cfba154
fix(config): restore IPC and web tool compatibility in research supersede 2026-02-25 02:32:22 +00:00
Chum Yin
6bf8578d75
Merge branch 'dev' into codex/supersede-pr-1639-20260225021812-271412-files 2026-02-25 10:25:01 +08:00
Chummy
3bf5e34232 supersede: replay changes from #1413
Force repo-owned branch so CI Required Gate can run.
2026-02-25 10:22:35 +08:00
Chummy
c293561be2
supersede: file-replay changes from #1639
Automated conflict recovery via changed-file replay on latest dev.
2026-02-25 02:18:16 +00:00
dependabot[bot]
cae645707f
chore(deps): bump the rust-all group across 1 directory with 4 updates (#1689)
Bumps the rust-all group with 4 updates in the / directory: [shellexpand](https://gitlab.com/ijackson/rust-shellexpand), [chrono](https://github.com/chronotope/chrono), [rustls](https://github.com/rustls/rustls) and [tempfile](https://github.com/Stebalien/tempfile).


Updates `shellexpand` from 3.1.1 to 3.1.2
- [Commits](https://gitlab.com/ijackson/rust-shellexpand/compare/shellexpand-3.1.1...shellexpand-3.1.2)

Updates `chrono` from 0.4.43 to 0.4.44
- [Release notes](https://github.com/chronotope/chrono/releases)
- [Changelog](https://github.com/chronotope/chrono/blob/main/CHANGELOG.md)
- [Commits](https://github.com/chronotope/chrono/compare/v0.4.43...v0.4.44)

Updates `rustls` from 0.23.36 to 0.23.37
- [Release notes](https://github.com/rustls/rustls/releases)
- [Changelog](https://github.com/rustls/rustls/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rustls/rustls/compare/v/0.23.36...v/0.23.37)

Updates `tempfile` from 3.25.0 to 3.26.0
- [Changelog](https://github.com/Stebalien/tempfile/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Stebalien/tempfile/commits/v3.26.0)

---
updated-dependencies:
- dependency-name: shellexpand
  dependency-version: 3.1.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: rust-all
- dependency-name: chrono
  dependency-version: 0.4.44
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: rust-all
- dependency-name: rustls
  dependency-version: 0.23.37
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: rust-all
- dependency-name: tempfile
  dependency-version: 3.26.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: rust-all
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Argenis <theonlyhennygod@gmail.com>
2026-02-24 19:23:35 -05:00
guitaripod
0a7931e73e
fix(agent): add channel media markers to system prompt (#1697)
The system prompt had no documentation of channel media markers
([Voice], [IMAGE:], [Document:]), causing the LLM to misinterpret
transcribed voice messages as unprocessable audio attachments instead
of responding to the transcribed text content.

Co-authored-by: Argenis <theonlyhennygod@gmail.com>
2026-02-24 19:07:27 -05:00
Argenis
8541aa1bd3
docs: add Docker setup guide (#1690)
Add comprehensive Docker documentation covering:
- Bootstrap and onboarding in Docker mode
- Running ZeroClaw as a daemon or interactively
- Common commands and troubleshooting
- Environment variables and configuration options

This addresses user confusion where `zeroclaw` commands don't work
on the host after Docker bootstrap, and no containers are started.

Closes #1364

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 18:40:43 -05:00
Chum Yin
9a9b73e3db
supersede: replay changes from #1648 (#1666)
Force repo-owned branch so CI Required Gate can run.

Co-authored-by: argenis de la rosa <theonlyhennygod@gmail.com>
2026-02-24 18:37:24 -05:00
Argenis
9ed863584a
fix(channels): add wildcard pattern for non-exhaustive Relation enum in matrix channel (#1702)
The Relation enum in the Matrix SDK is marked as non-exhaustive,
causing a compilation error when building with the channel-matrix feature.
Add a wildcard pattern to handle any future relation types.

Fixes #1693

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 18:33:38 -05:00
Chummy
83ef0a3cf6 fix(tools): address codeql api key handling alerts 2026-02-25 03:30:45 +08:00
Chummy
ffe340f849 fix(tools): satisfy strict delta lint for firecrawl dispatch 2026-02-25 03:30:45 +08:00
Chummy
b4df1dc30d feat(tools): add web_fetch provider dispatch and shared URL validation 2026-02-25 03:30:45 +08:00
Chummy
523fecac0f refactor(agent): satisfy strict lint delta for loop split 2026-02-25 02:09:23 +08:00
Chummy
1b12f60e05 refactor(agent): split loop loop_ concerns into focused submodules 2026-02-25 02:09:23 +08:00
Chummy
788437c15c docs(readme): add ZeroClaw Views ecosystem entry 2026-02-25 01:28:36 +08:00
Mike-7777777
0e14c199af refactor(tools): deduplicate IpcDb initialization and simplify inbox
Extract shared init logic (pragmas, schema creation, agent registration)
into IpcDb::init(), eliminating ~45 lines of duplication between open()
and open_with_id(). Extract SQL strings into PRAGMA_SQL and SCHEMA_SQL
constants for single source of truth. Remove unused (i64, Value) tuple
in AgentsInboxTool by collecting directly into Vec<Value>.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 01:14:47 +08:00
Mike-7777777
ed67184c7a feat(tools): add inter-process communication tools
Add 5 LLM-callable IPC tools (agents_list, agents_send, agents_inbox,
state_get, state_set) backed by a shared SQLite database, enabling
independent ZeroClaw processes on the same host to discover and
communicate with each other. Gated behind [agents_ipc] enabled = true.

Related #88 (item 3: Sessions / Sub-Agent Orchestration)
Related #1518 (design spec)
2026-02-25 01:14:47 +08:00
Chummy
2dc9d081e4 fix(shell): recover command args from malformed tool payloads 2026-02-25 01:00:13 +08:00
Argenis
a066eaaadc
Merge pull request #1659 from zeroclaw-labs/fix/issue-1469-voice-log
fix(telegram): add debug logging for voice transcription skip reasons
2026-02-24 11:46:50 -05:00
Argenis
51073af2d7
Merge branch 'dev' into fix/issue-1469-voice-log 2026-02-24 11:37:12 -05:00
Chummy
f00db63598 fix(telegram): infer audio filename for transcription fallback 2026-02-25 00:35:25 +08:00
Argenis
0935e5620e
Merge branch 'dev' into fix/issue-1469-voice-log 2026-02-24 11:26:13 -05:00
Chummy
79c3c6ac50 fix(matrix): avoid logging user/device identifiers in cleartext 2026-02-25 00:23:22 +08:00
Chummy
46c9f0fb45 feat(matrix): add mention_only gate for group messages 2026-02-25 00:23:22 +08:00
Argenis
09f401183d
Merge branch 'dev' into fix/issue-1469-voice-log 2026-02-24 11:13:58 -05:00
Chummy
4893ffebad docs(i18n): unify greek localization and docs structure parity 2026-02-25 00:08:28 +08:00
Chummy
817f783881 feat(agent): inject shell allowlist policy into system prompt 2026-02-25 00:01:49 +08:00
argenis de la rosa
b545d17ed0 fix(telegram): add debug logging for voice transcription skip reasons
Voice messages were being silently ignored when transcription was disabled
or user was unauthorized, making it difficult to diagnose configuration
issues. This change adds:

- Debug log when voice/audio message received but transcription disabled
- Debug log when voice message skipped due to unauthorized user
- Info log on successful voice transcription

Closes #1469

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 10:58:12 -05:00
Chummy
432ba603c2 chore(onboard): silence intentional capability-probe bool aggregate lint 2026-02-24 23:46:04 +08:00
Chummy
eb904c3625 fix(onboard): align wizard defaults with current config schema 2026-02-24 23:46:04 +08:00
Chummy
bf1d7ac928 supersede: file-replay changes from #1317
Automated conflict recovery via changed-file replay on latest dev.
2026-02-24 23:46:04 +08:00
Chummy
040bd95d84 fix(reliable): remap model fallbacks per provider 2026-02-24 23:21:39 +08:00
Allen Huang
b36dd3aa81 feat(logging): use local timezone for log timestamps
Replace default UTC timer with ChronoLocal::rfc_3339() so daemon and
CLI log lines display the operator's local time, making correlation
with external events easier.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-24 23:02:13 +08:00
guitaripod
b556a4bdce fix(telegram): handle brackets in attachment filenames
parse_attachment_markers used .find(']') which returns the first ']', so
filenames containing brackets (e.g. yt-dlp output 'Video [G4PvTrTp7Tc].mp4')
were truncated at the inner bracket, producing a wrong path and a send failure.

Replace the naive search with find_matching_close, a depth-tracking scanner
that correctly skips nested '[...]' pairs and returns the index of the
outermost closing bracket.

Adds regression tests for the bracket-in-filename case and for the
unclosed-bracket fallback (no match → message passed through unchanged).
2026-02-24 22:48:26 +08:00
zhzy0077
b228800e9e feat(web): add zh-CN locale support
- add zh-CN translations and locale normalization in i18n\n- type locale context/state and support three-language cycle in header

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
(cherry picked from commit 4814e80479)
2026-02-24 22:33:15 +08:00
Shadman Hossain
a22244d266 fix: stream_chat_with_history delegates to stream_chat_with_system
The default trait implementation returned a single error chunk that the
SSE mapper silently converted to `data: [DONE]`, producing empty
streaming responses from the OpenAI-compatible endpoint. Mirror the
non-streaming chat_with_history pattern: extract system + last user
message and delegate to stream_chat_with_system, which all providers
already implement.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 22:22:16 +08:00
Shadman Hossain
d6824afd21 style: fix clippy warnings and cargo fmt in new code
- Add underscores to long numeric literals (1234567890 → 1_234_567_890)
- Allow cast_possible_truncation for rough token estimates
- Replace loop/match with while-let for event stream parsing
- Merge identical match arms for event types
- Add #[allow(clippy::cast_possible_truncation)] on test helper

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 22:22:16 +08:00
Shadman Hossain
14bd06fab3 feat: add streaming support for AWS Bedrock ConverseStream API
Implement the streaming provider trait methods for Bedrock, enabling
real-time token-by-token responses via the ConverseStream endpoint.

Key implementation details:
- Uses /model/{id}/converse-stream endpoint with SigV4 signing
- Parses AWS binary event-stream format (application/vnd.amazon.eventstream)
  with a minimal parser (~60 lines) — no new crate dependencies needed
- Handles contentBlockDelta events for text extraction, plus error and
  exception events
- Uses mpsc channel + stream::unfold pattern (matching compatible.rs)
- Clones credentials for async task ownership

The binary event-stream parser extracts frame lengths, header sections
(looking for :event-type), and payload bytes. CRC validation is skipped
since TLS already provides integrity guarantees.

Includes 10 new tests for URL formatting, binary parsing, and
deserialization.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 22:22:16 +08:00
Shadman Hossain
18780b27fe feat: add OpenAI-compatible /v1/chat/completions and /v1/models endpoints
Add an OpenAI-compatible API surface to the gateway so that standard
OpenAI client libraries can interact with ZeroClaw directly.

Endpoints:
- POST /v1/chat/completions — supports both streaming (SSE) and
  non-streaming responses, bearer token auth, rate limiting
- GET /v1/models — returns the gateway's configured model

The chat completions endpoint accepts the standard OpenAI request format
(model, messages, temperature, stream) and returns responses in the
OpenAI envelope format. Streaming uses SSE with delta chunks and a
[DONE] sentinel. A 512KB body limit is applied (vs 64KB default) since
chat histories can be large.

When the underlying provider doesn't support native streaming, the
handler falls back to wrapping the non-streaming response in a single
SSE chunk for transparent compatibility.

Includes 8 unit tests for request/response serialization.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 22:22:16 +08:00
Chummy
d6ca79a52e fix(gateway): fill qq fields in node control test AppState 2026-02-24 22:03:53 +08:00
Chummy
5baca2c38d fix(node-control): derive default config for clippy strict-delta 2026-02-24 22:03:53 +08:00
Chummy
c876a03819 feat(gateway): add experimental node-control scaffold API 2026-02-24 22:03:53 +08:00
reidliu41
56ffcd4477 feat(tool): add background process management tool (spawn/list/output/kill) 2026-02-24 21:53:23 +08:00
Chummy
30ab6c14fe ci: enforce unsafe debt audit and policy governance 2026-02-24 21:36:47 +08:00
Chummy
225137c972 docs: make contributors badge dynamic across README locales 2026-02-24 21:30:23 +08:00
Chummy
f31a8efd7b supersede: replay changes from #1247
Automated replay on latest dev.
2026-02-24 21:18:50 +08:00
dependabot[bot]
cc961ec0a8 chore(deps): bump actions/upload-artifact from 4.6.2 to 6.0.0
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.6.2 to 6.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v4.6.2...b7c566a772e6b6bfb58ed0dc250532a479d7789f)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: 6.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-02-24 21:10:39 +08:00
Chummy
1028b736c4 chore(unsafe-debt): enforce strict full crate coverage defaults (RMN-54) 2026-02-24 21:00:46 +08:00
reidliu41
d6d32400fa feat(tool): add session-scoped task_plan tool for multi-step work tracking
- Base branch target: dev
  - Problem: ZeroClaw agents have no structured way to decompose complex tasks into trackable steps, falling behind
  every comparable agent runtime
  - Why it matters: Without task tracking, multi-step work is fragile (lost on context compression), invisible to users
   (no progress signal), and error-prone (agent loses track of what's done vs. pending)
  - What changed: Added a session-scoped task_plan tool with create/add/update/list/delete actions, integrated with
  SecurityPolicy, registered in the tool factory
  - What did not change: No config schema changes, no persistence layer, no CLI subcommand, no changes to agent loop or
   any other subsystem

  Label Snapshot

  - Risk label: risk: low
  - Size label: size: S
  - Scope labels: tool
  - Module labels: tool: task_plan
  - Contributor tier label: (auto-managed)
  - If any auto-label is incorrect: N/A

  Change Metadata

  - Change type: feature
  - Primary scope: tool

  Linked Issue

  - Closes #(issue number)
  - Related: N/A
  - Depends on: N/A
  - Supersedes: N/A

  Supersede Attribution

  N/A — no superseded PRs.

  Validation Evidence

  cargo fmt --all -- --check    # pass (no output)
  cargo clippy --all-targets -- -D warnings  # task_plan.rs: 0 warnings (pre-existing warnings in other files
  unrelated)
  cargo test --lib tools::task_plan  # 15/15 passed

  - Evidence provided: test output (15 passed, 0 failed)
  - If any command is intentionally skipped: cargo clippy reports pre-existing warnings in unrelated files
  (onboard/wizard.rs etc.); task_plan.rs itself has zero clippy warnings

  Security Impact

  - New permissions/capabilities? No — uses existing ToolOperation::Act enforcement
  - New external network calls? No
  - Secrets/tokens handling changed? No
  - File system access scope changed? No

  Privacy and Data Hygiene

  - Data-hygiene status: pass
  - Redaction/anonymization notes: No identity data in code or tests. Test fixtures use neutral strings ("step one",
  "do thing", "first")
  - Neutral wording confirmation: All naming follows ZeroClaw/project-native conventions

  Compatibility / Migration

  - Backward compatible? Yes
  - Config/env changes? No
  - Migration needed? No

  i18n Follow-Through

  - i18n follow-through triggered? No — no docs or user-facing wording changes

  Human Verification

  - Verified scenarios: Ran ./target/debug/zeroclaw agent -m "调用 task_plan 工具,action=list" — agent correctly
  identified and called task_plan, returned "No tasks."
  - Edge cases checked: read-only mode blocks mutations, empty task list, invalid action names, missing required
  parameters, create replaces existing list, ID auto-increment after add
  - What was not verified: Behavior with non-CLI channels (Telegram, Discord); behavior with XML-fallback dispatcher
  (non-native-tool providers)

  Side Effects / Blast Radius

  - Affected subsystems/workflows: src/tools/ only — tool factory gains one additional entry
  - Potential unintended effects: Marginally increases tool spec payload size sent to LLM (one more tool definition).
  Could theoretically cause tool name confusion with schedule if LLM descriptions are ambiguous — mitigated by distinct
   naming (task_plan vs schedule) and different description wording.
  - Guardrails/monitoring for early detection: Standard tool dispatch logging. Tool is session-scoped so no persistent
  side effects on failure.

  Agent Collaboration Notes

  - Agent tools used: Claude Code for implementation assistance and review
  - Workflow/plan summary: Implement Tool trait → register in factory → validate with tests → manual agent session test
  - Verification focus: Security policy enforcement, parameter validation edge cases, all 5 action paths
  - Confirmation: naming + architecture boundaries followed (CLAUDE.md §6.3, §6.4, §7.3): Yes

  Rollback Plan

  - Fast rollback command/path: git revert <commit> — removes 3 lines from mod.rs and deletes task_plan.rs
  - Feature flags or config toggles: None needed — tool is stateless and session-scoped
  - Observable failure symptoms: Tool not appearing in agent tool list, or tool returning errors on valid input

  Risks and Mitigations

  - Risk: LLM may occasionally confuse task_plan (action: list) with schedule (action: list) due to similar parameter
  structure
    - Mitigation: Distinct tool names and descriptions; task_plan description emphasizes "session checklist" while
  schedule emphasizes "cron/recurring tasks"
2026-02-24 20:52:31 +08:00
guitaripod
bd924a90dd fix(telegram): route image-extension Documents through vision pipeline
format_attachment_content was matching only Photo for [IMAGE:] routing.
Documents with image extensions (jpg, png, gif, webp, bmp) were formatted as
[Document: name] /path, bypassing the multimodal pipeline entirely.

Extend the match arm to cover Document when is_image_extension returns true,
so both Photos and image Documents produce [IMAGE:/path] and reach the provider
as proper vision input blocks.

Adds regression tests covering Document+image extension → [IMAGE:] and
Document+non-image extension → [Document:] paths.
2026-02-24 20:41:34 +08:00
Chummy
f218a35ee5 feat(unsafe-debt): integrate policy-driven audit coverage (RMN-53) 2026-02-24 20:30:57 +08:00
guitaripod
d9c6dc4e04 fix(anthropic): send image content as proper API vision blocks
The Anthropic provider had no Image variant in NativeContentOut, so
[IMAGE:data:image/jpeg;base64,...] markers produced by the multimodal
pipeline were sent to the API as plain text. The API counted every
base64 character as a token, reliably exceeding the 200k token limit
for any real image (a typical Telegram-compressed photo produced
~130k tokens of base64 text alone).

Fix:
- Add ImageSource struct and Image variant to NativeContentOut that
  serializes to the Anthropic Messages API image content block format
- Add parse_inline_image() to decode data URI markers into Image blocks
- Add build_user_content_blocks() to split user message content into
  Text and Image blocks using the existing parse_image_markers helper
- Update convert_messages() user arm to use build_user_content_blocks()
- Handle Image in the apply_cache_to_last_message no-op arm

Fixes #1626
2026-02-24 20:28:15 +08:00
guitaripod
b61f7403bf fix(anthropic): implement capabilities() to enable vision support
Set vision: true so image inputs are accepted by the capability gate.
Set native_tool_calling: true to align capabilities() with the existing
supports_native_tools() which always returned true, eliminating the
silent inconsistency between the two.

Adds a unit test that fails if either capability regresses.
2026-02-24 20:08:36 +08:00
Chummy
011b379bec feat(unsafe-debt): deepen crate-root guard enforcement (RMN-52) 2026-02-24 19:48:28 +08:00
Chummy
54dd7a4a9b feat(qq): add webhook receive mode with challenge validation 2026-02-24 19:30:36 +08:00
Chummy
7f2ef13da1 fix(ci): keep lark default feature without matrix bloat 2026-02-24 19:19:10 +08:00
Chummy
51d9d0d9e8 fix(channels): enable matrix+lark in default build features 2026-02-24 19:19:10 +08:00
Chummy
0083aece57 fix(gateway): normalize masked reliability api_keys in config PUT 2026-02-24 19:03:50 +08:00
Chummy
99bf8f29be fix(unsafe-debt): remove runtime unsafe UID check and forbid unsafe code (RMN-37 RMN-38) 2026-02-24 18:30:36 +08:00
Chummy
30d8a8b33b feat(ci): add unsafe debt audit report script (RMN-44) 2026-02-24 18:30:36 +08:00
reidliu41
8f263cd336 feat(agent): add CLI parameters for runtime config overrides 2026-02-24 18:12:33 +08:00
Chum Yin
d2b0593be3
ci(release): fix armv7 Android compiler detection in pub-release (#1602)
* ci(release): install Android NDK via shell to satisfy action policy

* ci(release): fix armv7 android compiler detection in NDK setup

* ci: stabilize release gate and armv7 Android workflow lint
2026-02-24 16:52:56 +08:00
Chummy
d78a6712ef fix: stabilize UTF-8 truncation and dashboard message IDs (RMN-25 RMN-33) 2026-02-24 16:52:26 +08:00
Chummy
cf81c15f68 fix(ci): remove audit false positives and pass actionlint 2026-02-24 16:25:53 +08:00
Chummy
8f91f956fd feat(ci): complete security audit governance and resilient CI control lanes 2026-02-24 16:25:53 +08:00
Chum Yin
d3524494e5
ci(release): pin setup-ndk action to commit sha (#1600) 2026-02-24 16:17:31 +08:00
Chummy
36c4e923f1 chore: suppress strict-delta clippy bool-count lint on compatible provider 2026-02-24 15:59:49 +08:00
Chummy
5505465f93 chore: fix lint gate formatting and codex test runtime options 2026-02-24 15:59:49 +08:00
Chummy
b3b5055080 feat: replay custom provider api mode, route max_tokens, and lark image support 2026-02-24 15:59:49 +08:00
Chum Yin
c2a39e78ff chore(codeowner): add @theonlyhennygod to be default owner for all files 2026-02-24 15:22:24 +08:00
Chummy
d2bbe5ff56 chore(codeowners): require both @theonlyhennygod and @chumyin for memory 2026-02-24 15:22:24 +08:00
Chummy
676aa6a53d chore(codeowners): update reviewer ownership and remove @willsarg 2026-02-24 15:22:24 +08:00
Chummy
3d5a5c3d3c fix(clippy): satisfy strict delta in websocket url mapping 2026-02-24 15:08:03 +08:00
Chummy
57cbb49d65 fix(fmt): align compatible provider websocket changes 2026-02-24 15:08:03 +08:00
Chummy
666f1a7d10 feat(provider): add responses websocket transport fallback 2026-02-24 15:08:03 +08:00
Chummy
ffb5942e60 style(qq): format channel changes 2026-02-24 14:46:42 +08:00
Chummy
f72c87dd26 fix(qq): support passive replies and media image send 2026-02-24 14:46:42 +08:00
Chummy
81b4680173 ci: add provider connectivity probes matrix and runbook
Implements scheduled/manual connectivity probes with contract-driven provider matrix, categorized failure policy, CI artifacts, and operator runbook.\n\nRefs RMN-5\nRefs RMN-6
2026-02-24 14:38:08 +08:00
Chummy
57f8979df1 fix(test): serialize openai codex env variable tests 2026-02-24 14:32:01 +08:00
Chummy
04e5950020 fix(gateway): remove unused websocket sink import 2026-02-24 14:21:34 +08:00
Chummy
68f1ba1617 chore(fmt): normalize gateway import order for webchat fix 2026-02-24 14:21:34 +08:00
Preventnetworkhacking
35a5815513 fix(gateway): enable tool execution in web chat agent
Web chat was calling provider.chat_with_history() directly, bypassing
the agent loop. Tool calls were rendered as raw XML instead of executing.

Changes:
- Add tools_registry_exec to AppState for executable tools
- Replace chat_with_history with run_tool_call_loop in ws.rs
- Maintain conversation history per WebSocket session
- Add multimodal and max_tool_iterations config to AppState

Closes #1524
2026-02-24 14:21:34 +08:00
Chummy
e2f4163ed8 fix(ci): quote workflow env path writes for actionlint 2026-02-24 14:12:08 +08:00
Chummy
fb95fc61a0 fix(browser): harden rust_native interactability for click/fill/type 2026-02-24 14:12:08 +08:00
Chummy
1caed16099 docs(ci): document workflow owner default allowlist 2026-02-24 14:02:42 +08:00
Chummy
a1d5f2802b ci: allow maintainer-authored workflow PRs for owner gate 2026-02-24 14:02:42 +08:00
Chummy
b0f14cd311 ci: compute change scope from merge-base 2026-02-24 14:02:42 +08:00
Chummy
254f262aba ci: fix shellcheck quoting in release workflow 2026-02-24 14:02:42 +08:00
Chummy
72211e62d5 ci: enforce PR gate parity with push checks 2026-02-24 14:02:42 +08:00
InuDial
de6fcea363 use std::hint::black_box instead of deprecated criterion::black_box 2026-02-24 13:59:11 +08:00
Chummy
0377a35811 chore(fmt): fix loop_ test formatting after #1505 2026-02-24 13:51:43 +08:00
Chummy
8ab75fdda9 test: add regression coverage for provider parser cron and telegram 2026-02-24 13:45:13 +08:00
Chummy
15b54670ff fix: improve tool-call parsing and shell expansion checks 2026-02-24 13:45:13 +08:00
Preventnetworkhacking
82c7fe8d8b fix(telegram): populate thread_ts for per-topic session isolation
When a Telegram message originates from a forum topic, the thread_id was
extracted and used for reply routing but never stored in ChannelMessage.thread_ts.
This caused all messages from the same sender to share conversation history
regardless of which topic they were posted in.

Changes:
- Set thread_ts to the extracted thread_id in parse_update_message,
  try_parse_voice_message, and try_parse_attachment_message
- Use 'ref' in if-let patterns to avoid moving thread_id before it's assigned
- Update conversation_history_key() to include thread_ts when present,
  producing keys like 'telegram_<thread_id>_<sender>' for forum topics
- Update conversation_memory_key() to also include thread_ts for memory isolation

This enables proper per-topic session isolation in Telegram forum groups while
preserving existing behavior for regular groups and DMs (where thread_ts is None).

Closes #1532
2026-02-24 13:40:04 +08:00
Chummy
ace493b32f chore(fmt): format gateway api after dashboard-save fix 2026-02-24 13:30:43 +08:00
argenis de la rosa
9751433803 fix(gateway): preserve masked config values on dashboard save
Replace line-based TOML masking with structured config masking so secret fields keep their original types (including reliability.api_keys arrays).\nHydrate dashboard PUT payloads with runtime config_path/workspace_dir and restore masked secret placeholders from current config before validation/save.\nAlso allow GET on /api/doctor for dashboard/client compatibility to avoid 405 responses.
2026-02-24 13:22:07 +08:00
Chummy
3157867a71 test(file_read): align outside-workspace case with workspace_only=false policy 2026-02-24 13:12:03 +08:00
Chummy
5e581eabfe fix(security): preserve workspace allowlist before forbidden-root checks 2026-02-24 12:58:59 +08:00
Allen Huang
752877051c fix: security, config, and provider hardening
- security: honor explicit command paths in allowed_commands list
- security: respect workspace_only=false in resolved path checks
- config: enforce 0600 permissions on every config save (unix)
- config: reject temp-directory paths in active workspace marker
- provider: preserve reasoning_content in tool-call conversation history
- provider: add allow_user_image_parts parameter for minimax compatibility

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-24 12:58:59 +08:00
Chummy
705e5b5a80 fix(ci): align codex tests with provider runtime API 2026-02-24 12:47:26 +08:00
Chummy
f4f6f5f48a test(codex): align provider init with runtime option changes 2026-02-24 12:38:48 +08:00
Chummy
d4f5f2ce95 fix(security): tighten prompt-guard detection thresholds and phrases 2026-02-24 12:38:48 +08:00
argenis de la rosa
09b6a2db0b fix(providers): use native_tool_calling field in supports_native_tools
The supports_native_tools() method was hardcoded to return true,
but it should return the value of self.native_tool_calling to
properly disable native tool calling for providers like MiniMax.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 12:38:48 +08:00
Chummy
005cd38d27 fix(onboard): resolve rebase conflict in models command helpers 2026-02-24 12:24:51 +08:00
Chummy
1290b73faa fix: align codex provider runtime options with current interfaces 2026-02-24 12:24:51 +08:00
Chummy
59d4f7d36d feat: stabilize codex oauth and add provider model connectivity workflow 2026-02-24 12:24:51 +08:00
Chummy
fefd0a1cc8 style: apply rustfmt normalization 2026-02-24 12:02:18 +08:00
Dominik Horváth
b8e4f1f803 fix(channels,memory): Docker workspace path remapping, vision support, and Qdrant backend restore (#1)
* fix(channels,providers): remap Docker /workspace paths and enable vision for custom provider

Two fixes:

1. Telegram channel: when a Docker-containerised runtime writes a file to
   /workspace/<path>, the host-side sender couldn't find it because the
   container mount point differs from the host workspace dir. Remap
   /workspace/<rel> → <host_workspace_dir>/<rel> in send_attachment before
   the path-exists check so generated media is delivered correctly.

2. Provider factory: custom: provider was created with vision disabled,
   causing all image messages to be rejected with a capability error even
   though the underlying OpenAI-compatible endpoint supports vision. Switch
   to new_with_vision(..., true) so image inputs are forwarded correctly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* feat(memory): restore Qdrant vector database backend

Re-adds the Qdrant memory backend that was removed from main in a
recent upstream merge. Restores:

- src/memory/qdrant.rs — full QdrantMemory implementation with lazy
  init, HTTP REST client, embeddings, and Memory trait
- src/memory/backend.rs — Qdrant variant in MemoryBackendKind, profile,
  classify and profile dispatch
- src/memory/mod.rs — module export, factory routing with build_qdrant_memory
- src/config/schema.rs — QdrantConfig struct and qdrant field on MemoryConfig
- src/config/mod.rs — re-export QdrantConfig
- src/onboard/wizard.rs — qdrant field in MemoryConfig initializer

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 12:02:18 +08:00
Mike Johnson-Maxted
d80a653552 fix(onboard): split device-flow hint — copilot auto-prompts, others use auth login
copilot is the only provider that performs a device-code flow automatically on
first run. openai-codex and gemini (when OAuth-backed) require an explicit
`zeroclaw auth login --provider <name>` step. Split the device-flow next-steps
block to reflect this distinction.

Addresses Copilot review comment on PR #1509.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 11:46:49 +08:00
Mike Johnson-Maxted
2f29ec75ef fix(onboard): use provider-aware env var hint in quick setup next steps
Replace hardcoded OPENROUTER_API_KEY hint with provider-aware logic:
- keyless local providers (ollama, llamacpp, etc.) show chat/gateway/status hints
- device-flow providers (copilot, gemini, openai-codex) show OAuth/first-run hint
- all other providers show the correct provider-specific env var via provider_env_var()

Also adds canonical alias "github-copilot" -> "copilot" in canonical_provider_name(),
and a new provider_supports_device_flow() helper with accompanying test.

Additionally fixes pre-existing compile blockers that prevented CI from running:
- fix(security): correct raw string literals in leak_detector.rs that terminated
  early due to unescaped " inside r"..." (use r#"..."# instead)
- fix(gateway): add missing wati: None in two test AppState initializations
- fix(gateway): use serde::Deserialize path on WatiVerifyQuery struct
- fix(security): add #[allow(unused_imports)] on new pub use re-exports in mod.rs
- fix(security): remove unused serde::{Deserialize, Serialize} import
- chore: apply cargo fmt to files that had pending formatting diffs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 11:46:49 +08:00
NB😈
5386414666 fix(cron): enable delivery for crons created from external channels
Scheduled jobs created via channel conversations (Discord, Telegram, etc.)
never delivered output back to the channel because:

1. The agent had no channel context (channel name + reply_target) in its
   system prompt, so it could not populate the delivery config.
2. The schedule tool only creates shell jobs with no delivery support,
   and the cron_add tool's delivery schema was opaque.
3. OpenAiCompatibleProvider was missing the native_tool_calling field,
   causing a compile error.

Changes:
- Inject channel context (channel name + reply_target) into the system
  prompt so the agent knows how to address delivery when scheduling.
- Improve cron_add tool description and delivery parameter schema to
  guide the agent toward correct delivery config.
- Update schedule tool description to warn that output is only logged
  and redirect to cron_add for channel delivery.
- Fix missing native_tool_calling field in OpenAiCompatibleProvider.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-24 11:34:12 +08:00
Adam Singer
388e168158 [bug] Regex build failure 2026-02-24 11:34:12 +08:00
Ali Zulfiqar
45636b966f docs: fix OAuth wording, binary size format, E.164 phone prefix, and grammar consistency 2026-02-24 11:24:09 +08:00
Argenis
9d5fecd691
Merge pull request #1517 from zeroclaw-labs/sync/main-into-dev-20260223
sync: merge main into dev — consolidate all upstream releases
2026-02-23 14:04:11 -05:00
argenis de la rosa
5c63ec380a Merge branch 'main' into dev — consolidate all upstream releases 2026-02-23 14:03:17 -05:00
Bojan Zivic
993ec3fba6
fix: always emit toolResult blocks for tool_use responses (#1476)
* ci(homebrew): prefer HOMEBREW_UPSTREAM_PR_TOKEN with fallback

* ci(homebrew): handle existing upstream remote and main base

* fix: always emit toolResult blocks for tool_use responses

The Bedrock Converse API requires that every toolUse block in an
assistant message has a corresponding toolResult block in the
subsequent user message. Two bugs caused violations of this contract:

1. When parse_tool_result_message failed (e.g. malformed JSON or
   missing tool_call_id), the fallback emitted a plain text user
   message instead of a toolResult block, causing Bedrock to reject
   the request with "Expected toolResult blocks at messages.N.content
   for the following Ids: ..."

2. When the assistant made multiple tool calls in a single turn, each
   tool result was pushed as a separate ConverseMessage with role
   "user". Bedrock expects all toolResult blocks for a turn to appear
   in a single user message.

Fix (1) by making the fallback construct a toolResult with status
"error" containing the raw content, and attempting to extract the
tool_use_id from the previous assistant message if JSON parsing fails.

Fix (2) by merging consecutive tool-result user messages into a single
ConverseMessage during convert_messages.

Also accept alternate field names (tool_use_id, toolUseId) in addition
to tool_call_id when parsing tool result messages.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Will Sarg <12886992+willsarg@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 07:55:38 -05:00
Chummy
994e6099d8
fix(provider): disable native tool calling for MiniMax (#1495)
MiniMax API does not support OpenAI-style native tool definitions
(`tools` parameter in chat completions). Sending them causes a 500
Internal Server Error with "unknown error (1000)" on every request.

Add a `native_tool_calling` field to `OpenAiCompatibleProvider` so each
constructor can declare its tool-calling capability independently.
MiniMax (via `new_merge_system_into_user`) now sets this to `false`,
causing the agent loop to inject tool instructions into the system
prompt as text instead of sending native JSON tool definitions.

Closes #1387


(cherry picked from commit 2b92a774fb)
(cherry picked from commit 1816e8a829)

Co-authored-by: keiten arch <tang.zhengliang@ivis-sh.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 07:53:22 -05:00
Adam Makhlouf
4ea238b18b
fix(channel): replace invalid Telegram ACK reaction emojis (#1477)
Replace 🙌 and 💪 with 🔥 and 👍 in the TELEGRAM_ACK_REACTIONS pool.
The removed emojis are not in Telegram's allowed reaction set, causing
~40% of ACK reactions to fail with REACTION_INVALID (400 Bad Request).

All replacements verified against the Telegram Bot API setMessageReaction
endpoint in a live private chat.

Closes #1475
2026-02-23 07:41:54 -05:00
Chummy
e6227d905a
[supersede #1354 v2] feat(composio): fix v3 compatibility with parameter discovery, NLP text execution, and error enrichment (#1493)
* feat(composio): fix v3 compatibility with parameter discovery, NLP text execution, and error enrichment

Three-layer fix for the Composio v3 API compatibility issue where the LLM
agent cannot discover parameter schemas, leading to repeated guessing and
execution failures.

Layer 1 – Surface parameter hints in list output:
  - Add input_parameters field to ComposioV3Tool and ComposioAction structs
  - Pass through input_parameters from v3 list response via map_v3_tools_to_actions
  - Add format_input_params_hint() to show required/optional param names in list output

Layer 2 – Support natural-language text execution:
  - Add text parameter to tool schema (mutually exclusive with params)
  - Thread text through execute handler → execute_action → execute_action_v3
  - Update build_execute_action_v3_request to send text instead of arguments
  - Skip v2 fallback when text-mode is used (v2 has no NLP support)

Layer 3 – Enrich execute errors with parameter schema:
  - Add get_tool_schema() to fetch full tool metadata from GET /api/v3/tools/{slug}
  - Add format_schema_hint() to render parameter names, types, and descriptions
  - On execute failure, auto-fetch schema and append to error message

Root cause: The v3 API returns input_parameters in list responses but
ComposioV3Tool was silently discarding them. The LLM had no way to discover
parameter schemas before calling execute, and error messages provided no
remediation guidance — creating an infinite guessing loop.

Co-Authored-By: unknown <>
(cherry picked from commit fd92cc5eb0)

* fix(composio): use floor_char_boundary for safe UTF-8 truncation in format_schema_hint

Co-Authored-By: unknown <>
(cherry picked from commit 18e72b6344)

* fix(composio): restore coherent v3 execute flow after replay

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
2026-02-23 07:38:59 -05:00
Chummy
ad61a7fe24
supersede: file-replay changes from #1416 (#1494)
Automated conflict recovery via changed-file replay on latest dev.
2026-02-23 07:38:02 -05:00
Le Song
dc53f46946 fix(config): add test for 0600 permissions on config file save
(cherry picked from commit a50877dbd2)
2026-02-23 20:35:21 +08:00
Le Song
2bd04a53bf fix(config): chmod 0600 on newly created config
Apply 0600 when saving a new config file so onboarding-created
configs are not world-readable.

(cherry picked from commit e51a596581)
2026-02-23 20:35:21 +08:00
Chummy
dd2044e45d fix(config): re-export Feishu/Estop/Otp configs 2026-02-23 20:30:21 +08:00
reidliu41
d3f0a79fe9 Summary
- Problem: The existing http_request tool returns raw HTML/JSON, which is nearly unusable for LLMs to extract
  meaningful content from web pages.
- Why it matters: All mainstream AI agents (Claude Code, Gemini CLI, Aider) have dedicated web content extraction
  tools. ZeroClaw lacks this capability, limiting its ability to research and gather information from the web.
- What changed: Added a new web_fetch tool that fetches web pages and converts HTML to clean plain text using
  nanohtml2text. Includes domain allowlist/blocklist, SSRF protection, redirect following, and content-type aware
  processing.
- What did not change (scope boundary): http_request tool is untouched. No shared code extracted between http_request
   and web_fetch (DRY rule-of-three: only 2 callers). No changes to existing tool behavior or defaults.

Label Snapshot (required)

  - Risk label: risk: medium
  - Size label: size: M
  - Scope labels: tool, config
  - Module labels: tool: web_fetch
  - If any auto-label is incorrect, note requested correction: N/A

  Change Metadata

  - Change type: feature
  - Primary scope: tool

  Linked Issue

  - Closes #
  - Related #
  - Depends on #
  - Supersedes #

  Supersede Attribution (required when Supersedes # is used)

  N/A

  Validation Evidence (required)

  cargo fmt --all -- --check   # pass
  cargo clippy --all-targets -- -D warnings  # no new warnings (pre-existing warnings only)
  cargo test --lib -- web_fetch  # 26/26 passed
  cargo test --lib -- tools::tests  # 12/12 passed
  cargo test --lib -- config::schema::tests  # 134/134 passed

  - Evidence provided: unit test results (26 new tests), manual end-to-end test with Ollama + qwen2.5:72b
  - If any command is intentionally skipped, explain why: Full cargo clippy --all-targets has 43 pre-existing errors
  unrelated to this PR (e.g. await_holding_lock, format! appended to String). Zero errors from web_fetch code.

  Security Impact (required)

  - New permissions/capabilities? Yes — new web_fetch tool can make outbound HTTP GET requests
  - New external network calls? Yes — fetches web pages from allowed domains
  - Secrets/tokens handling changed? No
  - File system access scope changed? No
  - If any Yes, describe risk and mitigation:
    - Deny-by-default: enabled = false by default; tool is not registered unless explicitly enabled
    - Domain filtering: allowed_domains (default ["*"] = all public hosts) + blocked_domains (takes priority).
  Blocklist always wins over allowlist.
    - SSRF protection: Blocks localhost, private IPs (RFC 1918), link-local, multicast, reserved ranges, IPv4-mapped
  IPv6, .local TLD — identical coverage to http_request
    - Rate limiting: can_act() + record_action() enforce autonomy level and rate limits
    - Read-only mode: Blocked when autonomy is ReadOnly
    - Response size cap: 500KB default truncation prevents context window exhaustion
    - Proxy support: Honors [proxy] config via tool.web_fetch service key

  Privacy and Data Hygiene (required)

  - Data-hygiene status: pass
  - Redaction/anonymization notes: No personal data in code, tests, or fixtures
  - Neutral wording confirmation: All test identifiers use neutral project-scoped labels

  Compatibility / Migration

  - Backward compatible? Yes — new tool, no existing behavior changed
  - Config/env changes? Yes — new [web_fetch] section in config.toml (all fields have defaults)
  - Migration needed? No — #[serde(default)] on all fields; existing configs without [web_fetch] section work unchanged

  i18n Follow-Through (required when docs or user-facing wording changes)

  - i18n follow-through triggered? No — no docs or user-facing wording changes

  Human Verification (required)

  - Verified scenarios:
    - End-to-end test: zeroclaw agent with Ollama qwen2.5:72b successfully called web_fetch to fetch
  https://github.com/zeroclaw-labs/zeroclaw, returned clean plain text with project description, features, star count
    - Tool registration: tool_count increased from 22 to 23 when enabled = true
    - Config: enabled = false (default) → tool not registered; enabled = true → tool available
  - Edge cases checked:
    - Missing [web_fetch] section in existing config.toml → works (serde defaults)
    - Blocklist priority over allowlist
    - SSRF with localhost, private IPs, IPv6
  - What was not verified:
    - Proxy routing (no proxy configured in test environment)
    - Very large page truncation with real-world content

  Side Effects / Blast Radius (required)

  - Affected subsystems/workflows: all_tools_with_runtime() signature gained one parameter (web_fetch_config); all 5
  call sites updated
  - Potential unintended effects: None — new tool only, existing tools unchanged
  - Guardrails/monitoring for early detection: enabled = false default; tool_count in debug logs

  Agent Collaboration Notes (recommended)

  - Agent tools used: Claude Code (Opus 4.6)
  - Workflow/plan summary: Plan mode → approval → implementation → validation
  - Verification focus: Security (SSRF, domain filtering, rate limiting), config compatibility, tool registration
  - Confirmation: naming + architecture boundaries followed (CLAUDE.md + CONTRIBUTING.md): Yes — trait implementation +
   factory registration pattern, independent security helpers (DRY rule-of-three), deny-by-default config

  Rollback Plan (required)

  - Fast rollback command/path: git revert <commit>
  - Feature flags or config toggles: [web_fetch] enabled = false (default) disables completely
  - Observable failure symptoms: tool_count in debug logs drops by 1; LLM cannot call web_fetch

  Risks and Mitigations

  - Risk: SSRF bypass via DNS rebinding (attacker-controlled domain resolving to private IP)
    - Mitigation: Pre-request host validation blocks known private/local patterns. Same defense level as existing
  http_request tool. Full DNS-level protection would require async DNS resolution before connect, which is out of scope
   for this PR.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
(cherry picked from commit 04597352cc)
2026-02-23 20:30:21 +08:00
Chummy
a9f0668649 fix(onboard): use is_feishu when constructing lark config 2026-02-23 20:25:06 +08:00
aricredemption-ai
f1ca0c05fd feat(lark): add mention_only group gating with bot open_id auto-discovery
(cherry picked from commit ef1f75640a)
2026-02-23 20:25:06 +08:00
Nils Fischer
1528121f67 fix(channel): normalize WhatsApp allowlist matching for LID senders
(cherry picked from commit 9545709231)
2026-02-23 20:17:13 +08:00
Ken Simpson
456b53d9d3 fix(tools): recover rust-native browser session on stale webdriver 2026-02-23 19:54:15 +08:00
Chummy
b7a5ef9d9d test(pairing): satisfy strict clippy delta on lockout sweep assertions 2026-02-23 19:49:10 +08:00
fettpl
99c4ae7200 fix(security): harden per-client lockout eviction and sweep
Addresses the unbounded-map gap left by #951: entries below the lockout
threshold (count < MAX_PAIR_ATTEMPTS, lockout = None) were never evicted,
allowing distributed brute-force (>1024 unique IPs, <5 attempts each) to
permanently fill the tracking map and disable accounting for new attackers.

Hardening delta on top of #951:

- Replace raw tuple with typed FailedAttemptState (count, lockout_until,
  last_attempt) for clarity and to enable retention-based sweep.
- Bump MAX_TRACKED_CLIENTS from 1024 to 10_000.
- Add 15-min retention sweep (prune_failed_attempts) on 5-min interval.
- Switch lockout from relative (locked_at + elapsed) to absolute
  (lockout_until) for simpler and monotonic comparison.
- Add LRU eviction fallback when map is at capacity after pruning.
- Add normalize_client_key() to sanitize whitespace/empty client IDs.
- Add 3 focused tests: per-client reset isolation, bounded map capacity,
  and sweep pruning of stale entries.

Supersedes:
- #670 by @fettpl (original hardening branch, rebased as delta)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 19:39:20 +08:00
Chummy
e4bedd4162 test(agent_e2e): allow datetime prefix when memory context is empty 2026-02-23 19:28:07 +08:00
Edvard
359cfb46ae feat(agent): inject current datetime into every user message
Prepends [YYYY-MM-DD HH:MM:SS TZ] to each user message before it
reaches the model. This gives the agent accurate temporal context
on every turn, not just session start.

Previously DateTimeSection only injected the time once when the
system prompt was built. Long conversations or cron jobs had
stale timestamps. Now every message carries the real time.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 19:16:34 +08:00
Nguyen Minh Thai
87ac60c71d
feat(tools): Use system default browser instead of hard-coded Brave Browser (#1453)
* ci(homebrew): prefer HOMEBREW_UPSTREAM_PR_TOKEN with fallback

* ci(homebrew): handle existing upstream remote and main base

* feat(tools): Use system default browser instead of hard-coded Brave Browser

---------

Co-authored-by: Will Sarg <12886992+willsarg@users.noreply.github.com>
2026-02-23 05:57:21 -05:00
Edvard Schøyen
e52a518b00
feat(channels): add /new command to clear conversation history (#1417)
Adds a `/new` runtime chat command for Telegram and Discord that clears
the sender's conversation history without changing provider or model.
Useful for starting a fresh session when stale context causes issues.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 05:52:36 -05:00
Amit Kotlovski
c370697b47 fix(providers): use /openai/v1 for Groq base URL 2026-02-23 17:32:31 +08:00
InuDial
a8e5606650 Add hardware feature conditional compile for hardware mods 2026-02-23 16:45:44 +08:00
Chummy
750bb6b3b5 test(gemini): restore oauth env vars in unit test 2026-02-23 16:15:17 +08:00
Chummy
5ac6490bf1 fix(ci): format openai codex vision e2e test for rust quality gate 2026-02-23 16:04:06 +08:00
reidliu41
a606e004e5 fix(gateway): skip pairing dialog in web UI when require_pairing is false 2026-02-23 15:01:46 +08:00
Kevin Syong
2d9bcaeac9 fix(scheduler): include failure reason in job failure warning
- Return output string from 'execute_and_persist_job' alongside job id and success flag.
- Include failure reason in 'tracing::warn' when a scheduler job fails.
- Makes failed cron job errors visible in logs without inspecting the database.
2026-02-23 14:55:37 +08:00
argenis de la rosa
cd8ab2b35f fix(gemini): derive OAuth refresh client id from Gemini CLI tokens
Gemini CLI oauth_creds.json can omit client_id/client_secret, causing refresh requests to fail with HTTP 400 invalid_request (could not determine client ID).

Parse id_token claims (aud/azp) as a client_id fallback, preserve env/file overrides, and keep refresh form logic explicit. Also add camelCase deserialization aliases and regression tests for refresh-form and id_token parsing edge cases.

Refs #1424
2026-02-23 14:55:34 +08:00
Ray Azrin Karim
0146bacbb3 fix(channel): remove unsupported Telegram reaction emojis
The previous emoji set included unsupported reactions (🦀, 👣) that Telegram API
rejects with REACTION_INVALID error in some chat contexts. Remove these while
keeping the working emojis.

Before: ["️", "🦀", "🙌", "💪", "👌", "👀", "👣"]
After:  ["️", "🙌", "💪", "👌", "👀"]

Fixes warning: REACTION_INVALID 400 Bad Request
2026-02-23 14:55:31 +08:00
Robert McGinley
7bea36532d fix(tool): treat max_response_size = 0 as unlimited
When max_response_size is set to 0, the condition `text.len() > 0` is
true for any non-empty response, causing all responses to be truncated
to empty strings. The conventional meaning of 0 for size limits is
"no limit" (matching ulimit, nginx client_max_body_size, curl, etc.).

Add an early return when max_response_size == 0 and update the doc
comment to document this behavior.
2026-02-23 14:55:27 +08:00
Aleksandr Prilipko
1ad5416611 feat(providers): normalize image paths to data URIs in OpenAI Codex
Fix OpenAI Codex vision support by converting file paths to data URIs
before sending requests to the API.

## Problem

OpenAI Codex API was rejecting vision requests with 400 error:
"Invalid 'input[0].content[1].image_url'. Expected a valid URL,
but got a value with an invalid format."

Root cause: provider was sending raw file paths (e.g. `/tmp/test.png`)
instead of data URIs (e.g. `data:image/png;base64,...`).

## Solution

Add image normalization in both `chat_with_system` and `chat_with_history`:
- Call `multimodal::prepare_messages_for_provider()` before building request
- Converts file paths to base64 data URIs
- Validates image size and MIME type
- Works with both local files and remote URLs

## Changes

- `src/providers/openai_codex.rs`:
  - Normalize images in `chat_with_system()`
  - Normalize images in `chat_with_history()`
  - Simplify `ResponsesInputContent.image_url` from nested object to String
  - Fix unit test assertion for flat image_url structure

- `tests/openai_codex_vision_e2e.rs`:
  - Add E2E test for second profile vision support
  - Validates capabilities, request success, and response content

## Verification

 Unit tests pass: `cargo test --lib openai_codex`
 E2E test passes: `cargo test openai_codex_second_vision -- --ignored`
 Second profile accepts vision requests (200 OK)
 Returns correct image descriptions

## Impact

- Enables vision support for all OpenAI Codex profiles
- Second profile works without rate limits
- Fallback chain: default → second → gemini
- No breaking changes to existing non-vision flows

Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-23 14:55:24 +08:00
Aleksandr Prilipko
12a3fa707b feat(providers): add vision support to OpenAI Codex provider
- Add vision capability declaration (vision: true)
- Extend ResponsesInputContent to support image_url field
- Update build_responses_input() to parse [IMAGE:...] markers
- Add ImageUrlContent structure for data URI images
- Maintain backward compatibility with text-only messages
- Add comprehensive unit tests for image handling

Enables multimodal input for gpt-5.3-codex and similar models.
Image markers are parsed and sent as separate input_image content items.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-23 14:55:24 +08:00
Aleksandr Prilipko
3a4e55b68d feat(providers): auto-refresh expired Gemini OAuth tokens in warmup
Добавлен автоматический refresh протухших OAuth токенов Gemini при вызове warmup().

## Проблема

При использовании Gemini как fallback провайдера, OAuth токены могут протухнуть пока daemon работает. Это приводит к ошибкам при попытке переключения с OpenAI Codex на Gemini.

Сценарий:
1. Daemon работает, но не делает запросов к Gemini
2. OAuth токены Gemini истекают (TTL = 1 час)
3. Происходит ошибка на OpenAI Codex → fallback на Gemini
4. Gemini провайдер использует протухшие токены → запрос падает

## Решение

### Изменения в `GeminiProvider::warmup()`

Добавлена проверка и обновление токенов для `ManagedOAuth`:
- Вызывается `AuthService::get_valid_gemini_access_token()` который автоматически обновляет токены если нужно
- Для `OAuthToken` (CLI): пропускается (существующее поведение)
- Для API key: проверяется через публичный API (существующее поведение)

### Тесты

**Unit тесты** (`src/providers/gemini.rs`):
- `warmup_managed_oauth_requires_auth_service()` — проверка что ManagedOAuth требует auth_service
- `warmup_cli_oauth_skips_validation()` — проверка что CLI OAuth пропускает валидацию

**E2E тест** (`tests/gemini_fallback_oauth_refresh.rs`):
- `gemini_warmup_refreshes_expired_oauth_token()` — live тест с expired токеном и реальным refresh
- `gemini_warmup_with_valid_credentials()` — простой тест что warmup работает с валидными credentials

### Зависимости

Добавлена dev-зависимость `scopeguard = "1.2"` для безопасного восстановления файлов в тестах.

## Верификация

Проверено на live daemon с Telegram ботом:
- OpenAI Codex упал с 429 rate limit
- Fallback на Gemini сработал успешно
- Бот ответил через Gemini без ошибок

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-23 14:55:24 +08:00
NanFengCheong
d44efc7076 fix(telegram): send image attachments when finalizing draft messages
When using streaming mode with Telegram, the finalize_draft function
would only edit the message text and never send actual image attachments
marked with [IMAGE:path] syntax.

This fix:
- Parses attachment markers in finalize_draft
- Deletes the draft message when attachments are present
- Sends text and attachments as separate messages
- Maintains backward compatibility for text-only messages

Fixes: Telegram finalize_draft edit failed; falling back to sendMessage
2026-02-23 14:55:22 +08:00
Argenis
03a8ce36f3
Merge pull request #1451 from reidliu41/model-subcmd
feat(models): add list, set, and status subcommands
2026-02-23 00:28:04 -05:00
Argenis
15e136b87f
Merge pull request #1448 from zeroclaw-labs/dev-temp
fix(provider): disable native tool calling for MiniMax
2026-02-22 23:16:27 -05:00
Argenis
6826ed5162
Merge pull request #1461 from zeroclaw-labs/sync/dev-from-dev-temp-20260223
sync(dev): bring in missing commits from dev-temp
2026-02-22 23:06:02 -05:00
argenis de la rosa
10973eb075 fix(web): call doctor endpoint with authenticated POST 2026-02-22 21:32:34 -05:00
argenis de la rosa
55ded3ee16 feat(agent): log query classification route decisions 2026-02-22 21:32:34 -05:00
argenis de la rosa
95085a34f2 docs(structure): add language-part-function navigation map 2026-02-22 21:32:28 -05:00
argenis de la rosa
91758b96bf fix(ollama): handle blank responses without tool calls 2026-02-22 21:32:20 -05:00
Argenis
63c7d52430
Merge pull request #1449 from zeroclaw-labs/issue-1338-macos-docs
docs(macOS): add update and uninstall instructions
2026-02-22 21:20:22 -05:00
Argenis
319506c8f5
Merge pull request #1454 from zeroclaw-labs/issue-1387-minimax-native-tools
fix(provider): disable native tool calling for MiniMax
2026-02-22 21:20:20 -05:00
argenis de la rosa
1365ecc5a0 fix(provider): disable native tool calling for MiniMax 2026-02-22 21:10:54 -05:00
reidliu41
04e8eb2d8e feat(models): add list, set, and status subcommands 2026-02-23 08:09:28 +08:00
argenis de la rosa
5e2f3bf7db docs(macOS): add update and uninstall guide 2026-02-22 18:50:16 -05:00
argenis de la rosa
8af534f15f fix(provider): disable native tool calling for MiniMax 2026-02-22 17:59:55 -05:00
argenis de la rosa
0c532affe3 fix(ollama): handle blank responses without tool calls 2026-02-22 17:49:26 -05:00
Argenis
74581a3aa5
fix(agent): parse tool <name> markdown fence format (#1438)
Issue: #1420

Some LLM providers (e.g., xAI grok) output tool calls in the format:
```tool file_write
{"path": "...", "content": "..."}
```

Previously, ZeroClaw only matched:
- ```tool_call
- ```tool-call
- ```toolcall
- ```invoke

This caused silent failures where:
1. Tool calls were not parsed
2. Agent reported success but no tools executed
3. LLM hallucinated tool execution results

Fix:
1. Added new regex `MD_TOOL_NAME_RE` to match ` ```tool <name>` format
2. Parse the tool name from the code block header
3. Parse JSON arguments from the block content
4. Updated `detect_tool_call_parse_issue()` to include this format

Added 3 tests:
- parse_tool_calls_handles_tool_name_fence_format
- parse_tool_calls_handles_tool_name_fence_shell
- parse_tool_calls_handles_multiple_tool_name_fences

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 14:34:57 -05:00
Chummy
e9a0801a77 fix(provider): fallback native tools on parser-style 5xx 2026-02-23 01:34:20 +08:00
Argenis
8a1409135b
feat(config): warn on unknown config keys to prevent silent misconfig (#1410)
* ci(homebrew): prefer HOMEBREW_UPSTREAM_PR_TOKEN with fallback

* ci(homebrew): handle existing upstream remote and main base

* fix(skills): allow cross-skill references in open-skills audit

Issue: #1391

The skill audit was too strict when validating markdown links in
open-skills, causing many skills to fail loading with errors like:
- "absolute markdown link paths are not allowed (../other-skill/SKILL.md)"
- "markdown link points to a missing file (skill-name.md)"

Root cause:
1. `looks_like_absolute_path()` rejected paths starting with ".."
   before canonicalization could validate they stay within root
2. Missing file errors were raised for cross-skill references that
   are valid but point to skills not installed locally

Fix:
1. Allow ".." paths to pass through to canonicalization check which
   properly validates they resolve within the skill root
2. Treat cross-skill references (parent dir traversal or bare .md
   filenames) as non-fatal when pointing to missing files

Cross-skill references are identified by:
- Parent directory traversal: `../other-skill/SKILL.md`
- Bare skill filename: `other-skill.md`
- Explicit relative: `./other-skill.md`

Added 6 new tests to cover edge cases for cross-skill references.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat(config): warn on unknown config keys to prevent silent misconfig

Issue: #1304

When users configure `[providers.ollama]` with `api_url`, the setting is
silently ignored because `[providers.*]` sections don't exist in the
config schema. This causes Ollama to always use localhost:11434 regardless
of the configured URL.

Fix: Use serde_ignored to detect and warn about unknown config keys at
load time. This helps users identify misconfigurations like:
- `[providers.ollama]` (should be top-level `api_url`)
- Typos in section names
- Deprecated/removed options

The warning is non-blocking - config still loads, but users see:
```
WARN Unknown config key ignored: "providers". Check config.toml...
```

This follows the fail-fast/explicit errors principle (CLAUDE.md §3.5).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Will Sarg <12886992+willsarg@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 12:16:54 -05:00
Chummy
13469f0839 refactor(telegram): remove redundant else in startup probe 2026-02-23 01:10:19 +08:00
Chummy
19b957e915 style(telegram): format startup probe warning log 2026-02-23 01:10:19 +08:00
zeroclaw
8aab98a7d6 fix(telegram): add debug log at startup probe success
Add a debug-level log line confirming when the startup probe succeeds
and the main long-poll loop is entered. Aids diagnostics when
troubleshooting persistent 409s (e.g. from an external competing poller).

Note: persistent 409 despite the startup probe and 35s backoff indicates
an external process is actively polling the same bot token from another
host. In that case, rotating the bot token via @BotFather is the fix.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-23 01:10:19 +08:00
zeroclaw
ff213bac68 fix(telegram): add startup probe + extend 409 backoff to eliminate polling conflict
Every daemon restart produced a flood of 409 Telegram polling conflicts for
up to several minutes. Two changes fix this:

1. **Startup probe (retry loop):** Before entering the long-poll loop,
   repeatedly issue `getUpdates?timeout=0` until a 200 OK is received.
   This claims the Telegram getUpdates slot before the 30-second long-poll
   starts, preventing the first long-poll from racing a stale server-side
   session left by the previous daemon. The probe retries every 5 seconds
   until the slot is confirmed free.

2. **Extended 409 backoff:** Increased from 2 s → 35 s (> the 30-second
   poll timeout). If a 409 still occurs despite the probe (e.g. in a genuine
   dual-instance scenario), the retry now waits long enough for the competing
   session to expire naturally before the next attempt, instead of hammering
   Telegram with ~15 retries per minute.

Fixes #1281.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-23 01:10:19 +08:00
Chummy
d8eb789db4 fix(composio): harden v3 slug candidate and test coverage 2026-02-23 00:55:42 +08:00
Bogdan
0d24a54b90 fix tests 2026-02-23 00:43:54 +08:00
Bogdan
a6e53e6fcd feat(tools): stabilize composio slug resolution and drop v2 fallback
- add cache + candidate builder for Composio action/tool slugs so execute runs without manual priming @src/tools/composio.rs#285-320
- remove unused v2 execute/connect code paths and rely on HTTPS-only v3 endpoints @src/tools/composio.rs#339-502
- extend tooling tests to cover slug candidate generation variants @src/tools/composio.rs#1317-1324
2026-02-23 00:43:54 +08:00
Chummy
f47974d485 docs(readme): drop TG CN/RU badges and add Facebook group link 2026-02-23 00:42:41 +08:00
argenis de la rosa
880a975744 fix(skills): allow cross-skill references in open-skills audit
Issue: #1391

The skill audit was too strict when validating markdown links in
open-skills, causing many skills to fail loading with errors like:
- "absolute markdown link paths are not allowed (../other-skill/SKILL.md)"
- "markdown link points to a missing file (skill-name.md)"

Root cause:
1. `looks_like_absolute_path()` rejected paths starting with ".."
   before canonicalization could validate they stay within root
2. Missing file errors were raised for cross-skill references that
   are valid but point to skills not installed locally

Fix:
1. Allow ".." paths to pass through to canonicalization check which
   properly validates they resolve within the skill root
2. Treat cross-skill references (parent dir traversal or bare .md
   filenames) as non-fatal when pointing to missing files

Cross-skill references are identified by:
- Parent directory traversal: `../other-skill/SKILL.md`
- Bare skill filename: `other-skill.md`
- Explicit relative: `./other-skill.md`

Added 6 new tests to cover edge cases for cross-skill references.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 00:27:32 +08:00
Chummy
1ee57801c9 fix: route heartbeat outputs to configured channels 2026-02-23 00:18:12 +08:00
zhzy0077
b04bb9c19d fix(channels): expand lark ack reactions with valid emoji_type ids
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-02-23 00:11:40 +08:00
zhzy0077
2cefcc1908 fix(channels): use valid Feishu emoji_type for lark ack
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-02-23 00:11:40 +08:00
cee ray
62fef4accb fix(providers): disable Responses API fallback for NVIDIA NIM
NVIDIA's NIM API (integrate.api.nvidia.com) does not support the
OpenAI Responses API endpoint. When chat completions returns a
non-success status, the fallback to /v1/responses also fails with
404, producing a confusing double-failure error.

Use `new_no_responses_fallback()` for the NVIDIA provider, matching
the approach already used for GLM and other chat-completions-only
providers.

Fixes #1282
2026-02-23 00:11:21 +08:00
Chummy
2c57c89f9e fix(kimi-code): include empty reasoning_content in tool history 2026-02-22 22:22:52 +08:00
Chummy
09c3c2c844 chore(readme): delete the typo. 2026-02-22 21:24:25 +08:00
Liang Zhang
241bb54c66 更新 README.zh-CN.md,改进中文表达并更新最后对齐时间 2026-02-22 21:24:25 +08:00
Will Sarg
e30cd4ac67 ci(homebrew): handle existing upstream remote and main base 2026-02-22 21:24:25 +08:00
Will Sarg
f1d4d4fbaf ci(homebrew): prefer HOMEBREW_UPSTREAM_PR_TOKEN with fallback 2026-02-22 21:24:25 +08:00
Chummy
cc849c54a7 test(cron): add shell one-shot regression coverage 2026-02-22 18:21:08 +08:00
reidliu41
3283231e11 fix(cron): set delete_after_run for one-shot shell jobs 2026-02-22 18:21:08 +08:00
Chummy
a6034aef26 fix(discord): send attachment markers as files/urls 2026-02-22 18:14:19 +08:00
Chummy
3baa71ca43 fix(minimax): avoid parsing merged system image markers as vision parts 2026-02-22 17:59:45 +08:00
Chummy
491b29303e fix(channels): render WhatsApp Web pairing QR in terminal 2026-02-22 17:58:35 +08:00
Chummy
fab09d15cb fix(config): enforce 0600 on every config save 2026-02-22 17:58:21 +08:00
Chummy
ec6553384a fix(slack): bootstrap poll cursor to avoid replay 2026-02-22 17:57:11 +08:00
Chummy
35e9ef2496 fix(security): honor explicit command paths in allowed_commands 2026-02-22 17:50:24 +08:00
1600 changed files with 211935 additions and 203085 deletions

View File

@ -0,0 +1,19 @@
{
"arch": "arm",
"crt-static-defaults": true,
"data-layout": "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64",
"emit-debug-gdb-scripts": false,
"env": "musl",
"executables": true,
"is-builtin": false,
"linker": "arm-linux-gnueabihf-gcc",
"linker-flavor": "gcc",
"llvm-target": "armv6-unknown-linux-musleabihf",
"max-atomic-width": 32,
"os": "linux",
"panic-strategy": "unwind",
"relocation-model": "static",
"target-endian": "little",
"target-pointer-width": "32",
"vendor": "unknown"
}

View File

@ -1,12 +0,0 @@
# cargo-audit configuration
# https://rustsec.org/
[advisories]
ignore = [
# wasmtime vulns via extism 1.13.0 — no upstream fix; plugins feature-gated
"RUSTSEC-2026-0006", # wasmtime f64.copysign segfault on x86-64
"RUSTSEC-2026-0020", # WASI guest-controlled resource exhaustion
"RUSTSEC-2026-0021", # WASI http fields panic
# instant crate unmaintained — transitive dep via nostr; no upstream fix
"RUSTSEC-2024-0384",
]

View File

@ -1,13 +1,33 @@
# macOS targets — pin minimum OS version so binaries run on supported releases.
# Intel (x86_64): target macOS 10.15 Catalina and later.
# Apple Silicon (aarch64): target macOS 11.0 Big Sur and later (no Catalina hardware exists).
[target.x86_64-apple-darwin]
rustflags = ["-C", "link-arg=-mmacosx-version-min=10.15"]
[target.aarch64-apple-darwin]
rustflags = ["-C", "link-arg=-mmacosx-version-min=11.0"]
[target.x86_64-unknown-linux-musl]
rustflags = ["-C", "link-arg=-static"]
[target.aarch64-unknown-linux-musl]
rustflags = ["-C", "link-arg=-static", "-C", "link-arg=-Wl,-z,stack-size=8388608"]
rustflags = ["-C", "link-arg=-static"]
# Android targets (NDK toolchain)
# ARMv6 musl (Raspberry Pi Zero W)
[target.armv6l-unknown-linux-musleabihf]
rustflags = ["-C", "link-arg=-static"]
# Android targets (Termux-native defaults).
# CI/NDK cross builds can override these via CARGO_TARGET_*_LINKER.
[target.armv7-linux-androideabi]
linker = "armv7a-linux-androideabi21-clang"
linker = "clang"
[target.aarch64-linux-android]
linker = "aarch64-linux-android21-clang"
rustflags = ["-C", "link-arg=-Wl,-z,stack-size=8388608"]
linker = "clang"
# Windows targets — increase stack size for large JsonSchema derives
[target.x86_64-pc-windows-msvc]
rustflags = ["-C", "link-args=/STACK:8388608"]
[target.aarch64-pc-windows-msvc]
rustflags = ["-C", "link-args=/STACK:8388608"]

View File

@ -1,133 +0,0 @@
# Skill: github-issue
File a structured GitHub issue (bug report or feature request) for ZeroClaw interactively from Claude Code.
## When to Use
Trigger when the user wants to file a GitHub issue, report a bug, or request a feature for ZeroClaw. Keywords: "file issue", "report bug", "feature request", "open issue", "create issue", "github issue".
## Instructions
You are filing a GitHub issue against the ZeroClaw repository using structured issue forms. Follow this workflow exactly.
### Step 1: Detect Issue Type and Read the Template
Determine from the user's message whether this is a **bug report** or **feature request**.
- If unclear, use AskUserQuestion to ask: "Is this a bug report or a feature request?"
Then read the corresponding issue template to understand the required fields:
- Bug report: `.github/ISSUE_TEMPLATE/bug_report.yml`
- Feature request: `.github/ISSUE_TEMPLATE/feature_request.yml`
Parse the YAML to extract:
- The `title` prefix (e.g. `[Bug]: `, `[Feature]: `)
- The `labels` array
- Each field in the `body` array: its `type` (dropdown, textarea, input, checkboxes, markdown), `id`, `attributes.label`, `attributes.options` (for dropdowns), `attributes.description`, `attributes.placeholder`, and `validations.required`
This is the source of truth for what fields exist, what they're called, what options are available, and which are required. Do not assume or hardcode any field names or options — always derive them from the template file.
### Step 2: Auto-Gather Context
Before asking the user anything, silently gather environment and repo context:
```bash
# Git context
git log --oneline -5
git status --short
git diff --stat HEAD~1 2>/dev/null
# For bug reports — environment detection
uname -s -r -m # OS info
sw_vers 2>/dev/null # macOS version
rustc --version 2>/dev/null # Rust version
cargo metadata --format-version=1 --no-deps 2>/dev/null | jq -r '.packages[] | select(.name=="zeroclaw") | .version' 2>/dev/null # ZeroClaw version
git rev-parse --short HEAD # commit SHA fallback
```
Also read recently changed files to infer the affected component and architecture impact.
### Step 3: Pre-Fill and Present the Form
Using the parsed template fields and gathered context, draft values for ALL fields from the template:
- **dropdown** fields: select the most likely option from `attributes.options` based on context. For dropdowns where you're uncertain, note your best guess and flag it for the user.
- **textarea** fields: draft content based on the user's description, git context, and the field's `attributes.description`/`attributes.placeholder` for guidance on what's expected.
- **input** fields: fill with auto-detected values (versions, OS) or draft from user context.
- **checkboxes** fields: auto-check all items (the skill itself ensures compliance with the stated checks).
- **markdown** fields: skip these — they're informational headers, not form inputs.
- **optional fields** (where `validations.required` is false): fill if there's enough context, otherwise note "(optional — not enough context to fill)".
Present the complete draft to the user in a clean readable format:
```
## Issue Draft: [Bug]: <title> / [Feature]: <title>
**Labels**: <from template>
### <Field Label>
<proposed value or selection>
### <Field Label>
<proposed value>
...
```
Use AskUserQuestion to ask the user to review:
- "Here's the pre-filled issue. Please review and let me know what to change, or say 'submit' to file it."
If the user requests changes, update the draft and re-present. Iterate until the user approves.
### Step 4: Scope Guard
Before final submission, analyze the collected content for scope creep:
- Does the bug report describe multiple independent defects?
- Does the feature request bundle unrelated changes?
If multi-concept issues are detected:
1. Inform the user: "This issue appears to cover multiple distinct topics. Focused, single-concept issues are strongly preferred and more likely to be accepted."
2. Break down the distinct groups found.
3. Offer to file separate issues for each group, reusing shared context (environment, etc.).
4. Let the user decide: proceed as-is or split.
### Step 5: Construct Issue Body
Build the issue body as markdown sections matching GitHub's form-field rendering format. GitHub renders form-submitted issues with `### <Field Label>` sections, so use that exact structure.
For each non-markdown field from the template, in order:
```markdown
### <attributes.label>
<value>
```
For optional fields with no content, use `_No response_` as the value (this matches GitHub's native rendering for empty optional fields).
For checkbox fields, render each option as:
```markdown
- [X] <option label text>
```
### Step 6: Final Preview and Submit
Show the final constructed issue (title + labels + full body) for one last confirmation.
Then submit using a HEREDOC for the body to preserve formatting:
```bash
gh issue create --title "<title prefix><user title>" --label "<label1>,<label2>" --body "$(cat <<'ISSUE_EOF'
<body content>
ISSUE_EOF
)"
```
Return the resulting issue URL to the user.
### Important Rules
- **Always read the template file** — never assume field names, options, or structure. The templates are the source of truth and may change over time.
- **Never include personal/sensitive data** in the issue. Redact secrets, tokens, emails, real names.
- **Use neutral project-scoped placeholders** per ZeroClaw's privacy contract.
- **One concept per issue** — enforce the scope guard.
- **Auto-detect, don't guess** — use real command output for environment fields.
- **Match GitHub's rendering** — use `### Field Label` sections so issues look consistent whether filed via web UI or this skill.

View File

@ -1,209 +0,0 @@
# Skill: github-pr
Open or update a GitHub Pull Request for ZeroClaw. Handles creating new PRs with a fully filled-out template body, and updating existing PRs (title, body sections, labels, comments). Use this skill whenever the user wants to open a PR, create a pull request, update a PR, edit PR description, add labels to a PR, or sync a PR after new commits — even if they don't say "PR" explicitly (e.g., "submit this for review", "push and open for merge").
## Instructions
This skill supports two modes: **Open** (create a new PR) and **Update** (edit an existing PR). Detect the mode from context — if there's already an open PR for the current branch and the user didn't say "open a new PR", default to update mode.
The PR template at `.github/pull_request_template.md` is the source of truth for the PR body structure. Read it every time — never assume or hardcode section names, fields, or their order. The template may change over time and the skill should always reflect its current state.
---
## Shared: Read the PR Template
Before opening or updating a PR body, read `.github/pull_request_template.md` and parse it to understand:
- The `## ` section headers (these are the top-level sections of the PR body)
- The bullet points, fields, and prompts within each section
- Which sections are marked `(required)` vs optional/recommended
- Any inline formatting conventions (backtick options, Yes/No fields, etc.)
This parsed structure drives how you fill, present, and edit the PR body.
---
## Mode: Open a New PR
### Step 1: Gather Context
Collect information to pre-fill the PR body. Run these in parallel:
```bash
# Branch and commit context
git branch --show-current
git log master..HEAD --oneline
git diff master...HEAD --stat
# Check if branch is pushed
git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null
# Environment (for validation evidence)
rustc --version 2>/dev/null
```
Also review the changed files and commit messages to understand the nature of the change (bug fix, feature, refactor, docs, chore, etc.) and which subsystems are affected.
### Step 2: Pre-Fill the Template
Using the parsed template structure and gathered context, draft a complete PR body:
- For each `## ` section from the template, fill in the bullet points and fields based on context from the commits, diff, and changed files.
- Use the field descriptions and placeholder text in the template as guidance for what each field expects.
- For Yes/No fields, infer from the diff (e.g., if no files in `src/security/` changed, security impact is likely all No).
- For required sections, always provide a substantive answer. For optional sections, fill if there's enough context, otherwise leave the template prompts in place.
- Draft a conventional commit-style PR title based on the changes (e.g., `feat(provider): add retry budget override`, `fix(channel): handle disconnect gracefully`, `chore(ci): update workflow targets`).
### Step 3: Present Draft for Review
Show the user the complete draft:
```
## PR Draft: <title>
**Branch**: <head> -> master
**Labels**: <suggested labels>
<full body with all sections filled>
```
Ask the user to review: "Here's the pre-filled PR. Review and let me know what to change, or say 'submit' to open it."
Iterate on changes until the user approves.
### Step 4: Push and Create
1. If the branch isn't pushed yet, push it:
```bash
git push -u origin <branch>
```
2. Create the PR using a HEREDOC for the body:
```bash
gh pr create --title "<title>" --base master --body "$(cat <<'PR_BODY_EOF'
<full body>
PR_BODY_EOF
)"
```
3. If labels were agreed on, add them:
```bash
gh pr edit <number> --add-label "<label1>,<label2>"
```
4. Return the PR URL to the user.
---
## Mode: Update an Existing PR
### Step 1: Identify the PR
1. **If a PR number or URL is given**: use that directly.
2. **If on a branch with an open PR**: auto-detect:
```bash
gh pr view --json number,title,body,labels,state,author,url,headRefName 2>/dev/null
```
3. **If neither**: ask the user for the PR number.
Verify the current user is the PR author:
```bash
CURRENT_USER=$(gh api user --jq '.login')
PR_AUTHOR=$(gh pr view <number> --json author --jq '.author.login')
```
If not the author, stop and inform the user.
### Step 2: Fetch Current State
```bash
gh pr view <number> --json number,title,body,labels,state,baseRefName,headRefName,url,author,reviewDecision,statusCheckRollup,commits
```
Display a summary:
```
## PR #<number>: <title>
**State**: <open/closed/merged>
**Branch**: <head> -> <base>
**Labels**: <label list>
**Checks**: <pass/fail/pending>
**URL**: <url>
```
### Step 3: Determine What to Update
Support these operations:
| Operation | How |
|---|---|
| **Edit title** | `gh pr edit <number> --title "<new title>"` |
| **Edit full body** | `gh pr edit <number> --body "<new body>"` |
| **Add labels** | `gh pr edit <number> --add-label "<label1>,<label2>"` |
| **Remove labels** | `gh pr edit <number> --remove-label "<label1>"` |
| **Edit specific section** | Parse body by `## ` headers, modify target section, re-submit full body |
| **Add a comment** | `gh pr comment <number> --body "<comment>"` |
| **Link an issue** | Edit the linked-issue section in the body |
| **Smart update after new commits** | Re-analyze and suggest section updates |
### Step 4: Handle Body Section Edits
When editing a specific section:
1. Parse the current PR body into sections by `## ` headers
2. Match the user's request to the corresponding section from the template
3. Show the current content of that section and the proposed replacement
4. On confirmation, modify only that section, reconstruct the full body, and submit
### Step 5: Smart Update After New Commits
When the user wants to sync the PR description after pushing new changes:
1. Identify new commits:
```bash
gh pr view <number> --json commits --jq '.commits[].messageHeadline'
git log <base>..<head> --oneline
git diff <base>...<head> --stat
```
2. Re-read the PR template. Analyze which sections are now stale based on the new changes — use the template's section names and field descriptions to identify what needs updating rather than relying on hardcoded assumptions.
3. Present proposed updates section-by-section and confirm before applying.
### Step 6: Apply Updates
For title/label changes, use direct `gh pr edit` flags.
For body edits, use a HEREDOC:
```bash
gh pr edit <number> --body "$(cat <<'PR_BODY_EOF'
<full updated body>
PR_BODY_EOF
)"
```
For comments:
```bash
gh pr comment <number> --body "$(cat <<'COMMENT_EOF'
<comment text>
COMMENT_EOF
)"
```
### Step 7: Confirm
Fetch and display the updated state:
```bash
gh pr view <number> --json number,title,labels,url
```
Return the PR URL.
---
## Important Rules
- **Always read `.github/pull_request_template.md`** before filling or editing a PR body. Never assume section names, fields, or structure — derive everything from the template. It's the source of truth and may change.
- **For updates, only modify requested sections.** Preserve everything else exactly as-is.
- **Always show diffs before applying body edits.** Present current vs proposed for each changed section.
- **Never include personal/sensitive data** in PR content per ZeroClaw's privacy contract.
- **For label changes**, only use labels that exist in the repository. Check with `gh label list` if unsure.
- **Fetch the latest body before editing** to avoid clobbering concurrent changes.
- **For new PRs**, push the branch before creating (with `-u` to set upstream tracking).

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,485 +0,0 @@
---
name: skill-creator
description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
---
# Skill Creator
A skill for creating new skills and iteratively improving them.
At a high level, the process of creating a skill goes like this:
- Decide what you want the skill to do and roughly how it should do it
- Write a draft of the skill
- Create a few test prompts and run claude-with-access-to-the-skill on them
- Help the user evaluate the results both qualitatively and quantitatively
- While the runs happen in the background, draft some quantitative evals if there aren't any (if there are some, you can either use as is or modify if you feel something needs to change about them). Then explain them to the user (or if they already existed, explain the ones that already exist)
- Use the `eval-viewer/generate_review.py` script to show the user the results for them to look at, and also let them look at the quantitative metrics
- Rewrite the skill based on feedback from the user's evaluation of the results (and also if there are any glaring flaws that become apparent from the quantitative benchmarks)
- Repeat until you're satisfied
- Expand the test set and try again at larger scale
Your job when using this skill is to figure out where the user is in this process and then jump in and help them progress through these stages. So for instance, maybe they're like "I want to make a skill for X". You can help narrow down what they mean, write a draft, write the test cases, figure out how they want to evaluate, run all the prompts, and repeat.
On the other hand, maybe they already have a draft of the skill. In this case you can go straight to the eval/iterate part of the loop.
Of course, you should always be flexible and if the user is like "I don't need to run a bunch of evaluations, just vibe with me", you can do that instead.
Then after the skill is done (but again, the order is flexible), you can also run the skill description improver, which we have a whole separate script for, to optimize the triggering of the skill.
Cool? Cool.
## Communicating with the user
The skill creator is liable to be used by people across a wide range of familiarity with coding jargon. If you haven't heard (and how could you, it's only very recently that it started), there's a trend now where the power of Claude is inspiring plumbers to open up their terminals, parents and grandparents to google "how to install npm". On the other hand, the bulk of users are probably fairly computer-literate.
So please pay attention to context cues to understand how to phrase your communication! In the default case, just to give you some idea:
- "evaluation" and "benchmark" are borderline, but OK
- for "JSON" and "assertion" you want to see serious cues from the user that they know what those things are before using them without explaining them
It's OK to briefly explain terms if you're in doubt, and feel free to clarify terms with a short definition if you're unsure if the user will get it.
---
## Creating a skill
### Capture Intent
Start by understanding the user's intent. The current conversation might already contain a workflow the user wants to capture (e.g., they say "turn this into a skill"). If so, extract answers from the conversation history first — the tools used, the sequence of steps, corrections the user made, input/output formats observed. The user may need to fill the gaps, and should confirm before proceeding to the next step.
1. What should this skill enable Claude to do?
2. When should this skill trigger? (what user phrases/contexts)
3. What's the expected output format?
4. Should we set up test cases to verify the skill works? Skills with objectively verifiable outputs (file transforms, data extraction, code generation, fixed workflow steps) benefit from test cases. Skills with subjective outputs (writing style, art) often don't need them. Suggest the appropriate default based on the skill type, but let the user decide.
### Interview and Research
Proactively ask questions about edge cases, input/output formats, example files, success criteria, and dependencies. Wait to write test prompts until you've got this part ironed out.
Check available MCPs - if useful for research (searching docs, finding similar skills, looking up best practices), research in parallel via subagents if available, otherwise inline. Come prepared with context to reduce burden on the user.
### Write the SKILL.md
Based on the user interview, fill in these components:
- **name**: Skill identifier
- **description**: When to trigger, what it does. This is the primary triggering mechanism - include both what the skill does AND specific contexts for when to use it. All "when to use" info goes here, not in the body. Note: currently Claude has a tendency to "undertrigger" skills -- to not use them when they'd be useful. To combat this, please make the skill descriptions a little bit "pushy". So for instance, instead of "How to build a simple fast dashboard to display internal Anthropic data.", you might write "How to build a simple fast dashboard to display internal Anthropic data. Make sure to use this skill whenever the user mentions dashboards, data visualization, internal metrics, or wants to display any kind of company data, even if they don't explicitly ask for a 'dashboard.'"
- **compatibility**: Required tools, dependencies (optional, rarely needed)
- **the rest of the skill :)**
### Skill Writing Guide
#### Anatomy of a Skill
```
skill-name/
├── SKILL.md (required)
│ ├── YAML frontmatter (name, description required)
│ └── Markdown instructions
└── Bundled Resources (optional)
├── scripts/ - Executable code for deterministic/repetitive tasks
├── references/ - Docs loaded into context as needed
└── assets/ - Files used in output (templates, icons, fonts)
```
#### Progressive Disclosure
Skills use a three-level loading system:
1. **Metadata** (name + description) - Always in context (~100 words)
2. **SKILL.md body** - In context whenever skill triggers (<500 lines ideal)
3. **Bundled resources** - As needed (unlimited, scripts can execute without loading)
These word counts are approximate and you can feel free to go longer if needed.
**Key patterns:**
- Keep SKILL.md under 500 lines; if you're approaching this limit, add an additional layer of hierarchy along with clear pointers about where the model using the skill should go next to follow up.
- Reference files clearly from SKILL.md with guidance on when to read them
- For large reference files (>300 lines), include a table of contents
**Domain organization**: When a skill supports multiple domains/frameworks, organize by variant:
```
cloud-deploy/
├── SKILL.md (workflow + selection)
└── references/
├── aws.md
├── gcp.md
└── azure.md
```
Claude reads only the relevant reference file.
#### Principle of Lack of Surprise
This goes without saying, but skills must not contain malware, exploit code, or any content that could compromise system security. A skill's contents should not surprise the user in their intent if described. Don't go along with requests to create misleading skills or skills designed to facilitate unauthorized access, data exfiltration, or other malicious activities. Things like a "roleplay as an XYZ" are OK though.
#### Writing Patterns
Prefer using the imperative form in instructions.
**Defining output formats** - You can do it like this:
```markdown
## Report structure
ALWAYS use this exact template:
# [Title]
## Executive summary
## Key findings
## Recommendations
```
**Examples pattern** - It's useful to include examples. You can format them like this (but if "Input" and "Output" are in the examples you might want to deviate a little):
```markdown
## Commit message format
**Example 1:**
Input: Added user authentication with JWT tokens
Output: feat(auth): implement JWT-based authentication
```
### Writing Style
Try to explain to the model why things are important in lieu of heavy-handed musty MUSTs. Use theory of mind and try to make the skill general and not super-narrow to specific examples. Start by writing a draft and then look at it with fresh eyes and improve it.
### Test Cases
After writing the skill draft, come up with 2-3 realistic test prompts — the kind of thing a real user would actually say. Share them with the user: [you don't have to use this exact language] "Here are a few test cases I'd like to try. Do these look right, or do you want to add more?" Then run them.
Save test cases to `evals/evals.json`. Don't write assertions yet — just the prompts. You'll draft assertions in the next step while the runs are in progress.
```json
{
"skill_name": "example-skill",
"evals": [
{
"id": 1,
"prompt": "User's task prompt",
"expected_output": "Description of expected result",
"files": []
}
]
}
```
See `references/schemas.md` for the full schema (including the `assertions` field, which you'll add later).
## Running and evaluating test cases
This section is one continuous sequence — don't stop partway through. Do NOT use `/skill-test` or any other testing skill.
Put results in `<skill-name>-workspace/` as a sibling to the skill directory. Within the workspace, organize results by iteration (`iteration-1/`, `iteration-2/`, etc.) and within that, each test case gets a directory (`eval-0/`, `eval-1/`, etc.). Don't create all of this upfront — just create directories as you go.
### Step 1: Spawn all runs (with-skill AND baseline) in the same turn
For each test case, spawn two subagents in the same turn — one with the skill, one without. This is important: don't spawn the with-skill runs first and then come back for baselines later. Launch everything at once so it all finishes around the same time.
**With-skill run:**
```
Execute this task:
- Skill path: <path-to-skill>
- Task: <eval prompt>
- Input files: <eval files if any, or "none">
- Save outputs to: <workspace>/iteration-<N>/eval-<ID>/with_skill/outputs/
- Outputs to save: <what the user cares about e.g., "the .docx file", "the final CSV">
```
**Baseline run** (same prompt, but the baseline depends on context):
- **Creating a new skill**: no skill at all. Same prompt, no skill path, save to `without_skill/outputs/`.
- **Improving an existing skill**: the old version. Before editing, snapshot the skill (`cp -r <skill-path> <workspace>/skill-snapshot/`), then point the baseline subagent at the snapshot. Save to `old_skill/outputs/`.
Write an `eval_metadata.json` for each test case (assertions can be empty for now). Give each eval a descriptive name based on what it's testing — not just "eval-0". Use this name for the directory too. If this iteration uses new or modified eval prompts, create these files for each new eval directory — don't assume they carry over from previous iterations.
```json
{
"eval_id": 0,
"eval_name": "descriptive-name-here",
"prompt": "The user's task prompt",
"assertions": []
}
```
### Step 2: While runs are in progress, draft assertions
Don't just wait for the runs to finish — you can use this time productively. Draft quantitative assertions for each test case and explain them to the user. If assertions already exist in `evals/evals.json`, review them and explain what they check.
Good assertions are objectively verifiable and have descriptive names — they should read clearly in the benchmark viewer so someone glancing at the results immediately understands what each one checks. Subjective skills (writing style, design quality) are better evaluated qualitatively — don't force assertions onto things that need human judgment.
Update the `eval_metadata.json` files and `evals/evals.json` with the assertions once drafted. Also explain to the user what they'll see in the viewer — both the qualitative outputs and the quantitative benchmark.
### Step 3: As runs complete, capture timing data
When each subagent task completes, you receive a notification containing `total_tokens` and `duration_ms`. Save this data immediately to `timing.json` in the run directory:
```json
{
"total_tokens": 84852,
"duration_ms": 23332,
"total_duration_seconds": 23.3
}
```
This is the only opportunity to capture this data — it comes through the task notification and isn't persisted elsewhere. Process each notification as it arrives rather than trying to batch them.
### Step 4: Grade, aggregate, and launch the viewer
Once all runs are done:
1. **Grade each run** — spawn a grader subagent (or grade inline) that reads `agents/grader.md` and evaluates each assertion against the outputs. Save results to `grading.json` in each run directory. The grading.json expectations array must use the fields `text`, `passed`, and `evidence` (not `name`/`met`/`details` or other variants) — the viewer depends on these exact field names. For assertions that can be checked programmatically, write and run a script rather than eyeballing it — scripts are faster, more reliable, and can be reused across iterations.
2. **Aggregate into benchmark** — run the aggregation script from the skill-creator directory:
```bash
python -m scripts.aggregate_benchmark <workspace>/iteration-N --skill-name <name>
```
This produces `benchmark.json` and `benchmark.md` with pass_rate, time, and tokens for each configuration, with mean ± stddev and the delta. If generating benchmark.json manually, see `references/schemas.md` for the exact schema the viewer expects.
Put each with_skill version before its baseline counterpart.
3. **Do an analyst pass** — read the benchmark data and surface patterns the aggregate stats might hide. See `agents/analyzer.md` (the "Analyzing Benchmark Results" section) for what to look for — things like assertions that always pass regardless of skill (non-discriminating), high-variance evals (possibly flaky), and time/token tradeoffs.
4. **Launch the viewer** with both qualitative outputs and quantitative data:
```bash
nohup python <skill-creator-path>/eval-viewer/generate_review.py \
<workspace>/iteration-N \
--skill-name "my-skill" \
--benchmark <workspace>/iteration-N/benchmark.json \
> /dev/null 2>&1 &
VIEWER_PID=$!
```
For iteration 2+, also pass `--previous-workspace <workspace>/iteration-<N-1>`.
**Cowork / headless environments:** If `webbrowser.open()` is not available or the environment has no display, use `--static <output_path>` to write a standalone HTML file instead of starting a server. Feedback will be downloaded as a `feedback.json` file when the user clicks "Submit All Reviews". After download, copy `feedback.json` into the workspace directory for the next iteration to pick up.
Note: please use generate_review.py to create the viewer; there's no need to write custom HTML.
5. **Tell the user** something like: "I've opened the results in your browser. There are two tabs — 'Outputs' lets you click through each test case and leave feedback, 'Benchmark' shows the quantitative comparison. When you're done, come back here and let me know."
### What the user sees in the viewer
The "Outputs" tab shows one test case at a time:
- **Prompt**: the task that was given
- **Output**: the files the skill produced, rendered inline where possible
- **Previous Output** (iteration 2+): collapsed section showing last iteration's output
- **Formal Grades** (if grading was run): collapsed section showing assertion pass/fail
- **Feedback**: a textbox that auto-saves as they type
- **Previous Feedback** (iteration 2+): their comments from last time, shown below the textbox
The "Benchmark" tab shows the stats summary: pass rates, timing, and token usage for each configuration, with per-eval breakdowns and analyst observations.
Navigation is via prev/next buttons or arrow keys. When done, they click "Submit All Reviews" which saves all feedback to `feedback.json`.
### Step 5: Read the feedback
When the user tells you they're done, read `feedback.json`:
```json
{
"reviews": [
{"run_id": "eval-0-with_skill", "feedback": "the chart is missing axis labels", "timestamp": "..."},
{"run_id": "eval-1-with_skill", "feedback": "", "timestamp": "..."},
{"run_id": "eval-2-with_skill", "feedback": "perfect, love this", "timestamp": "..."}
],
"status": "complete"
}
```
Empty feedback means the user thought it was fine. Focus your improvements on the test cases where the user had specific complaints.
Kill the viewer server when you're done with it:
```bash
kill $VIEWER_PID 2>/dev/null
```
---
## Improving the skill
This is the heart of the loop. You've run the test cases, the user has reviewed the results, and now you need to make the skill better based on their feedback.
### How to think about improvements
1. **Generalize from the feedback.** The big picture thing that's happening here is that we're trying to create skills that can be used a million times (maybe literally, maybe even more who knows) across many different prompts. Here you and the user are iterating on only a few examples over and over again because it helps move faster. The user knows these examples in and out and it's quick for them to assess new outputs. But if the skill you and the user are codeveloping works only for those examples, it's useless. Rather than put in fiddly overfitty changes, or oppressively constrictive MUSTs, if there's some stubborn issue, you might try branching out and using different metaphors, or recommending different patterns of working. It's relatively cheap to try and maybe you'll land on something great.
2. **Keep the prompt lean.** Remove things that aren't pulling their weight. Make sure to read the transcripts, not just the final outputs — if it looks like the skill is making the model waste a bunch of time doing things that are unproductive, you can try getting rid of the parts of the skill that are making it do that and seeing what happens.
3. **Explain the why.** Try hard to explain the **why** behind everything you're asking the model to do. Today's LLMs are *smart*. They have good theory of mind and when given a good harness can go beyond rote instructions and really make things happen. Even if the feedback from the user is terse or frustrated, try to actually understand the task and why the user is writing what they wrote, and what they actually wrote, and then transmit this understanding into the instructions. If you find yourself writing ALWAYS or NEVER in all caps, or using super rigid structures, that's a yellow flag — if possible, reframe and explain the reasoning so that the model understands why the thing you're asking for is important. That's a more humane, powerful, and effective approach.
4. **Look for repeated work across test cases.** Read the transcripts from the test runs and notice if the subagents all independently wrote similar helper scripts or took the same multi-step approach to something. If all 3 test cases resulted in the subagent writing a `create_docx.py` or a `build_chart.py`, that's a strong signal the skill should bundle that script. Write it once, put it in `scripts/`, and tell the skill to use it. This saves every future invocation from reinventing the wheel.
This task is pretty important (we are trying to create billions a year in economic value here!) and your thinking time is not the blocker; take your time and really mull things over. I'd suggest writing a draft revision and then looking at it anew and making improvements. Really do your best to get into the head of the user and understand what they want and need.
### The iteration loop
After improving the skill:
1. Apply your improvements to the skill
2. Rerun all test cases into a new `iteration-<N+1>/` directory, including baseline runs. If you're creating a new skill, the baseline is always `without_skill` (no skill) — that stays the same across iterations. If you're improving an existing skill, use your judgment on what makes sense as the baseline: the original version the user came in with, or the previous iteration.
3. Launch the reviewer with `--previous-workspace` pointing at the previous iteration
4. Wait for the user to review and tell you they're done
5. Read the new feedback, improve again, repeat
Keep going until:
- The user says they're happy
- The feedback is all empty (everything looks good)
- You're not making meaningful progress
---
## Advanced: Blind comparison
For situations where you want a more rigorous comparison between two versions of a skill (e.g., the user asks "is the new version actually better?"), there's a blind comparison system. Read `agents/comparator.md` and `agents/analyzer.md` for the details. The basic idea is: give two outputs to an independent agent without telling it which is which, and let it judge quality. Then analyze why the winner won.
This is optional, requires subagents, and most users won't need it. The human review loop is usually sufficient.
---
## Description Optimization
The description field in SKILL.md frontmatter is the primary mechanism that determines whether Claude invokes a skill. After creating or improving a skill, offer to optimize the description for better triggering accuracy.
### Step 1: Generate trigger eval queries
Create 20 eval queries — a mix of should-trigger and should-not-trigger. Save as JSON:
```json
[
{"query": "the user prompt", "should_trigger": true},
{"query": "another prompt", "should_trigger": false}
]
```
The queries must be realistic and something a Claude Code or Claude.ai user would actually type. Not abstract requests, but requests that are concrete and specific and have a good amount of detail. For instance, file paths, personal context about the user's job or situation, column names and values, company names, URLs. A little bit of backstory. Some might be in lowercase or contain abbreviations or typos or casual speech. Use a mix of different lengths, and focus on edge cases rather than making them clear-cut (the user will get a chance to sign off on them).
Bad: `"Format this data"`, `"Extract text from PDF"`, `"Create a chart"`
Good: `"ok so my boss just sent me this xlsx file (its in my downloads, called something like 'Q4 sales final FINAL v2.xlsx') and she wants me to add a column that shows the profit margin as a percentage. The revenue is in column C and costs are in column D i think"`
For the **should-trigger** queries (8-10), think about coverage. You want different phrasings of the same intent — some formal, some casual. Include cases where the user doesn't explicitly name the skill or file type but clearly needs it. Throw in some uncommon use cases and cases where this skill competes with another but should win.
For the **should-not-trigger** queries (8-10), the most valuable ones are the near-misses — queries that share keywords or concepts with the skill but actually need something different. Think adjacent domains, ambiguous phrasing where a naive keyword match would trigger but shouldn't, and cases where the query touches on something the skill does but in a context where another tool is more appropriate.
The key thing to avoid: don't make should-not-trigger queries obviously irrelevant. "Write a fibonacci function" as a negative test for a PDF skill is too easy — it doesn't test anything. The negative cases should be genuinely tricky.
### Step 2: Review with user
Present the eval set to the user for review using the HTML template:
1. Read the template from `assets/eval_review.html`
2. Replace the placeholders:
- `__EVAL_DATA_PLACEHOLDER__` → the JSON array of eval items (no quotes around it — it's a JS variable assignment)
- `__SKILL_NAME_PLACEHOLDER__` → the skill's name
- `__SKILL_DESCRIPTION_PLACEHOLDER__` → the skill's current description
3. Write to a temp file (e.g., `/tmp/eval_review_<skill-name>.html`) and open it: `open /tmp/eval_review_<skill-name>.html`
4. The user can edit queries, toggle should-trigger, add/remove entries, then click "Export Eval Set"
5. The file downloads to `~/Downloads/eval_set.json` — check the Downloads folder for the most recent version in case there are multiple (e.g., `eval_set (1).json`)
This step matters — bad eval queries lead to bad descriptions.
### Step 3: Run the optimization loop
Tell the user: "This will take some time — I'll run the optimization loop in the background and check on it periodically."
Save the eval set to the workspace, then run in the background:
```bash
python -m scripts.run_loop \
--eval-set <path-to-trigger-eval.json> \
--skill-path <path-to-skill> \
--model <model-id-powering-this-session> \
--max-iterations 5 \
--verbose
```
Use the model ID from your system prompt (the one powering the current session) so the triggering test matches what the user actually experiences.
While it runs, periodically tail the output to give the user updates on which iteration it's on and what the scores look like.
This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting.
### How skill triggering works
Understanding the triggering mechanism helps design better eval queries. Skills appear in Claude's `available_skills` list with their name + description, and Claude decides whether to consult a skill based on that description. The important thing to know is that Claude only consults skills for tasks it can't easily handle on its own — simple, one-step queries like "read this PDF" may not trigger a skill even if the description matches perfectly, because Claude can handle them directly with basic tools. Complex, multi-step, or specialized queries reliably trigger skills when the description matches.
This means your eval queries should be substantive enough that Claude would actually benefit from consulting a skill. Simple queries like "read file X" are poor test cases — they won't trigger skills regardless of description quality.
### Step 4: Apply the result
Take `best_description` from the JSON output and update the skill's SKILL.md frontmatter. Show the user before/after and report the scores.
---
### Package and Present (only if `present_files` tool is available)
Check whether you have access to the `present_files` tool. If you don't, skip this step. If you do, package the skill and present the .skill file to the user:
```bash
python -m scripts.package_skill <path/to/skill-folder>
```
After packaging, direct the user to the resulting `.skill` file path so they can install it.
---
## Claude.ai-specific instructions
In Claude.ai, the core workflow is the same (draft → test → review → improve → repeat), but because Claude.ai doesn't have subagents, some mechanics change. Here's what to adapt:
**Running test cases**: No subagents means no parallel execution. For each test case, read the skill's SKILL.md, then follow its instructions to accomplish the test prompt yourself. Do them one at a time. This is less rigorous than independent subagents (you wrote the skill and you're also running it, so you have full context), but it's a useful sanity check — and the human review step compensates. Skip the baseline runs — just use the skill to complete the task as requested.
**Reviewing results**: If you can't open a browser (e.g., Claude.ai's VM has no display, or you're on a remote server), skip the browser reviewer entirely. Instead, present results directly in the conversation. For each test case, show the prompt and the output. If the output is a file the user needs to see (like a .docx or .xlsx), save it to the filesystem and tell them where it is so they can download and inspect it. Ask for feedback inline: "How does this look? Anything you'd change?"
**Benchmarking**: Skip the quantitative benchmarking — it relies on baseline comparisons which aren't meaningful without subagents. Focus on qualitative feedback from the user.
**The iteration loop**: Same as before — improve the skill, rerun the test cases, ask for feedback — just without the browser reviewer in the middle. You can still organize results into iteration directories on the filesystem if you have one.
**Description optimization**: This section requires the `claude` CLI tool (specifically `claude -p`) which is only available in Claude Code. Skip it if you're on Claude.ai.
**Blind comparison**: Requires subagents. Skip it.
**Packaging**: The `package_skill.py` script works anywhere with Python and a filesystem. On Claude.ai, you can run it and the user can download the resulting `.skill` file.
**Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. In this case:
- **Preserve the original name.** Note the skill's directory name and `name` frontmatter field -- use them unchanged. E.g., if the installed skill is `research-helper`, output `research-helper.skill` (not `research-helper-v2`).
- **Copy to a writeable location before editing.** The installed skill path may be read-only. Copy to `/tmp/skill-name/`, edit there, and package from the copy.
- **If packaging manually, stage in `/tmp/` first**, then copy to the output directory -- direct writes may fail due to permissions.
---
## Cowork-Specific Instructions
If you're in Cowork, the main things to know are:
- You have subagents, so the main workflow (spawn test cases in parallel, run baselines, grade, etc.) all works. (However, if you run into severe problems with timeouts, it's OK to run the test prompts in series rather than parallel.)
- You don't have a browser or display, so when generating the eval viewer, use `--static <output_path>` to write a standalone HTML file instead of starting a server. Then proffer a link that the user can click to open the HTML in their browser.
- For whatever reason, the Cowork setup seems to disincline Claude from generating the eval viewer after running the tests, so just to reiterate: whether you're in Cowork or in Claude Code, after running tests, you should always generate the eval viewer for the human to look at examples before revising the skill yourself and trying to make corrections, using `generate_review.py` (not writing your own boutique html code). Sorry in advance but I'm gonna go all caps here: GENERATE THE EVAL VIEWER *BEFORE* evaluating inputs yourself. You want to get them in front of the human ASAP!
- Feedback works differently: since there's no running server, the viewer's "Submit All Reviews" button will download `feedback.json` as a file. You can then read it from there (you may have to request access first).
- Packaging works — `package_skill.py` just needs Python and a filesystem.
- Description optimization (`run_loop.py` / `run_eval.py`) should work in Cowork just fine since it uses `claude -p` via subprocess, not a browser, but please save it until you've fully finished making the skill and the user agrees it's in good shape.
- **Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. Follow the update guidance in the claude.ai section above.
---
## Reference files
The agents/ directory contains instructions for specialized subagents. Read them when you need to spawn the relevant subagent.
- `agents/grader.md` — How to evaluate assertions against outputs
- `agents/comparator.md` — How to do blind A/B comparison between two outputs
- `agents/analyzer.md` — How to analyze why one version beat another
The references/ directory has additional documentation:
- `references/schemas.md` — JSON structures for evals.json, grading.json, etc.
---
Repeating one more time the core loop here for emphasis:
- Figure out what the skill is about
- Draft or edit the skill
- Run claude-with-access-to-the-skill on test prompts
- With the user, evaluate the outputs:
- Create benchmark.json and run `eval-viewer/generate_review.py` to help the user review them
- Run quantitative evals
- Repeat until you and the user are satisfied
- Package the final skill and return it to the user.
Please add steps to your TodoList, if you have such a thing, to make sure you don't forget. If you're in Cowork, please specifically put "Create evals JSON and run `eval-viewer/generate_review.py` so human can review test cases" in your TodoList to make sure it happens.
Good luck!

View File

@ -1,274 +0,0 @@
# Post-hoc Analyzer Agent
Analyze blind comparison results to understand WHY the winner won and generate improvement suggestions.
## Role
After the blind comparator determines a winner, the Post-hoc Analyzer "unblids" the results by examining the skills and transcripts. The goal is to extract actionable insights: what made the winner better, and how can the loser be improved?
## Inputs
You receive these parameters in your prompt:
- **winner**: "A" or "B" (from blind comparison)
- **winner_skill_path**: Path to the skill that produced the winning output
- **winner_transcript_path**: Path to the execution transcript for the winner
- **loser_skill_path**: Path to the skill that produced the losing output
- **loser_transcript_path**: Path to the execution transcript for the loser
- **comparison_result_path**: Path to the blind comparator's output JSON
- **output_path**: Where to save the analysis results
## Process
### Step 1: Read Comparison Result
1. Read the blind comparator's output at comparison_result_path
2. Note the winning side (A or B), the reasoning, and any scores
3. Understand what the comparator valued in the winning output
### Step 2: Read Both Skills
1. Read the winner skill's SKILL.md and key referenced files
2. Read the loser skill's SKILL.md and key referenced files
3. Identify structural differences:
- Instructions clarity and specificity
- Script/tool usage patterns
- Example coverage
- Edge case handling
### Step 3: Read Both Transcripts
1. Read the winner's transcript
2. Read the loser's transcript
3. Compare execution patterns:
- How closely did each follow their skill's instructions?
- What tools were used differently?
- Where did the loser diverge from optimal behavior?
- Did either encounter errors or make recovery attempts?
### Step 4: Analyze Instruction Following
For each transcript, evaluate:
- Did the agent follow the skill's explicit instructions?
- Did the agent use the skill's provided tools/scripts?
- Were there missed opportunities to leverage skill content?
- Did the agent add unnecessary steps not in the skill?
Score instruction following 1-10 and note specific issues.
### Step 5: Identify Winner Strengths
Determine what made the winner better:
- Clearer instructions that led to better behavior?
- Better scripts/tools that produced better output?
- More comprehensive examples that guided edge cases?
- Better error handling guidance?
Be specific. Quote from skills/transcripts where relevant.
### Step 6: Identify Loser Weaknesses
Determine what held the loser back:
- Ambiguous instructions that led to suboptimal choices?
- Missing tools/scripts that forced workarounds?
- Gaps in edge case coverage?
- Poor error handling that caused failures?
### Step 7: Generate Improvement Suggestions
Based on the analysis, produce actionable suggestions for improving the loser skill:
- Specific instruction changes to make
- Tools/scripts to add or modify
- Examples to include
- Edge cases to address
Prioritize by impact. Focus on changes that would have changed the outcome.
### Step 8: Write Analysis Results
Save structured analysis to `{output_path}`.
## Output Format
Write a JSON file with this structure:
```json
{
"comparison_summary": {
"winner": "A",
"winner_skill": "path/to/winner/skill",
"loser_skill": "path/to/loser/skill",
"comparator_reasoning": "Brief summary of why comparator chose winner"
},
"winner_strengths": [
"Clear step-by-step instructions for handling multi-page documents",
"Included validation script that caught formatting errors",
"Explicit guidance on fallback behavior when OCR fails"
],
"loser_weaknesses": [
"Vague instruction 'process the document appropriately' led to inconsistent behavior",
"No script for validation, agent had to improvise and made errors",
"No guidance on OCR failure, agent gave up instead of trying alternatives"
],
"instruction_following": {
"winner": {
"score": 9,
"issues": [
"Minor: skipped optional logging step"
]
},
"loser": {
"score": 6,
"issues": [
"Did not use the skill's formatting template",
"Invented own approach instead of following step 3",
"Missed the 'always validate output' instruction"
]
}
},
"improvement_suggestions": [
{
"priority": "high",
"category": "instructions",
"suggestion": "Replace 'process the document appropriately' with explicit steps: 1) Extract text, 2) Identify sections, 3) Format per template",
"expected_impact": "Would eliminate ambiguity that caused inconsistent behavior"
},
{
"priority": "high",
"category": "tools",
"suggestion": "Add validate_output.py script similar to winner skill's validation approach",
"expected_impact": "Would catch formatting errors before final output"
},
{
"priority": "medium",
"category": "error_handling",
"suggestion": "Add fallback instructions: 'If OCR fails, try: 1) different resolution, 2) image preprocessing, 3) manual extraction'",
"expected_impact": "Would prevent early failure on difficult documents"
}
],
"transcript_insights": {
"winner_execution_pattern": "Read skill -> Followed 5-step process -> Used validation script -> Fixed 2 issues -> Produced output",
"loser_execution_pattern": "Read skill -> Unclear on approach -> Tried 3 different methods -> No validation -> Output had errors"
}
}
```
## Guidelines
- **Be specific**: Quote from skills and transcripts, don't just say "instructions were unclear"
- **Be actionable**: Suggestions should be concrete changes, not vague advice
- **Focus on skill improvements**: The goal is to improve the losing skill, not critique the agent
- **Prioritize by impact**: Which changes would most likely have changed the outcome?
- **Consider causation**: Did the skill weakness actually cause the worse output, or is it incidental?
- **Stay objective**: Analyze what happened, don't editorialize
- **Think about generalization**: Would this improvement help on other evals too?
## Categories for Suggestions
Use these categories to organize improvement suggestions:
| Category | Description |
|----------|-------------|
| `instructions` | Changes to the skill's prose instructions |
| `tools` | Scripts, templates, or utilities to add/modify |
| `examples` | Example inputs/outputs to include |
| `error_handling` | Guidance for handling failures |
| `structure` | Reorganization of skill content |
| `references` | External docs or resources to add |
## Priority Levels
- **high**: Would likely change the outcome of this comparison
- **medium**: Would improve quality but may not change win/loss
- **low**: Nice to have, marginal improvement
---
# Analyzing Benchmark Results
When analyzing benchmark results, the analyzer's purpose is to **surface patterns and anomalies** across multiple runs, not suggest skill improvements.
## Role
Review all benchmark run results and generate freeform notes that help the user understand skill performance. Focus on patterns that wouldn't be visible from aggregate metrics alone.
## Inputs
You receive these parameters in your prompt:
- **benchmark_data_path**: Path to the in-progress benchmark.json with all run results
- **skill_path**: Path to the skill being benchmarked
- **output_path**: Where to save the notes (as JSON array of strings)
## Process
### Step 1: Read Benchmark Data
1. Read the benchmark.json containing all run results
2. Note the configurations tested (with_skill, without_skill)
3. Understand the run_summary aggregates already calculated
### Step 2: Analyze Per-Assertion Patterns
For each expectation across all runs:
- Does it **always pass** in both configurations? (may not differentiate skill value)
- Does it **always fail** in both configurations? (may be broken or beyond capability)
- Does it **always pass with skill but fail without**? (skill clearly adds value here)
- Does it **always fail with skill but pass without**? (skill may be hurting)
- Is it **highly variable**? (flaky expectation or non-deterministic behavior)
### Step 3: Analyze Cross-Eval Patterns
Look for patterns across evals:
- Are certain eval types consistently harder/easier?
- Do some evals show high variance while others are stable?
- Are there surprising results that contradict expectations?
### Step 4: Analyze Metrics Patterns
Look at time_seconds, tokens, tool_calls:
- Does the skill significantly increase execution time?
- Is there high variance in resource usage?
- Are there outlier runs that skew the aggregates?
### Step 5: Generate Notes
Write freeform observations as a list of strings. Each note should:
- State a specific observation
- Be grounded in the data (not speculation)
- Help the user understand something the aggregate metrics don't show
Examples:
- "Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value"
- "Eval 3 shows high variance (50% ± 40%) - run 2 had an unusual failure that may be flaky"
- "Without-skill runs consistently fail on table extraction expectations (0% pass rate)"
- "Skill adds 13s average execution time but improves pass rate by 50%"
- "Token usage is 80% higher with skill, primarily due to script output parsing"
- "All 3 without-skill runs for eval 1 produced empty output"
### Step 6: Write Notes
Save notes to `{output_path}` as a JSON array of strings:
```json
[
"Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value",
"Eval 3 shows high variance (50% ± 40%) - run 2 had an unusual failure",
"Without-skill runs consistently fail on table extraction expectations",
"Skill adds 13s average execution time but improves pass rate by 50%"
]
```
## Guidelines
**DO:**
- Report what you observe in the data
- Be specific about which evals, expectations, or runs you're referring to
- Note patterns that aggregate metrics would hide
- Provide context that helps interpret the numbers
**DO NOT:**
- Suggest improvements to the skill (that's for the improvement step, not benchmarking)
- Make subjective quality judgments ("the output was good/bad")
- Speculate about causes without evidence
- Repeat information already in the run_summary aggregates

View File

@ -1,202 +0,0 @@
# Blind Comparator Agent
Compare two outputs WITHOUT knowing which skill produced them.
## Role
The Blind Comparator judges which output better accomplishes the eval task. You receive two outputs labeled A and B, but you do NOT know which skill produced which. This prevents bias toward a particular skill or approach.
Your judgment is based purely on output quality and task completion.
## Inputs
You receive these parameters in your prompt:
- **output_a_path**: Path to the first output file or directory
- **output_b_path**: Path to the second output file or directory
- **eval_prompt**: The original task/prompt that was executed
- **expectations**: List of expectations to check (optional - may be empty)
## Process
### Step 1: Read Both Outputs
1. Examine output A (file or directory)
2. Examine output B (file or directory)
3. Note the type, structure, and content of each
4. If outputs are directories, examine all relevant files inside
### Step 2: Understand the Task
1. Read the eval_prompt carefully
2. Identify what the task requires:
- What should be produced?
- What qualities matter (accuracy, completeness, format)?
- What would distinguish a good output from a poor one?
### Step 3: Generate Evaluation Rubric
Based on the task, generate a rubric with two dimensions:
**Content Rubric** (what the output contains):
| Criterion | 1 (Poor) | 3 (Acceptable) | 5 (Excellent) |
|-----------|----------|----------------|---------------|
| Correctness | Major errors | Minor errors | Fully correct |
| Completeness | Missing key elements | Mostly complete | All elements present |
| Accuracy | Significant inaccuracies | Minor inaccuracies | Accurate throughout |
**Structure Rubric** (how the output is organized):
| Criterion | 1 (Poor) | 3 (Acceptable) | 5 (Excellent) |
|-----------|----------|----------------|---------------|
| Organization | Disorganized | Reasonably organized | Clear, logical structure |
| Formatting | Inconsistent/broken | Mostly consistent | Professional, polished |
| Usability | Difficult to use | Usable with effort | Easy to use |
Adapt criteria to the specific task. For example:
- PDF form → "Field alignment", "Text readability", "Data placement"
- Document → "Section structure", "Heading hierarchy", "Paragraph flow"
- Data output → "Schema correctness", "Data types", "Completeness"
### Step 4: Evaluate Each Output Against the Rubric
For each output (A and B):
1. **Score each criterion** on the rubric (1-5 scale)
2. **Calculate dimension totals**: Content score, Structure score
3. **Calculate overall score**: Average of dimension scores, scaled to 1-10
### Step 5: Check Assertions (if provided)
If expectations are provided:
1. Check each expectation against output A
2. Check each expectation against output B
3. Count pass rates for each output
4. Use expectation scores as secondary evidence (not the primary decision factor)
### Step 6: Determine the Winner
Compare A and B based on (in priority order):
1. **Primary**: Overall rubric score (content + structure)
2. **Secondary**: Assertion pass rates (if applicable)
3. **Tiebreaker**: If truly equal, declare a TIE
Be decisive - ties should be rare. One output is usually better, even if marginally.
### Step 7: Write Comparison Results
Save results to a JSON file at the path specified (or `comparison.json` if not specified).
## Output Format
Write a JSON file with this structure:
```json
{
"winner": "A",
"reasoning": "Output A provides a complete solution with proper formatting and all required fields. Output B is missing the date field and has formatting inconsistencies.",
"rubric": {
"A": {
"content": {
"correctness": 5,
"completeness": 5,
"accuracy": 4
},
"structure": {
"organization": 4,
"formatting": 5,
"usability": 4
},
"content_score": 4.7,
"structure_score": 4.3,
"overall_score": 9.0
},
"B": {
"content": {
"correctness": 3,
"completeness": 2,
"accuracy": 3
},
"structure": {
"organization": 3,
"formatting": 2,
"usability": 3
},
"content_score": 2.7,
"structure_score": 2.7,
"overall_score": 5.4
}
},
"output_quality": {
"A": {
"score": 9,
"strengths": ["Complete solution", "Well-formatted", "All fields present"],
"weaknesses": ["Minor style inconsistency in header"]
},
"B": {
"score": 5,
"strengths": ["Readable output", "Correct basic structure"],
"weaknesses": ["Missing date field", "Formatting inconsistencies", "Partial data extraction"]
}
},
"expectation_results": {
"A": {
"passed": 4,
"total": 5,
"pass_rate": 0.80,
"details": [
{"text": "Output includes name", "passed": true},
{"text": "Output includes date", "passed": true},
{"text": "Format is PDF", "passed": true},
{"text": "Contains signature", "passed": false},
{"text": "Readable text", "passed": true}
]
},
"B": {
"passed": 3,
"total": 5,
"pass_rate": 0.60,
"details": [
{"text": "Output includes name", "passed": true},
{"text": "Output includes date", "passed": false},
{"text": "Format is PDF", "passed": true},
{"text": "Contains signature", "passed": false},
{"text": "Readable text", "passed": true}
]
}
}
}
```
If no expectations were provided, omit the `expectation_results` field entirely.
## Field Descriptions
- **winner**: "A", "B", or "TIE"
- **reasoning**: Clear explanation of why the winner was chosen (or why it's a tie)
- **rubric**: Structured rubric evaluation for each output
- **content**: Scores for content criteria (correctness, completeness, accuracy)
- **structure**: Scores for structure criteria (organization, formatting, usability)
- **content_score**: Average of content criteria (1-5)
- **structure_score**: Average of structure criteria (1-5)
- **overall_score**: Combined score scaled to 1-10
- **output_quality**: Summary quality assessment
- **score**: 1-10 rating (should match rubric overall_score)
- **strengths**: List of positive aspects
- **weaknesses**: List of issues or shortcomings
- **expectation_results**: (Only if expectations provided)
- **passed**: Number of expectations that passed
- **total**: Total number of expectations
- **pass_rate**: Fraction passed (0.0 to 1.0)
- **details**: Individual expectation results
## Guidelines
- **Stay blind**: DO NOT try to infer which skill produced which output. Judge purely on output quality.
- **Be specific**: Cite specific examples when explaining strengths and weaknesses.
- **Be decisive**: Choose a winner unless outputs are genuinely equivalent.
- **Output quality first**: Assertion scores are secondary to overall task completion.
- **Be objective**: Don't favor outputs based on style preferences; focus on correctness and completeness.
- **Explain your reasoning**: The reasoning field should make it clear why you chose the winner.
- **Handle edge cases**: If both outputs fail, pick the one that fails less badly. If both are excellent, pick the one that's marginally better.

View File

@ -1,223 +0,0 @@
# Grader Agent
Evaluate expectations against an execution transcript and outputs.
## Role
The Grader reviews a transcript and output files, then determines whether each expectation passes or fails. Provide clear evidence for each judgment.
You have two jobs: grade the outputs, and critique the evals themselves. A passing grade on a weak assertion is worse than useless — it creates false confidence. When you notice an assertion that's trivially satisfied, or an important outcome that no assertion checks, say so.
## Inputs
You receive these parameters in your prompt:
- **expectations**: List of expectations to evaluate (strings)
- **transcript_path**: Path to the execution transcript (markdown file)
- **outputs_dir**: Directory containing output files from execution
## Process
### Step 1: Read the Transcript
1. Read the transcript file completely
2. Note the eval prompt, execution steps, and final result
3. Identify any issues or errors documented
### Step 2: Examine Output Files
1. List files in outputs_dir
2. Read/examine each file relevant to the expectations. If outputs aren't plain text, use the inspection tools provided in your prompt — don't rely solely on what the transcript says the executor produced.
3. Note contents, structure, and quality
### Step 3: Evaluate Each Assertion
For each expectation:
1. **Search for evidence** in the transcript and outputs
2. **Determine verdict**:
- **PASS**: Clear evidence the expectation is true AND the evidence reflects genuine task completion, not just surface-level compliance
- **FAIL**: No evidence, or evidence contradicts the expectation, or the evidence is superficial (e.g., correct filename but empty/wrong content)
3. **Cite the evidence**: Quote the specific text or describe what you found
### Step 4: Extract and Verify Claims
Beyond the predefined expectations, extract implicit claims from the outputs and verify them:
1. **Extract claims** from the transcript and outputs:
- Factual statements ("The form has 12 fields")
- Process claims ("Used pypdf to fill the form")
- Quality claims ("All fields were filled correctly")
2. **Verify each claim**:
- **Factual claims**: Can be checked against the outputs or external sources
- **Process claims**: Can be verified from the transcript
- **Quality claims**: Evaluate whether the claim is justified
3. **Flag unverifiable claims**: Note claims that cannot be verified with available information
This catches issues that predefined expectations might miss.
### Step 5: Read User Notes
If `{outputs_dir}/user_notes.md` exists:
1. Read it and note any uncertainties or issues flagged by the executor
2. Include relevant concerns in the grading output
3. These may reveal problems even when expectations pass
### Step 6: Critique the Evals
After grading, consider whether the evals themselves could be improved. Only surface suggestions when there's a clear gap.
Good suggestions test meaningful outcomes — assertions that are hard to satisfy without actually doing the work correctly. Think about what makes an assertion *discriminating*: it passes when the skill genuinely succeeds and fails when it doesn't.
Suggestions worth raising:
- An assertion that passed but would also pass for a clearly wrong output (e.g., checking filename existence but not file content)
- An important outcome you observed — good or bad — that no assertion covers at all
- An assertion that can't actually be verified from the available outputs
Keep the bar high. The goal is to flag things the eval author would say "good catch" about, not to nitpick every assertion.
### Step 7: Write Grading Results
Save results to `{outputs_dir}/../grading.json` (sibling to outputs_dir).
## Grading Criteria
**PASS when**:
- The transcript or outputs clearly demonstrate the expectation is true
- Specific evidence can be cited
- The evidence reflects genuine substance, not just surface compliance (e.g., a file exists AND contains correct content, not just the right filename)
**FAIL when**:
- No evidence found for the expectation
- Evidence contradicts the expectation
- The expectation cannot be verified from available information
- The evidence is superficial — the assertion is technically satisfied but the underlying task outcome is wrong or incomplete
- The output appears to meet the assertion by coincidence rather than by actually doing the work
**When uncertain**: The burden of proof to pass is on the expectation.
### Step 8: Read Executor Metrics and Timing
1. If `{outputs_dir}/metrics.json` exists, read it and include in grading output
2. If `{outputs_dir}/../timing.json` exists, read it and include timing data
## Output Format
Write a JSON file with this structure:
```json
{
"expectations": [
{
"text": "The output includes the name 'John Smith'",
"passed": true,
"evidence": "Found in transcript Step 3: 'Extracted names: John Smith, Sarah Johnson'"
},
{
"text": "The spreadsheet has a SUM formula in cell B10",
"passed": false,
"evidence": "No spreadsheet was created. The output was a text file."
},
{
"text": "The assistant used the skill's OCR script",
"passed": true,
"evidence": "Transcript Step 2 shows: 'Tool: Bash - python ocr_script.py image.png'"
}
],
"summary": {
"passed": 2,
"failed": 1,
"total": 3,
"pass_rate": 0.67
},
"execution_metrics": {
"tool_calls": {
"Read": 5,
"Write": 2,
"Bash": 8
},
"total_tool_calls": 15,
"total_steps": 6,
"errors_encountered": 0,
"output_chars": 12450,
"transcript_chars": 3200
},
"timing": {
"executor_duration_seconds": 165.0,
"grader_duration_seconds": 26.0,
"total_duration_seconds": 191.0
},
"claims": [
{
"claim": "The form has 12 fillable fields",
"type": "factual",
"verified": true,
"evidence": "Counted 12 fields in field_info.json"
},
{
"claim": "All required fields were populated",
"type": "quality",
"verified": false,
"evidence": "Reference section was left blank despite data being available"
}
],
"user_notes_summary": {
"uncertainties": ["Used 2023 data, may be stale"],
"needs_review": [],
"workarounds": ["Fell back to text overlay for non-fillable fields"]
},
"eval_feedback": {
"suggestions": [
{
"assertion": "The output includes the name 'John Smith'",
"reason": "A hallucinated document that mentions the name would also pass — consider checking it appears as the primary contact with matching phone and email from the input"
},
{
"reason": "No assertion checks whether the extracted phone numbers match the input — I observed incorrect numbers in the output that went uncaught"
}
],
"overall": "Assertions check presence but not correctness. Consider adding content verification."
}
}
```
## Field Descriptions
- **expectations**: Array of graded expectations
- **text**: The original expectation text
- **passed**: Boolean - true if expectation passes
- **evidence**: Specific quote or description supporting the verdict
- **summary**: Aggregate statistics
- **passed**: Count of passed expectations
- **failed**: Count of failed expectations
- **total**: Total expectations evaluated
- **pass_rate**: Fraction passed (0.0 to 1.0)
- **execution_metrics**: Copied from executor's metrics.json (if available)
- **output_chars**: Total character count of output files (proxy for tokens)
- **transcript_chars**: Character count of transcript
- **timing**: Wall clock timing from timing.json (if available)
- **executor_duration_seconds**: Time spent in executor subagent
- **total_duration_seconds**: Total elapsed time for the run
- **claims**: Extracted and verified claims from the output
- **claim**: The statement being verified
- **type**: "factual", "process", or "quality"
- **verified**: Boolean - whether the claim holds
- **evidence**: Supporting or contradicting evidence
- **user_notes_summary**: Issues flagged by the executor
- **uncertainties**: Things the executor wasn't sure about
- **needs_review**: Items requiring human attention
- **workarounds**: Places where the skill didn't work as expected
- **eval_feedback**: Improvement suggestions for the evals (only when warranted)
- **suggestions**: List of concrete suggestions, each with a `reason` and optionally an `assertion` it relates to
- **overall**: Brief assessment — can be "No suggestions, evals look solid" if nothing to flag
## Guidelines
- **Be objective**: Base verdicts on evidence, not assumptions
- **Be specific**: Quote the exact text that supports your verdict
- **Be thorough**: Check both transcript and output files
- **Be consistent**: Apply the same standard to each expectation
- **Explain failures**: Make it clear why evidence was insufficient
- **No partial credit**: Each expectation is pass or fail, not partial

View File

@ -1,146 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Eval Set Review - __SKILL_NAME_PLACEHOLDER__</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@500;600&family=Lora:wght@400;500&display=swap" rel="stylesheet">
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: 'Lora', Georgia, serif; background: #faf9f5; padding: 2rem; color: #141413; }
h1 { font-family: 'Poppins', sans-serif; margin-bottom: 0.5rem; font-size: 1.5rem; }
.description { color: #b0aea5; margin-bottom: 1.5rem; font-style: italic; max-width: 900px; }
.controls { margin-bottom: 1rem; display: flex; gap: 0.5rem; }
.btn { font-family: 'Poppins', sans-serif; padding: 0.5rem 1rem; border: none; border-radius: 6px; cursor: pointer; font-size: 0.875rem; font-weight: 500; }
.btn-add { background: #6a9bcc; color: white; }
.btn-add:hover { background: #5889b8; }
.btn-export { background: #d97757; color: white; }
.btn-export:hover { background: #c4613f; }
table { width: 100%; max-width: 1100px; border-collapse: collapse; background: white; border-radius: 6px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.08); }
th { font-family: 'Poppins', sans-serif; background: #141413; color: #faf9f5; padding: 0.75rem 1rem; text-align: left; font-size: 0.875rem; }
td { padding: 0.75rem 1rem; border-bottom: 1px solid #e8e6dc; vertical-align: top; }
tr:nth-child(even) td { background: #faf9f5; }
tr:hover td { background: #f3f1ea; }
.section-header td { background: #e8e6dc; font-family: 'Poppins', sans-serif; font-weight: 500; font-size: 0.8rem; color: #141413; text-transform: uppercase; letter-spacing: 0.05em; }
.query-input { width: 100%; padding: 0.4rem; border: 1px solid #e8e6dc; border-radius: 4px; font-size: 0.875rem; font-family: 'Lora', Georgia, serif; resize: vertical; min-height: 60px; }
.query-input:focus { outline: none; border-color: #d97757; box-shadow: 0 0 0 2px rgba(217,119,87,0.15); }
.toggle { position: relative; display: inline-block; width: 44px; height: 24px; }
.toggle input { opacity: 0; width: 0; height: 0; }
.toggle .slider { position: absolute; inset: 0; background: #b0aea5; border-radius: 24px; cursor: pointer; transition: 0.2s; }
.toggle .slider::before { content: ""; position: absolute; width: 18px; height: 18px; left: 3px; bottom: 3px; background: white; border-radius: 50%; transition: 0.2s; }
.toggle input:checked + .slider { background: #d97757; }
.toggle input:checked + .slider::before { transform: translateX(20px); }
.btn-delete { background: #c44; color: white; padding: 0.3rem 0.6rem; border: none; border-radius: 4px; cursor: pointer; font-size: 0.75rem; font-family: 'Poppins', sans-serif; }
.btn-delete:hover { background: #a33; }
.summary { margin-top: 1rem; color: #b0aea5; font-size: 0.875rem; }
</style>
</head>
<body>
<h1>Eval Set Review: <span id="skill-name">__SKILL_NAME_PLACEHOLDER__</span></h1>
<p class="description">Current description: <span id="skill-desc">__SKILL_DESCRIPTION_PLACEHOLDER__</span></p>
<div class="controls">
<button class="btn btn-add" onclick="addRow()">+ Add Query</button>
<button class="btn btn-export" onclick="exportEvalSet()">Export Eval Set</button>
</div>
<table>
<thead>
<tr>
<th style="width:65%">Query</th>
<th style="width:18%">Should Trigger</th>
<th style="width:10%">Actions</th>
</tr>
</thead>
<tbody id="eval-body"></tbody>
</table>
<p class="summary" id="summary"></p>
<script>
const EVAL_DATA = __EVAL_DATA_PLACEHOLDER__;
let evalItems = [...EVAL_DATA];
function render() {
const tbody = document.getElementById('eval-body');
tbody.innerHTML = '';
// Sort: should-trigger first, then should-not-trigger
const sorted = evalItems
.map((item, origIdx) => ({ ...item, origIdx }))
.sort((a, b) => (b.should_trigger ? 1 : 0) - (a.should_trigger ? 1 : 0));
let lastGroup = null;
sorted.forEach(item => {
const group = item.should_trigger ? 'trigger' : 'no-trigger';
if (group !== lastGroup) {
const headerRow = document.createElement('tr');
headerRow.className = 'section-header';
headerRow.innerHTML = `<td colspan="3">${item.should_trigger ? 'Should Trigger' : 'Should NOT Trigger'}</td>`;
tbody.appendChild(headerRow);
lastGroup = group;
}
const idx = item.origIdx;
const tr = document.createElement('tr');
tr.innerHTML = `
<td><textarea class="query-input" onchange="updateQuery(${idx}, this.value)">${escapeHtml(item.query)}</textarea></td>
<td>
<label class="toggle">
<input type="checkbox" ${item.should_trigger ? 'checked' : ''} onchange="updateTrigger(${idx}, this.checked)">
<span class="slider"></span>
</label>
<span style="margin-left:8px;font-size:0.8rem;color:#b0aea5">${item.should_trigger ? 'Yes' : 'No'}</span>
</td>
<td><button class="btn-delete" onclick="deleteRow(${idx})">Delete</button></td>
`;
tbody.appendChild(tr);
});
updateSummary();
}
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
function updateQuery(idx, value) { evalItems[idx].query = value; updateSummary(); }
function updateTrigger(idx, value) { evalItems[idx].should_trigger = value; render(); }
function deleteRow(idx) { evalItems.splice(idx, 1); render(); }
function addRow() {
evalItems.push({ query: '', should_trigger: true });
render();
const inputs = document.querySelectorAll('.query-input');
inputs[inputs.length - 1].focus();
}
function updateSummary() {
const trigger = evalItems.filter(i => i.should_trigger).length;
const noTrigger = evalItems.filter(i => !i.should_trigger).length;
document.getElementById('summary').textContent =
`${evalItems.length} queries total: ${trigger} should trigger, ${noTrigger} should not trigger`;
}
function exportEvalSet() {
const valid = evalItems.filter(i => i.query.trim() !== '');
const data = valid.map(i => ({ query: i.query.trim(), should_trigger: i.should_trigger }));
const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'eval_set.json';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
render();
</script>
</body>
</html>

View File

@ -1,471 +0,0 @@
#!/usr/bin/env python3
"""Generate and serve a review page for eval results.
Reads the workspace directory, discovers runs (directories with outputs/),
embeds all output data into a self-contained HTML page, and serves it via
a tiny HTTP server. Feedback auto-saves to feedback.json in the workspace.
Usage:
python generate_review.py <workspace-path> [--port PORT] [--skill-name NAME]
python generate_review.py <workspace-path> --previous-feedback /path/to/old/feedback.json
No dependencies beyond the Python stdlib are required.
"""
import argparse
import base64
import json
import mimetypes
import os
import re
import signal
import subprocess
import sys
import time
import webbrowser
from functools import partial
from http.server import HTTPServer, BaseHTTPRequestHandler
from pathlib import Path
# Files to exclude from output listings
METADATA_FILES = {"transcript.md", "user_notes.md", "metrics.json"}
# Extensions we render as inline text
TEXT_EXTENSIONS = {
".txt", ".md", ".json", ".csv", ".py", ".js", ".ts", ".tsx", ".jsx",
".yaml", ".yml", ".xml", ".html", ".css", ".sh", ".rb", ".go", ".rs",
".java", ".c", ".cpp", ".h", ".hpp", ".sql", ".r", ".toml",
}
# Extensions we render as inline images
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"}
# MIME type overrides for common types
MIME_OVERRIDES = {
".svg": "image/svg+xml",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
}
def get_mime_type(path: Path) -> str:
ext = path.suffix.lower()
if ext in MIME_OVERRIDES:
return MIME_OVERRIDES[ext]
mime, _ = mimetypes.guess_type(str(path))
return mime or "application/octet-stream"
def find_runs(workspace: Path) -> list[dict]:
"""Recursively find directories that contain an outputs/ subdirectory."""
runs: list[dict] = []
_find_runs_recursive(workspace, workspace, runs)
runs.sort(key=lambda r: (r.get("eval_id", float("inf")), r["id"]))
return runs
def _find_runs_recursive(root: Path, current: Path, runs: list[dict]) -> None:
if not current.is_dir():
return
outputs_dir = current / "outputs"
if outputs_dir.is_dir():
run = build_run(root, current)
if run:
runs.append(run)
return
skip = {"node_modules", ".git", "__pycache__", "skill", "inputs"}
for child in sorted(current.iterdir()):
if child.is_dir() and child.name not in skip:
_find_runs_recursive(root, child, runs)
def build_run(root: Path, run_dir: Path) -> dict | None:
"""Build a run dict with prompt, outputs, and grading data."""
prompt = ""
eval_id = None
# Try eval_metadata.json
for candidate in [run_dir / "eval_metadata.json", run_dir.parent / "eval_metadata.json"]:
if candidate.exists():
try:
metadata = json.loads(candidate.read_text())
prompt = metadata.get("prompt", "")
eval_id = metadata.get("eval_id")
except (json.JSONDecodeError, OSError):
pass
if prompt:
break
# Fall back to transcript.md
if not prompt:
for candidate in [run_dir / "transcript.md", run_dir / "outputs" / "transcript.md"]:
if candidate.exists():
try:
text = candidate.read_text()
match = re.search(r"## Eval Prompt\n\n([\s\S]*?)(?=\n##|$)", text)
if match:
prompt = match.group(1).strip()
except OSError:
pass
if prompt:
break
if not prompt:
prompt = "(No prompt found)"
run_id = str(run_dir.relative_to(root)).replace("/", "-").replace("\\", "-")
# Collect output files
outputs_dir = run_dir / "outputs"
output_files: list[dict] = []
if outputs_dir.is_dir():
for f in sorted(outputs_dir.iterdir()):
if f.is_file() and f.name not in METADATA_FILES:
output_files.append(embed_file(f))
# Load grading if present
grading = None
for candidate in [run_dir / "grading.json", run_dir.parent / "grading.json"]:
if candidate.exists():
try:
grading = json.loads(candidate.read_text())
except (json.JSONDecodeError, OSError):
pass
if grading:
break
return {
"id": run_id,
"prompt": prompt,
"eval_id": eval_id,
"outputs": output_files,
"grading": grading,
}
def embed_file(path: Path) -> dict:
"""Read a file and return an embedded representation."""
ext = path.suffix.lower()
mime = get_mime_type(path)
if ext in TEXT_EXTENSIONS:
try:
content = path.read_text(errors="replace")
except OSError:
content = "(Error reading file)"
return {
"name": path.name,
"type": "text",
"content": content,
}
elif ext in IMAGE_EXTENSIONS:
try:
raw = path.read_bytes()
b64 = base64.b64encode(raw).decode("ascii")
except OSError:
return {"name": path.name, "type": "error", "content": "(Error reading file)"}
return {
"name": path.name,
"type": "image",
"mime": mime,
"data_uri": f"data:{mime};base64,{b64}",
}
elif ext == ".pdf":
try:
raw = path.read_bytes()
b64 = base64.b64encode(raw).decode("ascii")
except OSError:
return {"name": path.name, "type": "error", "content": "(Error reading file)"}
return {
"name": path.name,
"type": "pdf",
"data_uri": f"data:{mime};base64,{b64}",
}
elif ext == ".xlsx":
try:
raw = path.read_bytes()
b64 = base64.b64encode(raw).decode("ascii")
except OSError:
return {"name": path.name, "type": "error", "content": "(Error reading file)"}
return {
"name": path.name,
"type": "xlsx",
"data_b64": b64,
}
else:
# Binary / unknown — base64 download link
try:
raw = path.read_bytes()
b64 = base64.b64encode(raw).decode("ascii")
except OSError:
return {"name": path.name, "type": "error", "content": "(Error reading file)"}
return {
"name": path.name,
"type": "binary",
"mime": mime,
"data_uri": f"data:{mime};base64,{b64}",
}
def load_previous_iteration(workspace: Path) -> dict[str, dict]:
"""Load previous iteration's feedback and outputs.
Returns a map of run_id -> {"feedback": str, "outputs": list[dict]}.
"""
result: dict[str, dict] = {}
# Load feedback
feedback_map: dict[str, str] = {}
feedback_path = workspace / "feedback.json"
if feedback_path.exists():
try:
data = json.loads(feedback_path.read_text())
feedback_map = {
r["run_id"]: r["feedback"]
for r in data.get("reviews", [])
if r.get("feedback", "").strip()
}
except (json.JSONDecodeError, OSError, KeyError):
pass
# Load runs (to get outputs)
prev_runs = find_runs(workspace)
for run in prev_runs:
result[run["id"]] = {
"feedback": feedback_map.get(run["id"], ""),
"outputs": run.get("outputs", []),
}
# Also add feedback for run_ids that had feedback but no matching run
for run_id, fb in feedback_map.items():
if run_id not in result:
result[run_id] = {"feedback": fb, "outputs": []}
return result
def generate_html(
runs: list[dict],
skill_name: str,
previous: dict[str, dict] | None = None,
benchmark: dict | None = None,
) -> str:
"""Generate the complete standalone HTML page with embedded data."""
template_path = Path(__file__).parent / "viewer.html"
template = template_path.read_text()
# Build previous_feedback and previous_outputs maps for the template
previous_feedback: dict[str, str] = {}
previous_outputs: dict[str, list[dict]] = {}
if previous:
for run_id, data in previous.items():
if data.get("feedback"):
previous_feedback[run_id] = data["feedback"]
if data.get("outputs"):
previous_outputs[run_id] = data["outputs"]
embedded = {
"skill_name": skill_name,
"runs": runs,
"previous_feedback": previous_feedback,
"previous_outputs": previous_outputs,
}
if benchmark:
embedded["benchmark"] = benchmark
data_json = json.dumps(embedded)
return template.replace("/*__EMBEDDED_DATA__*/", f"const EMBEDDED_DATA = {data_json};")
# ---------------------------------------------------------------------------
# HTTP server (stdlib only, zero dependencies)
# ---------------------------------------------------------------------------
def _kill_port(port: int) -> None:
"""Kill any process listening on the given port."""
try:
result = subprocess.run(
["lsof", "-ti", f":{port}"],
capture_output=True, text=True, timeout=5,
)
for pid_str in result.stdout.strip().split("\n"):
if pid_str.strip():
try:
os.kill(int(pid_str.strip()), signal.SIGTERM)
except (ProcessLookupError, ValueError):
pass
if result.stdout.strip():
time.sleep(0.5)
except subprocess.TimeoutExpired:
pass
except FileNotFoundError:
print("Note: lsof not found, cannot check if port is in use", file=sys.stderr)
class ReviewHandler(BaseHTTPRequestHandler):
"""Serves the review HTML and handles feedback saves.
Regenerates the HTML on each page load so that refreshing the browser
picks up new eval outputs without restarting the server.
"""
def __init__(
self,
workspace: Path,
skill_name: str,
feedback_path: Path,
previous: dict[str, dict],
benchmark_path: Path | None,
*args,
**kwargs,
):
self.workspace = workspace
self.skill_name = skill_name
self.feedback_path = feedback_path
self.previous = previous
self.benchmark_path = benchmark_path
super().__init__(*args, **kwargs)
def do_GET(self) -> None:
if self.path == "/" or self.path == "/index.html":
# Regenerate HTML on each request (re-scans workspace for new outputs)
runs = find_runs(self.workspace)
benchmark = None
if self.benchmark_path and self.benchmark_path.exists():
try:
benchmark = json.loads(self.benchmark_path.read_text())
except (json.JSONDecodeError, OSError):
pass
html = generate_html(runs, self.skill_name, self.previous, benchmark)
content = html.encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.send_header("Content-Length", str(len(content)))
self.end_headers()
self.wfile.write(content)
elif self.path == "/api/feedback":
data = b"{}"
if self.feedback_path.exists():
data = self.feedback_path.read_bytes()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(data)))
self.end_headers()
self.wfile.write(data)
else:
self.send_error(404)
def do_POST(self) -> None:
if self.path == "/api/feedback":
length = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(length)
try:
data = json.loads(body)
if not isinstance(data, dict) or "reviews" not in data:
raise ValueError("Expected JSON object with 'reviews' key")
self.feedback_path.write_text(json.dumps(data, indent=2) + "\n")
resp = b'{"ok":true}'
self.send_response(200)
except (json.JSONDecodeError, OSError, ValueError) as e:
resp = json.dumps({"error": str(e)}).encode()
self.send_response(500)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(resp)))
self.end_headers()
self.wfile.write(resp)
else:
self.send_error(404)
def log_message(self, format: str, *args: object) -> None:
# Suppress request logging to keep terminal clean
pass
def main() -> None:
parser = argparse.ArgumentParser(description="Generate and serve eval review")
parser.add_argument("workspace", type=Path, help="Path to workspace directory")
parser.add_argument("--port", "-p", type=int, default=3117, help="Server port (default: 3117)")
parser.add_argument("--skill-name", "-n", type=str, default=None, help="Skill name for header")
parser.add_argument(
"--previous-workspace", type=Path, default=None,
help="Path to previous iteration's workspace (shows old outputs and feedback as context)",
)
parser.add_argument(
"--benchmark", type=Path, default=None,
help="Path to benchmark.json to show in the Benchmark tab",
)
parser.add_argument(
"--static", "-s", type=Path, default=None,
help="Write standalone HTML to this path instead of starting a server",
)
args = parser.parse_args()
workspace = args.workspace.resolve()
if not workspace.is_dir():
print(f"Error: {workspace} is not a directory", file=sys.stderr)
sys.exit(1)
runs = find_runs(workspace)
if not runs:
print(f"No runs found in {workspace}", file=sys.stderr)
sys.exit(1)
skill_name = args.skill_name or workspace.name.replace("-workspace", "")
feedback_path = workspace / "feedback.json"
previous: dict[str, dict] = {}
if args.previous_workspace:
previous = load_previous_iteration(args.previous_workspace.resolve())
benchmark_path = args.benchmark.resolve() if args.benchmark else None
benchmark = None
if benchmark_path and benchmark_path.exists():
try:
benchmark = json.loads(benchmark_path.read_text())
except (json.JSONDecodeError, OSError):
pass
if args.static:
html = generate_html(runs, skill_name, previous, benchmark)
args.static.parent.mkdir(parents=True, exist_ok=True)
args.static.write_text(html)
print(f"\n Static viewer written to: {args.static}\n")
sys.exit(0)
# Kill any existing process on the target port
port = args.port
_kill_port(port)
handler = partial(ReviewHandler, workspace, skill_name, feedback_path, previous, benchmark_path)
try:
server = HTTPServer(("127.0.0.1", port), handler)
except OSError:
# Port still in use after kill attempt — find a free one
server = HTTPServer(("127.0.0.1", 0), handler)
port = server.server_address[1]
url = f"http://localhost:{port}"
print(f"\n Eval Viewer")
print(f" ─────────────────────────────────")
print(f" URL: {url}")
print(f" Workspace: {workspace}")
print(f" Feedback: {feedback_path}")
if previous:
print(f" Previous: {args.previous_workspace} ({len(previous)} runs)")
if benchmark_path:
print(f" Benchmark: {benchmark_path}")
print(f"\n Press Ctrl+C to stop.\n")
webbrowser.open(url)
try:
server.serve_forever()
except KeyboardInterrupt:
print("\nStopped.")
server.server_close()
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@ -1,430 +0,0 @@
# JSON Schemas
This document defines the JSON schemas used by skill-creator.
---
## evals.json
Defines the evals for a skill. Located at `evals/evals.json` within the skill directory.
```json
{
"skill_name": "example-skill",
"evals": [
{
"id": 1,
"prompt": "User's example prompt",
"expected_output": "Description of expected result",
"files": ["evals/files/sample1.pdf"],
"expectations": [
"The output includes X",
"The skill used script Y"
]
}
]
}
```
**Fields:**
- `skill_name`: Name matching the skill's frontmatter
- `evals[].id`: Unique integer identifier
- `evals[].prompt`: The task to execute
- `evals[].expected_output`: Human-readable description of success
- `evals[].files`: Optional list of input file paths (relative to skill root)
- `evals[].expectations`: List of verifiable statements
---
## history.json
Tracks version progression in Improve mode. Located at workspace root.
```json
{
"started_at": "2026-01-15T10:30:00Z",
"skill_name": "pdf",
"current_best": "v2",
"iterations": [
{
"version": "v0",
"parent": null,
"expectation_pass_rate": 0.65,
"grading_result": "baseline",
"is_current_best": false
},
{
"version": "v1",
"parent": "v0",
"expectation_pass_rate": 0.75,
"grading_result": "won",
"is_current_best": false
},
{
"version": "v2",
"parent": "v1",
"expectation_pass_rate": 0.85,
"grading_result": "won",
"is_current_best": true
}
]
}
```
**Fields:**
- `started_at`: ISO timestamp of when improvement started
- `skill_name`: Name of the skill being improved
- `current_best`: Version identifier of the best performer
- `iterations[].version`: Version identifier (v0, v1, ...)
- `iterations[].parent`: Parent version this was derived from
- `iterations[].expectation_pass_rate`: Pass rate from grading
- `iterations[].grading_result`: "baseline", "won", "lost", or "tie"
- `iterations[].is_current_best`: Whether this is the current best version
---
## grading.json
Output from the grader agent. Located at `<run-dir>/grading.json`.
```json
{
"expectations": [
{
"text": "The output includes the name 'John Smith'",
"passed": true,
"evidence": "Found in transcript Step 3: 'Extracted names: John Smith, Sarah Johnson'"
},
{
"text": "The spreadsheet has a SUM formula in cell B10",
"passed": false,
"evidence": "No spreadsheet was created. The output was a text file."
}
],
"summary": {
"passed": 2,
"failed": 1,
"total": 3,
"pass_rate": 0.67
},
"execution_metrics": {
"tool_calls": {
"Read": 5,
"Write": 2,
"Bash": 8
},
"total_tool_calls": 15,
"total_steps": 6,
"errors_encountered": 0,
"output_chars": 12450,
"transcript_chars": 3200
},
"timing": {
"executor_duration_seconds": 165.0,
"grader_duration_seconds": 26.0,
"total_duration_seconds": 191.0
},
"claims": [
{
"claim": "The form has 12 fillable fields",
"type": "factual",
"verified": true,
"evidence": "Counted 12 fields in field_info.json"
}
],
"user_notes_summary": {
"uncertainties": ["Used 2023 data, may be stale"],
"needs_review": [],
"workarounds": ["Fell back to text overlay for non-fillable fields"]
},
"eval_feedback": {
"suggestions": [
{
"assertion": "The output includes the name 'John Smith'",
"reason": "A hallucinated document that mentions the name would also pass"
}
],
"overall": "Assertions check presence but not correctness."
}
}
```
**Fields:**
- `expectations[]`: Graded expectations with evidence
- `summary`: Aggregate pass/fail counts
- `execution_metrics`: Tool usage and output size (from executor's metrics.json)
- `timing`: Wall clock timing (from timing.json)
- `claims`: Extracted and verified claims from the output
- `user_notes_summary`: Issues flagged by the executor
- `eval_feedback`: (optional) Improvement suggestions for the evals, only present when the grader identifies issues worth raising
---
## metrics.json
Output from the executor agent. Located at `<run-dir>/outputs/metrics.json`.
```json
{
"tool_calls": {
"Read": 5,
"Write": 2,
"Bash": 8,
"Edit": 1,
"Glob": 2,
"Grep": 0
},
"total_tool_calls": 18,
"total_steps": 6,
"files_created": ["filled_form.pdf", "field_values.json"],
"errors_encountered": 0,
"output_chars": 12450,
"transcript_chars": 3200
}
```
**Fields:**
- `tool_calls`: Count per tool type
- `total_tool_calls`: Sum of all tool calls
- `total_steps`: Number of major execution steps
- `files_created`: List of output files created
- `errors_encountered`: Number of errors during execution
- `output_chars`: Total character count of output files
- `transcript_chars`: Character count of transcript
---
## timing.json
Wall clock timing for a run. Located at `<run-dir>/timing.json`.
**How to capture:** When a subagent task completes, the task notification includes `total_tokens` and `duration_ms`. Save these immediately — they are not persisted anywhere else and cannot be recovered after the fact.
```json
{
"total_tokens": 84852,
"duration_ms": 23332,
"total_duration_seconds": 23.3,
"executor_start": "2026-01-15T10:30:00Z",
"executor_end": "2026-01-15T10:32:45Z",
"executor_duration_seconds": 165.0,
"grader_start": "2026-01-15T10:32:46Z",
"grader_end": "2026-01-15T10:33:12Z",
"grader_duration_seconds": 26.0
}
```
---
## benchmark.json
Output from Benchmark mode. Located at `benchmarks/<timestamp>/benchmark.json`.
```json
{
"metadata": {
"skill_name": "pdf",
"skill_path": "/path/to/pdf",
"executor_model": "claude-sonnet-4-20250514",
"analyzer_model": "most-capable-model",
"timestamp": "2026-01-15T10:30:00Z",
"evals_run": [1, 2, 3],
"runs_per_configuration": 3
},
"runs": [
{
"eval_id": 1,
"eval_name": "Ocean",
"configuration": "with_skill",
"run_number": 1,
"result": {
"pass_rate": 0.85,
"passed": 6,
"failed": 1,
"total": 7,
"time_seconds": 42.5,
"tokens": 3800,
"tool_calls": 18,
"errors": 0
},
"expectations": [
{"text": "...", "passed": true, "evidence": "..."}
],
"notes": [
"Used 2023 data, may be stale",
"Fell back to text overlay for non-fillable fields"
]
}
],
"run_summary": {
"with_skill": {
"pass_rate": {"mean": 0.85, "stddev": 0.05, "min": 0.80, "max": 0.90},
"time_seconds": {"mean": 45.0, "stddev": 12.0, "min": 32.0, "max": 58.0},
"tokens": {"mean": 3800, "stddev": 400, "min": 3200, "max": 4100}
},
"without_skill": {
"pass_rate": {"mean": 0.35, "stddev": 0.08, "min": 0.28, "max": 0.45},
"time_seconds": {"mean": 32.0, "stddev": 8.0, "min": 24.0, "max": 42.0},
"tokens": {"mean": 2100, "stddev": 300, "min": 1800, "max": 2500}
},
"delta": {
"pass_rate": "+0.50",
"time_seconds": "+13.0",
"tokens": "+1700"
}
},
"notes": [
"Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value",
"Eval 3 shows high variance (50% ± 40%) - may be flaky or model-dependent",
"Without-skill runs consistently fail on table extraction expectations",
"Skill adds 13s average execution time but improves pass rate by 50%"
]
}
```
**Fields:**
- `metadata`: Information about the benchmark run
- `skill_name`: Name of the skill
- `timestamp`: When the benchmark was run
- `evals_run`: List of eval names or IDs
- `runs_per_configuration`: Number of runs per config (e.g. 3)
- `runs[]`: Individual run results
- `eval_id`: Numeric eval identifier
- `eval_name`: Human-readable eval name (used as section header in the viewer)
- `configuration`: Must be `"with_skill"` or `"without_skill"` (the viewer uses this exact string for grouping and color coding)
- `run_number`: Integer run number (1, 2, 3...)
- `result`: Nested object with `pass_rate`, `passed`, `total`, `time_seconds`, `tokens`, `errors`
- `run_summary`: Statistical aggregates per configuration
- `with_skill` / `without_skill`: Each contains `pass_rate`, `time_seconds`, `tokens` objects with `mean` and `stddev` fields
- `delta`: Difference strings like `"+0.50"`, `"+13.0"`, `"+1700"`
- `notes`: Freeform observations from the analyzer
**Important:** The viewer reads these field names exactly. Using `config` instead of `configuration`, or putting `pass_rate` at the top level of a run instead of nested under `result`, will cause the viewer to show empty/zero values. Always reference this schema when generating benchmark.json manually.
---
## comparison.json
Output from blind comparator. Located at `<grading-dir>/comparison-N.json`.
```json
{
"winner": "A",
"reasoning": "Output A provides a complete solution with proper formatting and all required fields. Output B is missing the date field and has formatting inconsistencies.",
"rubric": {
"A": {
"content": {
"correctness": 5,
"completeness": 5,
"accuracy": 4
},
"structure": {
"organization": 4,
"formatting": 5,
"usability": 4
},
"content_score": 4.7,
"structure_score": 4.3,
"overall_score": 9.0
},
"B": {
"content": {
"correctness": 3,
"completeness": 2,
"accuracy": 3
},
"structure": {
"organization": 3,
"formatting": 2,
"usability": 3
},
"content_score": 2.7,
"structure_score": 2.7,
"overall_score": 5.4
}
},
"output_quality": {
"A": {
"score": 9,
"strengths": ["Complete solution", "Well-formatted", "All fields present"],
"weaknesses": ["Minor style inconsistency in header"]
},
"B": {
"score": 5,
"strengths": ["Readable output", "Correct basic structure"],
"weaknesses": ["Missing date field", "Formatting inconsistencies", "Partial data extraction"]
}
},
"expectation_results": {
"A": {
"passed": 4,
"total": 5,
"pass_rate": 0.80,
"details": [
{"text": "Output includes name", "passed": true}
]
},
"B": {
"passed": 3,
"total": 5,
"pass_rate": 0.60,
"details": [
{"text": "Output includes name", "passed": true}
]
}
}
}
```
---
## analysis.json
Output from post-hoc analyzer. Located at `<grading-dir>/analysis.json`.
```json
{
"comparison_summary": {
"winner": "A",
"winner_skill": "path/to/winner/skill",
"loser_skill": "path/to/loser/skill",
"comparator_reasoning": "Brief summary of why comparator chose winner"
},
"winner_strengths": [
"Clear step-by-step instructions for handling multi-page documents",
"Included validation script that caught formatting errors"
],
"loser_weaknesses": [
"Vague instruction 'process the document appropriately' led to inconsistent behavior",
"No script for validation, agent had to improvise"
],
"instruction_following": {
"winner": {
"score": 9,
"issues": ["Minor: skipped optional logging step"]
},
"loser": {
"score": 6,
"issues": [
"Did not use the skill's formatting template",
"Invented own approach instead of following step 3"
]
}
},
"improvement_suggestions": [
{
"priority": "high",
"category": "instructions",
"suggestion": "Replace 'process the document appropriately' with explicit steps",
"expected_impact": "Would eliminate ambiguity that caused inconsistent behavior"
}
],
"transcript_insights": {
"winner_execution_pattern": "Read skill -> Followed 5-step process -> Used validation script",
"loser_execution_pattern": "Read skill -> Unclear on approach -> Tried 3 different methods"
}
}
```

View File

@ -1,401 +0,0 @@
#!/usr/bin/env python3
"""
Aggregate individual run results into benchmark summary statistics.
Reads grading.json files from run directories and produces:
- run_summary with mean, stddev, min, max for each metric
- delta between with_skill and without_skill configurations
Usage:
python aggregate_benchmark.py <benchmark_dir>
Example:
python aggregate_benchmark.py benchmarks/2026-01-15T10-30-00/
The script supports two directory layouts:
Workspace layout (from skill-creator iterations):
<benchmark_dir>/
eval-N/
with_skill/
run-1/grading.json
run-2/grading.json
without_skill/
run-1/grading.json
run-2/grading.json
Legacy layout (with runs/ subdirectory):
<benchmark_dir>/
runs/
eval-N/
with_skill/
run-1/grading.json
without_skill/
run-1/grading.json
"""
import argparse
import json
import math
import sys
from datetime import datetime, timezone
from pathlib import Path
def calculate_stats(values: list[float]) -> dict:
"""Calculate mean, stddev, min, max for a list of values."""
if not values:
return {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0}
n = len(values)
mean = sum(values) / n
if n > 1:
variance = sum((x - mean) ** 2 for x in values) / (n - 1)
stddev = math.sqrt(variance)
else:
stddev = 0.0
return {
"mean": round(mean, 4),
"stddev": round(stddev, 4),
"min": round(min(values), 4),
"max": round(max(values), 4)
}
def load_run_results(benchmark_dir: Path) -> dict:
"""
Load all run results from a benchmark directory.
Returns dict keyed by config name (e.g. "with_skill"/"without_skill",
or "new_skill"/"old_skill"), each containing a list of run results.
"""
# Support both layouts: eval dirs directly under benchmark_dir, or under runs/
runs_dir = benchmark_dir / "runs"
if runs_dir.exists():
search_dir = runs_dir
elif list(benchmark_dir.glob("eval-*")):
search_dir = benchmark_dir
else:
print(f"No eval directories found in {benchmark_dir} or {benchmark_dir / 'runs'}")
return {}
results: dict[str, list] = {}
for eval_idx, eval_dir in enumerate(sorted(search_dir.glob("eval-*"))):
metadata_path = eval_dir / "eval_metadata.json"
if metadata_path.exists():
try:
with open(metadata_path) as mf:
eval_id = json.load(mf).get("eval_id", eval_idx)
except (json.JSONDecodeError, OSError):
eval_id = eval_idx
else:
try:
eval_id = int(eval_dir.name.split("-")[1])
except ValueError:
eval_id = eval_idx
# Discover config directories dynamically rather than hardcoding names
for config_dir in sorted(eval_dir.iterdir()):
if not config_dir.is_dir():
continue
# Skip non-config directories (inputs, outputs, etc.)
if not list(config_dir.glob("run-*")):
continue
config = config_dir.name
if config not in results:
results[config] = []
for run_dir in sorted(config_dir.glob("run-*")):
run_number = int(run_dir.name.split("-")[1])
grading_file = run_dir / "grading.json"
if not grading_file.exists():
print(f"Warning: grading.json not found in {run_dir}")
continue
try:
with open(grading_file) as f:
grading = json.load(f)
except json.JSONDecodeError as e:
print(f"Warning: Invalid JSON in {grading_file}: {e}")
continue
# Extract metrics
result = {
"eval_id": eval_id,
"run_number": run_number,
"pass_rate": grading.get("summary", {}).get("pass_rate", 0.0),
"passed": grading.get("summary", {}).get("passed", 0),
"failed": grading.get("summary", {}).get("failed", 0),
"total": grading.get("summary", {}).get("total", 0),
}
# Extract timing — check grading.json first, then sibling timing.json
timing = grading.get("timing", {})
result["time_seconds"] = timing.get("total_duration_seconds", 0.0)
timing_file = run_dir / "timing.json"
if result["time_seconds"] == 0.0 and timing_file.exists():
try:
with open(timing_file) as tf:
timing_data = json.load(tf)
result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0)
result["tokens"] = timing_data.get("total_tokens", 0)
except json.JSONDecodeError:
pass
# Extract metrics if available
metrics = grading.get("execution_metrics", {})
result["tool_calls"] = metrics.get("total_tool_calls", 0)
if not result.get("tokens"):
result["tokens"] = metrics.get("output_chars", 0)
result["errors"] = metrics.get("errors_encountered", 0)
# Extract expectations — viewer requires fields: text, passed, evidence
raw_expectations = grading.get("expectations", [])
for exp in raw_expectations:
if "text" not in exp or "passed" not in exp:
print(f"Warning: expectation in {grading_file} missing required fields (text, passed, evidence): {exp}")
result["expectations"] = raw_expectations
# Extract notes from user_notes_summary
notes_summary = grading.get("user_notes_summary", {})
notes = []
notes.extend(notes_summary.get("uncertainties", []))
notes.extend(notes_summary.get("needs_review", []))
notes.extend(notes_summary.get("workarounds", []))
result["notes"] = notes
results[config].append(result)
return results
def aggregate_results(results: dict) -> dict:
"""
Aggregate run results into summary statistics.
Returns run_summary with stats for each configuration and delta.
"""
run_summary = {}
configs = list(results.keys())
for config in configs:
runs = results.get(config, [])
if not runs:
run_summary[config] = {
"pass_rate": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
"time_seconds": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
"tokens": {"mean": 0, "stddev": 0, "min": 0, "max": 0}
}
continue
pass_rates = [r["pass_rate"] for r in runs]
times = [r["time_seconds"] for r in runs]
tokens = [r.get("tokens", 0) for r in runs]
run_summary[config] = {
"pass_rate": calculate_stats(pass_rates),
"time_seconds": calculate_stats(times),
"tokens": calculate_stats(tokens)
}
# Calculate delta between the first two configs (if two exist)
if len(configs) >= 2:
primary = run_summary.get(configs[0], {})
baseline = run_summary.get(configs[1], {})
else:
primary = run_summary.get(configs[0], {}) if configs else {}
baseline = {}
delta_pass_rate = primary.get("pass_rate", {}).get("mean", 0) - baseline.get("pass_rate", {}).get("mean", 0)
delta_time = primary.get("time_seconds", {}).get("mean", 0) - baseline.get("time_seconds", {}).get("mean", 0)
delta_tokens = primary.get("tokens", {}).get("mean", 0) - baseline.get("tokens", {}).get("mean", 0)
run_summary["delta"] = {
"pass_rate": f"{delta_pass_rate:+.2f}",
"time_seconds": f"{delta_time:+.1f}",
"tokens": f"{delta_tokens:+.0f}"
}
return run_summary
def generate_benchmark(benchmark_dir: Path, skill_name: str = "", skill_path: str = "") -> dict:
"""
Generate complete benchmark.json from run results.
"""
results = load_run_results(benchmark_dir)
run_summary = aggregate_results(results)
# Build runs array for benchmark.json
runs = []
for config in results:
for result in results[config]:
runs.append({
"eval_id": result["eval_id"],
"configuration": config,
"run_number": result["run_number"],
"result": {
"pass_rate": result["pass_rate"],
"passed": result["passed"],
"failed": result["failed"],
"total": result["total"],
"time_seconds": result["time_seconds"],
"tokens": result.get("tokens", 0),
"tool_calls": result.get("tool_calls", 0),
"errors": result.get("errors", 0)
},
"expectations": result["expectations"],
"notes": result["notes"]
})
# Determine eval IDs from results
eval_ids = sorted(set(
r["eval_id"]
for config in results.values()
for r in config
))
benchmark = {
"metadata": {
"skill_name": skill_name or "<skill-name>",
"skill_path": skill_path or "<path/to/skill>",
"executor_model": "<model-name>",
"analyzer_model": "<model-name>",
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
"evals_run": eval_ids,
"runs_per_configuration": 3
},
"runs": runs,
"run_summary": run_summary,
"notes": [] # To be filled by analyzer
}
return benchmark
def generate_markdown(benchmark: dict) -> str:
"""Generate human-readable benchmark.md from benchmark data."""
metadata = benchmark["metadata"]
run_summary = benchmark["run_summary"]
# Determine config names (excluding "delta")
configs = [k for k in run_summary if k != "delta"]
config_a = configs[0] if len(configs) >= 1 else "config_a"
config_b = configs[1] if len(configs) >= 2 else "config_b"
label_a = config_a.replace("_", " ").title()
label_b = config_b.replace("_", " ").title()
lines = [
f"# Skill Benchmark: {metadata['skill_name']}",
"",
f"**Model**: {metadata['executor_model']}",
f"**Date**: {metadata['timestamp']}",
f"**Evals**: {', '.join(map(str, metadata['evals_run']))} ({metadata['runs_per_configuration']} runs each per configuration)",
"",
"## Summary",
"",
f"| Metric | {label_a} | {label_b} | Delta |",
"|--------|------------|---------------|-------|",
]
a_summary = run_summary.get(config_a, {})
b_summary = run_summary.get(config_b, {})
delta = run_summary.get("delta", {})
# Format pass rate
a_pr = a_summary.get("pass_rate", {})
b_pr = b_summary.get("pass_rate", {})
lines.append(f"| Pass Rate | {a_pr.get('mean', 0)*100:.0f}% ± {a_pr.get('stddev', 0)*100:.0f}% | {b_pr.get('mean', 0)*100:.0f}% ± {b_pr.get('stddev', 0)*100:.0f}% | {delta.get('pass_rate', '')} |")
# Format time
a_time = a_summary.get("time_seconds", {})
b_time = b_summary.get("time_seconds", {})
lines.append(f"| Time | {a_time.get('mean', 0):.1f}s ± {a_time.get('stddev', 0):.1f}s | {b_time.get('mean', 0):.1f}s ± {b_time.get('stddev', 0):.1f}s | {delta.get('time_seconds', '')}s |")
# Format tokens
a_tokens = a_summary.get("tokens", {})
b_tokens = b_summary.get("tokens", {})
lines.append(f"| Tokens | {a_tokens.get('mean', 0):.0f} ± {a_tokens.get('stddev', 0):.0f} | {b_tokens.get('mean', 0):.0f} ± {b_tokens.get('stddev', 0):.0f} | {delta.get('tokens', '')} |")
# Notes section
if benchmark.get("notes"):
lines.extend([
"",
"## Notes",
""
])
for note in benchmark["notes"]:
lines.append(f"- {note}")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description="Aggregate benchmark run results into summary statistics"
)
parser.add_argument(
"benchmark_dir",
type=Path,
help="Path to the benchmark directory"
)
parser.add_argument(
"--skill-name",
default="",
help="Name of the skill being benchmarked"
)
parser.add_argument(
"--skill-path",
default="",
help="Path to the skill being benchmarked"
)
parser.add_argument(
"--output", "-o",
type=Path,
help="Output path for benchmark.json (default: <benchmark_dir>/benchmark.json)"
)
args = parser.parse_args()
if not args.benchmark_dir.exists():
print(f"Directory not found: {args.benchmark_dir}")
sys.exit(1)
# Generate benchmark
benchmark = generate_benchmark(args.benchmark_dir, args.skill_name, args.skill_path)
# Determine output paths
output_json = args.output or (args.benchmark_dir / "benchmark.json")
output_md = output_json.with_suffix(".md")
# Write benchmark.json
with open(output_json, "w") as f:
json.dump(benchmark, f, indent=2)
print(f"Generated: {output_json}")
# Write benchmark.md
markdown = generate_markdown(benchmark)
with open(output_md, "w") as f:
f.write(markdown)
print(f"Generated: {output_md}")
# Print summary
run_summary = benchmark["run_summary"]
configs = [k for k in run_summary if k != "delta"]
delta = run_summary.get("delta", {})
print(f"\nSummary:")
for config in configs:
pr = run_summary[config]["pass_rate"]["mean"]
label = config.replace("_", " ").title()
print(f" {label}: {pr*100:.1f}% pass rate")
print(f" Delta: {delta.get('pass_rate', '')}")
if __name__ == "__main__":
main()

View File

@ -1,326 +0,0 @@
#!/usr/bin/env python3
"""Generate an HTML report from run_loop.py output.
Takes the JSON output from run_loop.py and generates a visual HTML report
showing each description attempt with check/x for each test case.
Distinguishes between train and test queries.
"""
import argparse
import html
import json
import sys
from pathlib import Path
def generate_html(data: dict, auto_refresh: bool = False, skill_name: str = "") -> str:
"""Generate HTML report from loop output data. If auto_refresh is True, adds a meta refresh tag."""
history = data.get("history", [])
holdout = data.get("holdout", 0)
title_prefix = html.escape(skill_name + " \u2014 ") if skill_name else ""
# Get all unique queries from train and test sets, with should_trigger info
train_queries: list[dict] = []
test_queries: list[dict] = []
if history:
for r in history[0].get("train_results", history[0].get("results", [])):
train_queries.append({"query": r["query"], "should_trigger": r.get("should_trigger", True)})
if history[0].get("test_results"):
for r in history[0].get("test_results", []):
test_queries.append({"query": r["query"], "should_trigger": r.get("should_trigger", True)})
refresh_tag = ' <meta http-equiv="refresh" content="5">\n' if auto_refresh else ""
html_parts = ["""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
""" + refresh_tag + """ <title>""" + title_prefix + """Skill Description Optimization</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@500;600&family=Lora:wght@400;500&display=swap" rel="stylesheet">
<style>
body {
font-family: 'Lora', Georgia, serif;
max-width: 100%;
margin: 0 auto;
padding: 20px;
background: #faf9f5;
color: #141413;
}
h1 { font-family: 'Poppins', sans-serif; color: #141413; }
.explainer {
background: white;
padding: 15px;
border-radius: 6px;
margin-bottom: 20px;
border: 1px solid #e8e6dc;
color: #b0aea5;
font-size: 0.875rem;
line-height: 1.6;
}
.summary {
background: white;
padding: 15px;
border-radius: 6px;
margin-bottom: 20px;
border: 1px solid #e8e6dc;
}
.summary p { margin: 5px 0; }
.best { color: #788c5d; font-weight: bold; }
.table-container {
overflow-x: auto;
width: 100%;
}
table {
border-collapse: collapse;
background: white;
border: 1px solid #e8e6dc;
border-radius: 6px;
font-size: 12px;
min-width: 100%;
}
th, td {
padding: 8px;
text-align: left;
border: 1px solid #e8e6dc;
white-space: normal;
word-wrap: break-word;
}
th {
font-family: 'Poppins', sans-serif;
background: #141413;
color: #faf9f5;
font-weight: 500;
}
th.test-col {
background: #6a9bcc;
}
th.query-col { min-width: 200px; }
td.description {
font-family: monospace;
font-size: 11px;
word-wrap: break-word;
max-width: 400px;
}
td.result {
text-align: center;
font-size: 16px;
min-width: 40px;
}
td.test-result {
background: #f0f6fc;
}
.pass { color: #788c5d; }
.fail { color: #c44; }
.rate {
font-size: 9px;
color: #b0aea5;
display: block;
}
tr:hover { background: #faf9f5; }
.score {
display: inline-block;
padding: 2px 6px;
border-radius: 4px;
font-weight: bold;
font-size: 11px;
}
.score-good { background: #eef2e8; color: #788c5d; }
.score-ok { background: #fef3c7; color: #d97706; }
.score-bad { background: #fceaea; color: #c44; }
.train-label { color: #b0aea5; font-size: 10px; }
.test-label { color: #6a9bcc; font-size: 10px; font-weight: bold; }
.best-row { background: #f5f8f2; }
th.positive-col { border-bottom: 3px solid #788c5d; }
th.negative-col { border-bottom: 3px solid #c44; }
th.test-col.positive-col { border-bottom: 3px solid #788c5d; }
th.test-col.negative-col { border-bottom: 3px solid #c44; }
.legend { font-family: 'Poppins', sans-serif; display: flex; gap: 20px; margin-bottom: 10px; font-size: 13px; align-items: center; }
.legend-item { display: flex; align-items: center; gap: 6px; }
.legend-swatch { width: 16px; height: 16px; border-radius: 3px; display: inline-block; }
.swatch-positive { background: #141413; border-bottom: 3px solid #788c5d; }
.swatch-negative { background: #141413; border-bottom: 3px solid #c44; }
.swatch-test { background: #6a9bcc; }
.swatch-train { background: #141413; }
</style>
</head>
<body>
<h1>""" + title_prefix + """Skill Description Optimization</h1>
<div class="explainer">
<strong>Optimizing your skill's description.</strong> This page updates automatically as Claude tests different versions of your skill's description. Each row is an iteration a new description attempt. The columns show test queries: green checkmarks mean the skill triggered correctly (or correctly didn't trigger), red crosses mean it got it wrong. The "Train" score shows performance on queries used to improve the description; the "Test" score shows performance on held-out queries the optimizer hasn't seen. When it's done, Claude will apply the best-performing description to your skill.
</div>
"""]
# Summary section
best_test_score = data.get('best_test_score')
best_train_score = data.get('best_train_score')
html_parts.append(f"""
<div class="summary">
<p><strong>Original:</strong> {html.escape(data.get('original_description', 'N/A'))}</p>
<p class="best"><strong>Best:</strong> {html.escape(data.get('best_description', 'N/A'))}</p>
<p><strong>Best Score:</strong> {data.get('best_score', 'N/A')} {'(test)' if best_test_score else '(train)'}</p>
<p><strong>Iterations:</strong> {data.get('iterations_run', 0)} | <strong>Train:</strong> {data.get('train_size', '?')} | <strong>Test:</strong> {data.get('test_size', '?')}</p>
</div>
""")
# Legend
html_parts.append("""
<div class="legend">
<span style="font-weight:600">Query columns:</span>
<span class="legend-item"><span class="legend-swatch swatch-positive"></span> Should trigger</span>
<span class="legend-item"><span class="legend-swatch swatch-negative"></span> Should NOT trigger</span>
<span class="legend-item"><span class="legend-swatch swatch-train"></span> Train</span>
<span class="legend-item"><span class="legend-swatch swatch-test"></span> Test</span>
</div>
""")
# Table header
html_parts.append("""
<div class="table-container">
<table>
<thead>
<tr>
<th>Iter</th>
<th>Train</th>
<th>Test</th>
<th class="query-col">Description</th>
""")
# Add column headers for train queries
for qinfo in train_queries:
polarity = "positive-col" if qinfo["should_trigger"] else "negative-col"
html_parts.append(f' <th class="{polarity}">{html.escape(qinfo["query"])}</th>\n')
# Add column headers for test queries (different color)
for qinfo in test_queries:
polarity = "positive-col" if qinfo["should_trigger"] else "negative-col"
html_parts.append(f' <th class="test-col {polarity}">{html.escape(qinfo["query"])}</th>\n')
html_parts.append(""" </tr>
</thead>
<tbody>
""")
# Find best iteration for highlighting
if test_queries:
best_iter = max(history, key=lambda h: h.get("test_passed") or 0).get("iteration")
else:
best_iter = max(history, key=lambda h: h.get("train_passed", h.get("passed", 0))).get("iteration")
# Add rows for each iteration
for h in history:
iteration = h.get("iteration", "?")
train_passed = h.get("train_passed", h.get("passed", 0))
train_total = h.get("train_total", h.get("total", 0))
test_passed = h.get("test_passed")
test_total = h.get("test_total")
description = h.get("description", "")
train_results = h.get("train_results", h.get("results", []))
test_results = h.get("test_results", [])
# Create lookups for results by query
train_by_query = {r["query"]: r for r in train_results}
test_by_query = {r["query"]: r for r in test_results} if test_results else {}
# Compute aggregate correct/total runs across all retries
def aggregate_runs(results: list[dict]) -> tuple[int, int]:
correct = 0
total = 0
for r in results:
runs = r.get("runs", 0)
triggers = r.get("triggers", 0)
total += runs
if r.get("should_trigger", True):
correct += triggers
else:
correct += runs - triggers
return correct, total
train_correct, train_runs = aggregate_runs(train_results)
test_correct, test_runs = aggregate_runs(test_results)
# Determine score classes
def score_class(correct: int, total: int) -> str:
if total > 0:
ratio = correct / total
if ratio >= 0.8:
return "score-good"
elif ratio >= 0.5:
return "score-ok"
return "score-bad"
train_class = score_class(train_correct, train_runs)
test_class = score_class(test_correct, test_runs)
row_class = "best-row" if iteration == best_iter else ""
html_parts.append(f""" <tr class="{row_class}">
<td>{iteration}</td>
<td><span class="score {train_class}">{train_correct}/{train_runs}</span></td>
<td><span class="score {test_class}">{test_correct}/{test_runs}</span></td>
<td class="description">{html.escape(description)}</td>
""")
# Add result for each train query
for qinfo in train_queries:
r = train_by_query.get(qinfo["query"], {})
did_pass = r.get("pass", False)
triggers = r.get("triggers", 0)
runs = r.get("runs", 0)
icon = "" if did_pass else ""
css_class = "pass" if did_pass else "fail"
html_parts.append(f' <td class="result {css_class}">{icon}<span class="rate">{triggers}/{runs}</span></td>\n')
# Add result for each test query (with different background)
for qinfo in test_queries:
r = test_by_query.get(qinfo["query"], {})
did_pass = r.get("pass", False)
triggers = r.get("triggers", 0)
runs = r.get("runs", 0)
icon = "" if did_pass else ""
css_class = "pass" if did_pass else "fail"
html_parts.append(f' <td class="result test-result {css_class}">{icon}<span class="rate">{triggers}/{runs}</span></td>\n')
html_parts.append(" </tr>\n")
html_parts.append(""" </tbody>
</table>
</div>
""")
html_parts.append("""
</body>
</html>
""")
return "".join(html_parts)
def main():
parser = argparse.ArgumentParser(description="Generate HTML report from run_loop output")
parser.add_argument("input", help="Path to JSON output from run_loop.py (or - for stdin)")
parser.add_argument("-o", "--output", default=None, help="Output HTML file (default: stdout)")
parser.add_argument("--skill-name", default="", help="Skill name to include in the report title")
args = parser.parse_args()
if args.input == "-":
data = json.load(sys.stdin)
else:
data = json.loads(Path(args.input).read_text())
html_output = generate_html(data, skill_name=args.skill_name)
if args.output:
Path(args.output).write_text(html_output)
print(f"Report written to {args.output}", file=sys.stderr)
else:
print(html_output)
if __name__ == "__main__":
main()

View File

@ -1,247 +0,0 @@
#!/usr/bin/env python3
"""Improve a skill description based on eval results.
Takes eval results (from run_eval.py) and generates an improved description
by calling `claude -p` as a subprocess (same auth pattern as run_eval.py
uses the session's Claude Code auth, no separate ANTHROPIC_API_KEY needed).
"""
import argparse
import json
import os
import re
import subprocess
import sys
from pathlib import Path
from scripts.utils import parse_skill_md
def _call_claude(prompt: str, model: str | None, timeout: int = 300) -> str:
"""Run `claude -p` with the prompt on stdin and return the text response.
Prompt goes over stdin (not argv) because it embeds the full SKILL.md
body and can easily exceed comfortable argv length.
"""
cmd = ["claude", "-p", "--output-format", "text"]
if model:
cmd.extend(["--model", model])
# Remove CLAUDECODE env var to allow nesting claude -p inside a
# Claude Code session. The guard is for interactive terminal conflicts;
# programmatic subprocess usage is safe. Same pattern as run_eval.py.
env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
result = subprocess.run(
cmd,
input=prompt,
capture_output=True,
text=True,
env=env,
timeout=timeout,
)
if result.returncode != 0:
raise RuntimeError(
f"claude -p exited {result.returncode}\nstderr: {result.stderr}"
)
return result.stdout
def improve_description(
skill_name: str,
skill_content: str,
current_description: str,
eval_results: dict,
history: list[dict],
model: str,
test_results: dict | None = None,
log_dir: Path | None = None,
iteration: int | None = None,
) -> str:
"""Call Claude to improve the description based on eval results."""
failed_triggers = [
r for r in eval_results["results"]
if r["should_trigger"] and not r["pass"]
]
false_triggers = [
r for r in eval_results["results"]
if not r["should_trigger"] and not r["pass"]
]
# Build scores summary
train_score = f"{eval_results['summary']['passed']}/{eval_results['summary']['total']}"
if test_results:
test_score = f"{test_results['summary']['passed']}/{test_results['summary']['total']}"
scores_summary = f"Train: {train_score}, Test: {test_score}"
else:
scores_summary = f"Train: {train_score}"
prompt = f"""You are optimizing a skill description for a Claude Code skill called "{skill_name}". A "skill" is sort of like a prompt, but with progressive disclosure -- there's a title and description that Claude sees when deciding whether to use the skill, and then if it does use the skill, it reads the .md file which has lots more details and potentially links to other resources in the skill folder like helper files and scripts and additional documentation or examples.
The description appears in Claude's "available_skills" list. When a user sends a query, Claude decides whether to invoke the skill based solely on the title and on this description. Your goal is to write a description that triggers for relevant queries, and doesn't trigger for irrelevant ones.
Here's the current description:
<current_description>
"{current_description}"
</current_description>
Current scores ({scores_summary}):
<scores_summary>
"""
if failed_triggers:
prompt += "FAILED TO TRIGGER (should have triggered but didn't):\n"
for r in failed_triggers:
prompt += f' - "{r["query"]}" (triggered {r["triggers"]}/{r["runs"]} times)\n'
prompt += "\n"
if false_triggers:
prompt += "FALSE TRIGGERS (triggered but shouldn't have):\n"
for r in false_triggers:
prompt += f' - "{r["query"]}" (triggered {r["triggers"]}/{r["runs"]} times)\n'
prompt += "\n"
if history:
prompt += "PREVIOUS ATTEMPTS (do NOT repeat these — try something structurally different):\n\n"
for h in history:
train_s = f"{h.get('train_passed', h.get('passed', 0))}/{h.get('train_total', h.get('total', 0))}"
test_s = f"{h.get('test_passed', '?')}/{h.get('test_total', '?')}" if h.get('test_passed') is not None else None
score_str = f"train={train_s}" + (f", test={test_s}" if test_s else "")
prompt += f'<attempt {score_str}>\n'
prompt += f'Description: "{h["description"]}"\n'
if "results" in h:
prompt += "Train results:\n"
for r in h["results"]:
status = "PASS" if r["pass"] else "FAIL"
prompt += f' [{status}] "{r["query"][:80]}" (triggered {r["triggers"]}/{r["runs"]})\n'
if h.get("note"):
prompt += f'Note: {h["note"]}\n'
prompt += "</attempt>\n\n"
prompt += f"""</scores_summary>
Skill content (for context on what the skill does):
<skill_content>
{skill_content}
</skill_content>
Based on the failures, write a new and improved description that is more likely to trigger correctly. When I say "based on the failures", it's a bit of a tricky line to walk because we don't want to overfit to the specific cases you're seeing. So what I DON'T want you to do is produce an ever-expanding list of specific queries that this skill should or shouldn't trigger for. Instead, try to generalize from the failures to broader categories of user intent and situations where this skill would be useful or not useful. The reason for this is twofold:
1. Avoid overfitting
2. The list might get loooong and it's injected into ALL queries and there might be a lot of skills, so we don't want to blow too much space on any given description.
Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy. There is a hard limit of 1024 characters descriptions over that will be truncated, so stay comfortably under it.
Here are some tips that we've found to work well in writing these descriptions:
- The skill should be phrased in the imperative -- "Use this skill for" rather than "this skill does"
- The skill description should focus on the user's intent, what they are trying to achieve, vs. the implementation details of how the skill works.
- The description competes with other skills for Claude's attention — make it distinctive and immediately recognizable.
- If you're getting lots of failures after repeated attempts, change things up. Try different sentence structures or wordings.
I'd encourage you to be creative and mix up the style in different iterations since you'll have multiple opportunities to try different approaches and we'll just grab the highest-scoring one at the end.
Please respond with only the new description text in <new_description> tags, nothing else."""
text = _call_claude(prompt, model)
match = re.search(r"<new_description>(.*?)</new_description>", text, re.DOTALL)
description = match.group(1).strip().strip('"') if match else text.strip().strip('"')
transcript: dict = {
"iteration": iteration,
"prompt": prompt,
"response": text,
"parsed_description": description,
"char_count": len(description),
"over_limit": len(description) > 1024,
}
# Safety net: the prompt already states the 1024-char hard limit, but if
# the model blew past it anyway, make one fresh single-turn call that
# quotes the too-long version and asks for a shorter rewrite. (The old
# SDK path did this as a true multi-turn; `claude -p` is one-shot, so we
# inline the prior output into the new prompt instead.)
if len(description) > 1024:
shorten_prompt = (
f"{prompt}\n\n"
f"---\n\n"
f"A previous attempt produced this description, which at "
f"{len(description)} characters is over the 1024-character hard limit:\n\n"
f'"{description}"\n\n'
f"Rewrite it to be under 1024 characters while keeping the most "
f"important trigger words and intent coverage. Respond with only "
f"the new description in <new_description> tags."
)
shorten_text = _call_claude(shorten_prompt, model)
match = re.search(r"<new_description>(.*?)</new_description>", shorten_text, re.DOTALL)
shortened = match.group(1).strip().strip('"') if match else shorten_text.strip().strip('"')
transcript["rewrite_prompt"] = shorten_prompt
transcript["rewrite_response"] = shorten_text
transcript["rewrite_description"] = shortened
transcript["rewrite_char_count"] = len(shortened)
description = shortened
transcript["final_description"] = description
if log_dir:
log_dir.mkdir(parents=True, exist_ok=True)
log_file = log_dir / f"improve_iter_{iteration or 'unknown'}.json"
log_file.write_text(json.dumps(transcript, indent=2))
return description
def main():
parser = argparse.ArgumentParser(description="Improve a skill description based on eval results")
parser.add_argument("--eval-results", required=True, help="Path to eval results JSON (from run_eval.py)")
parser.add_argument("--skill-path", required=True, help="Path to skill directory")
parser.add_argument("--history", default=None, help="Path to history JSON (previous attempts)")
parser.add_argument("--model", required=True, help="Model for improvement")
parser.add_argument("--verbose", action="store_true", help="Print thinking to stderr")
args = parser.parse_args()
skill_path = Path(args.skill_path)
if not (skill_path / "SKILL.md").exists():
print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
sys.exit(1)
eval_results = json.loads(Path(args.eval_results).read_text())
history = []
if args.history:
history = json.loads(Path(args.history).read_text())
name, _, content = parse_skill_md(skill_path)
current_description = eval_results["description"]
if args.verbose:
print(f"Current: {current_description}", file=sys.stderr)
print(f"Score: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr)
new_description = improve_description(
skill_name=name,
skill_content=content,
current_description=current_description,
eval_results=eval_results,
history=history,
model=args.model,
)
if args.verbose:
print(f"Improved: {new_description}", file=sys.stderr)
# Output as JSON with both the new description and updated history
output = {
"description": new_description,
"history": history + [{
"description": current_description,
"passed": eval_results["summary"]["passed"],
"failed": eval_results["summary"]["failed"],
"total": eval_results["summary"]["total"],
"results": eval_results["results"],
}],
}
print(json.dumps(output, indent=2))
if __name__ == "__main__":
main()

View File

@ -1,136 +0,0 @@
#!/usr/bin/env python3
"""
Skill Packager - Creates a distributable .skill file of a skill folder
Usage:
python utils/package_skill.py <path/to/skill-folder> [output-directory]
Example:
python utils/package_skill.py skills/public/my-skill
python utils/package_skill.py skills/public/my-skill ./dist
"""
import fnmatch
import sys
import zipfile
from pathlib import Path
from scripts.quick_validate import validate_skill
# Patterns to exclude when packaging skills.
EXCLUDE_DIRS = {"__pycache__", "node_modules"}
EXCLUDE_GLOBS = {"*.pyc"}
EXCLUDE_FILES = {".DS_Store"}
# Directories excluded only at the skill root (not when nested deeper).
ROOT_EXCLUDE_DIRS = {"evals"}
def should_exclude(rel_path: Path) -> bool:
"""Check if a path should be excluded from packaging."""
parts = rel_path.parts
if any(part in EXCLUDE_DIRS for part in parts):
return True
# rel_path is relative to skill_path.parent, so parts[0] is the skill
# folder name and parts[1] (if present) is the first subdir.
if len(parts) > 1 and parts[1] in ROOT_EXCLUDE_DIRS:
return True
name = rel_path.name
if name in EXCLUDE_FILES:
return True
return any(fnmatch.fnmatch(name, pat) for pat in EXCLUDE_GLOBS)
def package_skill(skill_path, output_dir=None):
"""
Package a skill folder into a .skill file.
Args:
skill_path: Path to the skill folder
output_dir: Optional output directory for the .skill file (defaults to current directory)
Returns:
Path to the created .skill file, or None if error
"""
skill_path = Path(skill_path).resolve()
# Validate skill folder exists
if not skill_path.exists():
print(f"❌ Error: Skill folder not found: {skill_path}")
return None
if not skill_path.is_dir():
print(f"❌ Error: Path is not a directory: {skill_path}")
return None
# Validate SKILL.md exists
skill_md = skill_path / "SKILL.md"
if not skill_md.exists():
print(f"❌ Error: SKILL.md not found in {skill_path}")
return None
# Run validation before packaging
print("🔍 Validating skill...")
valid, message = validate_skill(skill_path)
if not valid:
print(f"❌ Validation failed: {message}")
print(" Please fix the validation errors before packaging.")
return None
print(f"{message}\n")
# Determine output location
skill_name = skill_path.name
if output_dir:
output_path = Path(output_dir).resolve()
output_path.mkdir(parents=True, exist_ok=True)
else:
output_path = Path.cwd()
skill_filename = output_path / f"{skill_name}.skill"
# Create the .skill file (zip format)
try:
with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Walk through the skill directory, excluding build artifacts
for file_path in skill_path.rglob('*'):
if not file_path.is_file():
continue
arcname = file_path.relative_to(skill_path.parent)
if should_exclude(arcname):
print(f" Skipped: {arcname}")
continue
zipf.write(file_path, arcname)
print(f" Added: {arcname}")
print(f"\n✅ Successfully packaged skill to: {skill_filename}")
return skill_filename
except Exception as e:
print(f"❌ Error creating .skill file: {e}")
return None
def main():
if len(sys.argv) < 2:
print("Usage: python utils/package_skill.py <path/to/skill-folder> [output-directory]")
print("\nExample:")
print(" python utils/package_skill.py skills/public/my-skill")
print(" python utils/package_skill.py skills/public/my-skill ./dist")
sys.exit(1)
skill_path = sys.argv[1]
output_dir = sys.argv[2] if len(sys.argv) > 2 else None
print(f"📦 Packaging skill: {skill_path}")
if output_dir:
print(f" Output directory: {output_dir}")
print()
result = package_skill(skill_path, output_dir)
if result:
sys.exit(0)
else:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -1,103 +0,0 @@
#!/usr/bin/env python3
"""
Quick validation script for skills - minimal version
"""
import sys
import os
import re
import yaml
from pathlib import Path
def validate_skill(skill_path):
"""Basic validation of a skill"""
skill_path = Path(skill_path)
# Check SKILL.md exists
skill_md = skill_path / 'SKILL.md'
if not skill_md.exists():
return False, "SKILL.md not found"
# Read and validate frontmatter
content = skill_md.read_text()
if not content.startswith('---'):
return False, "No YAML frontmatter found"
# Extract frontmatter
match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL)
if not match:
return False, "Invalid frontmatter format"
frontmatter_text = match.group(1)
# Parse YAML frontmatter
try:
frontmatter = yaml.safe_load(frontmatter_text)
if not isinstance(frontmatter, dict):
return False, "Frontmatter must be a YAML dictionary"
except yaml.YAMLError as e:
return False, f"Invalid YAML in frontmatter: {e}"
# Define allowed properties
ALLOWED_PROPERTIES = {'name', 'description', 'license', 'allowed-tools', 'metadata', 'compatibility'}
# Check for unexpected properties (excluding nested keys under metadata)
unexpected_keys = set(frontmatter.keys()) - ALLOWED_PROPERTIES
if unexpected_keys:
return False, (
f"Unexpected key(s) in SKILL.md frontmatter: {', '.join(sorted(unexpected_keys))}. "
f"Allowed properties are: {', '.join(sorted(ALLOWED_PROPERTIES))}"
)
# Check required fields
if 'name' not in frontmatter:
return False, "Missing 'name' in frontmatter"
if 'description' not in frontmatter:
return False, "Missing 'description' in frontmatter"
# Extract name for validation
name = frontmatter.get('name', '')
if not isinstance(name, str):
return False, f"Name must be a string, got {type(name).__name__}"
name = name.strip()
if name:
# Check naming convention (kebab-case: lowercase with hyphens)
if not re.match(r'^[a-z0-9-]+$', name):
return False, f"Name '{name}' should be kebab-case (lowercase letters, digits, and hyphens only)"
if name.startswith('-') or name.endswith('-') or '--' in name:
return False, f"Name '{name}' cannot start/end with hyphen or contain consecutive hyphens"
# Check name length (max 64 characters per spec)
if len(name) > 64:
return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters."
# Extract and validate description
description = frontmatter.get('description', '')
if not isinstance(description, str):
return False, f"Description must be a string, got {type(description).__name__}"
description = description.strip()
if description:
# Check for angle brackets
if '<' in description or '>' in description:
return False, "Description cannot contain angle brackets (< or >)"
# Check description length (max 1024 characters per spec)
if len(description) > 1024:
return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters."
# Validate compatibility field if present (optional)
compatibility = frontmatter.get('compatibility', '')
if compatibility:
if not isinstance(compatibility, str):
return False, f"Compatibility must be a string, got {type(compatibility).__name__}"
if len(compatibility) > 500:
return False, f"Compatibility is too long ({len(compatibility)} characters). Maximum is 500 characters."
return True, "Skill is valid!"
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python quick_validate.py <skill_directory>")
sys.exit(1)
valid, message = validate_skill(sys.argv[1])
print(message)
sys.exit(0 if valid else 1)

View File

@ -1,310 +0,0 @@
#!/usr/bin/env python3
"""Run trigger evaluation for a skill description.
Tests whether a skill's description causes Claude to trigger (read the skill)
for a set of queries. Outputs results as JSON.
"""
import argparse
import json
import os
import select
import subprocess
import sys
import time
import uuid
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
from scripts.utils import parse_skill_md
def find_project_root() -> Path:
"""Find the project root by walking up from cwd looking for .claude/.
Mimics how Claude Code discovers its project root, so the command file
we create ends up where claude -p will look for it.
"""
current = Path.cwd()
for parent in [current, *current.parents]:
if (parent / ".claude").is_dir():
return parent
return current
def run_single_query(
query: str,
skill_name: str,
skill_description: str,
timeout: int,
project_root: str,
model: str | None = None,
) -> bool:
"""Run a single query and return whether the skill was triggered.
Creates a command file in .claude/commands/ so it appears in Claude's
available_skills list, then runs `claude -p` with the raw query.
Uses --include-partial-messages to detect triggering early from
stream events (content_block_start) rather than waiting for the
full assistant message, which only arrives after tool execution.
"""
unique_id = uuid.uuid4().hex[:8]
clean_name = f"{skill_name}-skill-{unique_id}"
project_commands_dir = Path(project_root) / ".claude" / "commands"
command_file = project_commands_dir / f"{clean_name}.md"
try:
project_commands_dir.mkdir(parents=True, exist_ok=True)
# Use YAML block scalar to avoid breaking on quotes in description
indented_desc = "\n ".join(skill_description.split("\n"))
command_content = (
f"---\n"
f"description: |\n"
f" {indented_desc}\n"
f"---\n\n"
f"# {skill_name}\n\n"
f"This skill handles: {skill_description}\n"
)
command_file.write_text(command_content)
cmd = [
"claude",
"-p", query,
"--output-format", "stream-json",
"--verbose",
"--include-partial-messages",
]
if model:
cmd.extend(["--model", model])
# Remove CLAUDECODE env var to allow nesting claude -p inside a
# Claude Code session. The guard is for interactive terminal conflicts;
# programmatic subprocess usage is safe.
env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
cwd=project_root,
env=env,
)
triggered = False
start_time = time.time()
buffer = ""
# Track state for stream event detection
pending_tool_name = None
accumulated_json = ""
try:
while time.time() - start_time < timeout:
if process.poll() is not None:
remaining = process.stdout.read()
if remaining:
buffer += remaining.decode("utf-8", errors="replace")
break
ready, _, _ = select.select([process.stdout], [], [], 1.0)
if not ready:
continue
chunk = os.read(process.stdout.fileno(), 8192)
if not chunk:
break
buffer += chunk.decode("utf-8", errors="replace")
while "\n" in buffer:
line, buffer = buffer.split("\n", 1)
line = line.strip()
if not line:
continue
try:
event = json.loads(line)
except json.JSONDecodeError:
continue
# Early detection via stream events
if event.get("type") == "stream_event":
se = event.get("event", {})
se_type = se.get("type", "")
if se_type == "content_block_start":
cb = se.get("content_block", {})
if cb.get("type") == "tool_use":
tool_name = cb.get("name", "")
if tool_name in ("Skill", "Read"):
pending_tool_name = tool_name
accumulated_json = ""
else:
return False
elif se_type == "content_block_delta" and pending_tool_name:
delta = se.get("delta", {})
if delta.get("type") == "input_json_delta":
accumulated_json += delta.get("partial_json", "")
if clean_name in accumulated_json:
return True
elif se_type in ("content_block_stop", "message_stop"):
if pending_tool_name:
return clean_name in accumulated_json
if se_type == "message_stop":
return False
# Fallback: full assistant message
elif event.get("type") == "assistant":
message = event.get("message", {})
for content_item in message.get("content", []):
if content_item.get("type") != "tool_use":
continue
tool_name = content_item.get("name", "")
tool_input = content_item.get("input", {})
if tool_name == "Skill" and clean_name in tool_input.get("skill", ""):
triggered = True
elif tool_name == "Read" and clean_name in tool_input.get("file_path", ""):
triggered = True
return triggered
elif event.get("type") == "result":
return triggered
finally:
# Clean up process on any exit path (return, exception, timeout)
if process.poll() is None:
process.kill()
process.wait()
return triggered
finally:
if command_file.exists():
command_file.unlink()
def run_eval(
eval_set: list[dict],
skill_name: str,
description: str,
num_workers: int,
timeout: int,
project_root: Path,
runs_per_query: int = 1,
trigger_threshold: float = 0.5,
model: str | None = None,
) -> dict:
"""Run the full eval set and return results."""
results = []
with ProcessPoolExecutor(max_workers=num_workers) as executor:
future_to_info = {}
for item in eval_set:
for run_idx in range(runs_per_query):
future = executor.submit(
run_single_query,
item["query"],
skill_name,
description,
timeout,
str(project_root),
model,
)
future_to_info[future] = (item, run_idx)
query_triggers: dict[str, list[bool]] = {}
query_items: dict[str, dict] = {}
for future in as_completed(future_to_info):
item, _ = future_to_info[future]
query = item["query"]
query_items[query] = item
if query not in query_triggers:
query_triggers[query] = []
try:
query_triggers[query].append(future.result())
except Exception as e:
print(f"Warning: query failed: {e}", file=sys.stderr)
query_triggers[query].append(False)
for query, triggers in query_triggers.items():
item = query_items[query]
trigger_rate = sum(triggers) / len(triggers)
should_trigger = item["should_trigger"]
if should_trigger:
did_pass = trigger_rate >= trigger_threshold
else:
did_pass = trigger_rate < trigger_threshold
results.append({
"query": query,
"should_trigger": should_trigger,
"trigger_rate": trigger_rate,
"triggers": sum(triggers),
"runs": len(triggers),
"pass": did_pass,
})
passed = sum(1 for r in results if r["pass"])
total = len(results)
return {
"skill_name": skill_name,
"description": description,
"results": results,
"summary": {
"total": total,
"passed": passed,
"failed": total - passed,
},
}
def main():
parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description")
parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
parser.add_argument("--skill-path", required=True, help="Path to skill directory")
parser.add_argument("--description", default=None, help="Override description to test")
parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
parser.add_argument("--model", default=None, help="Model to use for claude -p (default: user's configured model)")
parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
args = parser.parse_args()
eval_set = json.loads(Path(args.eval_set).read_text())
skill_path = Path(args.skill_path)
if not (skill_path / "SKILL.md").exists():
print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
sys.exit(1)
name, original_description, content = parse_skill_md(skill_path)
description = args.description or original_description
project_root = find_project_root()
if args.verbose:
print(f"Evaluating: {description}", file=sys.stderr)
output = run_eval(
eval_set=eval_set,
skill_name=name,
description=description,
num_workers=args.num_workers,
timeout=args.timeout,
project_root=project_root,
runs_per_query=args.runs_per_query,
trigger_threshold=args.trigger_threshold,
model=args.model,
)
if args.verbose:
summary = output["summary"]
print(f"Results: {summary['passed']}/{summary['total']} passed", file=sys.stderr)
for r in output["results"]:
status = "PASS" if r["pass"] else "FAIL"
rate_str = f"{r['triggers']}/{r['runs']}"
print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:70]}", file=sys.stderr)
print(json.dumps(output, indent=2))
if __name__ == "__main__":
main()

View File

@ -1,328 +0,0 @@
#!/usr/bin/env python3
"""Run the eval + improve loop until all pass or max iterations reached.
Combines run_eval.py and improve_description.py in a loop, tracking history
and returning the best description found. Supports train/test split to prevent
overfitting.
"""
import argparse
import json
import random
import sys
import tempfile
import time
import webbrowser
from pathlib import Path
from scripts.generate_report import generate_html
from scripts.improve_description import improve_description
from scripts.run_eval import find_project_root, run_eval
from scripts.utils import parse_skill_md
def split_eval_set(eval_set: list[dict], holdout: float, seed: int = 42) -> tuple[list[dict], list[dict]]:
"""Split eval set into train and test sets, stratified by should_trigger."""
random.seed(seed)
# Separate by should_trigger
trigger = [e for e in eval_set if e["should_trigger"]]
no_trigger = [e for e in eval_set if not e["should_trigger"]]
# Shuffle each group
random.shuffle(trigger)
random.shuffle(no_trigger)
# Calculate split points
n_trigger_test = max(1, int(len(trigger) * holdout))
n_no_trigger_test = max(1, int(len(no_trigger) * holdout))
# Split
test_set = trigger[:n_trigger_test] + no_trigger[:n_no_trigger_test]
train_set = trigger[n_trigger_test:] + no_trigger[n_no_trigger_test:]
return train_set, test_set
def run_loop(
eval_set: list[dict],
skill_path: Path,
description_override: str | None,
num_workers: int,
timeout: int,
max_iterations: int,
runs_per_query: int,
trigger_threshold: float,
holdout: float,
model: str,
verbose: bool,
live_report_path: Path | None = None,
log_dir: Path | None = None,
) -> dict:
"""Run the eval + improvement loop."""
project_root = find_project_root()
name, original_description, content = parse_skill_md(skill_path)
current_description = description_override or original_description
# Split into train/test if holdout > 0
if holdout > 0:
train_set, test_set = split_eval_set(eval_set, holdout)
if verbose:
print(f"Split: {len(train_set)} train, {len(test_set)} test (holdout={holdout})", file=sys.stderr)
else:
train_set = eval_set
test_set = []
history = []
exit_reason = "unknown"
for iteration in range(1, max_iterations + 1):
if verbose:
print(f"\n{'='*60}", file=sys.stderr)
print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr)
print(f"Description: {current_description}", file=sys.stderr)
print(f"{'='*60}", file=sys.stderr)
# Evaluate train + test together in one batch for parallelism
all_queries = train_set + test_set
t0 = time.time()
all_results = run_eval(
eval_set=all_queries,
skill_name=name,
description=current_description,
num_workers=num_workers,
timeout=timeout,
project_root=project_root,
runs_per_query=runs_per_query,
trigger_threshold=trigger_threshold,
model=model,
)
eval_elapsed = time.time() - t0
# Split results back into train/test by matching queries
train_queries_set = {q["query"] for q in train_set}
train_result_list = [r for r in all_results["results"] if r["query"] in train_queries_set]
test_result_list = [r for r in all_results["results"] if r["query"] not in train_queries_set]
train_passed = sum(1 for r in train_result_list if r["pass"])
train_total = len(train_result_list)
train_summary = {"passed": train_passed, "failed": train_total - train_passed, "total": train_total}
train_results = {"results": train_result_list, "summary": train_summary}
if test_set:
test_passed = sum(1 for r in test_result_list if r["pass"])
test_total = len(test_result_list)
test_summary = {"passed": test_passed, "failed": test_total - test_passed, "total": test_total}
test_results = {"results": test_result_list, "summary": test_summary}
else:
test_results = None
test_summary = None
history.append({
"iteration": iteration,
"description": current_description,
"train_passed": train_summary["passed"],
"train_failed": train_summary["failed"],
"train_total": train_summary["total"],
"train_results": train_results["results"],
"test_passed": test_summary["passed"] if test_summary else None,
"test_failed": test_summary["failed"] if test_summary else None,
"test_total": test_summary["total"] if test_summary else None,
"test_results": test_results["results"] if test_results else None,
# For backward compat with report generator
"passed": train_summary["passed"],
"failed": train_summary["failed"],
"total": train_summary["total"],
"results": train_results["results"],
})
# Write live report if path provided
if live_report_path:
partial_output = {
"original_description": original_description,
"best_description": current_description,
"best_score": "in progress",
"iterations_run": len(history),
"holdout": holdout,
"train_size": len(train_set),
"test_size": len(test_set),
"history": history,
}
live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name))
if verbose:
def print_eval_stats(label, results, elapsed):
pos = [r for r in results if r["should_trigger"]]
neg = [r for r in results if not r["should_trigger"]]
tp = sum(r["triggers"] for r in pos)
pos_runs = sum(r["runs"] for r in pos)
fn = pos_runs - tp
fp = sum(r["triggers"] for r in neg)
neg_runs = sum(r["runs"] for r in neg)
tn = neg_runs - fp
total = tp + tn + fp + fn
precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0
recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0
accuracy = (tp + tn) / total if total > 0 else 0.0
print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr)
for r in results:
status = "PASS" if r["pass"] else "FAIL"
rate_str = f"{r['triggers']}/{r['runs']}"
print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr)
print_eval_stats("Train", train_results["results"], eval_elapsed)
if test_summary:
print_eval_stats("Test ", test_results["results"], 0)
if train_summary["failed"] == 0:
exit_reason = f"all_passed (iteration {iteration})"
if verbose:
print(f"\nAll train queries passed on iteration {iteration}!", file=sys.stderr)
break
if iteration == max_iterations:
exit_reason = f"max_iterations ({max_iterations})"
if verbose:
print(f"\nMax iterations reached ({max_iterations}).", file=sys.stderr)
break
# Improve the description based on train results
if verbose:
print(f"\nImproving description...", file=sys.stderr)
t0 = time.time()
# Strip test scores from history so improvement model can't see them
blinded_history = [
{k: v for k, v in h.items() if not k.startswith("test_")}
for h in history
]
new_description = improve_description(
skill_name=name,
skill_content=content,
current_description=current_description,
eval_results=train_results,
history=blinded_history,
model=model,
log_dir=log_dir,
iteration=iteration,
)
improve_elapsed = time.time() - t0
if verbose:
print(f"Proposed ({improve_elapsed:.1f}s): {new_description}", file=sys.stderr)
current_description = new_description
# Find the best iteration by TEST score (or train if no test set)
if test_set:
best = max(history, key=lambda h: h["test_passed"] or 0)
best_score = f"{best['test_passed']}/{best['test_total']}"
else:
best = max(history, key=lambda h: h["train_passed"])
best_score = f"{best['train_passed']}/{best['train_total']}"
if verbose:
print(f"\nExit reason: {exit_reason}", file=sys.stderr)
print(f"Best score: {best_score} (iteration {best['iteration']})", file=sys.stderr)
return {
"exit_reason": exit_reason,
"original_description": original_description,
"best_description": best["description"],
"best_score": best_score,
"best_train_score": f"{best['train_passed']}/{best['train_total']}",
"best_test_score": f"{best['test_passed']}/{best['test_total']}" if test_set else None,
"final_description": current_description,
"iterations_run": len(history),
"holdout": holdout,
"train_size": len(train_set),
"test_size": len(test_set),
"history": history,
}
def main():
parser = argparse.ArgumentParser(description="Run eval + improve loop")
parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
parser.add_argument("--skill-path", required=True, help="Path to skill directory")
parser.add_argument("--description", default=None, help="Override starting description")
parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations")
parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
parser.add_argument("--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)")
parser.add_argument("--model", required=True, help="Model for improvement")
parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
parser.add_argument("--report", default="auto", help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)")
parser.add_argument("--results-dir", default=None, help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here")
args = parser.parse_args()
eval_set = json.loads(Path(args.eval_set).read_text())
skill_path = Path(args.skill_path)
if not (skill_path / "SKILL.md").exists():
print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
sys.exit(1)
name, _, _ = parse_skill_md(skill_path)
# Set up live report path
if args.report != "none":
if args.report == "auto":
timestamp = time.strftime("%Y%m%d_%H%M%S")
live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
else:
live_report_path = Path(args.report)
# Open the report immediately so the user can watch
live_report_path.write_text("<html><body><h1>Starting optimization loop...</h1><meta http-equiv='refresh' content='5'></body></html>")
webbrowser.open(str(live_report_path))
else:
live_report_path = None
# Determine output directory (create before run_loop so logs can be written)
if args.results_dir:
timestamp = time.strftime("%Y-%m-%d_%H%M%S")
results_dir = Path(args.results_dir) / timestamp
results_dir.mkdir(parents=True, exist_ok=True)
else:
results_dir = None
log_dir = results_dir / "logs" if results_dir else None
output = run_loop(
eval_set=eval_set,
skill_path=skill_path,
description_override=args.description,
num_workers=args.num_workers,
timeout=args.timeout,
max_iterations=args.max_iterations,
runs_per_query=args.runs_per_query,
trigger_threshold=args.trigger_threshold,
holdout=args.holdout,
model=args.model,
verbose=args.verbose,
live_report_path=live_report_path,
log_dir=log_dir,
)
# Save JSON output
json_output = json.dumps(output, indent=2)
print(json_output)
if results_dir:
(results_dir / "results.json").write_text(json_output)
# Write final HTML report (without auto-refresh)
if live_report_path:
live_report_path.write_text(generate_html(output, auto_refresh=False, skill_name=name))
print(f"\nReport: {live_report_path}", file=sys.stderr)
if results_dir and live_report_path:
(results_dir / "report.html").write_text(generate_html(output, auto_refresh=False, skill_name=name))
if results_dir:
print(f"Results saved to: {results_dir}", file=sys.stderr)
if __name__ == "__main__":
main()

View File

@ -1,47 +0,0 @@
"""Shared utilities for skill-creator scripts."""
from pathlib import Path
def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
"""Parse a SKILL.md file, returning (name, description, full_content)."""
content = (skill_path / "SKILL.md").read_text()
lines = content.split("\n")
if lines[0].strip() != "---":
raise ValueError("SKILL.md missing frontmatter (no opening ---)")
end_idx = None
for i, line in enumerate(lines[1:], start=1):
if line.strip() == "---":
end_idx = i
break
if end_idx is None:
raise ValueError("SKILL.md missing frontmatter (no closing ---)")
name = ""
description = ""
frontmatter_lines = lines[1:end_idx]
i = 0
while i < len(frontmatter_lines):
line = frontmatter_lines[i]
if line.startswith("name:"):
name = line[len("name:"):].strip().strip('"').strip("'")
elif line.startswith("description:"):
value = line[len("description:"):].strip()
# Handle YAML multiline indicators (>, |, >-, |-)
if value in (">", "|", ">-", "|-"):
continuation_lines: list[str] = []
i += 1
while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith(" ") or frontmatter_lines[i].startswith("\t")):
continuation_lines.append(frontmatter_lines[i].strip())
i += 1
description = " ".join(continuation_lines)
continue
else:
description = value.strip('"').strip("'")
i += 1
return name, description, content

View File

@ -1,285 +0,0 @@
---
name: zeroclaw
description: "Help users operate and interact with their ZeroClaw agent instance — through both the CLI (`zeroclaw` commands) and the REST/WebSocket gateway API. Use this skill whenever the user wants to: send messages to ZeroClaw, manage memory or cron jobs, check system status, configure channels or providers, hit the gateway API, troubleshoot their ZeroClaw setup, build from source, or do anything involving the `zeroclaw` binary or its HTTP endpoints. Trigger this even if the user just says things like 'check my agent status', 'schedule a reminder', 'store this in memory', 'list my cron jobs', 'send a message to my bot', 'set up Telegram', 'build zeroclaw', or 'my bot is broken' — these are all ZeroClaw operations."
---
# ZeroClaw Skill
You are helping a user operate their ZeroClaw agent instance. ZeroClaw is an autonomous agent runtime with a CLI and an HTTP/WebSocket gateway.
Your job is to understand what the user wants to accomplish and then **execute it** — run the command, make the API call, report the result. Do not just show commands for the user to copy-paste. Actually run them via the Bash tool and tell the user what happened. The only exception is destructive operations (clearing all memory, estop kill-all) where you should confirm first.
## Adaptive Expertise
Pay attention to how the user talks. Someone who says "can you hit the webhook endpoint with a POST" is telling you they know what they're doing — be concise, skip explanations, just execute. Someone who says "how do I make my bot remember things" needs more context about what's happening under the hood.
Signals of technical comfort: mentions specific endpoints, HTTP methods, JSON fields, talks about tokens/auth, uses CLI flags fluently, references config files directly.
Signals of less familiarity: asks "what does X do", uses casual language about the bot/agent, describes goals rather than mechanisms ("I want it to check something every morning").
Default to a middle ground — brief explanation of what you're about to do, then do it. Dial up or down from there based on cues.
## Discovery — Before You Act
Before running any ZeroClaw operation, make sure you know where things are:
1. **Find the binary.** Search in this order:
- `which zeroclaw` (PATH)
- The current project's build output: `./target/release/zeroclaw` or `./target/debug/zeroclaw` — this is the right choice when the user is working inside the ZeroClaw source tree and may have local changes
- Common install locations: `~/.cargo/bin/zeroclaw`, `~/Downloads/zeroclaw-bin/zeroclaw`
If no binary is found anywhere, offer to build from source (see "Building from Source" below). If the user is a developer working on ZeroClaw itself, they'll likely want the local build — watch for cues like them editing source files, mentioning PRs, or being in the project directory.
2. **Check if the gateway is running** (only needed for REST/WebSocket operations). A quick `curl -sf http://127.0.0.1:42617/health` tells you. If it's not running and the user wants REST access, let them know and offer to start it (`zeroclaw gateway` or `zeroclaw daemon`).
3. **Check auth status.** If the gateway requires pairing (`require_pairing = true` is the default), REST calls need a bearer token. Run `zeroclaw status` to see the current state, or check `~/.zeroclaw/config.toml` for a stored token under `[gateway]`.
Cache these findings for the conversation — don't re-discover every time.
## Important: REPL Limitation
`zeroclaw agent` (interactive REPL) requires interactive stdin, which doesn't work through the Bash tool. When the user wants to chat with their agent, use single-message mode instead:
```bash
zeroclaw agent -m "the message"
```
Each `-m` invocation is independent (no conversation history between calls). If the user needs multi-turn conversation, let them know they can run `zeroclaw agent` directly in their terminal, or use the WebSocket endpoint for programmatic streaming.
## First-Time Setup
If the user hasn't set up ZeroClaw yet (no `~/.zeroclaw/config.toml` exists), guide them through onboarding:
```bash
zeroclaw onboard # Quick mode — defaults to OpenRouter
zeroclaw onboard --provider anthropic # Use Anthropic directly
zeroclaw onboard # Guided wizard (default)
```
After onboarding, verify everything works:
```bash
zeroclaw status
zeroclaw doctor
```
If they already have a config but something is broken, `zeroclaw onboard --channels-only` repairs just the channel configuration without overwriting everything else.
## Building from Source
If the user wants to build ZeroClaw (or no binary is installed):
```bash
cargo build --release
```
This produces `target/release/zeroclaw`. For faster iteration during development, `cargo build` (debug mode) is quicker but produces a slower binary at `target/debug/zeroclaw`.
You can also run directly without a separate build step:
```bash
cargo run --release -- <subcommand> [args]
```
Before building, `cargo check` gives a quick compile validation without the full build.
## Choosing CLI vs REST
Both surfaces can do most things. Rules of thumb:
- **CLI is simpler** for one-off operations from the terminal. It handles auth internally and formats output nicely. Prefer CLI when the user is working locally.
- **REST is needed** when the user is building an integration, scripting from another language, or accessing a remote ZeroClaw instance. Also needed for streaming (WebSocket, SSE).
- If unclear, **default to CLI** — it's less setup.
## Core Operations
### Sending Messages
**CLI:** `zeroclaw agent -m "your message here"` — remember, always use `-m` mode, not bare `zeroclaw agent`.
**REST:**
```bash
curl -X POST http://127.0.0.1:42617/webhook \
-H "Authorization: Bearer <token>" \
-H "Content-Type: application/json" \
-d '{"message": "your message here"}'
```
Response: `{"response": "...", "model": "..."}`
**WebSocket** (for streaming): connect to `ws://127.0.0.1:42617/ws/chat?token=<token>`, send `{"type": "message", "content": "..."}`, receive `{"type": "done", "full_response": "..."}`.
### System Status
Run `zeroclaw status` to see provider, model, uptime, channels, memory backend. For deeper diagnostics: `zeroclaw doctor`.
**REST:** `GET /api/status` (same info as JSON), `GET /health` (no auth, quick ok/not-ok).
### Memory
The CLI can list, get, and clear memories but **cannot store** them directly. To store a memory:
- Via agent: `zeroclaw agent -m "remember that my favorite color is blue"`
- Via REST: `POST /api/memory` with `{"key": "...", "content": "...", "category": "core"}`
**CLI (read/delete):**
- `zeroclaw memory list` — list all entries
- `zeroclaw memory list --category core --limit 10` — filtered
- `zeroclaw memory get "key-name"` — get specific entry
- `zeroclaw memory stats` — usage statistics
- `zeroclaw memory clear --key "prefix" --yes` — delete entries (confirm with user first)
**REST (full CRUD):**
- `GET /api/memory` — list all (optional: `?query=search+text&category=core`)
- `POST /api/memory` — store: `{"key": "...", "content": "...", "category": "core"}`
- `DELETE /api/memory/{key}` — delete entry
Categories: `core`, `daily`, `conversation`, or any custom string.
### Cron / Scheduling
**CLI:**
- `zeroclaw cron list` — show all jobs
- `zeroclaw cron add '0 9 * * 1-5' 'Good morning' --tz America/New_York` — recurring
- `zeroclaw cron add-at '2026-03-11T10:00:00Z' 'Remind me'` — one-time at specific time
- `zeroclaw cron add-every 3600000 'Check health'` — interval in ms
- `zeroclaw cron once 30m 'Follow up'` — delay from now
- `zeroclaw cron pause <id>` / `zeroclaw cron resume <id>` / `zeroclaw cron remove <id>`
**REST:**
- `GET /api/cron` — list jobs
- `POST /api/cron` — add: `{"name": "...", "schedule": "0 9 * * *", "command": "..."}`
- `DELETE /api/cron/{id}` — remove job
### Tools
Tools are used automatically by the agent during conversations (shell, file ops, memory, browser, HTTP, web search, git, etc. — 30+ tools gated by security policy).
To see what's available: `GET /api/tools` (REST) lists all registered tools with descriptions and parameter schemas.
### Configuration
Edit `~/.zeroclaw/config.toml` directly, or re-run `zeroclaw onboard` to reconfigure.
**REST:**
- `GET /api/config` — get current config (secrets masked as `***MASKED***`)
- `PUT /api/config` — update config (send raw TOML as body, 1MB limit)
### Providers & Models
- `zeroclaw providers` — list all supported providers
- `zeroclaw models list` — cached model catalog
- `zeroclaw models refresh --all` — refresh from providers
- `zeroclaw models set anthropic/claude-sonnet-4-6` — set default model
Override per-message: `zeroclaw agent -p anthropic --model claude-sonnet-4-6 -m "hello"`
### Real-Time Events (SSE)
REST only — useful for building dashboards or monitoring:
```bash
curl -N -H "Authorization: Bearer <token>" http://127.0.0.1:42617/api/events
```
Streams JSON events: `llm_request`, `tool_call_start`, `tool_call`, `agent_start`, `agent_end`, `error`.
### Cost Tracking
`GET /api/cost` — returns session/daily/monthly costs, token counts, per-model breakdown.
### Emergency Stop
Confirm with the user before running any estop command — these are disruptive.
- `zeroclaw estop --level kill-all` — stop everything
- `zeroclaw estop --level network-kill` — block all network
- `zeroclaw estop --level tool-freeze --tool shell` — freeze specific tool
- `zeroclaw estop status` — check current estop state
- `zeroclaw estop resume --network` — resume
### Gateway Lifecycle
- `zeroclaw gateway` — start HTTP gateway (foreground)
- `zeroclaw gateway -p 8080 --host 127.0.0.1` — custom bind
- `zeroclaw daemon` — start gateway + channels + scheduler + heartbeat
- `zeroclaw service install/start/stop/status/uninstall` — OS service management
### Channels
ZeroClaw supports 21 messaging channels. To add one, you need to edit `~/.zeroclaw/config.toml`. For example, to set up Telegram:
```toml
[channels]
telegram = true
[channels_config.telegram]
bot_token = "your-bot-token-from-botfather"
allowed_users = [123456789]
```
Then restart the daemon. Check channel health with `zeroclaw channels doctor`.
For the full list of channels and their config fields, read `references/cli-reference.md` (Channels section).
### Pairing (Authentication Setup)
When `require_pairing = true` (default), REST clients need a bearer token:
```bash
curl -X POST http://127.0.0.1:42617/pair -H "X-Pairing-Code: <code>"
```
Response includes `{"token": "..."}` — save this for subsequent requests.
## Common Workflows
Here are multi-step sequences you're likely to need:
**"Is my agent healthy?"**
1. Run `zeroclaw status` — check provider, model, channels
2. Run `zeroclaw doctor` — check connectivity, diagnose issues
3. If gateway needed: `curl -sf http://127.0.0.1:42617/health`
**"Set up a new channel"**
1. Read the current config: `cat ~/.zeroclaw/config.toml`
2. Add the channel config (edit the TOML)
3. Restart: `zeroclaw service restart` (or restart daemon manually)
4. Verify: `zeroclaw channels doctor`
**"Switch to a different model"**
1. Check available: `zeroclaw models list`
2. Set it: `zeroclaw models set <provider/model>`
3. Verify: `zeroclaw status`
4. Test: `zeroclaw agent -m "hello, what model are you?"`
## Gateway Defaults
- **Port:** 42617
- **Host:** 127.0.0.1
- **Auth:** Pairing required (bearer token)
- **Rate limits:** 60 webhook requests/min, 10 pairing attempts/min
- **Body limit:** 64KB (1MB for config updates)
- **Timeout:** 30 seconds
- **Idempotency:** Optional `X-Idempotency-Key` header on `/webhook` (300s TTL)
- **Config location:** `~/.zeroclaw/config.toml`
## Reference Files
For the complete API specification with every endpoint, field, and edge case, read `references/rest-api.md`.
For the full CLI command tree with all flags and options, read `references/cli-reference.md`.
Only load these when you need precise details beyond what's in this file — for most operations, the quick references above are sufficient.
## Troubleshooting
**"zeroclaw: command not found"** — Binary not in PATH. Check `./target/release/zeroclaw`, `~/.cargo/bin/zeroclaw`, or build from source with `cargo build --release`.
**"Connection refused" on REST calls** — Gateway isn't running. Start it with `zeroclaw gateway` or `zeroclaw daemon`.
**"Unauthorized" (401/403)** — Bearer token is missing or invalid. Re-pair via `POST /pair` with the pairing code, or check `~/.zeroclaw/config.toml` for the stored token.
**"LLM request failed" (500)** — Provider issue. Run `zeroclaw doctor` to check connectivity. Common causes: expired API key, provider outage, rate limiting on the provider side.
**"Too many requests" (429)** — You're hitting ZeroClaw's rate limit. Back off — the response includes `retry_after` with the number of seconds to wait.
**Agent not using tools / acting limited** — Check autonomy settings in config.toml under `[autonomy]`. `level = "read_only"` disables most tools. Try `level = "supervised"` or `level = "full"`.
**Memory not persisting** — Check `[memory]` config. If `backend = "none"`, nothing is stored. Switch to `"sqlite"` or `"markdown"`. Also verify `auto_save = true`.
**Channel not responding** — Run `zeroclaw channels doctor` for the specific channel. Common issues: expired bot token, wrong allowed_users list, channel not enabled in `[channels]`.
Report errors to the user with context appropriate to their expertise level. For beginners, explain what went wrong and suggest the fix. For experts, just show the error and the fix.

View File

@ -1,23 +0,0 @@
{
"skill_name": "zeroclaw",
"evals": [
{
"id": 0,
"prompt": "how do i make my bot remember my name",
"expected_output": "Executes a zeroclaw command to store a memory, explains what happened in beginner-friendly language",
"files": []
},
{
"id": 1,
"prompt": "I want to schedule a daily health check on my ZeroClaw instance every morning at 9am ET",
"expected_output": "Executes zeroclaw cron add with correct cron expression and timezone flag",
"files": []
},
{
"id": 2,
"prompt": "Set up a Python script that monitors my ZeroClaw agent's activity via SSE and logs tool calls to a file",
"expected_output": "Writes a Python script that connects to /api/events SSE endpoint with auth, filters for tool_call events, and logs to a file",
"files": []
}
]
}

View File

@ -1,277 +0,0 @@
# ZeroClaw CLI Reference
Complete command reference for the `zeroclaw` binary.
## Table of Contents
1. [Agent](#agent)
2. [Onboarding](#onboarding)
3. [Status & Diagnostics](#status--diagnostics)
4. [Memory](#memory)
5. [Cron](#cron)
6. [Providers & Models](#providers--models)
7. [Gateway & Daemon](#gateway--daemon)
8. [Service Management](#service-management)
9. [Channels](#channels)
10. [Security & Emergency Stop](#security--emergency-stop)
11. [Hardware Peripherals](#hardware-peripherals)
12. [Skills](#skills)
13. [Shell Completions](#shell-completions)
---
## Agent
Interactive chat or single-message mode.
```bash
zeroclaw agent # Interactive REPL
zeroclaw agent -m "Summarize today's logs" # Single message
zeroclaw agent -p anthropic --model claude-sonnet-4-6 # Override provider/model
zeroclaw agent -t 0.3 # Set temperature
zeroclaw agent --peripheral nucleo-f401re:/dev/ttyACM0 # Attach hardware
```
**Key flags:**
- `-m <message>` — single message mode (no REPL)
- `-p <provider>` — override provider (openrouter, anthropic, openai, ollama)
- `--model <model>` — override model
- `-t <float>` — temperature (0.02.0)
- `--peripheral <name>:<port>` — attach hardware peripheral
The agent has access to 30+ tools gated by security policy: shell, file_read, file_write, file_edit, glob_search, content_search, memory_store, memory_recall, memory_forget, browser, http_request, web_fetch, web_search, cron, delegate, git, and more. Max tool iterations defaults to 10.
---
## Onboarding
First-time setup or reconfiguration.
```bash
zeroclaw onboard # Quick mode (default: openrouter)
zeroclaw onboard --provider anthropic # Quick mode with specific provider
zeroclaw onboard # Guided wizard (default)
zeroclaw onboard --memory sqlite # Set memory backend
zeroclaw onboard --force # Overwrite existing config
zeroclaw onboard --channels-only # Repair channels only
```
**Key flags:**
- `--provider <name>` — openrouter (default), anthropic, openai, ollama
- `--model <model>` — default model
- `--memory <backend>` — sqlite, markdown, lucid, none
- `--force` — overwrite existing config.toml
- `--channels-only` — only repair channel configuration
- `--reinit` — start fresh (backs up existing config)
Creates `~/.zeroclaw/config.toml` with `0600` permissions.
---
## Status & Diagnostics
```bash
zeroclaw status # System overview
zeroclaw doctor # Run all diagnostic checks
zeroclaw doctor models # Probe model connectivity
zeroclaw doctor traces # Query execution traces
```
---
## Memory
```bash
zeroclaw memory list # List all entries
zeroclaw memory list --category core --limit 10 # Filtered list
zeroclaw memory get "some-key" # Get specific entry
zeroclaw memory stats # Usage statistics
zeroclaw memory clear --key "prefix" --yes # Delete entries (requires --yes)
```
**Key flags:**
- `--category <name>` — filter by category (core, daily, conversation, custom)
- `--limit <n>` — limit results
- `--key <prefix>` — key prefix for clear operations
- `--yes` — skip confirmation (required for clear)
---
## Cron
```bash
zeroclaw cron list # List all jobs
zeroclaw cron add '0 9 * * 1-5' 'Good morning' --tz America/New_York # Recurring (cron expr)
zeroclaw cron add-at '2026-03-11T10:00:00Z' 'Remind me about meeting' # One-time at specific time
zeroclaw cron add-every 3600000 'Check server health' # Interval in milliseconds
zeroclaw cron once 30m 'Follow up on that task' # Delay from now
zeroclaw cron pause <id> # Pause job
zeroclaw cron resume <id> # Resume job
zeroclaw cron remove <id> # Delete job
```
**Subcommands:**
- `add <cron-expr> <command>` — standard cron expression (5-field)
- `add-at <iso-datetime> <command>` — fire once at exact time
- `add-every <ms> <command>` — repeating interval
- `once <duration> <command>` — delay from now (e.g., `30m`, `2h`, `1d`)
---
## Providers & Models
```bash
zeroclaw providers # List all 40+ supported providers
zeroclaw models list # Show cached model catalog
zeroclaw models refresh --all # Refresh catalogs from all providers
zeroclaw models set anthropic/claude-sonnet-4-6 # Set default model
zeroclaw models status # Current model info
```
Model routing in config.toml:
```toml
[[model_routes]]
hint = "reasoning"
provider = "openrouter"
model = "anthropic/claude-sonnet-4-6"
```
---
## Gateway & Daemon
```bash
zeroclaw gateway # Start HTTP gateway (foreground)
zeroclaw gateway -p 8080 --host 127.0.0.1 # Custom port/host
zeroclaw daemon # Gateway + channels + scheduler + heartbeat
zeroclaw daemon -p 8080 --host 0.0.0.0 # Custom bind
```
**Gateway defaults:**
- Port: 42617
- Host: 127.0.0.1
- Pairing required: true
- Public bind allowed: false
---
## Service Management
OS service lifecycle (systemd on Linux, launchd on macOS).
```bash
zeroclaw service install # Install as system service
zeroclaw service start # Start the service
zeroclaw service status # Check service status
zeroclaw service stop # Stop the service
zeroclaw service restart # Restart the service
zeroclaw service uninstall # Remove the service
```
**Logs:**
- macOS: `~/.zeroclaw/logs/daemon.stdout.log`
- Linux: `journalctl -u zeroclaw`
---
## Channels
Channels are configured in `config.toml` under `[channels]` and `[channels_config.*]`.
```bash
zeroclaw channels list # List configured channels
zeroclaw channels doctor # Check channel health
```
Supported channels (21 total): Telegram, Discord, Slack, WhatsApp (Meta), WATI, Linq (iMessage/RCS/SMS), Email (IMAP/SMTP), IRC, Matrix, Nostr, Signal, Nextcloud Talk, and more.
Channel config example (Telegram):
```toml
[channels]
telegram = true
[channels_config.telegram]
bot_token = "..."
allowed_users = [123456789]
```
---
## Security & Emergency Stop
```bash
zeroclaw estop --level kill-all # Stop everything
zeroclaw estop --level network-kill # Block all network access
zeroclaw estop --level domain-block --domain "*.example.com" # Block specific domains
zeroclaw estop --level tool-freeze --tool shell # Freeze specific tool
zeroclaw estop status # Check estop state
zeroclaw estop resume --network # Resume (may require OTP)
```
**Estop levels:**
- `kill-all` — nuclear option, stops all agent activity
- `network-kill` — blocks all outbound network
- `domain-block` — blocks specific domain patterns
- `tool-freeze` — freezes individual tools
Autonomy config in config.toml:
```toml
[autonomy]
level = "supervised" # read_only | supervised | full
workspace_only = true
allowed_commands = ["git", "cargo", "python"]
forbidden_paths = ["/etc", "/root", "~/.ssh"]
max_actions_per_hour = 20
max_cost_per_day_cents = 500
```
---
## Hardware Peripherals
```bash
zeroclaw hardware discover # Find USB devices
zeroclaw hardware introspect /dev/ttyACM0 # Probe device capabilities
zeroclaw peripheral list # List configured peripherals
zeroclaw peripheral add nucleo-f401re /dev/ttyACM0 # Add peripheral
zeroclaw peripheral flash-nucleo # Flash STM32 firmware
zeroclaw peripheral flash --port /dev/cu.usbmodem101 # Flash Arduino firmware
```
**Supported boards:** STM32 Nucleo-F401RE, Arduino Uno R4, Raspberry Pi GPIO, ESP32.
Attach to agent session: `zeroclaw agent --peripheral nucleo-f401re:/dev/ttyACM0`
---
## Skills
```bash
zeroclaw skills list # List installed skills
zeroclaw skills install <path-or-url> # Install a skill
zeroclaw skills audit # Audit installed skills
zeroclaw skills remove <name> # Remove a skill
```
---
## Shell Completions
```bash
zeroclaw completions zsh # Generate Zsh completions
zeroclaw completions bash # Generate Bash completions
zeroclaw completions fish # Generate Fish completions
```
---
## Config File
Default location: `~/.zeroclaw/config.toml`
Config resolution order (first match wins):
1. `ZEROCLAW_CONFIG_DIR` environment variable
2. `ZEROCLAW_WORKSPACE` environment variable
3. `~/.zeroclaw/active_workspace.toml` marker file
4. `~/.zeroclaw/config.toml` (default)

View File

@ -1,505 +0,0 @@
# ZeroClaw REST API Reference
Complete endpoint reference for the ZeroClaw gateway HTTP API.
## Table of Contents
1. [Authentication](#authentication)
2. [Public Endpoints](#public-endpoints)
3. [Webhook](#webhook)
4. [WebSocket Chat](#websocket-chat)
5. [Status & Health](#status--health)
6. [Memory](#memory)
7. [Cron](#cron)
8. [Tools](#tools)
9. [Configuration](#configuration)
10. [Integrations](#integrations)
11. [Cost](#cost)
12. [Events (SSE)](#events-sse)
13. [Channel Webhooks](#channel-webhooks)
14. [Rate Limiting](#rate-limiting)
15. [Error Responses](#error-responses)
---
## Authentication
Three authentication mechanisms:
### Bearer Token (Primary)
```
Authorization: Bearer <token>
```
Obtained via `POST /pair`. Required for all `/api/*` endpoints when `require_pairing = true` (default).
### Webhook Secret
```
X-Webhook-Secret: <raw_secret>
```
Optional additional auth for `/webhook`. Server SHA-256 hashes and compares using constant-time comparison.
### WebSocket Token
```
ws://host:port/ws/chat?token=<bearer_token>
```
WebSocket connections pass the token as a query parameter (browsers can't set custom headers on WS handshake).
---
## Public Endpoints
### GET /health
No authentication required.
**Response 200:**
```json
{
"status": "ok",
"paired": true,
"require_pairing": true,
"runtime": {}
}
```
### GET /metrics
Prometheus text exposition format.
**Response 200:**
```
Content-Type: text/plain; version=0.0.4; charset=utf-8
```
### POST /pair
Exchange a one-time pairing code for a bearer token.
**Rate Limit:** Configurable per-minute limit per IP (default: 10/min).
**Headers:**
- `X-Pairing-Code: <code>` (required)
**Response 200 (success):**
```json
{
"paired": true,
"persisted": true,
"token": "<bearer_token>",
"message": "Save this token — use it as Authorization: Bearer <token>"
}
```
**Response 200 (persistence failure):**
```json
{
"paired": true,
"persisted": false,
"token": "<bearer_token>",
"message": "Paired for this process, but failed to persist token to config.toml..."
}
```
**Response 403:**
```json
{"error": "Invalid pairing code"}
```
**Response 429:**
```json
{"error": "Too many pairing requests. Please retry later.", "retry_after": 60}
```
**Response 429 (lockout):**
```json
{"error": "Too many failed attempts. Try again in {lockout_secs}s.", "retry_after": 120}
```
---
## Webhook
### POST /webhook
Send a message to the agent and receive a response.
**Rate Limit:** Configurable per-minute limit per IP (default: 60/min).
**Headers:**
- `Authorization: Bearer <token>` (if pairing enabled)
- `Content-Type: application/json`
- `X-Webhook-Secret: <secret>` (optional)
- `X-Idempotency-Key: <uuid>` (optional)
**Request Body:**
```json
{"message": "your prompt here"}
```
**Response 200:**
```json
{"response": "<llm_response>", "model": "<model_name>"}
```
**Response 200 (duplicate — idempotency key match):**
```json
{"status": "duplicate", "idempotent": true, "message": "Request already processed for this idempotency key"}
```
**Response 401:**
```json
{"error": "Unauthorized — pair first via POST /pair, then send Authorization: Bearer <token>"}
```
**Response 429:**
```json
{"error": "Too many webhook requests. Please retry later.", "retry_after": 60}
```
**Response 500:**
```json
{"error": "LLM request failed"}
```
### Idempotency
- Header: `X-Idempotency-Key: <uuid>`
- TTL: configurable, default 300 seconds
- Max tracked keys: configurable, default 10,000
- Duplicate requests within TTL return `"status": "duplicate"` instead of re-processing
---
## WebSocket Chat
### GET /ws/chat?token=<bearer_token>
Streaming agent chat over WebSocket.
**Client → Server:**
```json
{"type": "message", "content": "Hello, what's the weather?"}
```
**Server → Client (complete response):**
```json
{"type": "done", "full_response": "The weather in San Francisco is sunny..."}
```
**Server → Client (error):**
```json
{"type": "error", "message": "Error message here"}
```
Ignore unknown message types. Invalid JSON triggers an error response.
---
## Status & Health
### GET /api/status
**Response 200:**
```json
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4",
"temperature": 0.7,
"uptime_seconds": 3600,
"gateway_port": 42617,
"locale": "en",
"memory_backend": "sqlite",
"paired": true,
"channels": {
"telegram": false,
"discord": true,
"slack": false
},
"health": {}
}
```
### GET /api/health
Component health snapshot (requires auth).
```json
{"health": {}}
```
### GET or POST /api/doctor
Run system diagnostics.
```json
{
"results": [
{"name": "provider_connectivity", "severity": "ok", "message": "OpenRouter API reachable"}
],
"summary": {"ok": 5, "warnings": 1, "errors": 0}
}
```
---
## Memory
### GET /api/memory
List or search memory entries.
**Query Parameters:**
- `query` (string, optional) — search text; triggers search mode
- `category` (string, optional) — filter by category
**Response 200:**
```json
{
"entries": [
{
"key": "memory_key",
"content": "memory content",
"category": "core",
"timestamp": "2025-01-10T12:00:00Z"
}
]
}
```
### POST /api/memory
Store a memory entry.
**Request Body:**
```json
{
"key": "unique_key",
"content": "memory content",
"category": "core"
}
```
Category defaults to `"core"` if omitted. Other values: `daily`, `conversation`, or any custom string.
**Response 200:**
```json
{"status": "ok"}
```
### DELETE /api/memory/{key}
Delete a memory entry.
**Response 200:**
```json
{"status": "ok", "deleted": true}
```
---
## Cron
### GET /api/cron
List all scheduled jobs.
**Response 200:**
```json
{
"jobs": [
{
"id": "<uuid>",
"name": "daily-backup",
"command": "backup.sh",
"next_run": "2025-01-10T15:00:00Z",
"last_run": "2025-01-09T15:00:00Z",
"last_status": "success",
"enabled": true
}
]
}
```
### POST /api/cron
Add a new job.
**Request Body:**
```json
{
"name": "job-name",
"schedule": "0 9 * * *",
"command": "command to run"
}
```
**Response 200:**
```json
{
"status": "ok",
"job": {"id": "<uuid>", "name": "job-name", "command": "command to run", "enabled": true}
}
```
### DELETE /api/cron/{id}
Remove a job.
**Response 200:**
```json
{"status": "ok"}
```
---
## Tools
### GET /api/tools
List all registered tools with descriptions and parameter schemas.
**Response 200:**
```json
{
"tools": [
{"name": "shell", "description": "Execute shell commands", "parameters": {}},
{"name": "file_read", "description": "Read file contents", "parameters": {}}
]
}
```
---
## Configuration
### GET /api/config
Get current config. Secrets are masked as `***MASKED***`.
**Response 200:**
```json
{"format": "toml", "content": "<toml_string>"}
```
### PUT /api/config
Update config from TOML body. Body limit: 1 MB.
**Request Body:** Raw TOML text.
**Response 200:**
```json
{"status": "ok"}
```
**Response 400:**
```json
{"error": "Invalid TOML: <details>"}
```
or
```json
{"error": "Invalid config: <validation_error>"}
```
---
## Integrations
### GET /api/integrations
List all integrations and their status.
**Response 200:**
```json
{
"integrations": [
{"name": "openrouter", "description": "OpenRouter LLM provider", "category": "providers", "status": "ok"},
{"name": "telegram", "description": "Telegram messaging channel", "category": "channels", "status": "configured"}
]
}
```
---
## Cost
### GET /api/cost
Cost tracking summary.
**Response 200:**
```json
{
"cost": {
"session_cost_usd": 1.50,
"daily_cost_usd": 5.00,
"monthly_cost_usd": 150.00,
"total_tokens": 50000,
"request_count": 25,
"by_model": {"anthropic/claude-sonnet-4": 1.50}
}
}
```
---
## Events (SSE)
### GET /api/events
Server-Sent Events stream. Requires bearer token.
**Content-Type:** `text/event-stream`
**Event types:**
| Type | Fields | Description |
|------|--------|-------------|
| `llm_request` | provider, model, timestamp | LLM call started |
| `tool_call_start` | tool, timestamp | Tool execution started |
| `tool_call` | tool, duration_ms, success, timestamp | Tool execution completed |
| `agent_start` | provider, model, timestamp | Agent loop started |
| `agent_end` | provider, model, duration_ms, tokens_used, cost_usd, timestamp | Agent loop completed |
| `error` | component, message, timestamp | Error occurred |
**Example:**
```bash
curl -N -H "Authorization: Bearer <token>" http://127.0.0.1:42617/api/events
```
---
## Channel Webhooks
These are incoming webhook endpoints for specific messaging channels. They're set up automatically when channels are configured.
### WhatsApp (Meta Cloud API)
- `GET /whatsapp` — verification (echoes `hub.challenge`)
- `POST /whatsapp` — incoming messages (signature verified via `X-Hub-Signature-256`)
### WATI (WhatsApp Business)
- `GET /wati` — verification (echoes `challenge`)
- `POST /wati` — incoming messages
### Linq (iMessage/RCS/SMS)
- `POST /linq` — incoming messages (signature verified via `X-Webhook-Signature` + `X-Webhook-Timestamp`)
### Nextcloud Talk
- `POST /nextcloud-talk` — bot API webhook (signature verified via `X-Nextcloud-Talk-Signature`)
---
## Rate Limiting
Sliding window (60-second window), per client IP.
| Endpoint | Default Limit |
|----------|--------------|
| `POST /pair` | 10/min |
| `POST /webhook` | 60/min |
If `trust_forwarded_headers` is enabled, uses `X-Forwarded-For` for client IP.
Max tracked keys: configurable (default: 10,000).
---
## Error Responses
**Standard format:**
```json
{"error": "Human-readable error message"}
```
**With retry info:**
```json
{"error": "...", "retry_after": 60}
```
**Status codes:**
| Code | Meaning |
|------|---------|
| 200 | Success |
| 400 | Invalid JSON, missing fields, invalid TOML |
| 401 | Invalid/missing bearer token or webhook secret |
| 403 | Pairing verification failed |
| 404 | Endpoint or channel not configured |
| 408 | Request timeout (30s) |
| 429 | Rate limited (check `retry_after`) |
| 500 | LLM error, database error, internal failure |

View File

@ -20,9 +20,12 @@ reviews:
enabled: true
# Only review PRs targeting these branches
base_branches:
- master
- main
- dev
# Skip reviews for draft PRs or WIP
drafts: false
# Enable base branch analysis
base_branch_analysis: true
# Poem feature toggle (must be a boolean, not an object)
poem: false

View File

@ -64,8 +64,3 @@ LICENSE
*.profdata
coverage
lcov.info
# Application and script directories (not needed for Docker runtime)
apps/
python/
scripts/

View File

@ -1,44 +1,32 @@
# EditorConfig is awesome: https://EditorConfig.org
# EditorConfig — https://editorconfig.org
# Provides consistent formatting defaults across editors and platforms.
# top-most EditorConfig file
root = true
# All files
[*]
indent_style = space
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
end_of_line = lf
insert_final_newline = true
# Rust files - match rustfmt.toml
[*.rs]
trim_trailing_whitespace = true
indent_style = space
indent_size = 4
max_line_length = 100
# Markdown files
[*.md]
# Trailing whitespace is significant in Markdown (line breaks).
trim_trailing_whitespace = false
max_line_length = 80
# TOML files
[*.toml]
indent_size = 2
[*.go]
indent_style = tab
# YAML files
[*.{yml,yaml}]
indent_size = 2
# Python files
[*.py]
[*.toml]
indent_size = 2
[Dockerfile]
indent_size = 4
max_line_length = 100
# Shell scripts
[*.{sh,bash}]
indent_size = 2
# JSON files
[*.json]
[*.nix]
indent_style = space
indent_size = 2

View File

@ -59,7 +59,6 @@ PROVIDER=openrouter
# ZAI_API_KEY=...
# SYNTHETIC_API_KEY=...
# OPENCODE_API_KEY=...
# OPENCODE_GO_API_KEY=...
# VERCEL_API_KEY=...
# CLOUDFLARE_API_KEY=...
@ -118,7 +117,3 @@ PROVIDER=openrouter
# Optional: Brave Search (requires API key from https://brave.com/search/api)
# WEB_SEARCH_PROVIDER=brave
# BRAVE_API_KEY=your-brave-search-api-key
#
# Optional: SearXNG (self-hosted, requires instance URL)
# WEB_SEARCH_PROVIDER=searxng
# SEARXNG_INSTANCE_URL=https://searx.example.com

68
.gitattributes vendored
View File

@ -1,61 +1,33 @@
# Git attributes for ZeroClaw
# https://git-scm.com/docs/gitattributes
# Auto detect text files and perform LF normalization
# Normalize all text files
* text=auto
# Source code
*.rs text eol=lf linguist-language=Rust
*.toml text eol=lf linguist-language=TOML
*.py text eol=lf linguist-language=Python
*.js text eol=lf linguist-language=JavaScript
*.ts text eol=lf linguist-language=TypeScript
*.html text eol=lf linguist-language=HTML
*.css text eol=lf linguist-language=CSS
*.scss text eol=lf linguist-language=SCSS
*.json text eol=lf linguist-language=JSON
*.yaml text eol=lf linguist-language=YAML
*.yml text eol=lf linguist-language=YAML
*.md text eol=lf linguist-language=Markdown
*.sh text eol=lf linguist-language=Shell
*.bash text eol=lf linguist-language=Shell
*.ps1 text eol=crlf linguist-language=PowerShell
# Force LF for scripts and build-critical files
*.sh text eol=lf
Dockerfile* text eol=lf
*.rs text eol=lf
*.toml text eol=lf
*.yml text eol=lf
*.yaml text eol=lf
# Documentation
*.txt text eol=lf
LICENSE* text eol=lf
# CI
.github/**/* text eol=lf
# Configuration files
.editorconfig text eol=lf
.gitattributes text eol=lf
.gitignore text eol=lf
.dockerignore text eol=lf
# Rust-specific
Cargo.lock text eol=lf linguist-generated
Cargo.toml text eol=lf
# Declare files that will always have CRLF line endings on checkout
*.sln text eol=crlf
# Denote all files that are truly binary and should not be modified
# Images
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.ico binary
*.svg text
*.wasm binary
*.woff binary
*.woff2 binary
*.ttf binary
*.eot binary
*.mp3 binary
*.mp4 binary
*.webm binary
# Archives
*.zip binary
*.tar binary
*.tgz binary
*.gz binary
*.bz2 binary
*.7z binary
*.db binary
# Compiled artifacts
*.so binary
*.dll binary
*.exe binary
*.a binary

50
.github/CODEOWNERS vendored
View File

@ -1,32 +1,32 @@
# Default owner for all files
* @theonlyhennygod @JordanTheJet @SimianAstronaut7
* @theonlyhennygod @JordanTheJet @chumyin
# Important functional modules
/src/agent/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/src/providers/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/src/channels/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/src/tools/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/src/gateway/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/src/runtime/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/src/memory/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/Cargo.toml @theonlyhennygod @JordanTheJet @SimianAstronaut7
/Cargo.lock @theonlyhennygod @JordanTheJet @SimianAstronaut7
/src/agent/** @theonlyhennygod @JordanTheJet @chumyin
/src/providers/** @theonlyhennygod @JordanTheJet @chumyin
/src/channels/** @theonlyhennygod @JordanTheJet @chumyin
/src/tools/** @theonlyhennygod @JordanTheJet @chumyin
/src/gateway/** @theonlyhennygod @JordanTheJet @chumyin
/src/runtime/** @theonlyhennygod @JordanTheJet @chumyin
/src/memory/** @theonlyhennygod @JordanTheJet @chumyin
/Cargo.toml @theonlyhennygod @JordanTheJet @chumyin
/Cargo.lock @theonlyhennygod @JordanTheJet @chumyin
# Security / tests / CI-CD ownership
/src/security/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/tests/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/.github/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/.github/workflows/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/.github/codeql/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/.github/dependabot.yml @theonlyhennygod @JordanTheJet @SimianAstronaut7
/SECURITY.md @theonlyhennygod @JordanTheJet @SimianAstronaut7
/docs/actions-source-policy.md @theonlyhennygod @JordanTheJet @SimianAstronaut7
/docs/ci-map.md @theonlyhennygod @JordanTheJet @SimianAstronaut7
/src/security/** @theonlyhennygod @JordanTheJet @chumyin
/tests/** @theonlyhennygod @JordanTheJet @chumyin
/.github/** @theonlyhennygod @JordanTheJet @chumyin
/.github/workflows/** @theonlyhennygod @JordanTheJet @chumyin
/.github/codeql/** @theonlyhennygod @JordanTheJet @chumyin
/.github/dependabot.yml @theonlyhennygod @JordanTheJet @chumyin
/SECURITY.md @theonlyhennygod @JordanTheJet @chumyin
/docs/actions-source-policy.md @theonlyhennygod @JordanTheJet @chumyin
/docs/ci-map.md @theonlyhennygod @JordanTheJet @chumyin
# Docs & governance
/docs/** @theonlyhennygod @JordanTheJet @SimianAstronaut7
/AGENTS.md @theonlyhennygod @JordanTheJet @SimianAstronaut7
/CLAUDE.md @theonlyhennygod @JordanTheJet @SimianAstronaut7
/CONTRIBUTING.md @theonlyhennygod @JordanTheJet @SimianAstronaut7
/docs/pr-workflow.md @theonlyhennygod @JordanTheJet @SimianAstronaut7
/docs/reviewer-playbook.md @theonlyhennygod @JordanTheJet @SimianAstronaut7
/docs/** @theonlyhennygod @JordanTheJet @chumyin
/AGENTS.md @theonlyhennygod @JordanTheJet @chumyin
/CLAUDE.md @theonlyhennygod @JordanTheJet @chumyin
/CONTRIBUTING.md @theonlyhennygod @JordanTheJet @chumyin
/docs/pr-workflow.md @theonlyhennygod @JordanTheJet @chumyin
/docs/reviewer-playbook.md @theonlyhennygod @JordanTheJet @chumyin

View File

@ -11,6 +11,15 @@ body:
Please provide a minimal reproducible case so maintainers can triage quickly.
Do not include personal/sensitive data; redact and anonymize all logs/payloads.
- type: input
id: summary
attributes:
label: Summary
description: One-line description of the problem.
placeholder: zeroclaw daemon exits immediately when ...
validations:
required: true
- type: dropdown
id: component
attributes:
@ -63,7 +72,7 @@ body:
label: Steps to reproduce
description: Please provide exact commands/config.
placeholder: |
1. zeroclaw onboard
1. zeroclaw onboard --interactive
2. zeroclaw daemon
3. Observe crash in logs
render: bash
@ -74,13 +83,13 @@ body:
id: impact
attributes:
label: Impact
description: Who is affected, how often, and practical consequences (optional but helps triage).
description: Who is affected, how often, and practical consequences.
placeholder: |
Affected users: ...
Frequency: always/intermittent
Consequence: ...
validations:
required: false
required: true
- type: textarea
id: logs
@ -103,10 +112,9 @@ body:
id: rust
attributes:
label: Rust version
description: Required for runtime/build bugs; optional for docs/config issues.
placeholder: rustc 1.xx.x
validations:
required: false
required: true
- type: input
id: os
@ -132,7 +140,9 @@ body:
attributes:
label: Pre-flight checks
options:
- label: I reproduced this on the latest master branch or latest release.
- label: I reproduced this on the latest main branch or latest release.
required: true
- label: I redacted secrets, tokens, and personal data from all submitted content.
- label: I redacted secrets/tokens from logs.
required: true
- label: I removed personal identifiers and replaced identity-specific data with neutral placeholders.
required: true

View File

@ -3,9 +3,15 @@ contact_links:
- name: Security vulnerability report
url: https://github.com/zeroclaw-labs/zeroclaw/security/policy
about: Please report security vulnerabilities privately via SECURITY.md policy.
- name: Private vulnerability report template
url: https://github.com/zeroclaw-labs/zeroclaw/blob/main/docs/security/private-vulnerability-report-template.md
about: Use this template when filing a private vulnerability report in Security Advisories.
- name: 私密漏洞报告模板(中文)
url: https://github.com/zeroclaw-labs/zeroclaw/blob/main/docs/security/private-vulnerability-report-template.zh-CN.md
about: 使用该中文模板通过 Security Advisories 进行私密漏洞提交。
- name: Contribution guide
url: https://github.com/zeroclaw-labs/zeroclaw/blob/master/CONTRIBUTING.md
url: https://github.com/zeroclaw-labs/zeroclaw/blob/main/CONTRIBUTING.md
about: Please read contribution and PR requirements before opening an issue.
- name: PR workflow & reviewer expectations
url: https://github.com/zeroclaw-labs/zeroclaw/blob/master/docs/pr-workflow.md
url: https://github.com/zeroclaw-labs/zeroclaw/blob/main/docs/pr-workflow.md
about: Read risk-based PR tracks, CI gates, and merge criteria before filing feature requests.

View File

@ -42,10 +42,10 @@ body:
id: non_goals
attributes:
label: Non-goals / out of scope
description: Clarify what should not be included in the first iteration (optional but helps scope discussion).
description: Clarify what should not be included in the first iteration.
placeholder: No UI changes, no cross-provider dynamic adaptation in v1.
validations:
required: false
required: true
- type: textarea
id: alternatives
@ -60,31 +60,31 @@ body:
id: acceptance
attributes:
label: Acceptance criteria
description: What outcomes would make this request complete? (optional — can be defined during triage)
description: What outcomes would make this request complete?
placeholder: |
- Config key is documented and validated
- Runtime path uses configured retry budget
- Regression tests cover fallback and invalid config
validations:
required: false
required: true
- type: textarea
id: architecture
attributes:
label: Architecture impact
description: Which subsystem(s) are affected? (optional — maintainers will assess during triage)
description: Which subsystem(s) are affected?
placeholder: providers/, channels/, memory/, runtime/, security/, docs/ ...
validations:
required: false
required: true
- type: textarea
id: risk
attributes:
label: Risk and rollback
description: Main risk + how to disable/revert quickly (optional — can be defined during planning).
description: Main risk + how to disable/revert quickly.
placeholder: Risk is ... rollback is ...
validations:
required: false
required: true
- type: dropdown
id: breaking

View File

@ -1,3 +1,11 @@
self-hosted-runner:
labels:
- Linux
- X64
- racknerd
- aws-india
- light
- cpu40
- codeql
- codeql-general
- blacksmith-2vcpu-ubuntu-2404

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 110 KiB

View File

@ -0,0 +1,70 @@
{
"version": 1,
"description": "Provider/model connectivity probe contract for scheduled CI checks.",
"consecutive_transient_failures_to_escalate": 2,
"providers": [
{
"name": "OpenAI",
"provider": "openai",
"required": true,
"secret_env": "OPENAI_API_KEY",
"timeout_sec": 90,
"retries": 2,
"notes": "Primary reference provider; validates baseline OpenAI-compatible path."
},
{
"name": "Anthropic",
"provider": "anthropic",
"required": true,
"secret_env": "ANTHROPIC_API_KEY",
"timeout_sec": 90,
"retries": 2,
"notes": "Checks non-OpenAI provider fetch path and account health."
},
{
"name": "Gemini",
"provider": "gemini",
"required": true,
"secret_env": "GEMINI_API_KEY",
"timeout_sec": 90,
"retries": 2,
"notes": "Validates Google model discovery endpoint availability."
},
{
"name": "OpenRouter",
"provider": "openrouter",
"required": true,
"secret_env": "OPENROUTER_API_KEY",
"timeout_sec": 90,
"retries": 2,
"notes": "Routes across many providers; signal for aggregator-side health."
},
{
"name": "Qwen",
"provider": "qwen",
"required": false,
"secret_env": "DASHSCOPE_API_KEY",
"timeout_sec": 90,
"retries": 2,
"notes": "Regional provider check; optional for global deployments."
},
{
"name": "NVIDIA NIM",
"provider": "nvidia",
"required": false,
"secret_env": "NVIDIA_API_KEY",
"timeout_sec": 90,
"retries": 2,
"notes": "Optional ecosystem endpoint check."
},
{
"name": "OpenAI Codex",
"provider": "openai-codex",
"required": false,
"secret_env": "OPENAI_API_KEY",
"timeout_sec": 90,
"retries": 2,
"notes": "Uses OpenAI-compatible models endpoint to verify Codex-profile discovery path."
}
]
}

77
.github/connectivity/providers.json vendored Normal file
View File

@ -0,0 +1,77 @@
{
"global_timeout_seconds": 8,
"providers": [
{
"id": "openrouter",
"url": "https://openrouter.ai/api/v1/models",
"method": "GET",
"critical": true
},
{
"id": "openai",
"url": "https://api.openai.com/v1/models",
"method": "GET",
"critical": true
},
{
"id": "anthropic",
"url": "https://api.anthropic.com/v1/messages",
"method": "POST",
"critical": true
},
{
"id": "groq",
"url": "https://api.groq.com/openai/v1/models",
"method": "GET",
"critical": false
},
{
"id": "deepseek",
"url": "https://api.deepseek.com/v1/models",
"method": "GET",
"critical": false
},
{
"id": "moonshot",
"url": "https://api.moonshot.ai/v1/models",
"method": "GET",
"critical": false
},
{
"id": "qwen",
"url": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models",
"method": "GET",
"critical": false
},
{
"id": "zai",
"url": "https://api.z.ai/api/paas/v4/models",
"method": "GET",
"critical": false
},
{
"id": "glm",
"url": "https://open.bigmodel.cn/api/paas/v4/models",
"method": "GET",
"critical": false
},
{
"id": "together",
"url": "https://api.together.xyz/v1/models",
"method": "GET",
"critical": false
},
{
"id": "fireworks",
"url": "https://api.fireworks.ai/inference/v1/models",
"method": "GET",
"critical": false
},
{
"id": "cohere",
"url": "https://api.cohere.com/v1/models",
"method": "GET",
"critical": false
}
]
}

View File

@ -5,7 +5,7 @@ updates:
directory: "/"
schedule:
interval: daily
target-branch: master
target-branch: main
open-pull-requests-limit: 3
labels:
- "dependencies"
@ -21,7 +21,7 @@ updates:
directory: "/"
schedule:
interval: daily
target-branch: master
target-branch: main
open-pull-requests-limit: 1
labels:
- "ci"
@ -38,7 +38,7 @@ updates:
directory: "/"
schedule:
interval: daily
target-branch: master
target-branch: main
open-pull-requests-limit: 1
labels:
- "ci"

301
.github/labeler.yml vendored
View File

@ -36,145 +36,6 @@
- any-glob-to-any-file:
- "src/channels/**"
"channel:bluesky":
- changed-files:
- any-glob-to-any-file:
- "src/channels/bluesky.rs"
"channel:clawdtalk":
- changed-files:
- any-glob-to-any-file:
- "src/channels/clawdtalk.rs"
"channel:cli":
- changed-files:
- any-glob-to-any-file:
- "src/channels/cli.rs"
"channel:dingtalk":
- changed-files:
- any-glob-to-any-file:
- "src/channels/dingtalk.rs"
"channel:discord":
- changed-files:
- any-glob-to-any-file:
- "src/channels/discord.rs"
- "src/channels/discord_history.rs"
"channel:email":
- changed-files:
- any-glob-to-any-file:
- "src/channels/email_channel.rs"
- "src/channels/gmail_push.rs"
"channel:imessage":
- changed-files:
- any-glob-to-any-file:
- "src/channels/imessage.rs"
"channel:irc":
- changed-files:
- any-glob-to-any-file:
- "src/channels/irc.rs"
"channel:lark":
- changed-files:
- any-glob-to-any-file:
- "src/channels/lark.rs"
"channel:linq":
- changed-files:
- any-glob-to-any-file:
- "src/channels/linq.rs"
"channel:matrix":
- changed-files:
- any-glob-to-any-file:
- "src/channels/matrix.rs"
"channel:mattermost":
- changed-files:
- any-glob-to-any-file:
- "src/channels/mattermost.rs"
"channel:mochat":
- changed-files:
- any-glob-to-any-file:
- "src/channels/mochat.rs"
"channel:mqtt":
- changed-files:
- any-glob-to-any-file:
- "src/channels/mqtt.rs"
"channel:nextcloud-talk":
- changed-files:
- any-glob-to-any-file:
- "src/channels/nextcloud_talk.rs"
"channel:nostr":
- changed-files:
- any-glob-to-any-file:
- "src/channels/nostr.rs"
"channel:notion":
- changed-files:
- any-glob-to-any-file:
- "src/channels/notion.rs"
"channel:qq":
- changed-files:
- any-glob-to-any-file:
- "src/channels/qq.rs"
"channel:reddit":
- changed-files:
- any-glob-to-any-file:
- "src/channels/reddit.rs"
"channel:signal":
- changed-files:
- any-glob-to-any-file:
- "src/channels/signal.rs"
"channel:slack":
- changed-files:
- any-glob-to-any-file:
- "src/channels/slack.rs"
"channel:telegram":
- changed-files:
- any-glob-to-any-file:
- "src/channels/telegram.rs"
"channel:twitter":
- changed-files:
- any-glob-to-any-file:
- "src/channels/twitter.rs"
"channel:wati":
- changed-files:
- any-glob-to-any-file:
- "src/channels/wati.rs"
"channel:webhook":
- changed-files:
- any-glob-to-any-file:
- "src/channels/webhook.rs"
"channel:wecom":
- changed-files:
- any-glob-to-any-file:
- "src/channels/wecom.rs"
"channel:whatsapp":
- changed-files:
- any-glob-to-any-file:
- "src/channels/whatsapp.rs"
- "src/channels/whatsapp_storage.rs"
- "src/channels/whatsapp_web.rs"
"gateway":
- changed-files:
- any-glob-to-any-file:
@ -240,73 +101,6 @@
- any-glob-to-any-file:
- "src/providers/**"
"provider:anthropic":
- changed-files:
- any-glob-to-any-file:
- "src/providers/anthropic.rs"
"provider:azure-openai":
- changed-files:
- any-glob-to-any-file:
- "src/providers/azure_openai.rs"
"provider:bedrock":
- changed-files:
- any-glob-to-any-file:
- "src/providers/bedrock.rs"
"provider:claude-code":
- changed-files:
- any-glob-to-any-file:
- "src/providers/claude_code.rs"
"provider:compatible":
- changed-files:
- any-glob-to-any-file:
- "src/providers/compatible.rs"
"provider:copilot":
- changed-files:
- any-glob-to-any-file:
- "src/providers/copilot.rs"
"provider:gemini":
- changed-files:
- any-glob-to-any-file:
- "src/providers/gemini.rs"
- "src/providers/gemini_cli.rs"
"provider:glm":
- changed-files:
- any-glob-to-any-file:
- "src/providers/glm.rs"
"provider:kilocli":
- changed-files:
- any-glob-to-any-file:
- "src/providers/kilocli.rs"
"provider:ollama":
- changed-files:
- any-glob-to-any-file:
- "src/providers/ollama.rs"
"provider:openai":
- changed-files:
- any-glob-to-any-file:
- "src/providers/openai.rs"
- "src/providers/openai_codex.rs"
"provider:openrouter":
- changed-files:
- any-glob-to-any-file:
- "src/providers/openrouter.rs"
"provider:telnyx":
- changed-files:
- any-glob-to-any-file:
- "src/providers/telnyx.rs"
"service":
- changed-files:
- any-glob-to-any-file:
@ -327,101 +121,6 @@
- any-glob-to-any-file:
- "src/tools/**"
"tool:browser":
- changed-files:
- any-glob-to-any-file:
- "src/tools/browser.rs"
- "src/tools/browser_delegate.rs"
- "src/tools/browser_open.rs"
- "src/tools/text_browser.rs"
- "src/tools/screenshot.rs"
"tool:composio":
- changed-files:
- any-glob-to-any-file:
- "src/tools/composio.rs"
"tool:cron":
- changed-files:
- any-glob-to-any-file:
- "src/tools/cron_add.rs"
- "src/tools/cron_list.rs"
- "src/tools/cron_remove.rs"
- "src/tools/cron_run.rs"
- "src/tools/cron_runs.rs"
- "src/tools/cron_update.rs"
"tool:file":
- changed-files:
- any-glob-to-any-file:
- "src/tools/file_edit.rs"
- "src/tools/file_read.rs"
- "src/tools/file_write.rs"
- "src/tools/glob_search.rs"
- "src/tools/content_search.rs"
"tool:google-workspace":
- changed-files:
- any-glob-to-any-file:
- "src/tools/google_workspace.rs"
"tool:mcp":
- changed-files:
- any-glob-to-any-file:
- "src/tools/mcp_client.rs"
- "src/tools/mcp_deferred.rs"
- "src/tools/mcp_protocol.rs"
- "src/tools/mcp_tool.rs"
- "src/tools/mcp_transport.rs"
"tool:memory":
- changed-files:
- any-glob-to-any-file:
- "src/tools/memory_forget.rs"
- "src/tools/memory_recall.rs"
- "src/tools/memory_store.rs"
"tool:microsoft365":
- changed-files:
- any-glob-to-any-file:
- "src/tools/microsoft365/**"
"tool:shell":
- changed-files:
- any-glob-to-any-file:
- "src/tools/shell.rs"
- "src/tools/node_tool.rs"
- "src/tools/cli_discovery.rs"
"tool:sop":
- changed-files:
- any-glob-to-any-file:
- "src/tools/sop_advance.rs"
- "src/tools/sop_approve.rs"
- "src/tools/sop_execute.rs"
- "src/tools/sop_list.rs"
- "src/tools/sop_status.rs"
"tool:web":
- changed-files:
- any-glob-to-any-file:
- "src/tools/web_fetch.rs"
- "src/tools/web_search_tool.rs"
- "src/tools/web_search_provider_routing.rs"
- "src/tools/http_request.rs"
"tool:security":
- changed-files:
- any-glob-to-any-file:
- "src/tools/security_ops.rs"
- "src/tools/verifiable_intent.rs"
"tool:cloud":
- changed-files:
- any-glob-to-any-file:
- "src/tools/cloud_ops.rs"
- "src/tools/cloud_patterns.rs"
"tunnel":
- changed-files:
- any-glob-to-any-file:

View File

@ -2,7 +2,7 @@
Describe this PR in 2-5 bullets:
- Base branch target (`master` for all contributions):
- Base branch target (`main` or `dev`; direct `main` PRs are allowed):
- Problem:
- Why it matters:
- What changed:

39
.github/release/canary-policy.json vendored Normal file
View File

@ -0,0 +1,39 @@
{
"schema_version": "zeroclaw.canary-policy.v1",
"release_channel": "stable",
"observation_window_minutes": 60,
"minimum_sample_size": 500,
"cohorts": [
{
"name": "canary-5pct",
"traffic_percent": 5,
"duration_minutes": 20
},
{
"name": "canary-20pct",
"traffic_percent": 20,
"duration_minutes": 20
},
{
"name": "canary-50pct",
"traffic_percent": 50,
"duration_minutes": 20
},
{
"name": "canary-100pct",
"traffic_percent": 100,
"duration_minutes": 60
}
],
"observability_signals": [
"error_rate",
"crash_rate",
"p95_latency_ms",
"sample_size"
],
"thresholds": {
"max_error_rate": 0.02,
"max_crash_rate": 0.01,
"max_p95_latency_ms": 1200
}
}

10
.github/release/docs-deploy-policy.json vendored Normal file
View File

@ -0,0 +1,10 @@
{
"schema_version": "zeroclaw.docs-deploy-policy.v1",
"production_branch": "main",
"allow_manual_production_dispatch": true,
"require_preview_evidence_on_manual_production": true,
"allow_manual_rollback_dispatch": true,
"rollback_ref_must_be_ancestor_of_production_branch": true,
"docs_preview_retention_days": 14,
"docs_guard_artifact_retention_days": 21
}

18
.github/release/ghcr-tag-policy.json vendored Normal file
View File

@ -0,0 +1,18 @@
{
"schema_version": "zeroclaw.ghcr-tag-policy.v1",
"release_tag_regex": "^v[0-9]+\\.[0-9]+\\.[0-9]+$",
"sha_tag_prefix": "sha-",
"sha_tag_length": 12,
"latest_tag": "latest",
"require_latest_on_release": true,
"immutable_tag_classes": [
"release",
"sha"
],
"rollback_priority": [
"sha",
"release"
],
"contract_artifact_retention_days": 21,
"scan_artifact_retention_days": 14
}

View File

@ -0,0 +1,17 @@
{
"schema_version": "zeroclaw.ghcr-vulnerability-policy.v1",
"required_tag_classes": [
"release",
"sha",
"latest"
],
"blocking_severities": [
"HIGH",
"CRITICAL"
],
"max_blocking_findings_per_tag": 0,
"require_blocking_count_parity": true,
"require_artifact_id_parity": true,
"scan_artifact_retention_days": 14,
"audit_artifact_retention_days": 21
}

View File

@ -0,0 +1,9 @@
{
"schema_version": "zeroclaw.nightly-owner-routing.v1",
"owners": {
"default": "@chumyin",
"whatsapp-web": "@chumyin",
"browser-native": "@chumyin",
"nightly-all-features": "@chumyin"
}
}

View File

@ -0,0 +1,33 @@
{
"schema_version": "zeroclaw.prerelease-stage-gates.v1",
"stage_order": ["alpha", "beta", "rc", "stable"],
"required_previous_stage": {
"beta": "alpha",
"rc": "beta",
"stable": "rc"
},
"required_checks": {
"alpha": [
"CI Required Gate",
"Security Audit"
],
"beta": [
"CI Required Gate",
"Security Audit",
"Feature Matrix Summary"
],
"rc": [
"CI Required Gate",
"Security Audit",
"Feature Matrix Summary",
"Nightly Summary & Routing"
],
"stable": [
"CI Required Gate",
"Security Audit",
"Feature Matrix Summary",
"Verify Artifact Set",
"Nightly Summary & Routing"
]
}
}

View File

@ -0,0 +1,30 @@
{
"schema_version": "zeroclaw.release-artifact-contract.v1",
"release_archive_patterns": [
"zeroclaw-x86_64-unknown-linux-gnu.tar.gz",
"zeroclaw-x86_64-unknown-linux-musl.tar.gz",
"zeroclaw-aarch64-unknown-linux-gnu.tar.gz",
"zeroclaw-aarch64-unknown-linux-musl.tar.gz",
"zeroclaw-armv7-unknown-linux-gnueabihf.tar.gz",
"zeroclaw-armv7-linux-androideabi.tar.gz",
"zeroclaw-aarch64-linux-android.tar.gz",
"zeroclaw-x86_64-unknown-freebsd.tar.gz",
"zeroclaw-x86_64-apple-darwin.tar.gz",
"zeroclaw-aarch64-apple-darwin.tar.gz",
"zeroclaw-x86_64-pc-windows-msvc.zip"
],
"required_manifest_files": [
"release-manifest.json",
"release-manifest.md",
"SHA256SUMS"
],
"required_sbom_files": [
"zeroclaw.cdx.json",
"zeroclaw.spdx.json"
],
"required_notice_files": [
"LICENSE-APACHE",
"LICENSE-MIT",
"NOTICE"
]
}

View File

@ -0,0 +1,33 @@
{
"schema_version": "zeroclaw.deny-governance.v1",
"advisories": [
{
"id": "RUSTSEC-2025-0141",
"owner": "repo-maintainers",
"reason": "Transitive via probe-rs in current release path; tracked for replacement when probe-rs updates.",
"ticket": "RMN-21",
"expires_on": "2026-12-31"
},
{
"id": "RUSTSEC-2024-0384",
"owner": "repo-maintainers",
"reason": "Upstream rust-nostr advisory mitigation is still in progress; monitor until released fix lands.",
"ticket": "RMN-21",
"expires_on": "2026-12-31"
},
{
"id": "RUSTSEC-2024-0388",
"owner": "repo-maintainers",
"reason": "Transitive via matrix-sdk indexeddb dependency chain in current matrix release line; track removal when upstream drops derivative.",
"ticket": "RMN-21",
"expires_on": "2026-12-31"
},
{
"id": "RUSTSEC-2024-0436",
"owner": "repo-maintainers",
"reason": "Transitive via wasmtime dependency stack; tracked until upstream removes or replaces paste.",
"ticket": "RMN-21",
"expires_on": "2026-12-31"
}
]
}

View File

@ -0,0 +1,56 @@
{
"schema_version": "zeroclaw.secrets-governance.v1",
"paths": [
{
"pattern": "src/security/leak_detector\\.rs",
"owner": "repo-maintainers",
"reason": "Fixture patterns are intentionally embedded for regression tests in leak detector logic.",
"ticket": "RMN-13",
"expires_on": "2026-12-31"
},
{
"pattern": "src/agent/loop_\\.rs",
"owner": "repo-maintainers",
"reason": "Contains escaped template snippets used for command orchestration and parser coverage.",
"ticket": "RMN-13",
"expires_on": "2026-12-31"
},
{
"pattern": "src/security/secrets\\.rs",
"owner": "repo-maintainers",
"reason": "Contains detector test vectors and redaction examples required for secret scanning tests.",
"ticket": "RMN-13",
"expires_on": "2026-12-31"
},
{
"pattern": "docs/(i18n/vi/|vi/)?zai-glm-setup\\.md",
"owner": "repo-maintainers",
"reason": "Documentation contains literal environment variable placeholders for onboarding commands.",
"ticket": "RMN-13",
"expires_on": "2026-12-31"
},
{
"pattern": "\\.github/workflows/pub-release\\.yml",
"owner": "repo-maintainers",
"reason": "Release workflow emits masked authorization header examples during registry smoke checks.",
"ticket": "RMN-13",
"expires_on": "2026-12-31"
}
],
"regexes": [
{
"pattern": "Authorization: Bearer \\$\\{[^}]+\\}",
"owner": "repo-maintainers",
"reason": "Intentional placeholder used in docs/workflow snippets for safe header examples.",
"ticket": "RMN-13",
"expires_on": "2026-12-31"
},
{
"pattern": "curl -sS -o /tmp/ghcr-release-manifest\\.json -w \"%\\{http_code\\}\"",
"owner": "repo-maintainers",
"reason": "Release smoke command string is non-secret telemetry and should not be flagged as credential leakage.",
"ticket": "RMN-13",
"expires_on": "2026-12-31"
}
]
}

View File

@ -0,0 +1,5 @@
{
"schema_version": "zeroclaw.unsafe-audit-governance.v1",
"ignore_paths": [],
"ignore_pattern_ids": []
}

View File

@ -10,8 +10,26 @@ Subdirectories are not valid locations for workflow entry files.
Repository convention:
1. Keep runnable workflow entry files at `.github/workflows/` root.
2. Keep cross-tooling/local CI scripts under `dev/` or `scripts/ci/` when used outside Actions.
2. Keep workflow-only helper scripts under `.github/workflows/scripts/`.
3. Keep cross-tooling/local CI scripts under `scripts/ci/` when they are used outside Actions.
Workflow behavior documentation in this directory:
- `.github/workflows/master-branch-flow.md`
- `.github/workflows/main-branch-flow.md`
Current workflow helper scripts:
- `.github/workflows/scripts/ci_license_file_owner_guard.js`
- `.github/workflows/scripts/lint_feedback.js`
- `.github/workflows/scripts/pr_auto_response_contributor_tier.js`
- `.github/workflows/scripts/pr_auto_response_labeled_routes.js`
- `.github/workflows/scripts/pr_check_status_nudge.js`
- `.github/workflows/scripts/pr_intake_checks.js`
- `.github/workflows/scripts/pr_labeler.js`
- `.github/workflows/scripts/test_benchmarks_pr_comment.js`
Release/CI policy assets introduced for advanced delivery lanes:
- `.github/release/nightly-owner-routing.json`
- `.github/release/canary-policy.json`
- `.github/release/prerelease-stage-gates.json`

View File

@ -1,175 +0,0 @@
name: Quality Gate
on:
pull_request:
branches: [master]
concurrency:
group: checks-${{ github.event.pull_request.number }}
cancel-in-progress: true
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
jobs:
lint:
name: Lint
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
components: rustfmt, clippy
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
- name: Ensure web/dist placeholder exists
run: mkdir -p web/dist && touch web/dist/.gitkeep
- name: Check formatting
run: cargo fmt --all -- --check
- name: Clippy
run: cargo clippy --all-targets -- -D warnings
test:
name: Test
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
- name: Ensure web/dist placeholder exists
run: mkdir -p web/dist && touch web/dist/.gitkeep
- name: Install mold linker
run: |
sudo apt-get update -qq
sudo apt-get install -y mold
- name: Install cargo-nextest
run: curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin
- name: Run tests
run: cargo nextest run --locked
env:
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER: clang
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
build:
name: Build ${{ matrix.target }}
runs-on: ${{ matrix.os }}
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
- os: macos-14
target: aarch64-apple-darwin
- os: windows-latest
target: x86_64-pc-windows-msvc
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: ${{ matrix.target }}
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
if: runner.os != 'Windows'
- name: Install mold linker
if: runner.os == 'Linux'
run: |
sudo apt-get update -qq
sudo apt-get install -y mold
- name: Ensure web/dist placeholder exists
shell: bash
run: mkdir -p web/dist && touch web/dist/.gitkeep
- name: Build release
shell: bash
run: cargo build --profile ci --locked --target ${{ matrix.target }}
env:
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER: clang
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
security:
name: Security Audit
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
- name: Install cargo-audit
run: cargo install cargo-audit --locked
- name: Install cargo-deny
run: cargo install cargo-deny --locked
- name: Audit dependencies
run: cargo audit
- name: Check licenses and sources
run: cargo deny check licenses sources
check-32bit:
name: "Check (32-bit)"
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: i686-unknown-linux-gnu
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
- name: Install 32-bit libs
run: sudo apt-get update && sudo apt-get install -y gcc-multilib
- name: Ensure web/dist placeholder exists
run: mkdir -p web/dist && touch web/dist/.gitkeep
- name: Cargo check (32-bit, no default features)
run: cargo check --target i686-unknown-linux-gnu --no-default-features
# Composite status check — branch protection only needs to require this
# single job instead of tracking every matrix leg individually.
gate:
name: CI Required Gate
if: always()
needs: [lint, test, build, security, check-32bit]
runs-on: ubuntu-latest
steps:
- name: Check upstream job results
run: |
if [[ "${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "::error::One or more upstream jobs failed or were cancelled"
exit 1
fi
security-gate:
name: Security Required Gate
if: always()
needs: [security]
runs-on: ubuntu-latest
steps:
- name: Check security job result
run: |
if [[ "${{ needs.security.result }}" != "success" ]]; then
echo "::error::Security audit failed or was cancelled"
exit 1
fi

330
.github/workflows/ci-canary-gate.yml vendored Normal file
View File

@ -0,0 +1,330 @@
name: CI Canary Gate
on:
workflow_dispatch:
inputs:
mode:
description: "dry-run computes decision only; execute enables canary dispatch"
required: true
default: dry-run
type: choice
options:
- dry-run
- execute
candidate_tag:
description: "Candidate release tag (e.g. v0.1.8-rc.1 or v0.1.8)"
required: false
default: ""
type: string
candidate_sha:
description: "Optional explicit candidate SHA"
required: false
default: ""
type: string
error_rate:
description: "Observed canary error rate (0.0-1.0)"
required: true
default: "0.0"
type: string
crash_rate:
description: "Observed canary crash rate (0.0-1.0)"
required: true
default: "0.0"
type: string
p95_latency_ms:
description: "Observed canary p95 latency in milliseconds"
required: true
default: "0"
type: string
sample_size:
description: "Observed canary sample size"
required: true
default: "0"
type: string
emit_repository_dispatch:
description: "Emit canary decision repository_dispatch event"
required: true
default: false
type: boolean
trigger_rollback_on_abort:
description: "Automatically dispatch CI Rollback Guard when canary decision is abort"
required: true
default: true
type: boolean
rollback_branch:
description: "Rollback integration branch used by CI Rollback Guard dispatch"
required: true
default: dev
type: choice
options:
- dev
- main
rollback_target_ref:
description: "Optional explicit rollback target ref passed to CI Rollback Guard"
required: false
default: ""
type: string
fail_on_violation:
description: "Fail on policy violations"
required: true
default: true
type: boolean
schedule:
- cron: "45 7 * * 1" # Weekly Monday 07:45 UTC
concurrency:
group: canary-gate-${{ github.event.inputs.candidate_tag || github.ref || github.run_id }}
cancel-in-progress: false
permissions:
contents: read
actions: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
canary-plan:
name: Canary Plan
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 20
outputs:
mode: ${{ steps.inputs.outputs.mode }}
candidate_tag: ${{ steps.inputs.outputs.candidate_tag }}
candidate_sha: ${{ steps.inputs.outputs.candidate_sha }}
trigger_rollback_on_abort: ${{ steps.inputs.outputs.trigger_rollback_on_abort }}
rollback_branch: ${{ steps.inputs.outputs.rollback_branch }}
rollback_target_ref: ${{ steps.inputs.outputs.rollback_target_ref }}
decision: ${{ steps.extract.outputs.decision }}
ready_to_execute: ${{ steps.extract.outputs.ready_to_execute }}
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Resolve canary inputs
id: inputs
shell: bash
run: |
set -euo pipefail
mode="dry-run"
candidate_tag=""
candidate_sha=""
error_rate="0.0"
crash_rate="0.0"
p95_latency_ms="0"
sample_size="0"
trigger_rollback_on_abort="true"
rollback_branch="dev"
rollback_target_ref=""
# Scheduled audits may not have live canary telemetry; report violations without failing by default.
fail_on_violation="false"
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
mode="${{ github.event.inputs.mode || 'dry-run' }}"
candidate_tag="${{ github.event.inputs.candidate_tag || '' }}"
candidate_sha="${{ github.event.inputs.candidate_sha || '' }}"
error_rate="${{ github.event.inputs.error_rate || '0.0' }}"
crash_rate="${{ github.event.inputs.crash_rate || '0.0' }}"
p95_latency_ms="${{ github.event.inputs.p95_latency_ms || '0' }}"
sample_size="${{ github.event.inputs.sample_size || '0' }}"
trigger_rollback_on_abort="${{ github.event.inputs.trigger_rollback_on_abort || 'true' }}"
rollback_branch="${{ github.event.inputs.rollback_branch || 'dev' }}"
rollback_target_ref="${{ github.event.inputs.rollback_target_ref || '' }}"
fail_on_violation="${{ github.event.inputs.fail_on_violation || 'true' }}"
else
git fetch --tags --force origin
candidate_tag="$(git tag --list 'v*' --sort=-version:refname | head -n1)"
if [ -n "$candidate_tag" ]; then
candidate_sha="$(git rev-parse "${candidate_tag}^{commit}")"
fi
fi
{
echo "mode=${mode}"
echo "candidate_tag=${candidate_tag}"
echo "candidate_sha=${candidate_sha}"
echo "error_rate=${error_rate}"
echo "crash_rate=${crash_rate}"
echo "p95_latency_ms=${p95_latency_ms}"
echo "sample_size=${sample_size}"
echo "trigger_rollback_on_abort=${trigger_rollback_on_abort}"
echo "rollback_branch=${rollback_branch}"
echo "rollback_target_ref=${rollback_target_ref}"
echo "fail_on_violation=${fail_on_violation}"
} >> "$GITHUB_OUTPUT"
- name: Run canary guard
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
args=()
if [ "${{ steps.inputs.outputs.fail_on_violation }}" = "true" ]; then
args+=(--fail-on-violation)
fi
python3 scripts/ci/canary_guard.py \
--policy-file .github/release/canary-policy.json \
--candidate-tag "${{ steps.inputs.outputs.candidate_tag }}" \
--candidate-sha "${{ steps.inputs.outputs.candidate_sha }}" \
--mode "${{ steps.inputs.outputs.mode }}" \
--error-rate "${{ steps.inputs.outputs.error_rate }}" \
--crash-rate "${{ steps.inputs.outputs.crash_rate }}" \
--p95-latency-ms "${{ steps.inputs.outputs.p95_latency_ms }}" \
--sample-size "${{ steps.inputs.outputs.sample_size }}" \
--output-json artifacts/canary-guard.json \
--output-md artifacts/canary-guard.md \
"${args[@]}"
- name: Extract canary decision outputs
id: extract
shell: bash
run: |
set -euo pipefail
decision="$(python3 - <<'PY'
import json
data = json.load(open('artifacts/canary-guard.json', encoding='utf-8'))
print(data.get('decision', 'hold'))
PY
)"
ready_to_execute="$(python3 - <<'PY'
import json
data = json.load(open('artifacts/canary-guard.json', encoding='utf-8'))
print(str(bool(data.get('ready_to_execute', False))).lower())
PY
)"
echo "decision=${decision}" >> "$GITHUB_OUTPUT"
echo "ready_to_execute=${ready_to_execute}" >> "$GITHUB_OUTPUT"
- name: Emit canary audit event
if: always()
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/emit_audit_event.py \
--event-type canary_guard \
--input-json artifacts/canary-guard.json \
--output-json artifacts/audit-event-canary-guard.json \
--artifact-name canary-guard \
--retention-days 21
- name: Publish canary summary
if: always()
shell: bash
run: |
set -euo pipefail
cat artifacts/canary-guard.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload canary artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: canary-guard
path: |
artifacts/canary-guard.json
artifacts/canary-guard.md
artifacts/audit-event-canary-guard.json
if-no-files-found: error
retention-days: 21
canary-execute:
name: Canary Execute
needs: [canary-plan]
if: github.event_name == 'workflow_dispatch' && needs.canary-plan.outputs.mode == 'execute' && needs.canary-plan.outputs.ready_to_execute == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 10
permissions:
contents: write
actions: write
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Create canary marker tag
shell: bash
run: |
set -euo pipefail
marker_tag="canary-${{ needs.canary-plan.outputs.candidate_tag }}-${{ github.run_id }}"
git fetch --tags --force origin
git tag -a "$marker_tag" "${{ needs.canary-plan.outputs.candidate_sha }}" -m "Canary decision marker from run ${{ github.run_id }}"
git push origin "$marker_tag"
echo "Created marker tag: $marker_tag" >> "$GITHUB_STEP_SUMMARY"
- name: Emit canary repository dispatch
if: github.event.inputs.emit_repository_dispatch == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
await github.rest.repos.createDispatchEvent({
owner: context.repo.owner,
repo: context.repo.repo,
event_type: `canary_${{ needs.canary-plan.outputs.decision }}`,
client_payload: {
candidate_tag: "${{ needs.canary-plan.outputs.candidate_tag }}",
candidate_sha: "${{ needs.canary-plan.outputs.candidate_sha }}",
decision: "${{ needs.canary-plan.outputs.decision }}",
run_id: context.runId,
run_attempt: process.env.GITHUB_RUN_ATTEMPT,
source_sha: context.sha
}
});
- name: Trigger rollback guard workflow on abort
if: needs.canary-plan.outputs.decision == 'abort' && needs.canary-plan.outputs.trigger_rollback_on_abort == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const rollbackBranch = "${{ needs.canary-plan.outputs.rollback_branch }}" || "dev";
const rollbackTargetRef = `${{ needs.canary-plan.outputs.rollback_target_ref }}`.trim();
const workflowRef = process.env.GITHUB_REF_NAME || "dev";
const inputs = {
branch: rollbackBranch,
mode: "execute",
allow_non_ancestor: "false",
fail_on_violation: "true",
create_marker_tag: "true",
emit_repository_dispatch: "true",
};
if (rollbackTargetRef.length > 0) {
inputs.target_ref = rollbackTargetRef;
}
await github.rest.actions.createWorkflowDispatch({
owner: context.repo.owner,
repo: context.repo.repo,
workflow_id: "ci-rollback.yml",
ref: workflowRef,
inputs,
});
- name: Publish rollback trigger summary
if: needs.canary-plan.outputs.decision == 'abort'
shell: bash
run: |
set -euo pipefail
if [ "${{ needs.canary-plan.outputs.trigger_rollback_on_abort }}" = "true" ]; then
{
echo "### Canary Abort Rollback Trigger"
echo "- CI Rollback Guard dispatch: triggered"
echo "- Rollback branch: \`${{ needs.canary-plan.outputs.rollback_branch }}\`"
if [ -n "${{ needs.canary-plan.outputs.rollback_target_ref }}" ]; then
echo "- Rollback target ref: \`${{ needs.canary-plan.outputs.rollback_target_ref }}\`"
else
echo "- Rollback target ref: _auto (latest release tag strategy)_"
fi
} >> "$GITHUB_STEP_SUMMARY"
else
{
echo "### Canary Abort Rollback Trigger"
echo "- CI Rollback Guard dispatch: skipped (trigger_rollback_on_abort=false)"
} >> "$GITHUB_STEP_SUMMARY"
fi

160
.github/workflows/ci-change-audit.yml vendored Normal file
View File

@ -0,0 +1,160 @@
name: CI/CD Change Audit
on:
pull_request:
branches: [dev, main]
paths:
- ".github/workflows/**"
- ".github/release/**"
- ".github/codeql/**"
- "scripts/ci/**"
- ".github/dependabot.yml"
- "deny.toml"
- ".gitleaks.toml"
push:
branches: [dev, main]
paths:
- ".github/workflows/**"
- ".github/release/**"
- ".github/codeql/**"
- "scripts/ci/**"
- ".github/dependabot.yml"
- "deny.toml"
- ".gitleaks.toml"
workflow_dispatch:
inputs:
base_sha:
description: "Optional base SHA (default: HEAD~1)"
required: false
default: ""
type: string
fail_on_policy:
description: "Fail when audit policy violations are found"
required: true
default: true
type: boolean
concurrency:
group: ci-change-audit-${{ github.event.pull_request.number || github.sha || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
audit:
name: CI Change Audit
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Setup Python
shell: bash
run: |
set -euo pipefail
python3 --version
- name: Resolve base/head commits
id: refs
shell: bash
run: |
set -euo pipefail
head_sha="$(git rev-parse HEAD)"
if [ "${GITHUB_EVENT_NAME}" = "pull_request" ]; then
# For pull_request events, checkout uses refs/pull/*/merge; HEAD^1 is the
# effective base commit for this synthesized merge and avoids stale base.sha.
if git rev-parse --verify HEAD^1 >/dev/null 2>&1; then
base_sha="$(git rev-parse HEAD^1)"
else
base_sha="${{ github.event.pull_request.base.sha }}"
fi
elif [ "${GITHUB_EVENT_NAME}" = "push" ]; then
base_sha="${{ github.event.before }}"
else
base_sha="${{ github.event.inputs.base_sha || '' }}"
if [ -z "$base_sha" ]; then
base_sha="$(git rev-parse HEAD~1)"
fi
fi
echo "base_sha=$base_sha" >> "$GITHUB_OUTPUT"
echo "head_sha=$head_sha" >> "$GITHUB_OUTPUT"
- name: Run CI helper script unit tests
shell: bash
run: |
set -euo pipefail
python3 -m unittest discover -s scripts/ci/tests -p 'test_*.py' -v
- name: Generate CI change audit
shell: bash
env:
BASE_SHA: ${{ steps.refs.outputs.base_sha }}
HEAD_SHA: ${{ steps.refs.outputs.head_sha }}
run: |
set -euo pipefail
mkdir -p artifacts
fail_on_policy="true"
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
fail_on_policy="${{ github.event.inputs.fail_on_policy || 'true' }}"
fi
cmd=(python3 scripts/ci/ci_change_audit.py
--base-sha "$BASE_SHA"
--head-sha "$HEAD_SHA"
--output-json artifacts/ci-change-audit.json
--output-md artifacts/ci-change-audit.md)
if [ "$fail_on_policy" = "true" ]; then
cmd+=(--fail-on-violations)
fi
"${cmd[@]}"
- name: Emit normalized audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/ci-change-audit.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type ci_change_audit \
--input-json artifacts/ci-change-audit.json \
--output-json artifacts/audit-event-ci-change-audit.json \
--artifact-name ci-change-audit-event \
--retention-days 14
fi
- name: Upload audit artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
if: always()
with:
name: ci-change-audit
path: artifacts/ci-change-audit.*
retention-days: 14
- name: Publish audit summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/ci-change-audit.md ]; then
cat artifacts/ci-change-audit.md >> "$GITHUB_STEP_SUMMARY"
else
echo "CI change audit report was not generated." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload audit event artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
if: always()
with:
name: ci-change-audit-event
path: artifacts/audit-event-ci-change-audit.json
if-no-files-found: ignore
retention-days: 14

View File

@ -0,0 +1,88 @@
---
name: Post-Release Validation
on:
release:
types: ["published"]
permissions:
contents: read
jobs:
validate:
name: Validate Published Release
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 15
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Download and verify release assets
shell: bash
env:
RELEASE_TAG: ${{ github.event.release.tag_name }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
echo "Validating release: ${RELEASE_TAG}"
# 1. Check release exists and is not draft
release_json="$(gh api \
"repos/${GITHUB_REPOSITORY}/releases/tags/${RELEASE_TAG}")"
is_draft="$(echo "$release_json" \
| python3 -c "import sys,json; print(json.load(sys.stdin)['draft'])")"
if [ "$is_draft" = "True" ]; then
echo "::warning::Release ${RELEASE_TAG} is still in draft."
fi
# 2. Check expected assets against artifact contract
asset_count="$(echo "$release_json" \
| python3 -c "import sys,json; print(len(json.load(sys.stdin)['assets']))")"
contract=".github/release/release-artifact-contract.json"
expected_count="$(python3 -c "
import json
c = json.load(open('$contract'))
total = sum(len(c[k]) for k in c if k != 'schema_version')
print(total)
")"
echo "Release has ${asset_count} assets (contract expects ${expected_count})"
if [ "$asset_count" -lt "$expected_count" ]; then
echo "::error::Expected >=${expected_count} release assets (from ${contract}), found ${asset_count}"
exit 1
fi
# 3. Download checksum file and one archive
gh release download "${RELEASE_TAG}" \
--pattern "SHA256SUMS" \
--dir /tmp/release-check
gh release download "${RELEASE_TAG}" \
--pattern "zeroclaw-x86_64-unknown-linux-gnu.tar.gz" \
--dir /tmp/release-check
# 4. Verify checksum
cd /tmp/release-check
if sha256sum --check --ignore-missing SHA256SUMS; then
echo "SHA256 checksum verification: passed"
else
echo "::error::SHA256 checksum verification failed"
exit 1
fi
# 5. Extract binary
tar xzf zeroclaw-x86_64-unknown-linux-gnu.tar.gz
- name: Smoke-test release binary
shell: bash
env:
RELEASE_TAG: ${{ github.event.release.tag_name }}
run: |
set -euo pipefail
cd /tmp/release-check
if ./zeroclaw --version | grep -Fq "${RELEASE_TAG#v}"; then
echo "Binary version check: passed (${RELEASE_TAG})"
else
actual="$(./zeroclaw --version)"
echo "::error::Binary --version mismatch: ${actual}"
exit 1
fi
echo "Post-release validation: all checks passed"

View File

@ -0,0 +1,112 @@
name: CI Provider Connectivity
on:
schedule:
- cron: "30 */6 * * *" # Every 6 hours
workflow_dispatch:
inputs:
fail_on_critical:
description: "Fail run when critical endpoints are unreachable"
required: true
default: false
type: boolean
pull_request:
branches: [dev, main]
paths:
- ".github/workflows/ci-provider-connectivity.yml"
- ".github/connectivity/providers.json"
- "scripts/ci/provider_connectivity_matrix.py"
push:
branches: [dev, main]
paths:
- ".github/workflows/ci-provider-connectivity.yml"
- ".github/connectivity/providers.json"
- "scripts/ci/provider_connectivity_matrix.py"
concurrency:
group: provider-connectivity-${{ github.event.pull_request.number || github.ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
probe:
name: Provider Connectivity Probe
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 20
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Run connectivity matrix probe
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
fail_on_critical="false"
case "${GITHUB_EVENT_NAME}" in
schedule)
fail_on_critical="true"
;;
workflow_dispatch)
fail_on_critical="${{ github.event.inputs.fail_on_critical || 'false' }}"
;;
esac
cmd=(python3 scripts/ci/provider_connectivity_matrix.py
--config .github/connectivity/providers.json
--output-json artifacts/provider-connectivity-matrix.json
--output-md artifacts/provider-connectivity-matrix.md)
if [ "$fail_on_critical" = "true" ]; then
cmd+=(--fail-on-critical)
fi
"${cmd[@]}"
- name: Emit normalized audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/provider-connectivity-matrix.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type provider_connectivity \
--input-json artifacts/provider-connectivity-matrix.json \
--output-json artifacts/audit-event-provider-connectivity.json \
--artifact-name provider-connectivity-audit-event \
--retention-days 14
fi
- name: Upload connectivity artifacts
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
if: always()
with:
name: provider-connectivity-matrix
path: artifacts/provider-connectivity-matrix.*
retention-days: 14
- name: Publish summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/provider-connectivity-matrix.md ]; then
cat artifacts/provider-connectivity-matrix.md >> "$GITHUB_STEP_SUMMARY"
else
echo "Provider connectivity report missing." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload audit event artifact
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: provider-connectivity-audit-event
path: artifacts/audit-event-provider-connectivity.json
if-no-files-found: ignore
retention-days: 14

152
.github/workflows/ci-queue-hygiene.yml vendored Normal file
View File

@ -0,0 +1,152 @@
name: CI Queue Hygiene
on:
schedule:
- cron: "*/5 * * * *"
workflow_dispatch:
inputs:
apply:
description: "Cancel selected queued runs (false = dry-run report only)"
required: true
default: false
type: boolean
status:
description: "Queued-run status scope"
required: true
default: queued
type: choice
options:
- queued
- in_progress
- requested
- waiting
max_cancel:
description: "Maximum runs to cancel in one execution"
required: true
default: "120"
type: string
concurrency:
group: ci-queue-hygiene
cancel-in-progress: false
permissions:
actions: write
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
hygiene:
name: Queue Hygiene
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Run queue hygiene policy
id: hygiene
shell: bash
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
mkdir -p artifacts
status_scope="queued"
max_cancel="120"
apply_mode="true"
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
status_scope="${{ github.event.inputs.status || 'queued' }}"
max_cancel="${{ github.event.inputs.max_cancel || '120' }}"
apply_mode="${{ github.event.inputs.apply || 'false' }}"
fi
cmd=(python3 scripts/ci/queue_hygiene.py
--repo "${{ github.repository }}"
--status "${status_scope}"
--max-cancel "${max_cancel}"
--dedupe-workflow "CI Run"
--dedupe-workflow "Test E2E"
--dedupe-workflow "Docs Deploy"
--dedupe-workflow "PR Intake Checks"
--dedupe-workflow "PR Labeler"
--dedupe-workflow "PR Auto Responder"
--dedupe-workflow "Workflow Sanity"
--dedupe-workflow "PR Label Policy Check"
--priority-branch-prefix "release/"
--dedupe-include-non-pr
--non-pr-key branch
--output-json artifacts/queue-hygiene-report.json
--verbose)
if [ "${apply_mode}" = "true" ]; then
cmd+=(--apply)
fi
"${cmd[@]}"
{
echo "status_scope=${status_scope}"
echo "max_cancel=${max_cancel}"
echo "apply_mode=${apply_mode}"
} >> "$GITHUB_OUTPUT"
- name: Publish queue hygiene summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ ! -f artifacts/queue-hygiene-report.json ]; then
echo "Queue hygiene report not found." >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
python3 - <<'PY'
from __future__ import annotations
import json
from pathlib import Path
report_path = Path("artifacts/queue-hygiene-report.json")
report = json.loads(report_path.read_text(encoding="utf-8"))
counts = report.get("counts", {})
results = report.get("results", {})
reasons = report.get("reason_counts", {})
lines = [
"### Queue Hygiene Report",
f"- Mode: `{report.get('mode', 'unknown')}`",
f"- Status scope: `{report.get('status_scope', 'queued')}`",
f"- Runs in scope: `{counts.get('runs_in_scope', 0)}`",
f"- Candidate runs before cap: `{counts.get('candidate_runs_before_cap', 0)}`",
f"- Candidate runs after cap: `{counts.get('candidate_runs_after_cap', 0)}`",
f"- Skipped by cap: `{counts.get('skipped_by_cap', 0)}`",
f"- Canceled: `{results.get('canceled', 0)}`",
f"- Cancel skipped (already terminal/conflict): `{results.get('skipped', 0)}`",
f"- Cancel failed: `{results.get('failed', 0)}`",
]
if reasons:
lines.append("")
lines.append("Reason counts:")
for reason, value in sorted(reasons.items()):
lines.append(f"- `{reason}`: `{value}`")
with Path("/tmp/queue-hygiene-summary.md").open("w", encoding="utf-8") as handle:
handle.write("\n".join(lines) + "\n")
PY
cat /tmp/queue-hygiene-summary.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload queue hygiene report
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: queue-hygiene-report
path: artifacts/queue-hygiene-report.json
if-no-files-found: ignore
retention-days: 14

View File

@ -0,0 +1,149 @@
name: CI Reproducible Build
on:
push:
branches: [dev, main]
paths:
- "Cargo.toml"
- "Cargo.lock"
- "src/**"
- "crates/**"
- "scripts/ci/ensure_c_toolchain.sh"
- "scripts/ci/ensure_cargo_component.sh"
- "scripts/ci/ensure_cc.sh"
- "scripts/ci/reproducible_build_check.sh"
- "scripts/ci/self_heal_rust_toolchain.sh"
- ".github/workflows/ci-reproducible-build.yml"
pull_request:
branches: [dev, main]
paths:
- "Cargo.toml"
- "Cargo.lock"
- "src/**"
- "crates/**"
- "scripts/ci/ensure_c_toolchain.sh"
- "scripts/ci/ensure_cargo_component.sh"
- "scripts/ci/ensure_cc.sh"
- "scripts/ci/reproducible_build_check.sh"
- "scripts/ci/self_heal_rust_toolchain.sh"
- ".github/workflows/ci-reproducible-build.yml"
schedule:
- cron: "45 5 * * 1" # Weekly Monday 05:45 UTC
workflow_dispatch:
inputs:
fail_on_drift:
description: "Fail workflow if deterministic hash drift is detected"
required: true
default: true
type: boolean
allow_build_id_drift:
description: "Treat GNU build-id-only drift as non-blocking"
required: true
default: true
type: boolean
concurrency:
group: repro-build-${{ github.event.pull_request.number || github.ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
reproducibility:
name: Reproducible Build Probe
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 75
env:
CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo
RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup
CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- name: Ensure C toolchain
shell: bash
run: bash ./scripts/ci/ensure_c_toolchain.sh
- name: Setup Rust
uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure C toolchain for Rust builds
run: ./scripts/ci/ensure_cc.sh
- name: Ensure cargo component
shell: bash
env:
ENSURE_CARGO_COMPONENT_STRICT: "true"
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- name: Run reproducible build check
shell: bash
run: |
set -euo pipefail
fail_on_drift="false"
allow_build_id_drift="true"
if [ "${GITHUB_EVENT_NAME}" = "schedule" ]; then
fail_on_drift="true"
elif [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
fail_on_drift="${{ github.event.inputs.fail_on_drift || 'true' }}"
allow_build_id_drift="${{ github.event.inputs.allow_build_id_drift || 'true' }}"
fi
FAIL_ON_DRIFT="$fail_on_drift" \
ALLOW_BUILD_ID_DRIFT="$allow_build_id_drift" \
OUTPUT_DIR="artifacts" \
./scripts/ci/reproducible_build_check.sh
- name: Emit normalized audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/reproducible-build.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type reproducible_build \
--input-json artifacts/reproducible-build.json \
--output-json artifacts/audit-event-reproducible-build.json \
--artifact-name reproducible-build-audit-event \
--retention-days 14
fi
- name: Upload reproducibility artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: reproducible-build
path: artifacts/reproducible-build*
retention-days: 14
- name: Upload audit event artifact
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: reproducible-build-audit-event
path: artifacts/audit-event-reproducible-build.json
if-no-files-found: ignore
retention-days: 14
- name: Publish summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/reproducible-build.md ]; then
cat artifacts/reproducible-build.md >> "$GITHUB_STEP_SUMMARY"
else
echo "Reproducible build report missing." >> "$GITHUB_STEP_SUMMARY"
fi

258
.github/workflows/ci-rollback.yml vendored Normal file
View File

@ -0,0 +1,258 @@
name: CI Rollback Guard
on:
workflow_dispatch:
inputs:
branch:
description: "Integration branch this rollback targets"
required: true
default: dev
type: choice
options:
- dev
- main
mode:
description: "dry-run only plans; execute enables rollback marker/dispatch actions"
required: true
default: dry-run
type: choice
options:
- dry-run
- execute
target_ref:
description: "Optional explicit rollback target (tag/sha/ref). Empty = latest matching tag."
required: false
default: ""
type: string
allow_non_ancestor:
description: "Allow target not being ancestor of current head (warning-only)"
required: true
default: false
type: boolean
fail_on_violation:
description: "Fail workflow when guard violations are detected"
required: true
default: true
type: boolean
create_marker_tag:
description: "In execute mode, create and push rollback marker tag"
required: true
default: false
type: boolean
emit_repository_dispatch:
description: "In execute mode, emit repository_dispatch event `rollback_execute`"
required: true
default: false
type: boolean
schedule:
- cron: "15 7 * * 1" # Weekly Monday 07:15 UTC
concurrency:
group: ci-rollback-${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.branch || 'dev') || github.ref_name }}
cancel-in-progress: false
permissions:
contents: read
actions: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
rollback-plan:
name: Rollback Guard Plan
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 20
outputs:
branch: ${{ steps.plan.outputs.branch }}
mode: ${{ steps.plan.outputs.mode }}
target_sha: ${{ steps.plan.outputs.target_sha }}
target_ref: ${{ steps.plan.outputs.target_ref }}
ready_to_execute: ${{ steps.plan.outputs.ready_to_execute }}
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
ref: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.branch || 'dev') || github.ref_name }}
- name: Build rollback plan
id: plan
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
branch_input="${GITHUB_REF_NAME}"
mode_input="dry-run"
target_ref_input=""
allow_non_ancestor="false"
# Scheduled audits can surface historical rollback violations; report without blocking by default.
fail_on_violation="false"
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
branch_input="${{ github.event.inputs.branch || 'dev' }}"
mode_input="${{ github.event.inputs.mode || 'dry-run' }}"
target_ref_input="${{ github.event.inputs.target_ref || '' }}"
allow_non_ancestor="${{ github.event.inputs.allow_non_ancestor || 'false' }}"
fail_on_violation="${{ github.event.inputs.fail_on_violation || 'true' }}"
fi
cmd=(python3 scripts/ci/rollback_guard.py
--repo-root .
--branch "$branch_input"
--mode "$mode_input"
--strategy latest-release-tag
--tag-pattern "v*"
--output-json artifacts/rollback-plan.json
--output-md artifacts/rollback-plan.md)
if [ -n "$target_ref_input" ]; then
cmd+=(--target-ref "$target_ref_input")
fi
if [ "$allow_non_ancestor" = "true" ]; then
cmd+=(--allow-non-ancestor)
fi
if [ "$fail_on_violation" = "true" ]; then
cmd+=(--fail-on-violation)
fi
"${cmd[@]}"
target_sha="$(python3 - <<'PY'
import json
d = json.load(open("artifacts/rollback-plan.json", "r", encoding="utf-8"))
print(d.get("target_sha", ""))
PY
)"
target_ref="$(python3 - <<'PY'
import json
d = json.load(open("artifacts/rollback-plan.json", "r", encoding="utf-8"))
print(d.get("target_ref", ""))
PY
)"
ready_to_execute="$(python3 - <<'PY'
import json
d = json.load(open("artifacts/rollback-plan.json", "r", encoding="utf-8"))
print(str(d.get("ready_to_execute", False)).lower())
PY
)"
{
echo "branch=$branch_input"
echo "mode=$mode_input"
echo "target_sha=$target_sha"
echo "target_ref=$target_ref"
echo "ready_to_execute=$ready_to_execute"
} >> "$GITHUB_OUTPUT"
- name: Emit rollback audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/rollback-plan.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type rollback_guard \
--input-json artifacts/rollback-plan.json \
--output-json artifacts/audit-event-rollback-guard.json \
--artifact-name ci-rollback-plan \
--retention-days 21
fi
- name: Upload rollback artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ci-rollback-plan
path: |
artifacts/rollback-plan.*
artifacts/audit-event-rollback-guard.json
if-no-files-found: ignore
retention-days: 21
- name: Publish rollback summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/rollback-plan.md ]; then
cat artifacts/rollback-plan.md >> "$GITHUB_STEP_SUMMARY"
else
echo "Rollback plan markdown report missing." >> "$GITHUB_STEP_SUMMARY"
fi
rollback-execute:
name: Rollback Execute Actions
needs: [rollback-plan]
if: github.event_name == 'workflow_dispatch' && needs.rollback-plan.outputs.mode == 'execute' && needs.rollback-plan.outputs.ready_to_execute == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 15
permissions:
contents: write
actions: read
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
ref: ${{ needs.rollback-plan.outputs.branch }}
- name: Fetch tags
shell: bash
run: |
set -euo pipefail
git fetch --tags --force origin
- name: Create rollback marker tag
id: marker
if: github.event.inputs.create_marker_tag == 'true'
shell: bash
run: |
set -euo pipefail
target_sha="${{ needs.rollback-plan.outputs.target_sha }}"
if [ -z "$target_sha" ]; then
echo "Rollback guard did not resolve target_sha."
exit 1
fi
marker_tag="rollback-${{ needs.rollback-plan.outputs.branch }}-${{ github.run_id }}"
git tag -a "$marker_tag" "$target_sha" -m "Rollback marker from run ${{ github.run_id }}"
git push origin "$marker_tag"
echo "marker_tag=$marker_tag" >> "$GITHUB_OUTPUT"
- name: Emit rollback repository dispatch
if: github.event.inputs.emit_repository_dispatch == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
await github.rest.repos.createDispatchEvent({
owner: context.repo.owner,
repo: context.repo.repo,
event_type: "rollback_execute",
client_payload: {
branch: "${{ needs.rollback-plan.outputs.branch }}",
target_ref: "${{ needs.rollback-plan.outputs.target_ref }}",
target_sha: "${{ needs.rollback-plan.outputs.target_sha }}",
run_id: context.runId,
run_attempt: process.env.GITHUB_RUN_ATTEMPT,
source_sha: context.sha
}
});
- name: Publish execute summary
if: always()
shell: bash
run: |
set -euo pipefail
{
echo "### Rollback Execute Actions"
echo "- Branch: \`${{ needs.rollback-plan.outputs.branch }}\`"
echo "- Target ref: \`${{ needs.rollback-plan.outputs.target_ref }}\`"
echo "- Target sha: \`${{ needs.rollback-plan.outputs.target_sha }}\`"
if [ -n "${{ steps.marker.outputs.marker_tag || '' }}" ]; then
echo "- Marker tag: \`${{ steps.marker.outputs.marker_tag }}\`"
fi
} >> "$GITHUB_STEP_SUMMARY"

View File

@ -1,193 +1,466 @@
name: CI
name: CI Run
on:
push:
branches: [master]
pull_request:
branches: [master]
push:
branches: [dev, main]
pull_request:
branches: [dev, main]
merge_group:
branches: [dev, main]
concurrency:
group: ci-${{ github.event.pull_request.number || 'push-master' }}
cancel-in-progress: true
group: ci-run-${{ github.event_name }}-${{ github.event.pull_request.number || github.ref_name || github.sha }}
cancel-in-progress: true
permissions:
contents: read
contents: read
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
lint:
name: Lint
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
components: rustfmt, clippy
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
changes:
name: Detect Change Scope
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
outputs:
docs_only: ${{ steps.scope.outputs.docs_only }}
docs_changed: ${{ steps.scope.outputs.docs_changed }}
rust_changed: ${{ steps.scope.outputs.rust_changed }}
workflow_changed: ${{ steps.scope.outputs.workflow_changed }}
ci_cd_changed: ${{ steps.scope.outputs.ci_cd_changed }}
docs_files: ${{ steps.scope.outputs.docs_files }}
base_sha: ${{ steps.scope.outputs.base_sha }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Ensure web/dist placeholder exists
run: mkdir -p web/dist && touch web/dist/.gitkeep
- name: Detect docs-only changes
id: scope
shell: bash
env:
EVENT_NAME: ${{ github.event_name }}
BASE_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event_name == 'merge_group' && github.event.merge_group.base_sha || github.event.before }}
run: ./scripts/ci/detect_change_scope.sh
- name: Check formatting
run: cargo fmt --all -- --check
- name: Clippy
run: cargo clippy --all-targets -- -D warnings
lint-strict-delta:
name: Strict Delta Lint
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
components: clippy
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
- name: Ensure web/dist placeholder exists
run: mkdir -p web/dist && touch web/dist/.gitkeep
- name: Run strict delta lint gate
run: bash scripts/ci/rust_strict_delta_gate.sh
lint:
name: Lint Gate (Format + Clippy + Strict Delta)
needs: [changes]
if: needs.changes.outputs.rust_changed == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 75
env:
BASE_SHA: ${{ github.event.pull_request.base.sha || github.event.before }}
CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo
RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup
CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- name: Ensure C toolchain
shell: bash
run: bash ./scripts/ci/ensure_c_toolchain.sh
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
components: rustfmt, clippy
- name: Ensure C toolchain for Rust builds
run: ./scripts/ci/ensure_cc.sh
- name: Ensure cargo component
shell: bash
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: ci-run-check
cache-bin: false
- name: Run rust quality gate
run: ./scripts/ci/rust_quality_gate.sh
- name: Run strict lint delta gate
env:
BASE_SHA: ${{ needs.changes.outputs.base_sha }}
run: ./scripts/ci/rust_strict_delta_gate.sh
test:
name: Test
runs-on: ubuntu-latest
timeout-minutes: 30
needs: [lint]
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
workspace-check:
name: Workspace Check
needs: [changes]
if: needs.changes.outputs.rust_changed == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 45
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: ci-run-workspace-check
cache-bin: false
- name: Check workspace
run: cargo check --workspace --locked
- name: Ensure web/dist placeholder exists
run: mkdir -p web/dist && touch web/dist/.gitkeep
package-check:
name: Package Check (${{ matrix.package }})
needs: [changes]
if: needs.changes.outputs.rust_changed == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 25
strategy:
fail-fast: false
matrix:
package: [zeroclaw-types, zeroclaw-core]
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: ci-run-package-check
cache-bin: false
- name: Check package
run: cargo check -p ${{ matrix.package }} --locked
- name: Install mold linker
run: |
sudo apt-get update -qq
sudo apt-get install -y mold
- name: Install cargo-nextest
run: curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin
- name: Run tests
run: cargo nextest run --locked
test:
name: Test
needs: [changes]
if: needs.changes.outputs.rust_changed == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 120
env:
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER: clang
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo
RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup
CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Ensure C toolchain
shell: bash
run: bash ./scripts/ci/ensure_c_toolchain.sh
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure C toolchain for Rust builds
run: ./scripts/ci/ensure_cc.sh
- name: Ensure cargo component
shell: bash
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: ci-run-check
cache-bin: false
- name: Run tests with flake detection
shell: bash
env:
BLOCK_ON_FLAKE: ${{ vars.CI_BLOCK_ON_FLAKE_SUSPECTED || 'false' }}
run: |
set -euo pipefail
mkdir -p artifacts
build:
name: Build ${{ matrix.target }}
runs-on: ${{ matrix.os }}
timeout-minutes: 40
needs: [lint]
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
- os: macos-14
target: aarch64-apple-darwin
- os: windows-latest
target: x86_64-pc-windows-msvc
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: ${{ matrix.target }}
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
if: runner.os != 'Windows'
toolchain_bin=""
if [ -n "${CARGO:-}" ]; then
toolchain_bin="$(dirname "${CARGO}")"
elif [ -n "${RUSTC:-}" ]; then
toolchain_bin="$(dirname "${RUSTC}")"
fi
- name: Install mold linker
if: runner.os == 'Linux'
run: |
sudo apt-get update -qq
sudo apt-get install -y mold
if [ -n "${toolchain_bin}" ] && [ -d "${toolchain_bin}" ]; then
case ":$PATH:" in
*":${toolchain_bin}:"*) ;;
*) export PATH="${toolchain_bin}:$PATH" ;;
esac
fi
- name: Ensure web/dist placeholder exists
shell: bash
run: mkdir -p web/dist && touch web/dist/.gitkeep
if cargo test --locked --verbose; then
echo '{"flake_suspected":false,"status":"success"}' > artifacts/flake-probe.json
exit 0
fi
- name: Build release
shell: bash
run: cargo build --profile ci --locked --target ${{ matrix.target }}
echo "::warning::First test run failed. Retrying for flake detection..."
if cargo test --locked --verbose; then
echo '{"flake_suspected":true,"status":"flake"}' > artifacts/flake-probe.json
echo "::warning::Flake suspected — test passed on retry"
if [ "${BLOCK_ON_FLAKE}" = "true" ]; then
echo "BLOCK_ON_FLAKE is set; failing on suspected flake."
exit 1
fi
exit 0
fi
echo '{"flake_suspected":false,"status":"failure"}' > artifacts/flake-probe.json
exit 1
- name: Publish flake probe summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/flake-probe.json ]; then
status=$(python3 -c "import json; print(json.load(open('artifacts/flake-probe.json'))['status'])")
flake=$(python3 -c "import json; print(json.load(open('artifacts/flake-probe.json'))['flake_suspected'])")
{
echo "### Test Flake Probe"
echo "- Status: \`${status}\`"
echo "- Flake suspected: \`${flake}\`"
} >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload flake probe artifact
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: test-flake-probe
path: artifacts/flake-probe.*
if-no-files-found: ignore
retention-days: 14
build:
name: Build (Smoke)
needs: [changes]
if: needs.changes.outputs.rust_changed == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 90
env:
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER: clang
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo
RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup
CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target
check-all-features:
name: Check (all features)
runs-on: ubuntu-latest
timeout-minutes: 20
needs: [lint]
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Ensure C toolchain
shell: bash
run: bash ./scripts/ci/ensure_c_toolchain.sh
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure C toolchain for Rust builds
run: ./scripts/ci/ensure_cc.sh
- name: Ensure cargo component
shell: bash
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: ci-run-build
cache-targets: true
cache-bin: false
- name: Build binary (smoke check)
env:
CARGO_BUILD_JOBS: 2
CI_SMOKE_BUILD_ATTEMPTS: 3
run: bash scripts/ci/smoke_build_retry.sh
- name: Check binary size
env:
BINARY_SIZE_HARD_LIMIT_MB: 28
BINARY_SIZE_ADVISORY_MB: 20
BINARY_SIZE_TARGET_MB: 5
run: bash scripts/ci/check_binary_size.sh target/release-fast/zeroclaw
- name: Install system dependencies
run: sudo apt-get update -qq && sudo apt-get install -y libudev-dev
docs-only:
name: Docs-Only Fast Path
needs: [changes]
if: needs.changes.outputs.docs_only == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
steps:
- name: Skip heavy jobs for docs-only change
run: echo "Docs-only change detected. Rust lint/test/build skipped."
- name: Ensure web/dist placeholder exists
run: mkdir -p web/dist && touch web/dist/.gitkeep
non-rust:
name: Non-Rust Fast Path
needs: [changes]
if: needs.changes.outputs.docs_only != 'true' && needs.changes.outputs.rust_changed != 'true'
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
steps:
- name: Skip Rust jobs for non-Rust change scope
run: echo "No Rust-impacting files changed. Rust lint/test/build skipped."
- name: Check all features
run: cargo check --features ci-all --locked
docs-quality:
name: Docs Quality
needs: [changes]
if: needs.changes.outputs.docs_changed == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 15
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Setup Node.js for markdown lint
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: "22"
docs-quality:
name: Docs Quality
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- uses: actions/setup-node@1d0ff469b7ec7b3cb9d8673fde0c81c44821de2a # v4
with:
node-version: 20
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
with:
python-version: "3.12"
- name: Markdown lint (changed lines only)
env:
BASE_SHA: ${{ needs.changes.outputs.base_sha }}
DOCS_FILES: ${{ needs.changes.outputs.docs_files }}
run: ./scripts/ci/docs_quality_gate.sh
- name: Run docs quality gate
run: bash scripts/ci/docs_quality_gate.sh
env:
BASE_SHA: ${{ github.event.pull_request.base.sha || github.event.before }}
- name: Collect added links
id: collect_links
shell: bash
env:
BASE_SHA: ${{ needs.changes.outputs.base_sha }}
DOCS_FILES: ${{ needs.changes.outputs.docs_files }}
run: |
set -euo pipefail
python3 ./scripts/ci/collect_changed_links.py \
--base "$BASE_SHA" \
--docs-files "$DOCS_FILES" \
--output .ci-added-links.txt
count=$(wc -l < .ci-added-links.txt | tr -d ' ')
echo "count=$count" >> "$GITHUB_OUTPUT"
if [ "$count" -gt 0 ]; then
echo "Added links queued for check:"
cat .ci-added-links.txt
else
echo "No added links found in changed docs lines."
fi
# Composite status check — branch protection requires this single job.
gate:
name: CI Required Gate
if: always()
needs: [lint, lint-strict-delta, test, build, docs-quality, check-all-features]
runs-on: ubuntu-latest
steps:
- name: Check upstream job results
env:
HAS_FAILURE: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
run: |
if [[ "$HAS_FAILURE" == "true" ]]; then
echo "::error::One or more upstream jobs failed or were cancelled"
exit 1
fi
- name: Link check (offline, added links only)
if: steps.collect_links.outputs.count != '0'
uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2
with:
fail: true
args: >-
--offline
--no-progress
--format detailed
.ci-added-links.txt
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Skip link check (no added links)
if: steps.collect_links.outputs.count == '0'
run: echo "No added links in changed docs lines. Link check skipped."
lint-feedback:
name: Lint Feedback
if: github.event_name == 'pull_request'
needs: [changes, lint, docs-quality]
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
permissions:
contents: read
pull-requests: write
issues: write
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Post actionable lint failure summary
if: always()
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
env:
RUST_CHANGED: ${{ needs.changes.outputs.rust_changed }}
DOCS_CHANGED: ${{ needs.changes.outputs.docs_changed }}
LINT_RESULT: ${{ needs.lint.result }}
LINT_DELTA_RESULT: ${{ needs.lint.result }}
DOCS_RESULT: ${{ needs.docs-quality.result }}
with:
script: |
const script = require('./.github/workflows/scripts/lint_feedback.js');
await script({github, context, core});
license-file-owner-guard:
name: License File Owner Guard
needs: [changes]
if: github.event_name == 'pull_request'
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
permissions:
contents: read
pull-requests: read
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Enforce owner-only edits for root license files
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const script = require('./.github/workflows/scripts/ci_license_file_owner_guard.js');
await script({ github, context, core });
ci-required:
name: CI Required Gate
if: always()
needs: [changes, lint, workspace-check, package-check, test, build, docs-only, non-rust, docs-quality, lint-feedback, license-file-owner-guard]
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
steps:
- name: Enforce required status
shell: bash
run: |
set -euo pipefail
event_name="${{ github.event_name }}"
rust_changed="${{ needs.changes.outputs.rust_changed }}"
docs_changed="${{ needs.changes.outputs.docs_changed }}"
docs_result="${{ needs.docs-quality.result }}"
license_owner_result="${{ needs.license-file-owner-guard.result }}"
# --- Helper: enforce PR governance gates ---
check_pr_governance() {
if [ "$event_name" != "pull_request" ]; then return 0; fi
if [ "$license_owner_result" != "success" ]; then
echo "License file owner guard did not pass."
exit 1
fi
}
check_docs_quality() {
if [ "$docs_changed" = "true" ] && [ "$docs_result" != "success" ]; then
echo "Docs changed but docs-quality did not pass."
exit 1
fi
}
# --- Docs-only fast path ---
if [ "${{ needs.changes.outputs.docs_only }}" = "true" ]; then
check_pr_governance
check_docs_quality
echo "Docs-only fast path passed."
exit 0
fi
# --- Non-rust fast path ---
if [ "$rust_changed" != "true" ]; then
check_pr_governance
check_docs_quality
echo "Non-rust fast path passed."
exit 0
fi
# --- Rust change path ---
lint_result="${{ needs.lint.result }}"
workspace_check_result="${{ needs.workspace-check.result }}"
package_check_result="${{ needs.package-check.result }}"
test_result="${{ needs.test.result }}"
build_result="${{ needs.build.result }}"
echo "lint=${lint_result}"
echo "workspace-check=${workspace_check_result}"
echo "package-check=${package_check_result}"
echo "test=${test_result}"
echo "build=${build_result}"
echo "docs=${docs_result}"
echo "license_file_owner_guard=${license_owner_result}"
check_pr_governance
if [ "$lint_result" != "success" ] || [ "$workspace_check_result" != "success" ] || [ "$package_check_result" != "success" ] || [ "$test_result" != "success" ] || [ "$build_result" != "success" ]; then
echo "Required CI jobs did not pass: lint=${lint_result} workspace-check=${workspace_check_result} package-check=${package_check_result} test=${test_result} build=${build_result}"
exit 1
fi
check_docs_quality
echo "All required checks passed."

View File

@ -0,0 +1,150 @@
name: CI Supply Chain Provenance
on:
push:
branches: [dev, main]
paths:
- "Cargo.toml"
- "Cargo.lock"
- "src/**"
- "crates/**"
- "scripts/ci/ensure_cc.sh"
- "scripts/ci/generate_provenance.py"
- ".github/workflows/ci-supply-chain-provenance.yml"
workflow_dispatch:
schedule:
- cron: "20 6 * * 1" # Weekly Monday 06:20 UTC
concurrency:
group: supply-chain-provenance-${{ github.ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
id-token: write
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
provenance:
name: Build + Provenance Bundle
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Setup Rust
uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure cargo component
shell: bash
env:
ENSURE_CARGO_COMPONENT_STRICT: "true"
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- name: Activate toolchain binaries on PATH
shell: bash
run: |
set -euo pipefail
toolchain_bin="$(dirname "$(rustup which --toolchain 1.92.0 cargo)")"
echo "$toolchain_bin" >> "$GITHUB_PATH"
- name: Resolve host target
id: rust-meta
shell: bash
run: |
set -euo pipefail
host_target="$(rustup run 1.92.0 rustc -vV | sed -n 's/^host: //p')"
if [ -z "${host_target}" ]; then
echo "::error::Unable to resolve Rust host target."
exit 1
fi
echo "host_target=${host_target}" >> "$GITHUB_OUTPUT"
- name: Runner preflight (compiler + disk)
shell: bash
run: |
set -euo pipefail
./scripts/ci/ensure_cc.sh
echo "Runner: ${RUNNER_NAME:-unknown} (${RUNNER_OS:-unknown}/${RUNNER_ARCH:-unknown})"
free_kb="$(df -Pk . | awk 'NR==2 {print $4}')"
min_kb=$((10 * 1024 * 1024))
if [ "${free_kb}" -lt "${min_kb}" ]; then
echo "::error::Insufficient disk space on runner (<10 GiB free)."
df -h .
exit 1
fi
- name: Build release-fast artifact
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
host_target="${{ steps.rust-meta.outputs.host_target }}"
cargo build --profile release-fast --locked --target "$host_target"
cp "target/${host_target}/release-fast/zeroclaw" "artifacts/zeroclaw-${host_target}"
sha256sum "artifacts/zeroclaw-${host_target}" > "artifacts/zeroclaw-${host_target}.sha256"
- name: Generate provenance statement
shell: bash
run: |
set -euo pipefail
host_target="${{ steps.rust-meta.outputs.host_target }}"
python3 scripts/ci/generate_provenance.py \
--artifact "artifacts/zeroclaw-${host_target}" \
--subject-name "zeroclaw-${host_target}" \
--output "artifacts/provenance-${host_target}.intoto.json"
- name: Install cosign
uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0
- name: Sign provenance bundle
shell: bash
run: |
set -euo pipefail
host_target="${{ steps.rust-meta.outputs.host_target }}"
statement="artifacts/provenance-${host_target}.intoto.json"
cosign sign-blob --yes \
--bundle="${statement}.sigstore.json" \
--output-signature="${statement}.sig" \
--output-certificate="${statement}.pem" \
"${statement}"
- name: Emit normalized audit event
shell: bash
run: |
set -euo pipefail
host_target="${{ steps.rust-meta.outputs.host_target }}"
python3 scripts/ci/emit_audit_event.py \
--event-type supply_chain_provenance \
--input-json "artifacts/provenance-${host_target}.intoto.json" \
--output-json "artifacts/audit-event-supply-chain-provenance.json" \
--artifact-name supply-chain-provenance \
--retention-days 30
- name: Upload provenance artifacts
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: supply-chain-provenance
path: artifacts/*
retention-days: 30
- name: Publish summary
shell: bash
run: |
set -euo pipefail
host_target="${{ steps.rust-meta.outputs.host_target }}"
{
echo "### Supply Chain Provenance"
echo "- Target: \`${host_target}\`"
echo "- Artifact: \`artifacts/zeroclaw-${host_target}\`"
echo "- Statement: \`artifacts/provenance-${host_target}.intoto.json\`"
echo "- Signature: \`artifacts/provenance-${host_target}.intoto.json.sig\`"
} >> "$GITHUB_STEP_SUMMARY"

View File

@ -1,82 +0,0 @@
name: Cross-Platform Build
on:
workflow_dispatch:
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
jobs:
web:
name: Build Web Dashboard
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 22
cache: npm
cache-dependency-path: web/package-lock.json
- name: Build web dashboard
run: cd web && npm ci && npm run build
- uses: actions/upload-artifact@v4
with:
name: web-dist
path: web/dist/
retention-days: 1
build:
name: Build ${{ matrix.target }}
needs: [web]
runs-on: ${{ matrix.os }}
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
target: aarch64-unknown-linux-gnu
cross_compiler: gcc-aarch64-linux-gnu
linker_env: CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER
linker: aarch64-linux-gnu-gcc
- os: ubuntu-latest
target: armv7-unknown-linux-gnueabihf
cross_compiler: gcc-arm-linux-gnueabihf
linker_env: CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER
linker: arm-linux-gnueabihf-gcc
- os: macos-15-intel
target: x86_64-apple-darwin
- os: windows-latest
target: x86_64-pc-windows-msvc
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: ${{ matrix.target }}
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
if: runner.os != 'Windows'
- uses: actions/download-artifact@v8
with:
name: web-dist
path: web/dist/
- name: Install cross compiler
if: matrix.cross_compiler
run: |
sudo apt-get update -qq
sudo apt-get install -y ${{ matrix.cross_compiler }}
- name: Build release
shell: bash
run: |
if [ -n "${{ matrix.linker_env || '' }}" ] && [ -n "${{ matrix.linker || '' }}" ]; then
export "${{ matrix.linker_env }}=${{ matrix.linker }}"
fi
cargo build --release --locked --features channel-matrix,channel-lark,memory-postgres --target ${{ matrix.target }}

56
.github/workflows/deploy-web.yml vendored Normal file
View File

@ -0,0 +1,56 @@
name: Deploy Web to GitHub Pages
on:
push:
branches: [main]
paths:
- 'web/**'
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Setup Node
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: '20'
- name: Install dependencies
working-directory: ./web
run: npm ci
- name: Build
working-directory: ./web
run: npm run build
- name: Setup Pages
uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # v5
- name: Upload artifact
uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4
with:
path: ./web/dist
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4

301
.github/workflows/docs-deploy.yml vendored Normal file
View File

@ -0,0 +1,301 @@
name: Docs Deploy
on:
pull_request:
branches: [dev, main]
paths:
- "docs/**"
- "README*.md"
- ".github/workflows/docs-deploy.yml"
- "scripts/ci/docs_quality_gate.sh"
- "scripts/ci/collect_changed_links.py"
- ".github/release/docs-deploy-policy.json"
- "scripts/ci/docs_deploy_guard.py"
push:
branches: [dev, main]
paths:
- "docs/**"
- "README*.md"
- ".github/workflows/docs-deploy.yml"
- "scripts/ci/docs_quality_gate.sh"
- "scripts/ci/collect_changed_links.py"
- ".github/release/docs-deploy-policy.json"
- "scripts/ci/docs_deploy_guard.py"
workflow_dispatch:
inputs:
deploy_target:
description: "preview uploads artifact only; production deploys to Pages"
required: true
default: preview
type: choice
options:
- preview
- production
preview_evidence_run_url:
description: "Required for manual production deploys when policy enforces preview promotion evidence"
required: false
default: ""
rollback_ref:
description: "Optional rollback source ref (tag/sha/ref) for manual production dispatch"
required: false
default: ""
concurrency:
group: docs-deploy-${{ github.event_name }}-${{ github.event.pull_request.number || github.ref_name || github.sha }}
cancel-in-progress: true
permissions:
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
docs-quality:
name: Docs Quality Gate
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 20
outputs:
docs_files: ${{ steps.scope.outputs.docs_files }}
base_sha: ${{ steps.scope.outputs.base_sha }}
deploy_target: ${{ steps.deploy_guard.outputs.deploy_target }}
deploy_mode: ${{ steps.deploy_guard.outputs.deploy_mode }}
source_ref: ${{ steps.deploy_guard.outputs.source_ref }}
production_branch_ref: ${{ steps.deploy_guard.outputs.production_branch_ref }}
ready_to_deploy: ${{ steps.deploy_guard.outputs.ready_to_deploy }}
docs_preview_retention_days: ${{ steps.deploy_guard.outputs.docs_preview_retention_days }}
docs_guard_artifact_retention_days: ${{ steps.deploy_guard.outputs.docs_guard_artifact_retention_days }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Setup Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: "22"
- name: Resolve docs diff scope
id: scope
shell: bash
run: |
set -euo pipefail
base_sha=""
docs_files=""
if [ "${GITHUB_EVENT_NAME}" = "pull_request" ]; then
base_sha="${{ github.event.pull_request.base.sha }}"
docs_files="$(git diff --name-only "$base_sha" HEAD | awk '/\.md$|\.mdx$|^README/ {print}')"
elif [ "${GITHUB_EVENT_NAME}" = "push" ]; then
base_sha="${{ github.event.before }}"
if [ -n "$base_sha" ] && [ "$base_sha" != "0000000000000000000000000000000000000000" ]; then
docs_files="$(git diff --name-only "$base_sha" HEAD | awk '/\.md$|\.mdx$|^README/ {print}')"
fi
else
docs_files="$(git ls-files 'docs/**/*.md' 'README*.md')"
fi
{
echo "base_sha=${base_sha}"
echo "docs_files<<EOF"
printf '%s\n' "$docs_files"
echo "EOF"
} >> "$GITHUB_OUTPUT"
- name: Validate docs deploy contract
id: deploy_guard
shell: bash
env:
INPUT_DEPLOY_TARGET: ${{ github.event.inputs.deploy_target || '' }}
INPUT_PREVIEW_EVIDENCE_RUN_URL: ${{ github.event.inputs.preview_evidence_run_url || '' }}
INPUT_ROLLBACK_REF: ${{ github.event.inputs.rollback_ref || '' }}
run: |
set -euo pipefail
mkdir -p artifacts
python3 scripts/ci/docs_deploy_guard.py \
--repo-root "$PWD" \
--event-name "${GITHUB_EVENT_NAME}" \
--git-ref "${GITHUB_REF}" \
--git-sha "${GITHUB_SHA}" \
--input-deploy-target "${INPUT_DEPLOY_TARGET}" \
--input-preview-evidence-run-url "${INPUT_PREVIEW_EVIDENCE_RUN_URL}" \
--input-rollback-ref "${INPUT_ROLLBACK_REF}" \
--policy-file .github/release/docs-deploy-policy.json \
--output-json artifacts/docs-deploy-guard.json \
--output-md artifacts/docs-deploy-guard.md \
--github-output-file "$GITHUB_OUTPUT" \
--fail-on-violation
- name: Emit docs deploy guard audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/docs-deploy-guard.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type docs_deploy_guard \
--input-json artifacts/docs-deploy-guard.json \
--output-json artifacts/audit-event-docs-deploy-guard.json \
--artifact-name docs-deploy-guard \
--retention-days 21
fi
- name: Publish docs deploy guard summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/docs-deploy-guard.md ]; then
cat artifacts/docs-deploy-guard.md >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload docs deploy guard artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: docs-deploy-guard
path: |
artifacts/docs-deploy-guard.json
artifacts/docs-deploy-guard.md
artifacts/audit-event-docs-deploy-guard.json
if-no-files-found: ignore
retention-days: ${{ steps.deploy_guard.outputs.docs_guard_artifact_retention_days || 21 }}
- name: Setup Node.js for markdown lint
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: "22"
- name: Markdown quality gate
env:
BASE_SHA: ${{ steps.scope.outputs.base_sha }}
DOCS_FILES: ${{ steps.scope.outputs.docs_files }}
run: ./scripts/ci/docs_quality_gate.sh
- name: Collect added links
id: links
if: github.event_name != 'workflow_dispatch'
shell: bash
env:
BASE_SHA: ${{ steps.scope.outputs.base_sha }}
DOCS_FILES: ${{ steps.scope.outputs.docs_files }}
run: |
set -euo pipefail
python3 ./scripts/ci/collect_changed_links.py \
--base "$BASE_SHA" \
--docs-files "$DOCS_FILES" \
--output .ci-added-links.txt
count=$(wc -l < .ci-added-links.txt | tr -d ' ')
echo "count=$count" >> "$GITHUB_OUTPUT"
- name: Link check (added links)
if: github.event_name != 'workflow_dispatch' && steps.links.outputs.count != '0'
uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2
with:
fail: true
args: >-
--offline
--no-progress
--format detailed
.ci-added-links.txt
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Skip link check (none added)
if: github.event_name != 'workflow_dispatch' && steps.links.outputs.count == '0'
run: echo "No added links detected in changed docs lines."
docs-preview:
name: Docs Preview Artifact
needs: [docs-quality]
if: github.event_name == 'pull_request' || (github.event_name == 'workflow_dispatch' && github.event.inputs.deploy_target == 'preview')
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 15
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Build preview bundle
shell: bash
run: |
set -euo pipefail
rm -rf site
mkdir -p site/docs
cp -R docs/. site/docs/
cp README.md site/README.md
cat > site/index.md <<'EOF'
# ZeroClaw Docs Preview
This preview bundle is produced by `.github/workflows/docs-deploy.yml`.
- [Repository README](./README.md)
- [Docs Home](./docs/README.md)
EOF
- name: Upload preview artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: docs-preview
path: site/**
if-no-files-found: error
retention-days: ${{ needs.docs-quality.outputs.docs_preview_retention_days || 14 }}
docs-deploy:
name: Deploy Docs to GitHub Pages
needs: [docs-quality]
if: needs.docs-quality.outputs.deploy_target == 'production' && needs.docs-quality.outputs.ready_to_deploy == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 20
permissions:
contents: read
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ needs.docs-quality.outputs.source_ref }}
- name: Build deploy bundle
shell: bash
run: |
set -euo pipefail
rm -rf site
mkdir -p site/docs
cp -R docs/. site/docs/
cp README.md site/README.md
cat > site/index.md <<'EOF'
# ZeroClaw Documentation
This site is deployed automatically from `main` by `.github/workflows/docs-deploy.yml`.
- [Repository README](./README.md)
- [Docs Home](./docs/README.md)
EOF
- name: Publish deploy source summary
shell: bash
run: |
{
echo "## Docs Deploy Source"
echo "- Deploy mode: \`${{ needs.docs-quality.outputs.deploy_mode }}\`"
echo "- Source ref: \`${{ needs.docs-quality.outputs.source_ref }}\`"
echo "- Production branch ref: \`${{ needs.docs-quality.outputs.production_branch_ref }}\`"
} >> "$GITHUB_STEP_SUMMARY"
- name: Setup Pages
uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # v5
- name: Upload Pages artifact
uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4
with:
path: site
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4

380
.github/workflows/feature-matrix.yml vendored Normal file
View File

@ -0,0 +1,380 @@
name: Feature Matrix
on:
push:
branches: [dev]
paths:
- "Cargo.toml"
- "Cargo.lock"
- "src/**"
- "crates/**"
- "scripts/ci/nightly_matrix_report.py"
- ".github/release/nightly-owner-routing.json"
- ".github/workflows/feature-matrix.yml"
pull_request:
branches: [dev, main]
types: [labeled]
merge_group:
branches: [dev, main]
schedule:
- cron: "30 4 * * 1" # Weekly Monday 04:30 UTC
- cron: "15 3 * * *" # Daily 03:15 UTC (nightly profile)
workflow_dispatch:
inputs:
profile:
description: "compile = merge-gate matrix, nightly = integration-oriented lane commands"
required: true
default: compile
type: choice
options:
- compile
- nightly
fail_on_failure:
description: "Fail summary job when any lane fails"
required: true
default: true
type: boolean
concurrency:
group: feature-matrix-${{ github.event.pull_request.number || github.ref || github.run_id }}-${{ github.event.inputs.profile || 'auto' }}
cancel-in-progress: true
permissions:
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
resolve-profile:
name: Resolve Matrix Profile
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
outputs:
profile: ${{ steps.resolve.outputs.profile }}
lane_job_prefix: ${{ steps.resolve.outputs.lane_job_prefix }}
summary_job_name: ${{ steps.resolve.outputs.summary_job_name }}
lane_retention_days: ${{ steps.resolve.outputs.lane_retention_days }}
lane_timeout_minutes: ${{ steps.resolve.outputs.lane_timeout_minutes }}
max_attempts: ${{ steps.resolve.outputs.max_attempts }}
summary_artifact_name: ${{ steps.resolve.outputs.summary_artifact_name }}
summary_json_name: ${{ steps.resolve.outputs.summary_json_name }}
summary_md_name: ${{ steps.resolve.outputs.summary_md_name }}
lane_artifact_prefix: ${{ steps.resolve.outputs.lane_artifact_prefix }}
fail_on_failure: ${{ steps.resolve.outputs.fail_on_failure }}
collect_history: ${{ steps.resolve.outputs.collect_history }}
steps:
- name: Resolve effective profile
id: resolve
shell: bash
run: |
set -euo pipefail
profile="compile"
fail_on_failure="true"
lane_job_prefix="Matrix Lane"
summary_job_name="Feature Matrix Summary"
lane_retention_days="21"
lane_timeout_minutes="55"
max_attempts="1"
summary_artifact_name="feature-matrix-summary"
summary_json_name="feature-matrix-summary.json"
summary_md_name="feature-matrix-summary.md"
lane_artifact_prefix="feature-matrix"
collect_history="false"
if [ "${GITHUB_EVENT_NAME}" = "schedule" ] && [ "${{ github.event.schedule }}" = "15 3 * * *" ]; then
profile="nightly"
elif [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
profile="${{ github.event.inputs.profile || 'compile' }}"
fail_on_failure="${{ github.event.inputs.fail_on_failure || 'true' }}"
fi
if [ "$profile" = "nightly" ]; then
lane_job_prefix="Nightly Lane"
summary_job_name="Nightly Summary & Routing"
lane_retention_days="30"
lane_timeout_minutes="70"
max_attempts="2"
summary_artifact_name="nightly-all-features-summary"
summary_json_name="nightly-summary.json"
summary_md_name="nightly-summary.md"
lane_artifact_prefix="nightly-lane"
collect_history="true"
fi
{
echo "profile=${profile}"
echo "lane_job_prefix=${lane_job_prefix}"
echo "summary_job_name=${summary_job_name}"
echo "lane_retention_days=${lane_retention_days}"
echo "lane_timeout_minutes=${lane_timeout_minutes}"
echo "max_attempts=${max_attempts}"
echo "summary_artifact_name=${summary_artifact_name}"
echo "summary_json_name=${summary_json_name}"
echo "summary_md_name=${summary_md_name}"
echo "lane_artifact_prefix=${lane_artifact_prefix}"
echo "fail_on_failure=${fail_on_failure}"
echo "collect_history=${collect_history}"
} >> "$GITHUB_OUTPUT"
feature-check:
name: ${{ needs.resolve-profile.outputs.lane_job_prefix }} (${{ matrix.name }})
needs: [resolve-profile]
if: >-
github.event_name != 'pull_request' ||
contains(github.event.pull_request.labels.*.name, 'ci:full') ||
contains(github.event.pull_request.labels.*.name, 'ci:feature-matrix')
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: ${{ fromJSON(needs.resolve-profile.outputs.lane_timeout_minutes) }}
strategy:
fail-fast: false
matrix:
include:
- name: default
compile_command: cargo check --locked
nightly_command: cargo test --locked --test agent_e2e --verbose
install_libudev: false
- name: whatsapp-web
compile_command: cargo check --locked --no-default-features --features whatsapp-web
nightly_command: cargo check --locked --no-default-features --features whatsapp-web --verbose
install_libudev: false
- name: browser-native
compile_command: cargo check --locked --no-default-features --features browser-native
nightly_command: cargo check --locked --no-default-features --features browser-native --verbose
install_libudev: false
- name: nightly-all-features
compile_command: cargo check --locked --all-features
nightly_command: cargo test --locked --all-features --test agent_e2e --verbose
install_libudev: true
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure cargo component
shell: bash
env:
ENSURE_CARGO_COMPONENT_STRICT: "true"
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: feature-matrix-${{ matrix.name }}
- name: Ensure Linux deps for all-features lane
if: matrix.install_libudev
shell: bash
run: |
set -euo pipefail
if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists libudev; then
echo "libudev development headers already available; skipping apt install."
exit 0
fi
echo "Installing missing libudev build dependencies..."
for attempt in 1 2 3; do
if sudo apt-get update -qq -o DPkg::Lock::Timeout=300 && \
sudo apt-get install -y --no-install-recommends --no-upgrade -o DPkg::Lock::Timeout=300 libudev-dev pkg-config; then
echo "Dependency installation succeeded on attempt ${attempt}."
exit 0
fi
if [ "$attempt" -eq 3 ]; then
echo "Failed to install libudev-dev/pkg-config after ${attempt} attempts." >&2
exit 1
fi
echo "Dependency installation failed on attempt ${attempt}; retrying in 10s..."
sleep 10
done
- name: Run matrix lane command
id: lane
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
profile="${{ needs.resolve-profile.outputs.profile }}"
lane_command="${{ matrix.compile_command }}"
if [ "$profile" = "nightly" ]; then
lane_command="${{ matrix.nightly_command }}"
fi
max_attempts="${{ needs.resolve-profile.outputs.max_attempts }}"
attempt=1
status=1
started_at="$(date +%s)"
while [ "$attempt" -le "$max_attempts" ]; do
echo "Running lane command (attempt ${attempt}/${max_attempts}): ${lane_command}"
set +e
bash -lc "${lane_command}"
status=$?
set -e
if [ "$status" -eq 0 ]; then
break
fi
if [ "$attempt" -lt "$max_attempts" ]; then
sleep 5
fi
attempt="$((attempt + 1))"
done
finished_at="$(date +%s)"
duration="$((finished_at - started_at))"
lane_status="success"
if [ "$status" -ne 0 ]; then
lane_status="failure"
fi
cat > "artifacts/nightly-result-${{ matrix.name }}.json" <<EOF
{
"lane": "${{ matrix.name }}",
"mode": "${profile}",
"status": "${lane_status}",
"exit_code": ${status},
"duration_seconds": ${duration},
"command": "${lane_command}",
"attempts_used": ${attempt},
"max_attempts": ${max_attempts}
}
EOF
{
echo "### ${{ needs.resolve-profile.outputs.lane_job_prefix }}: ${{ matrix.name }}"
echo "- Profile: \`${profile}\`"
echo "- Command: \`${lane_command}\`"
echo "- Status: ${lane_status}"
echo "- Exit code: ${status}"
echo "- Duration (s): ${duration}"
echo "- Attempts: ${attempt}/${max_attempts}"
} >> "$GITHUB_STEP_SUMMARY"
echo "lane_status=${lane_status}" >> "$GITHUB_OUTPUT"
echo "lane_exit_code=${status}" >> "$GITHUB_OUTPUT"
- name: Upload lane report
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ${{ needs.resolve-profile.outputs.lane_artifact_prefix }}-${{ matrix.name }}
path: artifacts/nightly-result-${{ matrix.name }}.json
if-no-files-found: error
retention-days: ${{ fromJSON(needs.resolve-profile.outputs.lane_retention_days) }}
- name: Enforce lane success
if: steps.lane.outputs.lane_status != 'success'
shell: bash
run: |
set -euo pipefail
code="${{ steps.lane.outputs.lane_exit_code }}"
if [[ "$code" =~ ^[0-9]+$ ]]; then
# shellcheck disable=SC2242
exit "$code"
fi
echo "Invalid lane exit code: $code" >&2
exit 1
summary:
name: ${{ needs.resolve-profile.outputs.summary_job_name }}
needs: [resolve-profile, feature-check]
if: always()
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Download lane reports
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: artifacts
- name: Collect recent nightly history
if: needs.resolve-profile.outputs.collect_history == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const fs = require("fs");
const path = require("path");
const workflowId = "feature-matrix.yml";
const owner = context.repo.owner;
const repo = context.repo.repo;
const events = ["schedule", "workflow_dispatch"];
let runs = [];
for (const event of events) {
const resp = await github.rest.actions.listWorkflowRuns({
owner,
repo,
workflow_id: workflowId,
branch: "dev",
event,
per_page: 20,
});
runs = runs.concat(resp.data.workflow_runs || []);
}
const currentRunId = context.runId;
runs = runs
.filter((run) => run.id !== currentRunId && run.status === "completed")
.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime())
.slice(0, 3)
.map((run) => ({
run_id: run.id,
url: run.html_url,
event: run.event,
conclusion: run.conclusion || "unknown",
created_at: run.created_at,
head_sha: run.head_sha,
display_title: run.display_title || "",
}));
fs.mkdirSync("artifacts", { recursive: true });
fs.writeFileSync(
path.join("artifacts", "nightly-history.json"),
`${JSON.stringify(runs, null, 2)}\n`,
{ encoding: "utf8" }
);
- name: Aggregate matrix summary
shell: bash
run: |
set -euo pipefail
args=(
--input-dir artifacts
--owners-file .github/release/nightly-owner-routing.json
--output-json "artifacts/${{ needs.resolve-profile.outputs.summary_json_name }}"
--output-md "artifacts/${{ needs.resolve-profile.outputs.summary_md_name }}"
)
if [ "${{ needs.resolve-profile.outputs.collect_history }}" = "true" ] && [ -f artifacts/nightly-history.json ]; then
args+=(--history-file artifacts/nightly-history.json)
fi
if [ "${{ needs.resolve-profile.outputs.fail_on_failure }}" = "true" ]; then
args+=(--fail-on-failure)
fi
python3 scripts/ci/nightly_matrix_report.py "${args[@]}"
- name: Publish summary
shell: bash
run: |
set -euo pipefail
cat "artifacts/${{ needs.resolve-profile.outputs.summary_md_name }}" >> "$GITHUB_STEP_SUMMARY"
- name: Upload summary artifact
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ${{ needs.resolve-profile.outputs.summary_artifact_name }}
path: |
artifacts/${{ needs.resolve-profile.outputs.summary_json_name }}
artifacts/${{ needs.resolve-profile.outputs.summary_md_name }}
artifacts/nightly-history.json
if-no-files-found: error
retention-days: ${{ fromJSON(needs.resolve-profile.outputs.lane_retention_days) }}

263
.github/workflows/main-branch-flow.md vendored Normal file
View File

@ -0,0 +1,263 @@
# Main Branch Delivery Flows
This document explains what runs when code is proposed to `dev`/`main`, merged to `main`, and released.
Use this with:
- [`docs/ci-map.md`](../../docs/ci-map.md)
- [`docs/pr-workflow.md`](../../docs/pr-workflow.md)
- [`docs/release-process.md`](../../docs/release-process.md)
## Event Summary
| Event | Main workflows |
| --- | --- |
| PR activity (`pull_request_target`) | `pr-intake-checks.yml`, `pr-labeler.yml`, `pr-auto-response.yml` |
| PR activity (`pull_request`) | `ci-run.yml`, `sec-audit.yml`, plus path-scoped workflows |
| Push to `dev`/`main` | `ci-run.yml`, `sec-audit.yml`, plus path-scoped workflows |
| Tag push (`v*`) | `pub-release.yml` publish mode, `pub-docker-img.yml` publish job |
| Scheduled/manual | `pub-release.yml` verification mode, `sec-codeql.yml`, `feature-matrix.yml`, `test-fuzz.yml`, `pr-check-stale.yml`, `pr-check-status.yml`, `ci-queue-hygiene.yml`, `sync-contributors.yml`, `test-benchmarks.yml`, `test-e2e.yml` |
## Runtime and Docker Matrix
Observed averages below are from recent completed runs (sampled from GitHub Actions on February 17, 2026). Values are directional, not SLA.
| Workflow | Typical trigger in main flow | Avg runtime | Docker build? | Docker run? | Docker push? |
| --- | --- | ---:| --- | --- | --- |
| `pr-intake-checks.yml` | PR open/update (`pull_request_target`) | 14.5s | No | No | No |
| `pr-labeler.yml` | PR open/update (`pull_request_target`) | 53.7s | No | No | No |
| `pr-auto-response.yml` | PR/issue automation | 24.3s | No | No | No |
| `ci-run.yml` | PR + push to `dev`/`main` | 74.7s | No | No | No |
| `sec-audit.yml` | PR + push to `dev`/`main` | 127.2s | No | No | No |
| `workflow-sanity.yml` | Workflow-file changes | 34.2s | No | No | No |
| `pr-label-policy-check.yml` | Label policy/automation changes | 14.7s | No | No | No |
| `pub-docker-img.yml` (`pull_request`) | Docker build-input PR changes | 240.4s | Yes | Yes | No |
| `pub-docker-img.yml` (`push`) | tag push `v*` | 139.9s | Yes | No | Yes |
| `pub-release.yml` | Tag push `v*` (publish) + manual/scheduled verification (no publish) | N/A in recent sample | No | No | No |
Notes:
1. `pub-docker-img.yml` is the only workflow in the main PR/push path that builds Docker images.
2. Container runtime verification (`docker run`) occurs in PR smoke only.
3. Container registry push occurs on tag pushes (`v*`) only.
4. `ci-run.yml` "Build (Smoke)" builds Rust binaries, not Docker images.
## Step-By-Step
### 1) PR from branch in this repository -> `dev`
1. Contributor opens or updates PR against `dev`.
2. `pull_request_target` automation runs (typical runtime):
- `pr-intake-checks.yml` posts intake warnings/errors.
- `pr-labeler.yml` sets size/risk/scope labels.
- `pr-auto-response.yml` runs first-interaction and label routes.
3. `pull_request` CI workflows start:
- `ci-run.yml`
- `feature-matrix.yml` (Rust/workflow path scope)
- `sec-audit.yml`
- `sec-codeql.yml` (if Rust/codeql paths changed)
- path-scoped workflows if matching files changed:
- `pub-docker-img.yml` (Docker build-input paths only)
- `docs-deploy.yml` (docs + README markdown paths; deploy contract guard enforces promotion + rollback ref policy)
- `workflow-sanity.yml` (workflow files only)
- `pr-label-policy-check.yml` (label-policy files only)
- `ci-change-audit.yml` (CI/security path changes)
- `ci-provider-connectivity.yml` (probe config/script/workflow changes)
- `ci-reproducible-build.yml` (Rust/build reproducibility paths)
4. In `ci-run.yml`, `changes` computes:
- `docs_only`
- `docs_changed`
- `rust_changed`
- `workflow_changed`
5. `build` runs for Rust-impacting changes.
6. On PRs, full lint/test/docs checks run when PR has label `ci:full`:
- `lint`
- `lint-strict-delta`
- `test`
- `flake-probe` (single-retry telemetry; optional block via `CI_BLOCK_ON_FLAKE_SUSPECTED`)
- `docs-quality`
7. If root license files (`LICENSE-APACHE`, `LICENSE-MIT`) changed, `license-file-owner-guard` allows only PR author `willsarg`.
8. `lint-feedback` posts actionable comment if lint/docs gates fail.
9. `CI Required Gate` aggregates results to final pass/fail.
10. Maintainer merges PR once checks and review policy are satisfied.
11. Merge emits a `push` event on `dev` (see scenario 4).
### 2) PR from fork -> `dev`
1. External contributor opens PR from `fork/<branch>` into `zeroclaw:dev`.
2. Immediately on `opened`:
- `pull_request_target` workflows start with base-repo context and base-repo token:
- `pr-intake-checks.yml`
- `pr-labeler.yml`
- `pr-auto-response.yml`
- `pull_request` workflows are queued for the fork head commit:
- `ci-run.yml`
- `sec-audit.yml`
- path-scoped workflows (`pub-docker-img.yml`, `workflow-sanity.yml`, `pr-label-policy-check.yml`) if changed files match.
3. Fork-specific permission behavior in `pull_request` workflows:
- token is restricted (read-focused), so jobs that try to write PR comments/status extras can be limited.
- secrets from the base repo are not exposed to fork PR `pull_request` jobs.
4. Approval gate possibility:
- if Actions settings require maintainer approval for fork workflows, the `pull_request` run stays in `action_required`/waiting state until approved.
5. Event fan-out after labeling:
- manual label changes emit `labeled`/`unlabeled` events.
- those events retrigger only label-driven `pull_request_target` automation (`pr-auto-response.yml`); `pr-labeler.yml` now runs only on PR lifecycle events (`opened`/`reopened`/`synchronize`/`ready_for_review`) to reduce churn.
6. When contributor pushes new commits to fork branch (`synchronize`):
- reruns: `pr-intake-checks.yml`, `pr-labeler.yml`, `ci-run.yml`, `sec-audit.yml`, and matching path-scoped PR workflows.
- does not rerun `pr-auto-response.yml` unless label/open events occur.
7. `ci-run.yml` execution details for fork PR:
- `changes` computes `docs_only`, `docs_changed`, `rust_changed`, `workflow_changed`.
- `build` runs for Rust-impacting changes.
- `lint`/`lint-strict-delta`/`test`/`docs-quality` run on PR when `ci:full` label exists.
- `CI Required Gate` emits final pass/fail for the PR head.
8. Fork PR merge blockers to check first when diagnosing stalls:
- run approval pending for fork workflows.
- `license-file-owner-guard` failing when root license files are modified by non-owner PR author.
- `CI Required Gate` failure caused by upstream jobs.
- repeated `pull_request_target` reruns from label churn causing noisy signals.
9. After merge, normal `push` workflows on `dev` execute (scenario 4).
### 3) PR to `main` (direct or from `dev`)
1. Contributor or maintainer opens PR with base `main`.
2. `ci-run.yml` and `sec-audit.yml` run on the PR, plus any path-scoped workflows.
3. Maintainer merges PR once checks and review policy pass.
4. Merge emits a `push` event on `main`.
### 4) Push/Merge Queue to `dev` or `main` (including after merge)
1. Commit reaches `dev` or `main` (usually from a merged PR), or merge queue creates a `merge_group` validation commit.
2. `ci-run.yml` runs on `push` and `merge_group`.
3. `feature-matrix.yml` runs on `push` to `dev` for Rust/workflow paths and on `merge_group`.
4. `sec-audit.yml` runs on `push` and `merge_group`.
5. `sec-codeql.yml` runs on `push`/`merge_group` when Rust/codeql paths change (path-scoped on push).
6. `ci-supply-chain-provenance.yml` runs on push when Rust/build provenance paths change.
7. Path-filtered workflows run only if touched files match their filters.
8. In `ci-run.yml`, push/merge-group behavior differs from PR behavior:
- Rust path: `lint`, `lint-strict-delta`, `test`, `build` are expected.
- Docs/non-rust paths: fast-path behavior applies.
9. `CI Required Gate` computes overall push/merge-group result.
## Docker Publish Logic
Workflow: `.github/workflows/pub-docker-img.yml`
### PR behavior
1. Triggered on `pull_request` to `dev` or `main` when Docker build-input paths change.
2. Runs `PR Docker Smoke` job:
- Builds local smoke image with Buildx builder.
- Verifies container with `docker run ... --version`.
3. Typical runtime in recent sample: ~240.4s.
4. No registry push happens on PR events.
### Push behavior
1. `publish` job runs on tag pushes `v*` only.
2. Workflow trigger includes semantic version tag pushes (`v*`) only.
3. Login to `ghcr.io` uses `${{ github.actor }}` and `${{ secrets.GITHUB_TOKEN }}`.
4. Tag computation includes semantic tag from pushed git tag (`vX.Y.Z`) + SHA tag (`sha-<12>`) + `latest`.
5. Multi-platform publish is used for tag pushes (`linux/amd64,linux/arm64`).
6. `scripts/ci/ghcr_publish_contract_guard.py` validates anonymous pullability and digest parity across `vX.Y.Z`, `sha-<12>`, and `latest`, then emits rollback candidate mapping evidence.
7. Trivy scans are emitted for version, SHA, and latest references.
8. `scripts/ci/ghcr_vulnerability_gate.py` validates Trivy JSON outputs against `.github/release/ghcr-vulnerability-policy.json` and emits audit-event evidence.
9. Typical runtime in recent sample: ~139.9s.
10. Result: pushed image tags under `ghcr.io/<owner>/<repo>` with publish-contract + vulnerability-gate + scan artifacts.
Important: Docker publish now requires a `v*` tag push; regular `dev`/`main` branch pushes do not publish images.
## Release Logic
Workflow: `.github/workflows/pub-release.yml`
1. Trigger modes:
- Tag push `v*` -> publish mode.
- Manual dispatch -> verification-only or publish mode (input-driven).
- Weekly schedule -> verification-only mode.
2. `prepare` resolves release context (`release_ref`, `release_tag`, publish/draft mode) and runs `scripts/ci/release_trigger_guard.py`.
- publish mode enforces actor authorization, stable annotated tag policy, `origin/main` ancestry, and `release_tag` == `Cargo.toml` version at the tag commit.
- trigger provenance is emitted as `release-trigger-guard` artifacts.
3. `build-release` builds matrix artifacts across Linux/macOS/Windows targets.
4. `verify-artifacts` runs `scripts/ci/release_artifact_guard.py` against `.github/release/release-artifact-contract.json` in verify-stage mode (archive contract required; manifest/SBOM/notice checks intentionally skipped) and uploads `release-artifact-guard-verify` evidence.
5. In publish mode, workflow generates SBOM (`CycloneDX` + `SPDX`), `SHA256SUMS`, and a checksum provenance statement (`zeroclaw.sha256sums.intoto.json`) plus audit-event envelope.
6. In publish mode, after manifest generation, workflow reruns `release_artifact_guard.py` in full-contract mode and emits `release-artifact-guard.publish.json` plus `audit-event-release-artifact-guard-publish.json`.
7. In publish mode, workflow keyless-signs release artifacts and composes a supply-chain release-notes preface via `release_notes_with_supply_chain_refs.py`.
8. In publish mode, workflow verifies GHCR release-tag availability.
9. In publish mode, workflow creates/updates the GitHub Release for the resolved tag and commit-ish, combining generated supply-chain preface with GitHub auto-generated commit notes.
Pre-release path:
1. Pre-release tags (`vX.Y.Z-alpha.N`, `vX.Y.Z-beta.N`, `vX.Y.Z-rc.N`) trigger `.github/workflows/pub-prerelease.yml`.
2. `scripts/ci/prerelease_guard.py` enforces stage progression, `origin/main` ancestry, and Cargo version/tag alignment.
3. In publish mode, prerelease assets are attached to a GitHub prerelease for the stage tag.
Canary policy lane:
1. `.github/workflows/ci-canary-gate.yml` runs weekly or manually.
2. `scripts/ci/canary_guard.py` evaluates metrics against `.github/release/canary-policy.json`.
3. Decision output is explicit (`promote`, `hold`, `abort`) with auditable artifacts and optional dispatch signal.
## Merge/Policy Notes
1. Workflow-file changes (`.github/workflows/**`) are validated through `pr-intake-checks.yml`, `ci-change-audit.yml`, and `CI Required Gate` without a dedicated owner-approval gate.
2. PR lint/test strictness is intentionally controlled by `ci:full` label.
4. `sec-audit.yml` runs on PR/push/merge queue (`merge_group`), plus scheduled weekly.
5. `ci-change-audit.yml` enforces pinned `uses:` references for CI/security workflow changes.
6. `sec-audit.yml` includes deny policy hygiene checks (`deny_policy_guard.py`) before cargo-deny.
7. `sec-audit.yml` includes gitleaks allowlist governance checks (`secrets_governance_guard.py`) against `.github/security/gitleaks-allowlist-governance.json`.
8. `ci-reproducible-build.yml` and `ci-supply-chain-provenance.yml` provide scheduled supply-chain assurance signals outside release-only windows.
9. Some workflows are operational and non-merge-path (`pr-check-stale`, `pr-check-status`, `sync-contributors`, etc.).
10. Workflow-specific JavaScript helpers are organized under `.github/workflows/scripts/`.
11. `ci-run.yml` includes cache partitioning (`prefix-key`) across lint/test/build/flake-probe lanes to reduce cache contention.
12. `ci-rollback.yml` provides a guarded rollback planning lane (scheduled dry-run + manual execute controls) with audit artifacts.
13. `ci-queue-hygiene.yml` periodically deduplicates superseded queued runs for lightweight PR automation workflows to reduce queue pressure.
## Mermaid Diagrams
### PR to Dev
```mermaid
flowchart TD
A["PR opened or updated -> dev"] --> B["pull_request_target lane"]
B --> B1["pr-intake-checks.yml"]
B --> B2["pr-labeler.yml"]
B --> B3["pr-auto-response.yml"]
A --> C["pull_request CI lane"]
C --> C1["ci-run.yml"]
C --> C2["sec-audit.yml"]
C --> C3["pub-docker-img.yml (if Docker paths changed)"]
C --> C4["workflow-sanity.yml (if workflow files changed)"]
C --> C5["pr-label-policy-check.yml (if policy files changed)"]
C1 --> D["CI Required Gate"]
D --> E{"Checks + review policy pass?"}
E -->|No| F["PR stays open"]
E -->|Yes| G["Merge PR"]
G --> H["push event on dev"]
```
### Main Delivery and Release
```mermaid
flowchart TD
D0["Commit reaches dev"] --> B0["ci-run.yml"]
D0 --> C0["sec-audit.yml"]
PRM["PR to main"] --> QM["ci-run.yml + sec-audit.yml (+ path-scoped)"]
QM --> M["Merge to main"]
M --> A["Commit reaches main"]
A --> B["ci-run.yml"]
A --> C["sec-audit.yml"]
A --> D["path-scoped workflows (if matched)"]
T["Tag push v*"] --> R["pub-release.yml"]
W["Manual/Scheduled release verify"] --> R
T --> DP["pub-docker-img.yml publish job"]
R --> R1["Artifacts + SBOM + checksums + signatures + GitHub Release"]
W --> R2["Verification build only (no GitHub Release publish)"]
DP --> P1["Push ghcr image tags (version + sha + latest)"]
```
## Quick Troubleshooting
1. Unexpected skipped jobs: inspect `scripts/ci/detect_change_scope.sh` outputs.
2. CI/CD-change PR blocked: verify `@chumyin` approved review is present.
3. Fork PR appears stalled: check whether Actions run approval is pending.
4. Docker not published: confirm a `v*` tag was pushed to the intended commit.

View File

@ -1,130 +0,0 @@
# Master Branch Delivery Flows
This document explains what runs when code is proposed to `master` and released.
Use this with:
- [`docs/ci-map.md`](../../docs/contributing/ci-map.md)
- [`docs/pr-workflow.md`](../../docs/contributing/pr-workflow.md)
- [`docs/release-process.md`](../../docs/contributing/release-process.md)
## Branching Model
ZeroClaw uses a single default branch: `master`. All contributor PRs target `master` directly. There is no `dev` or promotion branch.
Current maintainers with PR approval authority: `theonlyhennygod`, `JordanTheJet`, and `SimianAstronaut7`.
## Active Workflows
| File | Trigger | Purpose |
| --- | --- | --- |
| `checks-on-pr.yml` | `pull_request``master` | Lint + test + build + security audit on every PR |
| `cross-platform-build-manual.yml` | `workflow_dispatch` | Full platform build matrix (manual) |
| `release-beta-on-push.yml` | `push``master` | Beta release on every master commit |
| `release-stable-manual.yml` | `workflow_dispatch` | Stable release (manual, version-gated) |
## Event Summary
| Event | Workflows triggered |
| --- | --- |
| PR opened or updated against `master` | `checks-on-pr.yml` |
| Push to `master` (including after merge) | `release-beta-on-push.yml` |
| Manual dispatch | `cross-platform-build-manual.yml`, `release-stable-manual.yml` |
## Step-By-Step
### 1) PR → `master`
1. Contributor opens or updates a PR against `master`.
2. `checks-on-pr.yml` starts:
- `lint` job: runs `cargo fmt --check` and `cargo clippy -D warnings`.
- `test` job: runs `cargo nextest run --locked` on `ubuntu-latest` with Rust 1.92.0 and mold linker.
- `build` job (matrix): compiles release binary on `x86_64-unknown-linux-gnu` and `aarch64-apple-darwin`.
- `security` job: runs `cargo audit` and `cargo deny check licenses sources`.
- Concurrency group cancels in-progress runs for the same PR on new pushes.
3. All jobs must pass before merge.
4. Maintainer (`theonlyhennygod`, `JordanTheJet`, or `SimianAstronaut7`) merges PR once checks and review policy are satisfied.
5. Merge emits a `push` event on `master` (see section 2).
### 2) Push to `master` (including after merge)
1. Commit reaches `master`.
2. `release-beta-on-push.yml` (Release Beta) starts:
- `version` job: computes beta tag as `v{cargo_version}-beta.{run_number}`.
- `build` job (matrix, 4 targets): `x86_64-linux`, `aarch64-linux`, `aarch64-darwin`, `x86_64-windows`.
- `publish` job: generates `SHA256SUMS`, creates a GitHub pre-release with all artifacts. Artifact retention: 7 days.
- `docker` job: builds multi-platform image (`linux/amd64,linux/arm64`) and pushes to `ghcr.io` with `:beta` and the versioned beta tag.
3. This runs on every push to `master` without filtering. Every merged PR produces a beta pre-release.
### 3) Stable Release (manual)
1. Maintainer runs `release-stable-manual.yml` via `workflow_dispatch` with a version input (e.g. `0.2.0`).
2. `validate` job checks:
- Input matches semver `X.Y.Z` format.
- `Cargo.toml` version matches input exactly.
- Tag `vX.Y.Z` does not already exist on the remote.
3. `build` job (matrix, same 4 targets as beta): compiles release binary.
4. `publish` job: generates `SHA256SUMS`, creates a stable GitHub Release (not pre-release). Artifact retention: 14 days.
5. `docker` job: pushes to `ghcr.io` with `:latest` and `:vX.Y.Z`.
### 4) Full Platform Build (manual)
1. Maintainer runs `cross-platform-build-manual.yml` via `workflow_dispatch`.
2. `build` job (matrix, 3 targets): `aarch64-linux-gnu`, `x86_64-darwin` (macOS 15 Intel), `x86_64-windows-msvc`.
3. Build-only, no tests, no publish. Used to verify cross-compilation on platforms not covered by `checks-on-pr.yml`.
## Build Targets by Workflow
| Target | `checks-on-pr.yml` | `cross-platform-build-manual.yml` | `release-beta-on-push.yml` | `release-stable-manual.yml` |
| --- | :---: | :---: | :---: | :---: |
| `x86_64-unknown-linux-gnu` | ✓ | | ✓ | ✓ |
| `aarch64-unknown-linux-gnu` | | ✓ | ✓ | ✓ |
| `aarch64-apple-darwin` | ✓ | | ✓ | ✓ |
| `x86_64-apple-darwin` | | ✓ | | |
| `x86_64-pc-windows-msvc` | ✓ | ✓ | ✓ | ✓ |
## Mermaid Diagrams
### PR to Master
```mermaid
flowchart TD
A["PR opened or updated → master"] --> B["checks-on-pr.yml"]
B --> B0["lint: fmt + clippy"]
B --> B1["test: cargo nextest (ubuntu-latest)"]
B --> B2["build: x86_64-linux + aarch64-darwin"]
B --> B3["security: audit + deny"]
B0 & B1 & B2 & B3 --> C{"Checks pass?"}
C -->|No| D["PR stays open"]
C -->|Yes| E["Maintainer merges"]
E --> F["push event on master"]
```
### Beta Release (on every master push)
```mermaid
flowchart TD
A["Push to master"] --> B["release-beta-on-push.yml"]
B --> B1["version: compute v{x.y.z}-beta.{N}"]
B1 --> B2["build: 4 targets"]
B2 --> B3["publish: GitHub pre-release + SHA256SUMS"]
B2 --> B4["docker: push ghcr.io :beta + versioned tag"]
```
### Stable Release (manual)
```mermaid
flowchart TD
A["workflow_dispatch: version=X.Y.Z"] --> B["release-stable-manual.yml"]
B --> B1["validate: semver + Cargo.toml + tag uniqueness"]
B1 --> B2["build: 4 targets"]
B2 --> B3["publish: GitHub stable release + SHA256SUMS"]
B2 --> B4["docker: push ghcr.io :latest + :vX.Y.Z"]
```
## Quick Troubleshooting
1. **Quality gate failing on PR**: check `lint` job for formatting/clippy issues; check `test` job for test failures; check `build` job for compile errors; check `security` job for audit/deny failures.
2. **Beta release not appearing**: confirm the push landed on `master` (not another branch); check `release-beta-on-push.yml` run status.
3. **Stable release failing at validate**: ensure `Cargo.toml` version matches the input version and the tag does not already exist.
4. **Full matrix build needed**: run `cross-platform-build-manual.yml` manually from the Actions tab.

View File

@ -0,0 +1,192 @@
name: Nightly All-Features
on:
schedule:
- cron: "15 3 * * *" # Daily 03:15 UTC
workflow_dispatch:
inputs:
fail_on_failure:
description: "Fail workflow when any nightly lane fails"
required: true
default: true
type: boolean
concurrency:
group: nightly-all-features-${{ github.ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
nightly-lanes:
name: Nightly Lane (${{ matrix.name }})
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 70
strategy:
fail-fast: false
matrix:
include:
- name: default
command: cargo test --locked --test agent_e2e --verbose
install_libudev: false
- name: whatsapp-web
command: cargo check --locked --no-default-features --features whatsapp-web --verbose
install_libudev: false
- name: browser-native
command: cargo check --locked --no-default-features --features browser-native --verbose
install_libudev: false
- name: nightly-all-features
command: cargo test --locked --all-features --test agent_e2e --verbose
install_libudev: true
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure cargo component
shell: bash
env:
ENSURE_CARGO_COMPONENT_STRICT: "true"
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: nightly-all-features-${{ matrix.name }}
- name: Ensure Linux deps for all-features lane
if: matrix.install_libudev
shell: bash
run: |
set -euo pipefail
if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists libudev; then
echo "libudev development headers already available; skipping apt install."
exit 0
fi
echo "Installing missing libudev build dependencies..."
for attempt in 1 2 3; do
if sudo apt-get update -qq -o DPkg::Lock::Timeout=300 && \
sudo apt-get install -y --no-install-recommends --no-upgrade -o DPkg::Lock::Timeout=300 libudev-dev pkg-config; then
echo "Dependency installation succeeded on attempt ${attempt}."
exit 0
fi
if [ "$attempt" -eq 3 ]; then
echo "Failed to install libudev-dev/pkg-config after ${attempt} attempts." >&2
exit 1
fi
echo "Dependency installation failed on attempt ${attempt}; retrying in 10s..."
sleep 10
done
- name: Run nightly lane command
id: lane
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
started_at="$(date +%s)"
set +e
bash -lc "${{ matrix.command }}"
status=$?
set -e
finished_at="$(date +%s)"
duration="$((finished_at - started_at))"
lane_status="success"
if [ "$status" -ne 0 ]; then
lane_status="failure"
fi
cat > "artifacts/nightly-result-${{ matrix.name }}.json" <<EOF
{
"lane": "${{ matrix.name }}",
"status": "${lane_status}",
"exit_code": ${status},
"duration_seconds": ${duration},
"command": "${{ matrix.command }}"
}
EOF
{
echo "### Nightly Lane: ${{ matrix.name }}"
echo "- Command: \`${{ matrix.command }}\`"
echo "- Status: ${lane_status}"
echo "- Exit code: ${status}"
echo "- Duration (s): ${duration}"
} >> "$GITHUB_STEP_SUMMARY"
echo "lane_status=${lane_status}" >> "$GITHUB_OUTPUT"
echo "lane_exit_code=${status}" >> "$GITHUB_OUTPUT"
- name: Upload nightly lane artifact
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: nightly-lane-${{ matrix.name }}
path: artifacts/nightly-result-${{ matrix.name }}.json
if-no-files-found: error
retention-days: 30
nightly-summary:
name: Nightly Summary & Routing
needs: [nightly-lanes]
if: always()
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Download nightly artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: artifacts
- name: Aggregate nightly report
shell: bash
env:
FAIL_ON_FAILURE_INPUT: ${{ github.event.inputs.fail_on_failure || 'true' }}
run: |
set -euo pipefail
fail_on_failure="true"
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
fail_on_failure="${FAIL_ON_FAILURE_INPUT}"
fi
args=()
if [ "$fail_on_failure" = "true" ]; then
args+=(--fail-on-failure)
fi
python3 scripts/ci/nightly_matrix_report.py \
--input-dir artifacts \
--owners-file .github/release/nightly-owner-routing.json \
--output-json artifacts/nightly-summary.json \
--output-md artifacts/nightly-summary.md \
"${args[@]}"
- name: Publish nightly summary
shell: bash
run: |
set -euo pipefail
cat artifacts/nightly-summary.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload nightly summary artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: nightly-all-features-summary
path: |
artifacts/nightly-summary.json
artifacts/nightly-summary.md
if-no-files-found: error
retention-days: 30

64
.github/workflows/pages-deploy.yml vendored Normal file
View File

@ -0,0 +1,64 @@
name: Deploy GitHub Pages
on:
push:
branches:
- main
paths:
- site/**
- docs/**
- README.md
- .github/workflows/pages-deploy.yml
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: github-pages
cancel-in-progress: true
jobs:
build:
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: 22
cache: npm
cache-dependency-path: site/package-lock.json
- name: Install Dependencies
working-directory: site
run: npm ci
- name: Build Site
working-directory: site
run: npm run build
- name: Configure Pages
uses: actions/configure-pages@v5
- name: Upload Artifact
uses: actions/upload-pages-artifact@v3
with:
path: gh-pages
deploy:
needs: build
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

94
.github/workflows/pr-auto-response.yml vendored Normal file
View File

@ -0,0 +1,94 @@
name: PR Auto Responder
on:
issues:
types: [opened, reopened, labeled, unlabeled]
pull_request_target:
branches: [dev, main]
types: [opened, labeled, unlabeled]
concurrency:
# Keep cancellation within the same lifecycle action to avoid `labeled`
# events canceling an in-flight `opened` run for the same issue/PR.
group: pr-auto-response-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }}-${{ github.event.action || 'unknown' }}
cancel-in-progress: true
permissions: {}
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
LABEL_POLICY_PATH: .github/label-policy.json
jobs:
contributor-tier-issues:
# Only run for opened/reopened events to avoid duplicate runs with labeled-routes job
if: >-
(github.event_name == 'issues' &&
(github.event.action == 'opened' || github.event.action == 'reopened'))
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
permissions:
contents: read
issues: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Apply contributor tier label for issue author
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
env:
LABEL_POLICY_PATH: .github/label-policy.json
with:
script: |
const script = require('./.github/workflows/scripts/pr_auto_response_contributor_tier.js');
await script({ github, context, core });
first-interaction:
if: github.event.action == 'opened'
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
permissions:
issues: write
pull-requests: write
steps:
- name: Greet first-time contributors
uses: actions/first-interaction@a1db7729b356323c7988c20ed6f0d33fe31297be # v1
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
issue_message: |
Thanks for opening this issue.
Before maintainers triage it, please confirm:
- Repro steps are complete and run on latest `main`
- Environment details are included (OS, Rust version, ZeroClaw version)
- Sensitive values are redacted
This helps us keep issue throughput high and response latency low.
pr_message: |
Thanks for contributing to ZeroClaw.
For faster review, please ensure:
- PR template sections are fully completed
- `cargo fmt --all -- --check`, `cargo clippy --all-targets -- -D warnings`, and `cargo test` are included
- If automation/agents were used heavily, add brief workflow notes
- Scope is focused (prefer one concern per PR)
See `CONTRIBUTING.md` and `docs/pr-workflow.md` for full collaboration rules.
labeled-routes:
if: github.event.action == 'labeled'
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
permissions:
contents: read
issues: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Handle label-driven responses
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const script = require('./.github/workflows/scripts/pr_auto_response_labeled_routes.js');
await script({ github, context, core });

50
.github/workflows/pr-check-stale.yml vendored Normal file
View File

@ -0,0 +1,50 @@
name: PR Check Stale
on:
schedule:
- cron: "20 2 * * *"
workflow_dispatch:
permissions: {}
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
stale:
permissions:
issues: write
pull-requests: write
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 10
steps:
- name: Mark stale issues and pull requests
uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
days-before-issue-stale: 21
days-before-issue-close: 7
days-before-pr-stale: 14
days-before-pr-close: 7
stale-issue-label: stale
stale-pr-label: stale
exempt-issue-labels: security,pinned,no-stale,no-pr-hygiene,maintainer
exempt-pr-labels: no-stale,no-pr-hygiene,maintainer
remove-stale-when-updated: true
exempt-all-assignees: true
operations-per-run: 300
stale-issue-message: |
This issue was automatically marked as stale due to inactivity.
Please provide an update, reproduction details, or current status to keep it open.
close-issue-message: |
Closing this issue due to inactivity.
If the problem still exists on the latest `main`, please open a new issue with fresh repro steps.
close-issue-reason: not_planned
stale-pr-message: |
This PR was automatically marked as stale due to inactivity.
Please rebase/update and post the latest validation results.
close-pr-message: |
Closing this PR due to inactivity.
Maintainers can reopen once the branch is updated and validation is provided.

37
.github/workflows/pr-check-status.yml vendored Normal file
View File

@ -0,0 +1,37 @@
name: PR Check Status
on:
schedule:
- cron: "15 8 * * *" # Once daily at 8:15am UTC
workflow_dispatch:
permissions: {}
concurrency:
group: pr-check-status
cancel-in-progress: true
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
nudge-stale-prs:
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 10
permissions:
contents: read
pull-requests: write
issues: write
env:
STALE_HOURS: "48"
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Nudge PRs that need rebase or CI refresh
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const script = require('./.github/workflows/scripts/pr_check_status_nudge.js');
await script({ github, context, core });

37
.github/workflows/pr-intake-checks.yml vendored Normal file
View File

@ -0,0 +1,37 @@
name: PR Intake Checks
on:
pull_request_target:
branches: [dev, main]
types: [opened, reopened, synchronize, ready_for_review]
concurrency:
group: pr-intake-checks-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
pull-requests: write
issues: write
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
intake:
name: Intake Checks
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 10
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Run safe PR intake checks
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const script = require('./.github/workflows/scripts/pr_intake_checks.js');
await script({ github, context, core });

View File

@ -0,0 +1,81 @@
name: PR Label Policy Check
on:
pull_request:
paths:
- ".github/label-policy.json"
- ".github/workflows/pr-labeler.yml"
- ".github/workflows/pr-auto-response.yml"
push:
branches: [dev, main]
paths:
- ".github/label-policy.json"
- ".github/workflows/pr-labeler.yml"
- ".github/workflows/pr-auto-response.yml"
concurrency:
group: pr-label-policy-check-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
permissions:
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
jobs:
contributor-tier-consistency:
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 10
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Verify shared label policy and workflow wiring
shell: bash
run: |
set -euo pipefail
python3 - <<'PY'
import json
import re
from pathlib import Path
policy_path = Path('.github/label-policy.json')
policy = json.loads(policy_path.read_text(encoding='utf-8'))
color = str(policy.get('contributor_tier_color', '')).upper()
rules = policy.get('contributor_tiers', [])
if not re.fullmatch(r'[0-9A-F]{6}', color):
raise SystemExit('invalid contributor_tier_color in .github/label-policy.json')
if not rules:
raise SystemExit('contributor_tiers must not be empty in .github/label-policy.json')
labels = set()
prev_min = None
for entry in rules:
label = str(entry.get('label', '')).strip().lower()
min_merged = int(entry.get('min_merged_prs', 0))
if not label.endswith('contributor'):
raise SystemExit(f'invalid contributor tier label: {label}')
if label in labels:
raise SystemExit(f'duplicate contributor tier label: {label}')
if prev_min is not None and min_merged > prev_min:
raise SystemExit('contributor_tiers must be sorted descending by min_merged_prs')
labels.add(label)
prev_min = min_merged
workflow_paths = [
Path('.github/workflows/pr-labeler.yml'),
Path('.github/workflows/pr-auto-response.yml'),
]
for workflow in workflow_paths:
text = workflow.read_text(encoding='utf-8')
if '.github/label-policy.json' not in text:
raise SystemExit(f'{workflow} must load .github/label-policy.json')
if re.search(r'contributorTierColor\s*=\s*"[0-9A-Fa-f]{6}"', text):
raise SystemExit(f'{workflow} contains hardcoded contributorTierColor')
print('label policy file is valid and workflow consumers are wired to shared policy')
PY

56
.github/workflows/pr-labeler.yml vendored Normal file
View File

@ -0,0 +1,56 @@
name: PR Labeler
on:
pull_request_target:
branches: [dev, main]
types: [opened, reopened, synchronize, ready_for_review]
workflow_dispatch:
inputs:
mode:
description: "Run mode for managed-label governance"
required: true
default: "audit"
type: choice
options:
- audit
- repair
concurrency:
group: pr-labeler-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
pull-requests: write
issues: write
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
LABEL_POLICY_PATH: .github/label-policy.json
jobs:
label:
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Apply path labels
if: github.event_name == 'pull_request_target'
uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1
continue-on-error: true
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
sync-labels: true
- name: Apply size/risk/module labels
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
continue-on-error: true
env:
LABEL_POLICY_PATH: .github/label-policy.json
with:
script: |
const script = require('./.github/workflows/scripts/pr_labeler.js');
await script({ github, context, core });

View File

@ -1,19 +0,0 @@
name: PR Path Labeler
on:
pull_request_target:
types: [opened, synchronize, reopened]
permissions:
contents: read
pull-requests: write
jobs:
label:
name: Apply path labels
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5
with:
sync-labels: true

View File

@ -1,181 +0,0 @@
name: Pub AUR Package
on:
workflow_call:
inputs:
release_tag:
description: "Existing release tag (vX.Y.Z)"
required: true
type: string
dry_run:
description: "Generate PKGBUILD only (no push)"
required: false
default: false
type: boolean
secrets:
AUR_SSH_KEY:
required: false
workflow_dispatch:
inputs:
release_tag:
description: "Existing release tag (vX.Y.Z)"
required: true
type: string
dry_run:
description: "Generate PKGBUILD only (no push)"
required: false
default: true
type: boolean
concurrency:
group: aur-publish-${{ github.run_id }}
cancel-in-progress: false
permissions:
contents: read
jobs:
publish-aur:
name: Update AUR Package
runs-on: ubuntu-latest
env:
RELEASE_TAG: ${{ inputs.release_tag }}
DRY_RUN: ${{ inputs.dry_run }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Validate and compute metadata
id: meta
shell: bash
run: |
set -euo pipefail
if [[ ! "$RELEASE_TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "::error::release_tag must be vX.Y.Z format."
exit 1
fi
version="${RELEASE_TAG#v}"
tarball_url="https://github.com/${GITHUB_REPOSITORY}/archive/refs/tags/${RELEASE_TAG}.tar.gz"
tarball_sha="$(curl -fsSL "$tarball_url" | sha256sum | awk '{print $1}')"
if [[ -z "$tarball_sha" ]]; then
echo "::error::Could not compute SHA256 for source tarball."
exit 1
fi
{
echo "version=$version"
echo "tarball_url=$tarball_url"
echo "tarball_sha=$tarball_sha"
} >> "$GITHUB_OUTPUT"
{
echo "### AUR Package Metadata"
echo "- version: \`${version}\`"
echo "- tarball_url: \`${tarball_url}\`"
echo "- tarball_sha: \`${tarball_sha}\`"
} >> "$GITHUB_STEP_SUMMARY"
- name: Generate PKGBUILD
id: pkgbuild
shell: bash
env:
VERSION: ${{ steps.meta.outputs.version }}
TARBALL_SHA: ${{ steps.meta.outputs.tarball_sha }}
run: |
set -euo pipefail
pkgbuild_file="$(mktemp)"
sed -e "s/^pkgver=.*/pkgver=${VERSION}/" \
-e "s/^sha256sums=.*/sha256sums=('${TARBALL_SHA}')/" \
dist/aur/PKGBUILD > "$pkgbuild_file"
echo "pkgbuild_file=$pkgbuild_file" >> "$GITHUB_OUTPUT"
echo "### Generated PKGBUILD" >> "$GITHUB_STEP_SUMMARY"
echo '```bash' >> "$GITHUB_STEP_SUMMARY"
cat "$pkgbuild_file" >> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"
- name: Generate .SRCINFO
id: srcinfo
shell: bash
env:
VERSION: ${{ steps.meta.outputs.version }}
TARBALL_SHA: ${{ steps.meta.outputs.tarball_sha }}
run: |
set -euo pipefail
srcinfo_file="$(mktemp)"
sed -e "s/pkgver = .*/pkgver = ${VERSION}/" \
-e "s/sha256sums = .*/sha256sums = ${TARBALL_SHA}/" \
-e "s|zeroclaw-[0-9.]*.tar.gz|zeroclaw-${VERSION}.tar.gz|g" \
-e "s|/v[0-9.]*\.tar\.gz|/v${VERSION}.tar.gz|g" \
dist/aur/.SRCINFO > "$srcinfo_file"
echo "srcinfo_file=$srcinfo_file" >> "$GITHUB_OUTPUT"
- name: Push to AUR
if: inputs.dry_run == false
shell: bash
env:
AUR_SSH_KEY: ${{ secrets.AUR_SSH_KEY }}
PKGBUILD_FILE: ${{ steps.pkgbuild.outputs.pkgbuild_file }}
SRCINFO_FILE: ${{ steps.srcinfo.outputs.srcinfo_file }}
VERSION: ${{ steps.meta.outputs.version }}
run: |
set -euo pipefail
if [[ -z "${AUR_SSH_KEY}" ]]; then
echo "::error::Secret AUR_SSH_KEY is required for non-dry-run."
exit 1
fi
# Set up SSH key — normalize line endings and ensure trailing newline
mkdir -p ~/.ssh
chmod 700 ~/.ssh
printf '%s\n' "$AUR_SSH_KEY" | tr -d '\r' > ~/.ssh/aur
chmod 600 ~/.ssh/aur
cat > ~/.ssh/config <<'SSH_CONFIG'
Host aur.archlinux.org
IdentityFile ~/.ssh/aur
User aur
StrictHostKeyChecking accept-new
SSH_CONFIG
chmod 600 ~/.ssh/config
# Verify key is valid and print fingerprint for debugging
echo "::group::SSH key diagnostics"
ssh-keygen -l -f ~/.ssh/aur || { echo "::error::AUR_SSH_KEY is not a valid SSH private key"; exit 1; }
echo "::endgroup::"
# Test SSH connectivity before attempting clone
ssh -T -o BatchMode=yes -o ConnectTimeout=10 aur@aur.archlinux.org 2>&1 || true
tmp_dir="$(mktemp -d)"
git clone ssh://aur@aur.archlinux.org/zeroclaw.git "$tmp_dir/aur"
cp "$PKGBUILD_FILE" "$tmp_dir/aur/PKGBUILD"
cp "$SRCINFO_FILE" "$tmp_dir/aur/.SRCINFO"
cd "$tmp_dir/aur"
git config user.name "zeroclaw-bot"
git config user.email "bot@zeroclaw.dev"
git add PKGBUILD .SRCINFO
git commit -m "zeroclaw ${VERSION}"
git push origin HEAD
echo "AUR package updated to ${VERSION}"
- name: Summary
shell: bash
run: |
if [[ "$DRY_RUN" == "true" ]]; then
echo "Dry run complete: PKGBUILD generated, no push performed."
else
echo "Publish complete: AUR package pushed."
fi

423
.github/workflows/pub-docker-img.yml vendored Normal file
View File

@ -0,0 +1,423 @@
name: Pub Docker Img
on:
push:
tags: ["v*"]
pull_request:
branches: [dev, main]
paths:
- "Dockerfile"
- ".dockerignore"
- "docker-compose.yml"
- "rust-toolchain.toml"
- "dev/config.template.toml"
- ".github/workflows/pub-docker-img.yml"
- ".github/release/ghcr-tag-policy.json"
- ".github/release/ghcr-vulnerability-policy.json"
- "scripts/ci/ghcr_publish_contract_guard.py"
- "scripts/ci/ghcr_vulnerability_gate.py"
workflow_dispatch:
inputs:
release_tag:
description: "Existing release tag to publish (e.g. v0.2.0). Leave empty for smoke-only run."
required: false
type: string
concurrency:
group: docker-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
pr-smoke:
name: PR Docker Smoke
if: (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) || (github.event_name == 'workflow_dispatch' && inputs.release_tag == '')
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 25
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Resolve Docker API version
shell: bash
run: |
set -euo pipefail
server_api="$(docker version --format '{{.Server.APIVersion}}')"
min_api="$(docker version --format '{{.Server.MinAPIVersion}}' 2>/dev/null || true)"
if [[ -z "${server_api}" || "${server_api}" == "<no value>" ]]; then
echo "::error::Unable to detect Docker server API version."
docker version || true
exit 1
fi
echo "DOCKER_API_VERSION=${server_api}" >> "$GITHUB_ENV"
echo "Using Docker API version ${server_api} (server min: ${min_api:-unknown})"
- name: Setup Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- name: Extract metadata (tags, labels)
if: github.event_name == 'pull_request'
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=pr
- name: Build smoke image
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
with:
context: .
push: false
load: true
provenance: false
sbom: false
tags: zeroclaw-pr-smoke:latest
labels: ${{ steps.meta.outputs.labels || '' }}
platforms: linux/amd64
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Verify image
run: docker run --rm zeroclaw-pr-smoke:latest --version
publish:
name: Build and Push Docker Image
if: github.repository == 'zeroclaw-labs/zeroclaw' && ((github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) || (github.event_name == 'workflow_dispatch' && inputs.release_tag != ''))
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 90
permissions:
contents: read
packages: write
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ github.event_name == 'workflow_dispatch' && format('refs/tags/{0}', inputs.release_tag) || github.ref }}
- name: Resolve Docker API version
shell: bash
run: |
set -euo pipefail
server_api="$(docker version --format '{{.Server.APIVersion}}')"
min_api="$(docker version --format '{{.Server.MinAPIVersion}}' 2>/dev/null || true)"
if [[ -z "${server_api}" || "${server_api}" == "<no value>" ]]; then
echo "::error::Unable to detect Docker server API version."
docker version || true
exit 1
fi
echo "DOCKER_API_VERSION=${server_api}" >> "$GITHUB_ENV"
echo "Using Docker API version ${server_api} (server min: ${min_api:-unknown})"
- name: Setup Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- name: Log in to Container Registry
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Compute tags
id: meta
shell: bash
run: |
set -euo pipefail
IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}"
if [[ "${GITHUB_EVENT_NAME}" == "push" ]]; then
if [[ "${GITHUB_REF}" != refs/tags/v* ]]; then
echo "::error::Docker publish is restricted to v* tag pushes."
exit 1
fi
RELEASE_TAG="${GITHUB_REF#refs/tags/}"
elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then
RELEASE_TAG="${{ inputs.release_tag }}"
if [[ -z "${RELEASE_TAG}" ]]; then
echo "::error::workflow_dispatch publish requires inputs.release_tag"
exit 1
fi
if [[ ! "${RELEASE_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+([.-][0-9A-Za-z.-]+)?$ ]]; then
echo "::error::release_tag must be vX.Y.Z or vX.Y.Z-suffix (received: ${RELEASE_TAG})"
exit 1
fi
if ! git rev-parse --verify "refs/tags/${RELEASE_TAG}" >/dev/null 2>&1; then
echo "::error::release tag not found in checkout: ${RELEASE_TAG}"
exit 1
fi
else
echo "::error::Unsupported event for publish: ${GITHUB_EVENT_NAME}"
exit 1
fi
RELEASE_SHA="$(git rev-parse HEAD)"
SHA_SUFFIX="sha-${RELEASE_SHA::12}"
SHA_TAG="${IMAGE}:${SHA_SUFFIX}"
LATEST_SUFFIX="latest"
LATEST_TAG="${IMAGE}:${LATEST_SUFFIX}"
VERSION_TAG="${IMAGE}:${RELEASE_TAG}"
TAGS="${VERSION_TAG},${SHA_TAG},${LATEST_TAG}"
{
echo "tags=${TAGS}"
echo "release_tag=${RELEASE_TAG}"
echo "release_sha=${RELEASE_SHA}"
echo "sha_tag=${SHA_SUFFIX}"
echo "latest_tag=${LATEST_SUFFIX}"
} >> "$GITHUB_OUTPUT"
- name: Build and push Docker image
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
with:
context: .
push: true
build-args: |
ZEROCLAW_CARGO_ALL_FEATURES=true
tags: ${{ steps.meta.outputs.tags }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Set GHCR package visibility to public
shell: bash
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
owner="${GITHUB_REPOSITORY_OWNER,,}"
repo="${GITHUB_REPOSITORY#*/}"
# Package path can vary depending on repository/package linkage.
candidates=(
"$repo"
"${owner}%2F${repo}"
)
for scope in orgs users; do
for pkg in "${candidates[@]}"; do
code="$(curl -sS -o /tmp/ghcr-visibility.json -w "%{http_code}" \
-X PATCH \
-H "Authorization: Bearer ${GH_TOKEN}" \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/${scope}/${owner}/packages/container/${pkg}/visibility" \
-d '{"visibility":"public"}' || true)"
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
echo "GHCR package visibility is public (${scope}/${owner}/${pkg})."
exit 0
fi
echo "Visibility attempt ${scope}/${owner}/${pkg} returned HTTP ${code}."
done
done
echo "::warning::Unable to update GHCR visibility via API in this run; proceeding to GHCR publish contract verification."
- name: Validate GHCR publish contract
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
python3 scripts/ci/ghcr_publish_contract_guard.py \
--repository "${GITHUB_REPOSITORY,,}" \
--release-tag "${{ steps.meta.outputs.release_tag }}" \
--sha "${{ steps.meta.outputs.release_sha }}" \
--policy-file .github/release/ghcr-tag-policy.json \
--output-json artifacts/ghcr-publish-contract.json \
--output-md artifacts/ghcr-publish-contract.md \
--fail-on-violation
- name: Emit GHCR publish contract audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/ghcr-publish-contract.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type ghcr_publish_contract \
--input-json artifacts/ghcr-publish-contract.json \
--output-json artifacts/audit-event-ghcr-publish-contract.json \
--artifact-name ghcr-publish-contract \
--retention-days 21
fi
- name: Publish GHCR contract summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/ghcr-publish-contract.md ]; then
cat artifacts/ghcr-publish-contract.md >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload GHCR publish contract artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ghcr-publish-contract
path: |
artifacts/ghcr-publish-contract.json
artifacts/ghcr-publish-contract.md
artifacts/audit-event-ghcr-publish-contract.json
if-no-files-found: ignore
retention-days: 21
- name: Scan published image for vulnerabilities (Trivy)
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
TAG_NAME="${{ steps.meta.outputs.release_tag }}"
SHA_TAG="${{ steps.meta.outputs.sha_tag }}"
LATEST_TAG="${{ steps.meta.outputs.latest_tag }}"
IMAGE_BASE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}"
VERSION_REF="${IMAGE_BASE}:${TAG_NAME}"
SHA_REF="${IMAGE_BASE}:${SHA_TAG}"
LATEST_REF="${IMAGE_BASE}:${LATEST_TAG}"
SARIF_OUT="artifacts/trivy-${TAG_NAME}.sarif"
TABLE_OUT="artifacts/trivy-${TAG_NAME}.txt"
JSON_OUT="artifacts/trivy-${TAG_NAME}.json"
SHA_TABLE_OUT="artifacts/trivy-${SHA_TAG}.txt"
SHA_JSON_OUT="artifacts/trivy-${SHA_TAG}.json"
LATEST_TABLE_OUT="artifacts/trivy-${LATEST_TAG}.txt"
LATEST_JSON_OUT="artifacts/trivy-${LATEST_TAG}.json"
scan_trivy() {
local image_ref="$1"
local output_prefix="$2"
docker run --rm \
-v "$PWD/artifacts:/work" \
aquasec/trivy:0.58.2 image \
--quiet \
--ignore-unfixed \
--severity HIGH,CRITICAL \
--format json \
--output "/work/${output_prefix}.json" \
"${image_ref}"
docker run --rm \
-v "$PWD/artifacts:/work" \
aquasec/trivy:0.58.2 image \
--quiet \
--ignore-unfixed \
--severity HIGH,CRITICAL \
--format table \
--output "/work/${output_prefix}.txt" \
"${image_ref}"
}
docker run --rm \
-v "$PWD/artifacts:/work" \
aquasec/trivy:0.58.2 image \
--quiet \
--ignore-unfixed \
--severity HIGH,CRITICAL \
--format sarif \
--output "/work/trivy-${TAG_NAME}.sarif" \
"${VERSION_REF}"
scan_trivy "${VERSION_REF}" "trivy-${TAG_NAME}"
scan_trivy "${SHA_REF}" "trivy-${SHA_TAG}"
scan_trivy "${LATEST_REF}" "trivy-${LATEST_TAG}"
echo "Generated Trivy reports:"
ls -1 "$SARIF_OUT" "$TABLE_OUT" "$JSON_OUT" "$SHA_TABLE_OUT" "$SHA_JSON_OUT" "$LATEST_TABLE_OUT" "$LATEST_JSON_OUT"
- name: Validate GHCR vulnerability gate
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/ghcr_vulnerability_gate.py \
--release-tag "${{ steps.meta.outputs.release_tag }}" \
--sha-tag "${{ steps.meta.outputs.sha_tag }}" \
--latest-tag "${{ steps.meta.outputs.latest_tag }}" \
--release-report-json "artifacts/trivy-${{ steps.meta.outputs.release_tag }}.json" \
--sha-report-json "artifacts/trivy-${{ steps.meta.outputs.sha_tag }}.json" \
--latest-report-json "artifacts/trivy-${{ steps.meta.outputs.latest_tag }}.json" \
--policy-file .github/release/ghcr-vulnerability-policy.json \
--output-json artifacts/ghcr-vulnerability-gate.json \
--output-md artifacts/ghcr-vulnerability-gate.md \
--fail-on-violation
- name: Emit GHCR vulnerability gate audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/ghcr-vulnerability-gate.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type ghcr_vulnerability_gate \
--input-json artifacts/ghcr-vulnerability-gate.json \
--output-json artifacts/audit-event-ghcr-vulnerability-gate.json \
--artifact-name ghcr-vulnerability-gate \
--retention-days 21
fi
- name: Publish GHCR vulnerability summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/ghcr-vulnerability-gate.md ]; then
cat artifacts/ghcr-vulnerability-gate.md >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload GHCR vulnerability gate artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ghcr-vulnerability-gate
path: |
artifacts/ghcr-vulnerability-gate.json
artifacts/ghcr-vulnerability-gate.md
artifacts/audit-event-ghcr-vulnerability-gate.json
if-no-files-found: ignore
retention-days: 21
- name: Detect Trivy SARIF report
id: trivy-sarif
if: always()
shell: bash
run: |
set -euo pipefail
sarif_path="artifacts/trivy-${{ steps.meta.outputs.release_tag }}.sarif"
if [ -f "${sarif_path}" ]; then
echo "exists=true" >> "$GITHUB_OUTPUT"
else
echo "exists=false" >> "$GITHUB_OUTPUT"
echo "::notice::Trivy SARIF report not found at ${sarif_path}; skipping SARIF upload."
fi
- name: Upload Trivy SARIF
if: always() && steps.trivy-sarif.outputs.exists == 'true'
uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4
with:
sarif_file: artifacts/trivy-${{ steps.meta.outputs.release_tag }}.sarif
category: ghcr-trivy
- name: Upload Trivy report artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ghcr-trivy-report
path: |
artifacts/trivy-${{ steps.meta.outputs.release_tag }}.sarif
artifacts/trivy-${{ steps.meta.outputs.release_tag }}.txt
artifacts/trivy-${{ steps.meta.outputs.release_tag }}.json
artifacts/trivy-sha-*.txt
artifacts/trivy-sha-*.json
artifacts/trivy-latest.txt
artifacts/trivy-latest.json
if-no-files-found: ignore
retention-days: 14

View File

@ -1,228 +0,0 @@
name: Pub Homebrew Core
on:
workflow_call:
inputs:
release_tag:
description: "Existing release tag to publish (vX.Y.Z)"
required: true
type: string
dry_run:
description: "Patch formula only (no push/PR)"
required: false
default: false
type: boolean
secrets:
HOMEBREW_UPSTREAM_PR_TOKEN:
required: false
HOMEBREW_CORE_BOT_TOKEN:
required: false
workflow_dispatch:
inputs:
release_tag:
description: "Existing release tag to publish (vX.Y.Z)"
required: true
type: string
dry_run:
description: "Patch formula only (no push/PR)"
required: false
default: true
type: boolean
concurrency:
group: homebrew-core-${{ github.run_id }}
cancel-in-progress: false
permissions:
contents: read
jobs:
publish-homebrew-core:
name: Publish Homebrew Core PR
runs-on: ubuntu-latest
env:
UPSTREAM_REPO: Homebrew/homebrew-core
FORMULA_PATH: Formula/z/zeroclaw.rb
RELEASE_TAG: ${{ inputs.release_tag }}
DRY_RUN: ${{ inputs.dry_run }}
BOT_FORK_REPO: ${{ vars.HOMEBREW_CORE_BOT_FORK_REPO }}
BOT_EMAIL: ${{ vars.HOMEBREW_CORE_BOT_EMAIL }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Validate release tag and version alignment
id: release_meta
shell: bash
run: |
set -euo pipefail
semver_pattern='^v[0-9]+\.[0-9]+\.[0-9]+([.-][0-9A-Za-z.-]+)?$'
if [[ ! "$RELEASE_TAG" =~ $semver_pattern ]]; then
echo "::error::release_tag must match semver-like format (vX.Y.Z[-suffix])."
exit 1
fi
if ! git rev-parse "refs/tags/${RELEASE_TAG}" >/dev/null 2>&1; then
git fetch --tags origin
fi
tag_version="${RELEASE_TAG#v}"
cargo_version="$(git show "${RELEASE_TAG}:Cargo.toml" \
| sed -n 's/^version = "\([^"]*\)"/\1/p' | head -n1)"
if [[ -z "$cargo_version" ]]; then
echo "::error::Unable to read Cargo.toml version from tag ${RELEASE_TAG}."
exit 1
fi
if [[ "$cargo_version" != "$tag_version" ]]; then
echo "::error::Tag ${RELEASE_TAG} does not match Cargo.toml version (${cargo_version})."
exit 1
fi
tarball_url="https://github.com/${GITHUB_REPOSITORY}/archive/refs/tags/${RELEASE_TAG}.tar.gz"
tarball_sha="$(curl -fsSL "$tarball_url" | sha256sum | awk '{print $1}')"
{
echo "tag_version=$tag_version"
echo "tarball_url=$tarball_url"
echo "tarball_sha=$tarball_sha"
} >> "$GITHUB_OUTPUT"
{
echo "### Release Metadata"
echo "- release_tag: \`${RELEASE_TAG}\`"
echo "- cargo_version: \`${cargo_version}\`"
echo "- tarball_sha256: \`${tarball_sha}\`"
echo "- dry_run: ${DRY_RUN}"
} >> "$GITHUB_STEP_SUMMARY"
- name: Patch Homebrew formula
id: patch_formula
shell: bash
env:
HOMEBREW_CORE_BOT_TOKEN: ${{ secrets.HOMEBREW_UPSTREAM_PR_TOKEN || secrets.HOMEBREW_CORE_BOT_TOKEN }}
GH_TOKEN: ${{ secrets.HOMEBREW_UPSTREAM_PR_TOKEN || secrets.HOMEBREW_CORE_BOT_TOKEN }}
run: |
set -euo pipefail
tmp_repo="$(mktemp -d)"
echo "tmp_repo=$tmp_repo" >> "$GITHUB_OUTPUT"
if [[ "$DRY_RUN" == "true" ]]; then
git clone --depth=1 "https://github.com/${UPSTREAM_REPO}.git" "$tmp_repo/homebrew-core"
else
if [[ -z "${BOT_FORK_REPO}" ]]; then
echo "::error::Repository variable HOMEBREW_CORE_BOT_FORK_REPO is required when dry_run=false."
exit 1
fi
if [[ -z "${HOMEBREW_CORE_BOT_TOKEN}" ]]; then
echo "::error::Repository secret HOMEBREW_CORE_BOT_TOKEN is required when dry_run=false."
exit 1
fi
if [[ "$BOT_FORK_REPO" != */* ]]; then
echo "::error::HOMEBREW_CORE_BOT_FORK_REPO must be in owner/repo format."
exit 1
fi
if ! gh api "repos/${BOT_FORK_REPO}" >/dev/null 2>&1; then
echo "::error::HOMEBREW_CORE_BOT_TOKEN cannot access ${BOT_FORK_REPO}."
exit 1
fi
gh repo clone "${BOT_FORK_REPO}" "$tmp_repo/homebrew-core" -- --depth=1
fi
repo_dir="$tmp_repo/homebrew-core"
formula_file="$repo_dir/$FORMULA_PATH"
if [[ ! -f "$formula_file" ]]; then
echo "::error::Formula file not found: $FORMULA_PATH"
exit 1
fi
if [[ "$DRY_RUN" == "false" ]]; then
if git -C "$repo_dir" remote get-url upstream >/dev/null 2>&1; then
git -C "$repo_dir" remote set-url upstream "https://github.com/${UPSTREAM_REPO}.git"
else
git -C "$repo_dir" remote add upstream "https://github.com/${UPSTREAM_REPO}.git"
fi
if git -C "$repo_dir" ls-remote --exit-code --heads upstream main >/dev/null 2>&1; then
upstream_ref="main"
else
upstream_ref="master"
fi
git -C "$repo_dir" fetch --depth=1 upstream "$upstream_ref"
branch_name="zeroclaw-${RELEASE_TAG}-${GITHUB_RUN_ID}"
git -C "$repo_dir" checkout -B "$branch_name" "upstream/$upstream_ref"
echo "branch_name=$branch_name" >> "$GITHUB_OUTPUT"
fi
tarball_url="$(grep 'tarball_url=' "$GITHUB_OUTPUT" | head -1 | cut -d= -f2-)"
tarball_sha="$(grep 'tarball_sha=' "$GITHUB_OUTPUT" | head -1 | cut -d= -f2-)"
perl -0pi -e "s|^ url \".*\"| url \"${tarball_url}\"|m" "$formula_file"
perl -0pi -e "s|^ sha256 \".*\"| sha256 \"${tarball_sha}\"|m" "$formula_file"
perl -0pi -e "s|^ license \".*\"| license \"Apache-2.0 OR MIT\"|m" "$formula_file"
# Ensure Node.js build dependency is declared so that build.rs can
# run `npm ci && npm run build` to produce the web frontend assets.
if ! grep -q 'depends_on "node" => :build' "$formula_file"; then
perl -0pi -e 's|( depends_on "rust" => :build\n)|\1 depends_on "node" => :build\n|m' "$formula_file"
fi
git -C "$repo_dir" diff -- "$FORMULA_PATH" > "$tmp_repo/formula.diff"
if [[ ! -s "$tmp_repo/formula.diff" ]]; then
echo "::error::No formula changes generated. Nothing to publish."
exit 1
fi
{
echo "### Formula Diff"
echo '```diff'
cat "$tmp_repo/formula.diff"
echo '```'
} >> "$GITHUB_STEP_SUMMARY"
- name: Push branch and open Homebrew PR
if: inputs.dry_run == false
shell: bash
env:
GH_TOKEN: ${{ secrets.HOMEBREW_UPSTREAM_PR_TOKEN || secrets.HOMEBREW_CORE_BOT_TOKEN }}
TMP_REPO: ${{ steps.patch_formula.outputs.tmp_repo }}
BRANCH_NAME: ${{ steps.patch_formula.outputs.branch_name }}
TAG_VERSION: ${{ steps.release_meta.outputs.tag_version }}
TARBALL_URL: ${{ steps.release_meta.outputs.tarball_url }}
TARBALL_SHA: ${{ steps.release_meta.outputs.tarball_sha }}
run: |
set -euo pipefail
repo_dir="${TMP_REPO}/homebrew-core"
fork_owner="${BOT_FORK_REPO%%/*}"
bot_email="${BOT_EMAIL:-${fork_owner}@users.noreply.github.com}"
git -C "$repo_dir" config user.name "$fork_owner"
git -C "$repo_dir" config user.email "$bot_email"
git -C "$repo_dir" add "$FORMULA_PATH"
git -C "$repo_dir" commit -m "zeroclaw ${TAG_VERSION}"
gh auth setup-git
git -C "$repo_dir" push --set-upstream origin "$BRANCH_NAME"
pr_body="Automated formula bump from ZeroClaw release workflow.
- Release tag: ${RELEASE_TAG}
- Source tarball: ${TARBALL_URL}
- Source sha256: ${TARBALL_SHA}"
gh pr create \
--repo "$UPSTREAM_REPO" \
--base main \
--head "${fork_owner}:${BRANCH_NAME}" \
--title "zeroclaw ${TAG_VERSION}" \
--body "$pr_body"
- name: Summary
shell: bash
run: |
if [[ "$DRY_RUN" == "true" ]]; then
echo "Dry run complete: formula diff generated, no push/PR performed."
else
echo "Publish complete: branch pushed and PR opened from bot fork."
fi

261
.github/workflows/pub-prerelease.yml vendored Normal file
View File

@ -0,0 +1,261 @@
name: Pub Pre-release
on:
push:
tags:
- "v*-alpha.*"
- "v*-beta.*"
- "v*-rc.*"
workflow_dispatch:
inputs:
tag:
description: "Existing pre-release tag (e.g. v0.1.8-rc.1)"
required: true
default: ""
type: string
mode:
description: "dry-run validates/builds only; publish creates prerelease"
required: true
default: dry-run
type: choice
options:
- dry-run
- publish
draft:
description: "Create prerelease as draft"
required: true
default: true
type: boolean
concurrency:
group: prerelease-${{ github.ref || github.run_id }}
cancel-in-progress: false
permissions:
contents: write
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
prerelease-guard:
name: Pre-release Guard
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 20
outputs:
release_tag: ${{ steps.vars.outputs.release_tag }}
mode: ${{ steps.vars.outputs.mode }}
draft: ${{ steps.vars.outputs.draft }}
ready_to_publish: ${{ steps.extract.outputs.ready_to_publish }}
stage: ${{ steps.extract.outputs.stage }}
transition_outcome: ${{ steps.extract.outputs.transition_outcome }}
latest_stage: ${{ steps.extract.outputs.latest_stage }}
latest_stage_tag: ${{ steps.extract.outputs.latest_stage_tag }}
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Resolve prerelease inputs
id: vars
shell: bash
run: |
set -euo pipefail
if [ "${GITHUB_EVENT_NAME}" = "push" ]; then
release_tag="${GITHUB_REF_NAME}"
mode="publish"
draft="false"
else
release_tag="${{ inputs.tag }}"
mode="${{ inputs.mode }}"
draft="${{ inputs.draft }}"
fi
{
echo "release_tag=${release_tag}"
echo "mode=${mode}"
echo "draft=${draft}"
} >> "$GITHUB_OUTPUT"
- name: Validate prerelease stage gate
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
python3 scripts/ci/prerelease_guard.py \
--repo-root . \
--tag "${{ steps.vars.outputs.release_tag }}" \
--stage-config-file .github/release/prerelease-stage-gates.json \
--mode "${{ steps.vars.outputs.mode }}" \
--output-json artifacts/prerelease-guard.json \
--output-md artifacts/prerelease-guard.md \
--fail-on-violation
- name: Extract prerelease outputs
id: extract
shell: bash
run: |
set -euo pipefail
ready_to_publish="$(python3 - <<'PY'
import json
data = json.load(open('artifacts/prerelease-guard.json', encoding='utf-8'))
print(str(bool(data.get('ready_to_publish', False))).lower())
PY
)"
stage="$(python3 - <<'PY'
import json
data = json.load(open('artifacts/prerelease-guard.json', encoding='utf-8'))
print(data.get('stage', 'unknown'))
PY
)"
transition_outcome="$(python3 - <<'PY'
import json
data = json.load(open('artifacts/prerelease-guard.json', encoding='utf-8'))
transition = data.get('transition') or {}
print(transition.get('outcome', 'unknown'))
PY
)"
latest_stage="$(python3 - <<'PY'
import json
data = json.load(open('artifacts/prerelease-guard.json', encoding='utf-8'))
history = data.get('stage_history') or {}
print(history.get('latest_stage', 'unknown'))
PY
)"
latest_stage_tag="$(python3 - <<'PY'
import json
data = json.load(open('artifacts/prerelease-guard.json', encoding='utf-8'))
history = data.get('stage_history') or {}
print(history.get('latest_tag', 'unknown'))
PY
)"
{
echo "ready_to_publish=${ready_to_publish}"
echo "stage=${stage}"
echo "transition_outcome=${transition_outcome}"
echo "latest_stage=${latest_stage}"
echo "latest_stage_tag=${latest_stage_tag}"
} >> "$GITHUB_OUTPUT"
- name: Emit prerelease audit event
if: always()
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/emit_audit_event.py \
--event-type prerelease_guard \
--input-json artifacts/prerelease-guard.json \
--output-json artifacts/audit-event-prerelease-guard.json \
--artifact-name prerelease-guard \
--retention-days 21
- name: Publish prerelease summary
if: always()
shell: bash
run: |
set -euo pipefail
cat artifacts/prerelease-guard.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload prerelease guard artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: prerelease-guard
path: |
artifacts/prerelease-guard.json
artifacts/prerelease-guard.md
artifacts/audit-event-prerelease-guard.json
if-no-files-found: error
retention-days: 21
build-prerelease:
name: Build Pre-release Artifact
needs: [prerelease-guard]
# Keep GNU Linux prerelease artifacts on Ubuntu 22.04 so runtime GLIBC
# symbols remain compatible with Debian 12 / Ubuntu 22.04 hosts.
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 45
steps:
- name: Checkout tag
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ needs.prerelease-guard.outputs.release_tag }}
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: prerelease-${{ needs.prerelease-guard.outputs.release_tag }}
cache-targets: true
- name: Build release-fast binary
shell: bash
run: |
set -euo pipefail
cargo build --profile release-fast --locked --target x86_64-unknown-linux-gnu
- name: Package prerelease artifact
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
cp target/x86_64-unknown-linux-gnu/release-fast/zeroclaw artifacts/zeroclaw
tar czf artifacts/zeroclaw-x86_64-unknown-linux-gnu.tar.gz -C artifacts zeroclaw
rm artifacts/zeroclaw
- name: Generate manifest + checksums
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/release_manifest.py \
--artifacts-dir artifacts \
--release-tag "${{ needs.prerelease-guard.outputs.release_tag }}" \
--output-json artifacts/prerelease-manifest.json \
--output-md artifacts/prerelease-manifest.md \
--checksums-path artifacts/SHA256SUMS \
--fail-empty
- name: Publish prerelease build summary
shell: bash
run: |
set -euo pipefail
cat artifacts/prerelease-manifest.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload prerelease build artifacts
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: prerelease-artifacts
path: artifacts/*
if-no-files-found: error
retention-days: 14
publish-prerelease:
name: Publish GitHub Pre-release
needs: [prerelease-guard, build-prerelease]
if: needs.prerelease-guard.outputs.ready_to_publish == 'true'
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 15
steps:
- name: Download prerelease artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
name: prerelease-artifacts
path: artifacts
- name: Create or update GitHub pre-release
uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2
with:
tag_name: ${{ needs.prerelease-guard.outputs.release_tag }}
prerelease: true
draft: ${{ needs.prerelease-guard.outputs.draft == 'true' }}
generate_release_notes: true
files: |
artifacts/**/*
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

722
.github/workflows/pub-release.yml vendored Normal file
View File

@ -0,0 +1,722 @@
name: Pub Release
on:
push:
tags: ["v*"]
workflow_dispatch:
inputs:
release_ref:
description: "Git ref (branch, tag, or SHA) to build"
required: false
default: "main"
type: string
publish_release:
description: "Publish a GitHub release (false = verification build only)"
required: false
default: false
type: boolean
release_tag:
description: "Existing release tag (required when publish_release=true), e.g. v0.1.1"
required: false
default: ""
type: string
draft:
description: "Create release as draft (manual publish only)"
required: false
default: true
type: boolean
schedule:
# Weekly release-readiness verification on default branch (no publish)
- cron: "17 8 * * 1"
concurrency:
group: release-${{ github.ref || github.run_id }}
cancel-in-progress: false
permissions:
contents: write
packages: read
id-token: write # Required for cosign keyless signing via OIDC
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
prepare:
name: Prepare Release Context
if: github.event_name != 'push' || !contains(github.ref_name, '-')
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
outputs:
release_ref: ${{ steps.vars.outputs.release_ref }}
release_tag: ${{ steps.vars.outputs.release_tag }}
publish_release: ${{ steps.vars.outputs.publish_release }}
draft_release: ${{ steps.vars.outputs.draft_release }}
steps:
- name: Resolve release inputs
id: vars
shell: bash
run: |
set -euo pipefail
event_name="${GITHUB_EVENT_NAME}"
publish_release="false"
draft_release="false"
if [[ "$event_name" == "push" ]]; then
release_ref="${GITHUB_REF_NAME}"
release_tag="${GITHUB_REF_NAME}"
publish_release="true"
elif [[ "$event_name" == "workflow_dispatch" ]]; then
release_ref="${{ inputs.release_ref }}"
publish_release="${{ inputs.publish_release }}"
draft_release="${{ inputs.draft }}"
if [[ "$publish_release" == "true" ]]; then
release_tag="${{ inputs.release_tag }}"
if [[ -z "$release_tag" ]]; then
echo "::error::release_tag is required when publish_release=true"
exit 1
fi
release_ref="$release_tag"
else
release_tag="verify-${GITHUB_SHA::12}"
fi
else
# schedule
release_ref="main"
release_tag="verify-${GITHUB_SHA::12}"
fi
{
echo "release_ref=${release_ref}"
echo "release_tag=${release_tag}"
echo "publish_release=${publish_release}"
echo "draft_release=${draft_release}"
} >> "$GITHUB_OUTPUT"
{
echo "### Release Context"
echo "- event: ${event_name}"
echo "- release_ref: ${release_ref}"
echo "- release_tag: ${release_tag}"
echo "- publish_release: ${publish_release}"
echo "- draft_release: ${draft_release}"
} >> "$GITHUB_STEP_SUMMARY"
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Install gh CLI
shell: bash
run: |
set -euo pipefail
if command -v gh &>/dev/null; then
echo "gh already available: $(gh --version | head -1)"
exit 0
fi
echo "Installing gh CLI..."
curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
| sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
| sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
for i in {1..60}; do
if sudo fuser /var/lib/apt/lists/lock >/dev/null 2>&1 \
|| sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1 \
|| sudo fuser /var/lib/dpkg/lock >/dev/null 2>&1; then
echo "apt/dpkg locked; waiting ($i/60)..."
sleep 5
else
break
fi
done
sudo apt-get -o DPkg::Lock::Timeout=600 -o Acquire::Retries=3 update -qq
sudo apt-get -o DPkg::Lock::Timeout=600 -o Acquire::Retries=3 install -y gh
env:
GH_TOKEN: ${{ github.token }}
- name: Validate release trigger and authorization guard
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
python3 scripts/ci/release_trigger_guard.py \
--repo-root . \
--repository "${GITHUB_REPOSITORY}" \
--event-name "${GITHUB_EVENT_NAME}" \
--actor "${GITHUB_ACTOR}" \
--release-ref "${{ steps.vars.outputs.release_ref }}" \
--release-tag "${{ steps.vars.outputs.release_tag }}" \
--publish-release "${{ steps.vars.outputs.publish_release }}" \
--authorized-actors "${{ vars.RELEASE_AUTHORIZED_ACTORS || 'willsarg,theonlyhennygod,chumyin' }}" \
--authorized-tagger-emails "${{ vars.RELEASE_AUTHORIZED_TAGGER_EMAILS || '' }}" \
--require-annotated-tag true \
--output-json artifacts/release-trigger-guard.json \
--output-md artifacts/release-trigger-guard.md \
--fail-on-violation
env:
GH_TOKEN: ${{ github.token }}
- name: Emit release trigger audit event
if: always()
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/emit_audit_event.py \
--event-type release_trigger_guard \
--input-json artifacts/release-trigger-guard.json \
--output-json artifacts/audit-event-release-trigger-guard.json \
--artifact-name release-trigger-guard \
--retention-days 30
- name: Publish release trigger guard summary
if: always()
shell: bash
run: |
set -euo pipefail
cat artifacts/release-trigger-guard.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload release trigger guard artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: release-trigger-guard
path: |
artifacts/release-trigger-guard.json
artifacts/release-trigger-guard.md
artifacts/audit-event-release-trigger-guard.json
if-no-files-found: error
retention-days: 30
build-release:
name: Build ${{ matrix.target }}
needs: [prepare]
runs-on: ${{ matrix.os }}
timeout-minutes: 40
env:
CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}-${{ matrix.target }}/cargo
RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}-${{ matrix.target }}/rustup
CARGO_TARGET_DIR: ${{ github.workspace }}/target
strategy:
fail-fast: false
matrix:
include:
# Keep GNU Linux release artifacts on Ubuntu 22.04 to preserve
# a broadly compatible GLIBC baseline for user distributions.
- os: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
target: x86_64-unknown-linux-gnu
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: ""
linker_env: ""
linker: ""
- os: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
target: x86_64-unknown-linux-musl
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: ""
linker_env: ""
linker: ""
use_cross: true
- os: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
target: aarch64-unknown-linux-gnu
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: gcc-aarch64-linux-gnu
linker_env: CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER
linker: aarch64-linux-gnu-gcc
- os: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
target: aarch64-unknown-linux-musl
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: ""
linker_env: ""
linker: ""
use_cross: true
- os: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
target: armv7-unknown-linux-gnueabihf
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: gcc-arm-linux-gnueabihf
linker_env: CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER
linker: arm-linux-gnueabihf-gcc
- os: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
target: armv7-linux-androideabi
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: ""
linker_env: ""
linker: ""
android_ndk: true
android_api: 21
- os: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
target: aarch64-linux-android
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: ""
linker_env: ""
linker: ""
android_ndk: true
android_api: 21
- os: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
target: x86_64-unknown-freebsd
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: ""
linker_env: ""
linker: ""
use_cross: true
- os: macos-15-intel
target: x86_64-apple-darwin
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: ""
linker_env: ""
linker: ""
- os: macos-14
target: aarch64-apple-darwin
artifact: zeroclaw
archive_ext: tar.gz
cross_compiler: ""
linker_env: ""
linker: ""
- os: windows-latest
target: x86_64-pc-windows-msvc
artifact: zeroclaw.exe
archive_ext: zip
cross_compiler: ""
linker_env: ""
linker: ""
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ needs.prepare.outputs.release_ref }}
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: ${{ matrix.target }}
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
if: runner.os != 'Windows'
- name: Install cross for cross-built targets
if: matrix.use_cross
shell: bash
run: |
set -euo pipefail
echo "${CARGO_HOME:-$HOME/.cargo}/bin" >> "$GITHUB_PATH"
cargo install cross --locked --version 0.2.5
command -v cross
cross --version
- name: Install cross-compilation toolchain (Linux)
if: runner.os == 'Linux' && matrix.cross_compiler != ''
run: |
set -euo pipefail
for i in {1..60}; do
if sudo fuser /var/lib/apt/lists/lock >/dev/null 2>&1 \
|| sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1 \
|| sudo fuser /var/lib/dpkg/lock >/dev/null 2>&1; then
echo "apt/dpkg locked; waiting ($i/60)..."
sleep 5
else
break
fi
done
sudo apt-get -o DPkg::Lock::Timeout=600 -o Acquire::Retries=3 update -qq
sudo apt-get -o DPkg::Lock::Timeout=600 -o Acquire::Retries=3 install -y "${{ matrix.cross_compiler }}"
# Install matching libc dev headers for cross targets
# (required by ring/aws-lc-sys C compilation)
case "${{ matrix.target }}" in
armv7-unknown-linux-gnueabihf)
sudo apt-get -o DPkg::Lock::Timeout=600 -o Acquire::Retries=3 install -y libc6-dev-armhf-cross ;;
aarch64-unknown-linux-gnu)
sudo apt-get -o DPkg::Lock::Timeout=600 -o Acquire::Retries=3 install -y libc6-dev-arm64-cross ;;
esac
- name: Setup Android NDK
if: matrix.android_ndk
shell: bash
run: |
set -euo pipefail
NDK_VERSION="r26d"
NDK_ZIP="android-ndk-${NDK_VERSION}-linux.zip"
NDK_URL="https://dl.google.com/android/repository/${NDK_ZIP}"
NDK_ROOT="${RUNNER_TEMP}/android-ndk"
NDK_HOME="${NDK_ROOT}/android-ndk-${NDK_VERSION}"
for i in {1..60}; do
if sudo fuser /var/lib/apt/lists/lock >/dev/null 2>&1 \
|| sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1 \
|| sudo fuser /var/lib/dpkg/lock >/dev/null 2>&1; then
echo "apt/dpkg locked; waiting ($i/60)..."
sleep 5
else
break
fi
done
sudo apt-get -o DPkg::Lock::Timeout=600 -o Acquire::Retries=3 update -qq
sudo apt-get -o DPkg::Lock::Timeout=600 -o Acquire::Retries=3 install -y unzip
mkdir -p "${NDK_ROOT}"
curl -fsSL "${NDK_URL}" -o "${RUNNER_TEMP}/${NDK_ZIP}"
unzip -q "${RUNNER_TEMP}/${NDK_ZIP}" -d "${NDK_ROOT}"
echo "ANDROID_NDK_HOME=${NDK_HOME}" >> "$GITHUB_ENV"
echo "${NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin" >> "$GITHUB_PATH"
- name: Configure Android toolchain
if: matrix.android_ndk
shell: bash
run: |
echo "Setting up Android NDK toolchain for ${{ matrix.target }}"
NDK_HOME="${ANDROID_NDK_HOME:-}"
if [[ -z "$NDK_HOME" ]]; then
echo "::error::ANDROID_NDK_HOME was not configured."
exit 1
fi
TOOLCHAIN="$NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin"
# Add to path for linker resolution
echo "$TOOLCHAIN" >> "$GITHUB_PATH"
# Set linker environment variables
if [[ "${{ matrix.target }}" == "armv7-linux-androideabi" ]]; then
ARMV7_CC="${TOOLCHAIN}/armv7a-linux-androideabi${{ matrix.android_api }}-clang"
ARMV7_CXX="${TOOLCHAIN}/armv7a-linux-androideabi${{ matrix.android_api }}-clang++"
# Some crates still probe legacy compiler names (arm-linux-androideabi-clang).
ln -sf "$ARMV7_CC" "${TOOLCHAIN}/arm-linux-androideabi-clang"
ln -sf "$ARMV7_CXX" "${TOOLCHAIN}/arm-linux-androideabi-clang++"
{
echo "CARGO_TARGET_ARMV7_LINUX_ANDROIDEABI_LINKER=${ARMV7_CC}"
echo "CC_armv7_linux_androideabi=${ARMV7_CC}"
echo "CXX_armv7_linux_androideabi=${ARMV7_CXX}"
echo "AR_armv7_linux_androideabi=${TOOLCHAIN}/llvm-ar"
} >> "$GITHUB_ENV"
elif [[ "${{ matrix.target }}" == "aarch64-linux-android" ]]; then
AARCH64_CC="${TOOLCHAIN}/aarch64-linux-android${{ matrix.android_api }}-clang"
AARCH64_CXX="${TOOLCHAIN}/aarch64-linux-android${{ matrix.android_api }}-clang++"
{
echo "CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=${AARCH64_CC}"
echo "CC_aarch64_linux_android=${AARCH64_CC}"
echo "CXX_aarch64_linux_android=${AARCH64_CXX}"
echo "AR_aarch64_linux_android=${TOOLCHAIN}/llvm-ar"
} >> "$GITHUB_ENV"
fi
- name: Build release
shell: bash
env:
LINKER_ENV: ${{ matrix.linker_env }}
LINKER: ${{ matrix.linker }}
USE_CROSS: ${{ matrix.use_cross }}
run: |
if [ -n "$LINKER_ENV" ] && [ -n "$LINKER" ]; then
echo "Using linker override: $LINKER_ENV=$LINKER"
export "$LINKER_ENV=$LINKER"
fi
if [ "$USE_CROSS" = "true" ]; then
echo "Using cross for MUSL target"
cross build --profile release-fast --locked --target ${{ matrix.target }}
else
cargo build --profile release-fast --locked --target ${{ matrix.target }}
fi
- name: Check binary size (Unix)
if: runner.os != 'Windows'
env:
BINARY_SIZE_HARD_LIMIT_MB: 28
BINARY_SIZE_ADVISORY_MB: 20
BINARY_SIZE_TARGET_MB: 5
run: bash scripts/ci/check_binary_size.sh "target/${{ matrix.target }}/release-fast/${{ matrix.artifact }}" "${{ matrix.target }}"
- name: Package (Unix)
if: runner.os != 'Windows'
run: |
cd target/${{ matrix.target }}/release-fast
tar czf ../../../zeroclaw-${{ matrix.target }}.${{ matrix.archive_ext }} ${{ matrix.artifact }}
- name: Package (Windows)
if: runner.os == 'Windows'
run: |
cd target/${{ matrix.target }}/release-fast
7z a ../../../zeroclaw-${{ matrix.target }}.${{ matrix.archive_ext }} ${{ matrix.artifact }}
- name: Upload artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
with:
name: zeroclaw-${{ matrix.target }}
path: zeroclaw-${{ matrix.target }}.${{ matrix.archive_ext }}
retention-days: 7
verify-artifacts:
name: Verify Artifact Set
needs: [prepare, build-release]
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ needs.prepare.outputs.release_ref }}
- name: Download all artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: artifacts
- name: Validate release archive contract (verify stage)
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/release_artifact_guard.py \
--artifacts-dir artifacts \
--contract-file .github/release/release-artifact-contract.json \
--output-json artifacts/release-artifact-guard.verify.json \
--output-md artifacts/release-artifact-guard.verify.md \
--allow-extra-archives \
--skip-manifest-files \
--skip-sbom-files \
--skip-notice-files \
--fail-on-violation
- name: Emit verify-stage artifact guard audit event
if: always()
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/emit_audit_event.py \
--event-type release_artifact_guard_verify \
--input-json artifacts/release-artifact-guard.verify.json \
--output-json artifacts/audit-event-release-artifact-guard-verify.json \
--artifact-name release-artifact-guard-verify \
--retention-days 21
- name: Publish verify-stage artifact guard summary
if: always()
shell: bash
run: |
set -euo pipefail
cat artifacts/release-artifact-guard.verify.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload verify-stage artifact guard reports
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: release-artifact-guard-verify
path: |
artifacts/release-artifact-guard.verify.json
artifacts/release-artifact-guard.verify.md
artifacts/audit-event-release-artifact-guard-verify.json
if-no-files-found: error
retention-days: 21
publish:
name: Publish Release
if: needs.prepare.outputs.publish_release == 'true'
needs: [prepare, verify-artifacts]
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 45
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ needs.prepare.outputs.release_ref }}
- name: Download all artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: artifacts
- name: Install syft
shell: bash
run: |
set -euo pipefail
mkdir -p "${RUNNER_TEMP}/bin"
./scripts/ci/install_syft.sh "${RUNNER_TEMP}/bin"
echo "${RUNNER_TEMP}/bin" >> "$GITHUB_PATH"
- name: Generate SBOM (CycloneDX)
run: |
syft dir:. --source-name zeroclaw -o cyclonedx-json=artifacts/zeroclaw.cdx.json -o spdx-json=artifacts/zeroclaw.spdx.json
{
echo "### SBOM Generated"
echo "- CycloneDX: zeroclaw.cdx.json"
echo "- SPDX: zeroclaw.spdx.json"
} >> "$GITHUB_STEP_SUMMARY"
- name: Attach license and notice files
run: |
cp LICENSE-APACHE artifacts/LICENSE-APACHE
cp LICENSE-MIT artifacts/LICENSE-MIT
cp NOTICE artifacts/NOTICE
- name: Generate release manifest + checksums
shell: bash
env:
RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
run: |
set -euo pipefail
python3 scripts/ci/release_manifest.py \
--artifacts-dir artifacts \
--release-tag "${RELEASE_TAG}" \
--output-json artifacts/release-manifest.json \
--output-md artifacts/release-manifest.md \
--checksums-path artifacts/SHA256SUMS \
--fail-empty
- name: Generate SHA256SUMS provenance statement
shell: bash
env:
RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
run: |
set -euo pipefail
python3 scripts/ci/generate_provenance.py \
--artifact artifacts/SHA256SUMS \
--subject-name "zeroclaw-${RELEASE_TAG}-sha256sums" \
--output artifacts/zeroclaw.sha256sums.intoto.json
- name: Emit SHA256SUMS provenance audit event
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/emit_audit_event.py \
--event-type release_sha256sums_provenance \
--input-json artifacts/zeroclaw.sha256sums.intoto.json \
--output-json artifacts/audit-event-release-sha256sums-provenance.json \
--artifact-name release-sha256sums-provenance \
--retention-days 30
- name: Validate release artifact contract (publish stage)
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/release_artifact_guard.py \
--artifacts-dir artifacts \
--contract-file .github/release/release-artifact-contract.json \
--output-json artifacts/release-artifact-guard.publish.json \
--output-md artifacts/release-artifact-guard.publish.md \
--allow-extra-archives \
--allow-extra-manifest-files \
--allow-extra-sbom-files \
--allow-extra-notice-files \
--fail-on-violation
- name: Emit publish-stage artifact guard audit event
if: always()
shell: bash
run: |
set -euo pipefail
python3 scripts/ci/emit_audit_event.py \
--event-type release_artifact_guard_publish \
--input-json artifacts/release-artifact-guard.publish.json \
--output-json artifacts/audit-event-release-artifact-guard-publish.json \
--artifact-name release-artifact-guard-publish \
--retention-days 30
- name: Publish artifact guard summary
shell: bash
run: |
set -euo pipefail
cat artifacts/release-artifact-guard.publish.md >> "$GITHUB_STEP_SUMMARY"
- name: Publish release manifest summary
shell: bash
run: |
set -euo pipefail
cat artifacts/release-manifest.md >> "$GITHUB_STEP_SUMMARY"
- name: Install cosign
uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0
- name: Sign artifacts with cosign (keyless)
shell: bash
run: |
set -euo pipefail
while IFS= read -r -d '' file; do
cosign sign-blob --yes \
--bundle="${file}.sigstore.json" \
--output-signature="${file}.sig" \
--output-certificate="${file}.pem" \
"$file"
done < <(find artifacts -type f ! -name '*.sig' ! -name '*.pem' ! -name '*.sigstore.json' -print0)
- name: Compose release-notes supply-chain references
shell: bash
env:
RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
run: |
set -euo pipefail
python3 scripts/ci/release_notes_with_supply_chain_refs.py \
--artifacts-dir artifacts \
--repository "${GITHUB_REPOSITORY}" \
--release-tag "${RELEASE_TAG}" \
--output-json artifacts/release-notes-supply-chain.json \
--output-md artifacts/release-notes-supply-chain.md \
--fail-on-missing
- name: Publish release-notes supply-chain summary
shell: bash
run: |
set -euo pipefail
cat artifacts/release-notes-supply-chain.md >> "$GITHUB_STEP_SUMMARY"
- name: Verify GHCR release tag availability
shell: bash
env:
RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
run: |
set -euo pipefail
repo="${GITHUB_REPOSITORY,,}"
manifest_url="https://ghcr.io/v2/${repo}/manifests/${RELEASE_TAG}"
accept_header="application/vnd.oci.image.index.v1+json, application/vnd.docker.distribution.manifest.v2+json"
max_attempts=75
sleep_seconds=20
for attempt in $(seq 1 "$max_attempts"); do
token_resp="$(curl -sS "https://ghcr.io/token?scope=repository:${repo}:pull" || true)"
token="$(echo "$token_resp" | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')"
if [ -z "$token" ]; then
code="000"
else
code="$(curl -sS -o /tmp/ghcr-release-manifest.json -w "%{http_code}" \
-H "Authorization: Bearer ${token}" \
-H "Accept: ${accept_header}" \
"${manifest_url}" || true)"
fi
if [ "$code" = "200" ]; then
echo "GHCR release tag is available: ${repo}:${RELEASE_TAG}"
exit 0
fi
if [ "$attempt" -lt "$max_attempts" ]; then
echo "Waiting for GHCR tag ${repo}:${RELEASE_TAG} (attempt ${attempt}/${max_attempts}, HTTP ${code})..."
sleep "$sleep_seconds"
fi
done
echo "::error::GHCR tag ${repo}:${RELEASE_TAG} was not available before release publish timeout."
cat /tmp/ghcr-release-manifest.json || true
exit 1
- name: Create GitHub Release
uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2
with:
tag_name: ${{ needs.prepare.outputs.release_tag }}
draft: ${{ needs.prepare.outputs.draft_release == 'true' }}
body_path: artifacts/release-notes-supply-chain.md
generate_release_notes: true
files: |
artifacts/**/*
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@ -1,165 +0,0 @@
name: Pub Scoop Manifest
on:
workflow_call:
inputs:
release_tag:
description: "Existing release tag (vX.Y.Z)"
required: true
type: string
dry_run:
description: "Generate manifest only (no push)"
required: false
default: false
type: boolean
secrets:
SCOOP_BUCKET_TOKEN:
required: false
workflow_dispatch:
inputs:
release_tag:
description: "Existing release tag (vX.Y.Z)"
required: true
type: string
dry_run:
description: "Generate manifest only (no push)"
required: false
default: true
type: boolean
concurrency:
group: scoop-publish-${{ github.run_id }}
cancel-in-progress: false
permissions:
contents: read
jobs:
publish-scoop:
name: Update Scoop Manifest
runs-on: ubuntu-latest
env:
RELEASE_TAG: ${{ inputs.release_tag }}
DRY_RUN: ${{ inputs.dry_run }}
SCOOP_BUCKET_REPO: ${{ vars.SCOOP_BUCKET_REPO }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Validate and compute metadata
id: meta
shell: bash
run: |
set -euo pipefail
if [[ ! "$RELEASE_TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "::error::release_tag must be vX.Y.Z format."
exit 1
fi
version="${RELEASE_TAG#v}"
zip_url="https://github.com/${GITHUB_REPOSITORY}/releases/download/${RELEASE_TAG}/zeroclaw-x86_64-pc-windows-msvc.zip"
sums_url="https://github.com/${GITHUB_REPOSITORY}/releases/download/${RELEASE_TAG}/SHA256SUMS"
sha256="$(curl -fsSL "$sums_url" | grep 'zeroclaw-x86_64-pc-windows-msvc.zip' | awk '{print $1}')"
if [[ -z "$sha256" ]]; then
echo "::error::Could not find Windows binary hash in SHA256SUMS for ${RELEASE_TAG}."
exit 1
fi
{
echo "version=$version"
echo "zip_url=$zip_url"
echo "sha256=$sha256"
} >> "$GITHUB_OUTPUT"
{
echo "### Scoop Manifest Metadata"
echo "- version: \`${version}\`"
echo "- zip_url: \`${zip_url}\`"
echo "- sha256: \`${sha256}\`"
} >> "$GITHUB_STEP_SUMMARY"
- name: Generate manifest
id: manifest
shell: bash
env:
VERSION: ${{ steps.meta.outputs.version }}
ZIP_URL: ${{ steps.meta.outputs.zip_url }}
SHA256: ${{ steps.meta.outputs.sha256 }}
run: |
set -euo pipefail
manifest_file="$(mktemp)"
cat > "$manifest_file" <<MANIFEST
{
"version": "${VERSION}",
"description": "Zero overhead. Zero compromise. 100% Rust. The fastest, smallest AI assistant.",
"homepage": "https://github.com/zeroclaw-labs/zeroclaw",
"license": "MIT|Apache-2.0",
"architecture": {
"64bit": {
"url": "${ZIP_URL}",
"hash": "${SHA256}",
"bin": "zeroclaw.exe"
}
},
"checkver": {
"github": "https://github.com/zeroclaw-labs/zeroclaw"
},
"autoupdate": {
"architecture": {
"64bit": {
"url": "https://github.com/zeroclaw-labs/zeroclaw/releases/download/v\$version/zeroclaw-x86_64-pc-windows-msvc.zip"
}
},
"hash": {
"url": "https://github.com/zeroclaw-labs/zeroclaw/releases/download/v\$version/SHA256SUMS",
"regex": "([a-f0-9]{64})\\\\s+zeroclaw-x86_64-pc-windows-msvc\\\\.zip"
}
}
}
MANIFEST
jq '.' "$manifest_file" > "${manifest_file}.formatted"
mv "${manifest_file}.formatted" "$manifest_file"
echo "manifest_file=$manifest_file" >> "$GITHUB_OUTPUT"
echo "### Generated Manifest" >> "$GITHUB_STEP_SUMMARY"
echo '```json' >> "$GITHUB_STEP_SUMMARY"
cat "$manifest_file" >> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"
- name: Push to Scoop bucket
if: inputs.dry_run == false
shell: bash
env:
GH_TOKEN: ${{ secrets.SCOOP_BUCKET_TOKEN }}
MANIFEST_FILE: ${{ steps.manifest.outputs.manifest_file }}
VERSION: ${{ steps.meta.outputs.version }}
run: |
set -euo pipefail
if [[ -z "${SCOOP_BUCKET_REPO}" ]]; then
echo "::error::Repository variable SCOOP_BUCKET_REPO is required (e.g. zeroclaw-labs/scoop-zeroclaw)."
exit 1
fi
tmp_dir="$(mktemp -d)"
gh repo clone "${SCOOP_BUCKET_REPO}" "$tmp_dir/bucket" -- --depth=1
mkdir -p "$tmp_dir/bucket/bucket"
cp "$MANIFEST_FILE" "$tmp_dir/bucket/bucket/zeroclaw.json"
cd "$tmp_dir/bucket"
git config user.name "zeroclaw-bot"
git config user.email "bot@zeroclaw.dev"
git add bucket/zeroclaw.json
git commit -m "zeroclaw ${VERSION}"
gh auth setup-git
git push origin HEAD
echo "Scoop manifest updated to ${VERSION}"

View File

@ -1,160 +0,0 @@
name: Auto-sync crates.io
on:
push:
branches: [master]
paths:
- "Cargo.toml"
concurrency:
group: publish-crates-auto
cancel-in-progress: false
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
jobs:
detect-version-change:
name: Detect Version Bump
if: github.repository == 'zeroclaw-labs/zeroclaw'
runs-on: ubuntu-latest
outputs:
changed: ${{ steps.check.outputs.changed }}
version: ${{ steps.check.outputs.version }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Check if version changed
id: check
shell: bash
run: |
set -euo pipefail
current=$(sed -n 's/^version = "\([^"]*\)"/\1/p' Cargo.toml | head -1)
previous=$(git show HEAD~1:Cargo.toml 2>/dev/null | sed -n 's/^version = "\([^"]*\)"/\1/p' | head -1 || echo "")
echo "Current version: ${current}"
echo "Previous version: ${previous}"
# Skip if stable release workflow will handle this version
# (indicated by an existing or imminent stable tag)
if git ls-remote --exit-code --tags origin "refs/tags/v${current}" >/dev/null 2>&1; then
echo "Stable tag v${current} exists — stable release workflow handles crates.io"
echo "changed=false" >> "$GITHUB_OUTPUT"
exit 0
fi
if [[ "$current" != "$previous" && -n "$current" ]]; then
echo "changed=true" >> "$GITHUB_OUTPUT"
echo "version=${current}" >> "$GITHUB_OUTPUT"
echo "Version bumped from ${previous} to ${current} — will publish"
else
echo "changed=false" >> "$GITHUB_OUTPUT"
echo "Version unchanged (${current}) — skipping publish"
fi
check-registry:
name: Check if Already Published
needs: [detect-version-change]
if: needs.detect-version-change.outputs.changed == 'true'
runs-on: ubuntu-latest
outputs:
should_publish: ${{ steps.check.outputs.should_publish }}
steps:
- name: Check crates.io for existing version
id: check
shell: bash
env:
VERSION: ${{ needs.detect-version-change.outputs.version }}
run: |
set -euo pipefail
status=$(curl -s -o /dev/null -w "%{http_code}" \
"https://crates.io/api/v1/crates/zeroclawlabs/${VERSION}")
if [[ "$status" == "200" ]]; then
echo "Version ${VERSION} already exists on crates.io — skipping"
echo "should_publish=false" >> "$GITHUB_OUTPUT"
else
echo "Version ${VERSION} not yet published — proceeding"
echo "should_publish=true" >> "$GITHUB_OUTPUT"
fi
publish:
name: Publish to crates.io
needs: [detect-version-change, check-registry]
if: needs.check-registry.outputs.should_publish == 'true'
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-node@v4
with:
node-version: 22
cache: npm
cache-dependency-path: web/package-lock.json
- name: Build web dashboard
run: cd web && npm ci && npm run build
- name: Clean web build artifacts
run: rm -rf web/node_modules web/src web/package.json web/package-lock.json web/tsconfig*.json web/vite.config.ts web/index.html
- name: Publish aardvark-sys to crates.io
shell: bash
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
run: |
OUTPUT=$(cargo publish --locked --allow-dirty --no-verify -p aardvark-sys 2>&1) && exit 0
echo "$OUTPUT"
if echo "$OUTPUT" | grep -q 'already exists'; then
echo "::notice::aardvark-sys already on crates.io — skipping"
exit 0
fi
exit 1
- name: Wait for aardvark-sys to index
run: sleep 15
- name: Publish to crates.io
shell: bash
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
VERSION: ${{ needs.detect-version-change.outputs.version }}
run: |
# Publish to crates.io; treat "already exists" as success
# (manual publish or stable workflow may have already published)
OUTPUT=$(cargo publish --locked --allow-dirty --no-verify 2>&1) && exit 0
echo "$OUTPUT"
if echo "$OUTPUT" | grep -q 'already exists'; then
echo "::notice::zeroclawlabs@${VERSION} already on crates.io — skipping"
exit 0
fi
exit 1
- name: Verify published
shell: bash
env:
VERSION: ${{ needs.detect-version-change.outputs.version }}
run: |
echo "Waiting for crates.io to index..."
sleep 15
status=$(curl -s -o /dev/null -w "%{http_code}" \
"https://crates.io/api/v1/crates/zeroclawlabs/${VERSION}")
if [[ "$status" == "200" ]]; then
echo "zeroclawlabs v${VERSION} is live on crates.io"
echo "Install: cargo install zeroclawlabs"
else
echo "::warning::Version may still be indexing — check https://crates.io/crates/zeroclawlabs"
fi

View File

@ -1,108 +0,0 @@
name: Publish to crates.io
on:
workflow_dispatch:
inputs:
version:
description: "Version to publish (e.g. 0.2.0) — must match Cargo.toml"
required: true
type: string
dry_run:
description: "Dry run (validate without publishing)"
required: false
type: boolean
default: false
concurrency:
group: publish-crates
cancel-in-progress: false
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
jobs:
validate:
name: Validate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check version matches Cargo.toml
shell: bash
env:
INPUT_VERSION: ${{ inputs.version }}
run: |
set -euo pipefail
cargo_version=$(sed -n 's/^version = "\([^"]*\)"/\1/p' Cargo.toml | head -1)
if [[ "$cargo_version" != "$INPUT_VERSION" ]]; then
echo "::error::Cargo.toml version (${cargo_version}) does not match input (${INPUT_VERSION})"
exit 1
fi
publish:
name: Publish to crates.io
needs: [validate]
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-node@v4
with:
node-version: 22
cache: npm
cache-dependency-path: web/package-lock.json
- name: Build web dashboard
run: cd web && npm ci && npm run build
- name: Clean web build artifacts
run: rm -rf web/node_modules web/src web/package.json web/package-lock.json web/tsconfig*.json web/vite.config.ts web/index.html
- name: Publish aardvark-sys to crates.io
if: "!inputs.dry_run"
shell: bash
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
run: |
OUTPUT=$(cargo publish --locked --allow-dirty --no-verify -p aardvark-sys 2>&1) && exit 0
echo "$OUTPUT"
if echo "$OUTPUT" | grep -q 'already exists'; then
echo "::notice::aardvark-sys already on crates.io — skipping"
exit 0
fi
exit 1
- name: Wait for aardvark-sys to index
if: "!inputs.dry_run"
run: sleep 15
- name: Publish (dry run)
if: inputs.dry_run
run: cargo publish --dry-run --locked --allow-dirty --no-verify
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
- name: Publish to crates.io
if: "!inputs.dry_run"
shell: bash
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
VERSION: ${{ inputs.version }}
run: |
# Publish to crates.io; treat "already exists" as success
OUTPUT=$(cargo publish --locked --allow-dirty --no-verify 2>&1) && exit 0
echo "$OUTPUT"
if echo "$OUTPUT" | grep -q 'already exists'; then
echo "::notice::zeroclawlabs@${VERSION} already on crates.io — skipping"
exit 0
fi
exit 1

View File

@ -1,458 +0,0 @@
name: Release Beta
on:
push:
branches: [master]
concurrency:
group: release-beta
cancel-in-progress: true
permissions:
contents: write
packages: write
env:
CARGO_TERM_COLOR: always
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
RELEASE_CARGO_FEATURES: channel-matrix,channel-lark,memory-postgres
jobs:
version:
name: Resolve Version
if: github.repository == 'zeroclaw-labs/zeroclaw'
runs-on: ubuntu-latest
outputs:
version: ${{ steps.ver.outputs.version }}
tag: ${{ steps.ver.outputs.tag }}
skip: ${{ steps.ver.outputs.skip }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 2
- name: Compute beta version
id: ver
shell: bash
run: |
set -euo pipefail
base_version=$(sed -n 's/^version = "\([^"]*\)"/\1/p' Cargo.toml | head -1)
# Skip beta if this is a version bump commit (stable release handles it)
commit_msg=$(git log -1 --pretty=format:"%s")
if [[ "$commit_msg" =~ ^chore:\ bump\ version ]]; then
echo "Version bump commit detected — skipping beta release"
echo "skip=true" >> "$GITHUB_OUTPUT"
exit 0
fi
# Skip beta if a stable tag already exists for this version
if git ls-remote --exit-code --tags origin "refs/tags/v${base_version}" >/dev/null 2>&1; then
echo "Stable tag v${base_version} exists — skipping beta release"
echo "skip=true" >> "$GITHUB_OUTPUT"
exit 0
fi
beta_tag="v${base_version}-beta.${GITHUB_RUN_NUMBER}"
echo "version=${base_version}" >> "$GITHUB_OUTPUT"
echo "tag=${beta_tag}" >> "$GITHUB_OUTPUT"
echo "skip=false" >> "$GITHUB_OUTPUT"
echo "Beta release: ${beta_tag}"
release-notes:
name: Generate Release Notes
needs: [version]
if: github.repository == 'zeroclaw-labs/zeroclaw' && needs.version.outputs.skip != 'true'
runs-on: ubuntu-latest
outputs:
notes: ${{ steps.notes.outputs.body }}
features: ${{ steps.notes.outputs.features }}
contributors: ${{ steps.notes.outputs.contributors }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Build release notes
id: notes
shell: bash
run: |
set -euo pipefail
# Use a wider range — find the previous stable tag to capture all
# contributors across the full release cycle, not just one beta bump
PREV_TAG=$(git tag --sort=-creatordate \
| grep -vE '\-beta\.' \
| head -1 || echo "")
if [ -z "$PREV_TAG" ]; then
RANGE="HEAD"
else
RANGE="${PREV_TAG}..HEAD"
fi
# Extract features only (feat commits) — skip bug fixes for clean notes
FEATURES=$(git log "$RANGE" --pretty=format:"%s" --no-merges \
| grep -iE '^feat(\(|:)' \
| sed 's/^feat(\([^)]*\)): /\1: /' \
| sed 's/^feat: //' \
| sed 's/ (#[0-9]*)$//' \
| sort -uf \
| while IFS= read -r line; do echo "- ${line}"; done || true)
if [ -z "$FEATURES" ]; then
FEATURES="- Incremental improvements and polish"
fi
# Collect ALL unique contributors: git authors + Co-Authored-By
GIT_AUTHORS=$(git log "$RANGE" --pretty=format:"%an" --no-merges | sort -uf || true)
CO_AUTHORS=$(git log "$RANGE" --pretty=format:"%b" --no-merges \
| grep -ioE 'Co-Authored-By: *[^<]+' \
| sed 's/Co-Authored-By: *//i' \
| sed 's/ *$//' \
| sort -uf || true)
# Merge, deduplicate, and filter out bots
ALL_CONTRIBUTORS=$(printf "%s\n%s" "$GIT_AUTHORS" "$CO_AUTHORS" \
| sort -uf \
| grep -v '^$' \
| grep -viE '\[bot\]$|^dependabot|^github-actions|^copilot|^ZeroClaw Bot|^ZeroClaw Runner|^ZeroClaw Agent|^blacksmith' \
| while IFS= read -r name; do echo "- ${name}"; done || true)
# Build release body
BODY=$(cat <<NOTES_EOF
## What's New
${FEATURES}
## Contributors
${ALL_CONTRIBUTORS}
---
*Full changelog: ${PREV_TAG}...HEAD*
NOTES_EOF
)
# Output multiline values
{
echo "body<<BODY_EOF"
echo "$BODY"
echo "BODY_EOF"
} >> "$GITHUB_OUTPUT"
{
echo "features<<FEAT_EOF"
echo "$FEATURES"
echo "FEAT_EOF"
} >> "$GITHUB_OUTPUT"
{
echo "contributors<<CONTRIB_EOF"
echo "$ALL_CONTRIBUTORS"
echo "CONTRIB_EOF"
} >> "$GITHUB_OUTPUT"
web:
name: Build Web Dashboard
needs: [version]
if: github.repository == 'zeroclaw-labs/zeroclaw' && needs.version.outputs.skip != 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 22
cache: npm
cache-dependency-path: web/package-lock.json
- name: Build web dashboard
run: cd web && npm ci && npm run build
- uses: actions/upload-artifact@v4
with:
name: web-dist
path: web/dist/
retention-days: 1
build:
name: Build ${{ matrix.target }}
needs: [version, web]
runs-on: ${{ matrix.os }}
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
include:
# Use ubuntu-22.04 for Linux builds to link against glibc 2.35,
# ensuring compatibility with Ubuntu 22.04+ (#3573).
- os: ubuntu-22.04
target: x86_64-unknown-linux-gnu
artifact: zeroclaw
ext: tar.gz
- os: ubuntu-22.04
target: aarch64-unknown-linux-gnu
artifact: zeroclaw
ext: tar.gz
cross_compiler: gcc-aarch64-linux-gnu
linker_env: CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER
linker: aarch64-linux-gnu-gcc
- os: ubuntu-22.04
target: armv7-unknown-linux-gnueabihf
artifact: zeroclaw
ext: tar.gz
cross_compiler: gcc-arm-linux-gnueabihf
linker_env: CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER
linker: arm-linux-gnueabihf-gcc
- os: macos-14
target: aarch64-apple-darwin
artifact: zeroclaw
ext: tar.gz
- os: ubuntu-latest
target: aarch64-linux-android
artifact: zeroclaw
ext: tar.gz
ndk: true
- os: windows-latest
target: x86_64-pc-windows-msvc
artifact: zeroclaw.exe
ext: zip
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: ${{ matrix.target }}
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
if: runner.os != 'Windows'
with:
prefix-key: ${{ matrix.os }}-${{ matrix.target }}
- uses: actions/download-artifact@v4
with:
name: web-dist
path: web/dist/
- name: Install cross compiler
if: matrix.cross_compiler
run: |
sudo apt-get update -qq
sudo apt-get install -y ${{ matrix.cross_compiler }}
- name: Setup Android NDK
if: matrix.ndk
run: echo "$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin" >> "$GITHUB_PATH"
- name: Build release
shell: bash
run: |
if [ -n "${{ matrix.linker_env || '' }}" ] && [ -n "${{ matrix.linker || '' }}" ]; then
export "${{ matrix.linker_env }}=${{ matrix.linker }}"
fi
cargo build --release --locked --features "${{ env.RELEASE_CARGO_FEATURES }}" --target ${{ matrix.target }}
- name: Package (Unix)
if: runner.os != 'Windows'
run: |
cd target/${{ matrix.target }}/release
tar czf ../../../zeroclaw-${{ matrix.target }}.${{ matrix.ext }} ${{ matrix.artifact }}
- name: Package (Windows)
if: runner.os == 'Windows'
run: |
cd target/${{ matrix.target }}/release
7z a ../../../zeroclaw-${{ matrix.target }}.${{ matrix.ext }} ${{ matrix.artifact }}
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: zeroclaw-${{ matrix.target }}
path: zeroclaw-${{ matrix.target }}.${{ matrix.ext }}
retention-days: 7
build-desktop:
name: Build Desktop App (macOS Universal)
needs: [version]
if: needs.version.outputs.skip != 'true'
runs-on: macos-14
timeout-minutes: 40
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: aarch64-apple-darwin,x86_64-apple-darwin
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
with:
prefix-key: macos-tauri
- uses: actions/setup-node@v4
with:
node-version: 22
- name: Install Tauri CLI
run: cargo install tauri-cli --locked
- name: Sync Tauri version with Cargo.toml
shell: bash
run: |
VERSION=$(sed -n 's/^version = "\([^"]*\)"/\1/p' Cargo.toml | head -1)
cd apps/tauri
if command -v jq >/dev/null 2>&1; then
jq --arg v "$VERSION" '.version = $v' tauri.conf.json > tmp.json && mv tmp.json tauri.conf.json
else
sed -i '' "s/\"version\": \"[^\"]*\"/\"version\": \"$VERSION\"/" tauri.conf.json
fi
echo "Tauri version set to: $VERSION"
- name: Build Tauri app (universal binary)
working-directory: apps/tauri
run: cargo tauri build --target universal-apple-darwin
- name: Prepare desktop release assets
run: |
mkdir -p desktop-assets
find target -name '*.dmg' -exec cp {} desktop-assets/ZeroClaw.dmg \; 2>/dev/null || true
find target -name '*.app.tar.gz' -exec cp {} desktop-assets/ZeroClaw-macos.app.tar.gz \; 2>/dev/null || true
find target -name '*.app.tar.gz.sig' -exec cp {} desktop-assets/ZeroClaw-macos.app.tar.gz.sig \; 2>/dev/null || true
echo "--- Desktop assets ---"
ls -lh desktop-assets/
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: desktop-macos
path: desktop-assets/*
retention-days: 7
publish:
name: Publish Beta Release
needs: [version, release-notes, build, build-desktop]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
pattern: zeroclaw-*
path: artifacts
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: desktop-macos
path: artifacts/desktop-macos
- name: Generate checksums
run: |
cd artifacts
find . -type f \( -name '*.tar.gz' -o -name '*.zip' -o -name '*.dmg' \) -exec sha256sum {} + | sed 's| \./[^/]*/| |' > SHA256SUMS
cat SHA256SUMS
- name: Collect release assets
run: |
mkdir -p release-assets
find artifacts -type f \( -name '*.tar.gz' -o -name '*.zip' -o -name '*.dmg' -o -name 'SHA256SUMS' \) -exec cp {} release-assets/ \;
cp install.sh release-assets/
echo "--- Assets ---"
ls -lh release-assets/
- name: Write release notes
env:
NOTES: ${{ needs.release-notes.outputs.notes }}
run: printf '%s\n' "$NOTES" > release-notes.md
- name: Create GitHub Release
env:
GH_TOKEN: ${{ secrets.RELEASE_TOKEN }}
TAG: ${{ needs.version.outputs.tag }}
run: |
gh release create "$TAG" release-assets/* \
--repo "${{ github.repository }}" \
--title "$TAG" \
--notes-file release-notes.md \
--prerelease
redeploy-website:
name: Trigger Website Redeploy
needs: [publish]
runs-on: ubuntu-latest
steps:
- name: Trigger website redeploy
env:
PAT: ${{ secrets.WEBSITE_REPO_PAT }}
run: |
curl -fsSL -X POST \
-H "Authorization: token $PAT" \
-H "Accept: application/vnd.github+json" \
https://api.github.com/repos/zeroclaw-labs/zeroclaw-website/dispatches \
-d '{"event_type":"new-release","client_payload":{"install_script_url":"https://raw.githubusercontent.com/zeroclaw-labs/zeroclaw/master/install.sh"}}'
docker:
name: Push Docker Image
needs: [version, build]
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: zeroclaw-x86_64-unknown-linux-gnu
path: artifacts/
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: zeroclaw-aarch64-unknown-linux-gnu
path: artifacts/
- name: Prepare Docker context with pre-built binaries
run: |
mkdir -p docker-ctx/bin/amd64 docker-ctx/bin/arm64
tar xzf artifacts/zeroclaw-x86_64-unknown-linux-gnu.tar.gz -C docker-ctx/bin/amd64
tar xzf artifacts/zeroclaw-aarch64-unknown-linux-gnu.tar.gz -C docker-ctx/bin/arm64
mkdir -p docker-ctx/zeroclaw-data/.zeroclaw docker-ctx/zeroclaw-data/workspace
printf '%s\n' \
'workspace_dir = "/zeroclaw-data/workspace"' \
'config_path = "/zeroclaw-data/.zeroclaw/config.toml"' \
'api_key = ""' \
'default_provider = "openrouter"' \
'default_model = "anthropic/claude-sonnet-4-20250514"' \
'default_temperature = 0.7' \
'' \
'[gateway]' \
'port = 42617' \
'host = "[::]"' \
'allow_public_bind = true' \
> docker-ctx/zeroclaw-data/.zeroclaw/config.toml
cp Dockerfile.ci docker-ctx/Dockerfile
cp Dockerfile.debian.ci docker-ctx/Dockerfile.debian
- uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
with:
context: docker-ctx
push: true
tags: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.version.outputs.tag }}
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:beta
platforms: linux/amd64,linux/arm64
- name: Build and push Debian compatibility image
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
with:
context: docker-ctx
file: docker-ctx/Dockerfile.debian
push: true
tags: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.version.outputs.tag }}-debian
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:beta-debian
platforms: linux/amd64,linux/arm64
# Tweet removed — only stable releases should tweet (see tweet-release.yml).

102
.github/workflows/release-build.yml vendored Normal file
View File

@ -0,0 +1,102 @@
name: Production Release Build
on:
push:
branches: ["main"]
tags: ["v*"]
workflow_dispatch:
concurrency:
group: production-release-build-${{ github.ref || github.run_id }}
cancel-in-progress: false
permissions:
contents: read
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
build-and-test:
name: Build and Test (Linux x86_64)
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 120
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Ensure C toolchain
shell: bash
run: bash ./scripts/ci/ensure_c_toolchain.sh
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- name: Setup Rust
uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
components: rustfmt, clippy
- name: Ensure C toolchain for Rust builds
shell: bash
run: ./scripts/ci/ensure_cc.sh
- name: Ensure cargo component
shell: bash
env:
ENSURE_CARGO_COMPONENT_STRICT: "true"
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- name: Ensure rustfmt and clippy components
shell: bash
run: rustup component add rustfmt clippy --toolchain 1.92.0
- name: Activate toolchain binaries on PATH
shell: bash
run: |
set -euo pipefail
toolchain_bin="$(dirname "$(rustup which --toolchain 1.92.0 cargo)")"
echo "$toolchain_bin" >> "$GITHUB_PATH"
- name: Cache Cargo registry and target
uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: production-release-build
shared-key: ${{ runner.os }}-${{ hashFiles('Cargo.lock') }}
cache-targets: true
cache-bin: false
- name: Rust quality gates
shell: bash
run: |
set -euo pipefail
./scripts/ci/rust_quality_gate.sh
cargo test --locked --lib --bins --verbose
- name: Build production binary (canonical)
shell: bash
run: cargo build --release --locked
- name: Prepare artifact bundle
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
cp target/release/zeroclaw artifacts/zeroclaw
sha256sum artifacts/zeroclaw > artifacts/zeroclaw.sha256
- name: Upload production artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: zeroclaw-linux-amd64
path: |
artifacts/zeroclaw
artifacts/zeroclaw.sha256
if-no-files-found: error
retention-days: 21

View File

@ -1,570 +0,0 @@
name: Release Stable
on:
push:
tags:
- "v[0-9]+.[0-9]+.[0-9]+" # stable tags only (no -beta suffix)
workflow_dispatch:
inputs:
version:
description: "Stable version to release (e.g. 0.2.0)"
required: true
type: string
concurrency:
group: promote-release
cancel-in-progress: false
permissions:
contents: write
packages: write
env:
CARGO_TERM_COLOR: always
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
RELEASE_CARGO_FEATURES: channel-matrix,channel-lark,memory-postgres
jobs:
validate:
name: Validate Version
runs-on: ubuntu-latest
outputs:
tag: ${{ steps.check.outputs.tag }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Validate semver and Cargo.toml match
id: check
shell: bash
env:
INPUT_VERSION: ${{ inputs.version || '' }}
REF_NAME: ${{ github.ref_name }}
EVENT_NAME: ${{ github.event_name }}
run: |
set -euo pipefail
cargo_version=$(sed -n 's/^version = "\([^"]*\)"/\1/p' Cargo.toml | head -1)
# Resolve version from tag push or manual input
if [[ "$EVENT_NAME" == "push" ]]; then
# Tag push: extract version from tag name (v0.5.9 -> 0.5.9)
input_version="${REF_NAME#v}"
else
input_version="$INPUT_VERSION"
fi
if [[ ! "$input_version" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "::error::Version must be semver (X.Y.Z). Got: ${input_version}"
exit 1
fi
if [[ "$cargo_version" != "$input_version" ]]; then
echo "::error::Cargo.toml version (${cargo_version}) does not match input (${input_version}). Bump Cargo.toml first."
exit 1
fi
tag="v${input_version}"
# Only check tag existence for manual dispatch (tag push means it already exists)
if [[ "$EVENT_NAME" != "push" ]]; then
if git ls-remote --exit-code --tags origin "refs/tags/${tag}" >/dev/null 2>&1; then
echo "::error::Tag ${tag} already exists."
exit 1
fi
fi
echo "tag=${tag}" >> "$GITHUB_OUTPUT"
web:
name: Build Web Dashboard
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 22
cache: npm
cache-dependency-path: web/package-lock.json
- name: Build web dashboard
run: cd web && npm ci && npm run build
- uses: actions/upload-artifact@v4
with:
name: web-dist
path: web/dist/
retention-days: 1
release-notes:
name: Generate Release Notes
runs-on: ubuntu-latest
outputs:
notes: ${{ steps.notes.outputs.body }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Build release notes
id: notes
shell: bash
env:
INPUT_VERSION: ${{ inputs.version }}
run: |
set -euo pipefail
# Find the previous stable tag (exclude beta tags)
PREV_TAG=$(git tag --sort=-creatordate | grep -vE '\-beta\.' | grep -v "^v${INPUT_VERSION}$" | head -1 || echo "")
if [ -z "$PREV_TAG" ]; then
RANGE="HEAD"
else
RANGE="${PREV_TAG}..HEAD"
fi
# Extract features only — skip bug fixes for clean release notes
FEATURES=$(git log "$RANGE" --pretty=format:"%s" --no-merges \
| grep -iE '^feat(\(|:)' \
| sed 's/^feat(\([^)]*\)): /\1: /' \
| sed 's/^feat: //' \
| sed 's/ (#[0-9]*)$//' \
| sort -uf \
| while IFS= read -r line; do echo "- ${line}"; done || true)
if [ -z "$FEATURES" ]; then
FEATURES="- Incremental improvements and polish"
fi
# Collect ALL unique contributors: git authors + Co-Authored-By
GIT_AUTHORS=$(git log "$RANGE" --pretty=format:"%an" --no-merges | sort -uf || true)
CO_AUTHORS=$(git log "$RANGE" --pretty=format:"%b" --no-merges \
| grep -ioE 'Co-Authored-By: *[^<]+' \
| sed 's/Co-Authored-By: *//i' \
| sed 's/ *$//' \
| sort -uf || true)
# Merge, deduplicate, and filter out bots
ALL_CONTRIBUTORS=$(printf "%s\n%s" "$GIT_AUTHORS" "$CO_AUTHORS" \
| sort -uf \
| grep -v '^$' \
| grep -viE '\[bot\]$|^dependabot|^github-actions|^copilot|^ZeroClaw Bot|^ZeroClaw Runner|^ZeroClaw Agent|^blacksmith' \
| while IFS= read -r name; do echo "- ${name}"; done || true)
BODY=$(cat <<NOTES_EOF
## What's New
${FEATURES}
## Contributors
${ALL_CONTRIBUTORS}
---
*Full changelog: ${PREV_TAG}...v${INPUT_VERSION}*
NOTES_EOF
)
{
echo "body<<BODY_EOF"
echo "$BODY"
echo "BODY_EOF"
} >> "$GITHUB_OUTPUT"
build:
name: Build ${{ matrix.target }}
needs: [validate, web]
runs-on: ${{ matrix.os }}
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
include:
# Use ubuntu-22.04 for Linux builds to link against glibc 2.35,
# ensuring compatibility with Ubuntu 22.04+ (#3573).
- os: ubuntu-22.04
target: x86_64-unknown-linux-gnu
artifact: zeroclaw
ext: tar.gz
- os: ubuntu-22.04
target: aarch64-unknown-linux-gnu
artifact: zeroclaw
ext: tar.gz
cross_compiler: gcc-aarch64-linux-gnu
linker_env: CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER
linker: aarch64-linux-gnu-gcc
- os: ubuntu-22.04
target: armv7-unknown-linux-gnueabihf
artifact: zeroclaw
ext: tar.gz
cross_compiler: gcc-arm-linux-gnueabihf
linker_env: CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER
linker: arm-linux-gnueabihf-gcc
skip_prometheus: true
- os: ubuntu-22.04
target: arm-unknown-linux-gnueabihf
artifact: zeroclaw
ext: tar.gz
cross_compiler: gcc-arm-linux-gnueabihf
linker_env: CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER
linker: arm-linux-gnueabihf-gcc
skip_prometheus: true
- os: macos-14
target: aarch64-apple-darwin
artifact: zeroclaw
ext: tar.gz
- os: ubuntu-latest
target: aarch64-linux-android
artifact: zeroclaw
ext: tar.gz
ndk: true
- os: windows-latest
target: x86_64-pc-windows-msvc
artifact: zeroclaw.exe
ext: zip
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: ${{ matrix.target }}
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
if: runner.os != 'Windows'
with:
prefix-key: ${{ matrix.os }}-${{ matrix.target }}
- uses: actions/download-artifact@v4
with:
name: web-dist
path: web/dist/
- name: Install cross compiler
if: matrix.cross_compiler
run: |
sudo apt-get update -qq
sudo apt-get install -y ${{ matrix.cross_compiler }}
- name: Setup Android NDK
if: matrix.ndk
run: echo "$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin" >> "$GITHUB_PATH"
- name: Build release
shell: bash
run: |
if [ -n "${{ matrix.linker_env || '' }}" ] && [ -n "${{ matrix.linker || '' }}" ]; then
export "${{ matrix.linker_env }}=${{ matrix.linker }}"
fi
if [ "${{ matrix.skip_prometheus || 'false' }}" = "true" ]; then
cargo build --release --locked --no-default-features --features "${{ env.RELEASE_CARGO_FEATURES }},channel-nostr,skill-creation" --target ${{ matrix.target }}
else
cargo build --release --locked --features "${{ env.RELEASE_CARGO_FEATURES }}" --target ${{ matrix.target }}
fi
- name: Package (Unix)
if: runner.os != 'Windows'
run: |
cd target/${{ matrix.target }}/release
tar czf ../../../zeroclaw-${{ matrix.target }}.${{ matrix.ext }} ${{ matrix.artifact }}
- name: Package (Windows)
if: runner.os == 'Windows'
run: |
cd target/${{ matrix.target }}/release
7z a ../../../zeroclaw-${{ matrix.target }}.${{ matrix.ext }} ${{ matrix.artifact }}
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: zeroclaw-${{ matrix.target }}
path: zeroclaw-${{ matrix.target }}.${{ matrix.ext }}
retention-days: 14
build-desktop:
name: Build Desktop App (macOS Universal)
needs: [validate]
runs-on: macos-14
timeout-minutes: 40
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
targets: aarch64-apple-darwin,x86_64-apple-darwin
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
with:
prefix-key: macos-tauri
- uses: actions/setup-node@v4
with:
node-version: 22
- name: Install Tauri CLI
run: cargo install tauri-cli --locked
- name: Sync Tauri version with Cargo.toml
shell: bash
run: |
VERSION=$(sed -n 's/^version = "\([^"]*\)"/\1/p' Cargo.toml | head -1)
cd apps/tauri
if command -v jq >/dev/null 2>&1; then
jq --arg v "$VERSION" '.version = $v' tauri.conf.json > tmp.json && mv tmp.json tauri.conf.json
else
sed -i '' "s/\"version\": \"[^\"]*\"/\"version\": \"$VERSION\"/" tauri.conf.json
fi
echo "Tauri version set to: $VERSION"
- name: Build Tauri app (universal binary)
working-directory: apps/tauri
run: cargo tauri build --target universal-apple-darwin
- name: Prepare desktop release assets
run: |
mkdir -p desktop-assets
find target -name '*.dmg' -exec cp {} desktop-assets/ZeroClaw.dmg \; 2>/dev/null || true
find target -name '*.app.tar.gz' -exec cp {} desktop-assets/ZeroClaw-macos.app.tar.gz \; 2>/dev/null || true
find target -name '*.app.tar.gz.sig' -exec cp {} desktop-assets/ZeroClaw-macos.app.tar.gz.sig \; 2>/dev/null || true
echo "--- Desktop assets ---"
ls -lh desktop-assets/
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: desktop-macos
path: desktop-assets/*
retention-days: 14
publish:
name: Publish Stable Release
needs: [validate, release-notes, build, build-desktop]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
pattern: zeroclaw-*
path: artifacts
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: desktop-macos
path: artifacts/desktop-macos
- name: Generate checksums
run: |
cd artifacts
find . -type f \( -name '*.tar.gz' -o -name '*.zip' -o -name '*.dmg' \) -exec sha256sum {} + | sed 's| \./[^/]*/| |' > SHA256SUMS
cat SHA256SUMS
- name: Collect release assets
run: |
mkdir -p release-assets
find artifacts -type f \( -name '*.tar.gz' -o -name '*.zip' -o -name '*.dmg' -o -name 'SHA256SUMS' \) -exec cp {} release-assets/ \;
cp install.sh release-assets/
echo "--- Assets ---"
ls -lh release-assets/
- name: Write release notes
env:
NOTES: ${{ needs.release-notes.outputs.notes }}
run: printf '%s\n' "$NOTES" > release-notes.md
- name: Create tag if manual dispatch
if: github.event_name == 'workflow_dispatch'
env:
TAG: ${{ needs.validate.outputs.tag }}
run: |
git tag -a "$TAG" -m "zeroclaw $TAG"
git push origin "$TAG"
- name: Create GitHub Release
env:
GH_TOKEN: ${{ secrets.RELEASE_TOKEN }}
TAG: ${{ needs.validate.outputs.tag }}
run: |
gh release create "$TAG" release-assets/* \
--repo "${{ github.repository }}" \
--title "$TAG" \
--notes-file release-notes.md \
--latest
crates-io:
name: Publish to crates.io
needs: [validate, publish]
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: 1.92.0
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-node@v4
with:
node-version: 22
cache: npm
cache-dependency-path: web/package-lock.json
- name: Build web dashboard
run: cd web && npm ci && npm run build
- name: Clean web build artifacts
run: rm -rf web/node_modules web/src web/package.json web/package-lock.json web/tsconfig*.json web/vite.config.ts web/index.html
- name: Publish aardvark-sys to crates.io
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
run: |
OUTPUT=$(cargo publish --locked --allow-dirty --no-verify -p aardvark-sys 2>&1) && exit 0
echo "$OUTPUT"
if echo "$OUTPUT" | grep -q 'already exists'; then
echo "::notice::aardvark-sys already on crates.io — skipping"
exit 0
fi
exit 1
- name: Wait for aardvark-sys to index
run: sleep 15
- name: Publish to crates.io
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
VERSION: ${{ inputs.version }}
run: |
# Publish to crates.io; treat "already exists" as success
# (auto-publish workflow may have already published this version)
CRATE_NAME=$(sed -n 's/^name = "\([^"]*\)"/\1/p' Cargo.toml | head -1)
OUTPUT=$(cargo publish --locked --allow-dirty --no-verify 2>&1) && exit 0
echo "$OUTPUT"
if echo "$OUTPUT" | grep -q 'already exists'; then
echo "::notice::${CRATE_NAME}@${VERSION} already on crates.io — skipping"
exit 0
fi
exit 1
redeploy-website:
name: Trigger Website Redeploy
needs: [publish]
runs-on: ubuntu-latest
steps:
- name: Trigger website redeploy
env:
PAT: ${{ secrets.WEBSITE_REPO_PAT }}
run: |
curl -fsSL -X POST \
-H "Authorization: token $PAT" \
-H "Accept: application/vnd.github+json" \
https://api.github.com/repos/zeroclaw-labs/zeroclaw-website/dispatches \
-d '{"event_type":"new-release","client_payload":{"install_script_url":"https://raw.githubusercontent.com/zeroclaw-labs/zeroclaw/master/install.sh"}}'
docker:
name: Push Docker Image
needs: [validate, build]
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: zeroclaw-x86_64-unknown-linux-gnu
path: artifacts/
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: zeroclaw-aarch64-unknown-linux-gnu
path: artifacts/
- name: Prepare Docker context with pre-built binaries
run: |
mkdir -p docker-ctx/bin/amd64 docker-ctx/bin/arm64
tar xzf artifacts/zeroclaw-x86_64-unknown-linux-gnu.tar.gz -C docker-ctx/bin/amd64
tar xzf artifacts/zeroclaw-aarch64-unknown-linux-gnu.tar.gz -C docker-ctx/bin/arm64
mkdir -p docker-ctx/zeroclaw-data/.zeroclaw docker-ctx/zeroclaw-data/workspace
printf '%s\n' \
'workspace_dir = "/zeroclaw-data/workspace"' \
'config_path = "/zeroclaw-data/.zeroclaw/config.toml"' \
'api_key = ""' \
'default_provider = "openrouter"' \
'default_model = "anthropic/claude-sonnet-4-20250514"' \
'default_temperature = 0.7' \
'' \
'[gateway]' \
'port = 42617' \
'host = "[::]"' \
'allow_public_bind = true' \
> docker-ctx/zeroclaw-data/.zeroclaw/config.toml
cp Dockerfile.ci docker-ctx/Dockerfile
cp Dockerfile.debian.ci docker-ctx/Dockerfile.debian
- uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
with:
context: docker-ctx
push: true
tags: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.validate.outputs.tag }}
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
platforms: linux/amd64,linux/arm64
- name: Build and push Debian compatibility image
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
with:
context: docker-ctx
file: docker-ctx/Dockerfile.debian
push: true
tags: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.validate.outputs.tag }}-debian
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:debian
platforms: linux/amd64,linux/arm64
# ── Post-publish: package manager auto-sync ─────────────────────────
scoop:
name: Update Scoop Manifest
needs: [validate, publish]
if: ${{ !cancelled() && needs.publish.result == 'success' }}
uses: ./.github/workflows/pub-scoop.yml
with:
release_tag: ${{ needs.validate.outputs.tag }}
dry_run: false
secrets: inherit
aur:
name: Update AUR Package
needs: [validate, publish]
if: ${{ !cancelled() && needs.publish.result == 'success' }}
uses: ./.github/workflows/pub-aur.yml
with:
release_tag: ${{ needs.validate.outputs.tag }}
dry_run: false
secrets: inherit
homebrew:
name: Update Homebrew Core
needs: [validate, publish]
if: ${{ !cancelled() && needs.publish.result == 'success' }}
uses: ./.github/workflows/pub-homebrew-core.yml
with:
release_tag: ${{ needs.validate.outputs.tag }}
dry_run: false
secrets: inherit
# ── Post-publish: tweet after release + website are live ──────────────
# Docker push can be slow; don't let it block the tweet.
tweet:
name: Tweet Release
needs: [validate, publish, redeploy-website]
if: ${{ !cancelled() && needs.publish.result == 'success' }}
uses: ./.github/workflows/tweet-release.yml
with:
release_tag: ${{ needs.validate.outputs.tag }}
release_url: https://github.com/zeroclaw-labs/zeroclaw/releases/tag/${{ needs.validate.outputs.tag }}
secrets: inherit

View File

@ -0,0 +1,61 @@
// Enforce at least one human approval on pull requests.
// Used by .github/workflows/ci-run.yml via actions/github-script.
module.exports = async ({ github, context, core }) => {
const owner = context.repo.owner;
const repo = context.repo.repo;
const prNumber = context.payload.pull_request?.number;
if (!prNumber) {
core.setFailed("Missing pull_request context.");
return;
}
const botAllowlist = new Set(
(process.env.HUMAN_REVIEW_BOT_LOGINS || "github-actions[bot],dependabot[bot],coderabbitai[bot]")
.split(",")
.map((value) => value.trim().toLowerCase())
.filter(Boolean),
);
const isBotAccount = (login, accountType) => {
if (!login) return false;
if ((accountType || "").toLowerCase() === "bot") return true;
if (login.endsWith("[bot]")) return true;
return botAllowlist.has(login);
};
const reviews = await github.paginate(github.rest.pulls.listReviews, {
owner,
repo,
pull_number: prNumber,
per_page: 100,
});
const latestReviewByUser = new Map();
const decisiveStates = new Set(["APPROVED", "CHANGES_REQUESTED", "DISMISSED"]);
for (const review of reviews) {
const login = review.user?.login?.toLowerCase();
if (!login) continue;
if (!decisiveStates.has(review.state)) continue;
latestReviewByUser.set(login, {
state: review.state,
type: review.user?.type || "",
});
}
const humanApprovers = [];
for (const [login, review] of latestReviewByUser.entries()) {
if (review.state !== "APPROVED") continue;
if (isBotAccount(login, review.type)) continue;
humanApprovers.push(login);
}
if (humanApprovers.length === 0) {
core.setFailed(
"No human approving review found. At least one non-bot approval is required before merge.",
);
return;
}
core.info(`Human approval check passed. Approver(s): ${humanApprovers.join(", ")}`);
};

View File

@ -0,0 +1,54 @@
// Enforce ownership rules for root license files in PRs.
module.exports = async ({ github, context, core }) => {
const owner = context.repo.owner;
const repo = context.repo.repo;
const prNumber = context.payload.pull_request?.number;
const prAuthor = context.payload.pull_request?.user?.login?.toLowerCase() || "";
if (!prNumber) {
core.setFailed("Missing pull_request context.");
return;
}
const ownerAllowlist = ["willsarg"];
if (ownerAllowlist.length === 0) {
core.setFailed("License owner allowlist is empty.");
return;
}
const protectedFiles = new Set(["LICENSE-APACHE", "LICENSE-MIT"]);
const files = await github.paginate(github.rest.pulls.listFiles, {
owner,
repo,
pull_number: prNumber,
per_page: 100,
});
const changedProtectedFiles = files
.map((file) => file.filename)
.filter((name) => protectedFiles.has(name));
if (changedProtectedFiles.length === 0) {
core.info("No protected root license files changed in this PR.");
return;
}
core.info(`Protected license files changed:\n- ${changedProtectedFiles.join("\n- ")}`);
core.info(`Allowed license file editors: ${ownerAllowlist.join(", ")}`);
if (!prAuthor) {
core.setFailed("Unable to resolve PR author login.");
return;
}
if (!ownerAllowlist.includes(prAuthor)) {
core.setFailed(
`Root license files (${changedProtectedFiles.join(", ")}) can only be changed by ${ownerAllowlist.join(", ")}. PR author is @${prAuthor}.`,
);
return;
}
core.info(`License file edit authorized for PR author: @${prAuthor}`);
};

View File

@ -0,0 +1,90 @@
// Post actionable lint failure summary as a PR comment.
// Used by the lint-feedback CI job via actions/github-script.
//
// Required environment variables:
// RUST_CHANGED — "true" if Rust files changed
// DOCS_CHANGED — "true" if docs files changed
// LINT_RESULT — result of the lint job
// LINT_DELTA_RESULT — result of the strict delta lint job
// DOCS_RESULT — result of the docs-quality job
module.exports = async ({ github, context, core }) => {
const owner = context.repo.owner;
const repo = context.repo.repo;
const issueNumber = context.payload.pull_request?.number;
if (!issueNumber) return;
const marker = "<!-- ci-lint-feedback -->";
const rustChanged = process.env.RUST_CHANGED === "true";
const docsChanged = process.env.DOCS_CHANGED === "true";
const lintResult = process.env.LINT_RESULT || "skipped";
const lintDeltaResult = process.env.LINT_DELTA_RESULT || "skipped";
const docsResult = process.env.DOCS_RESULT || "skipped";
const failures = [];
if (rustChanged && !["success", "skipped"].includes(lintResult)) {
failures.push("`Lint Gate (Format + Clippy)` failed.");
}
if (rustChanged && !["success", "skipped"].includes(lintDeltaResult)) {
failures.push("`Lint Gate (Strict Delta)` failed.");
}
if (docsChanged && !["success", "skipped"].includes(docsResult)) {
failures.push("`Docs Quality` failed.");
}
const comments = await github.paginate(github.rest.issues.listComments, {
owner,
repo,
issue_number: issueNumber,
per_page: 100,
});
const existing = comments.find((comment) => (comment.body || "").includes(marker));
if (failures.length === 0) {
if (existing) {
await github.rest.issues.deleteComment({
owner,
repo,
comment_id: existing.id,
});
}
core.info("No lint/docs gate failures. No feedback comment required.");
return;
}
const runUrl = `${context.serverUrl}/${owner}/${repo}/actions/runs/${context.runId}`;
const body = [
marker,
"### CI lint feedback",
"",
"This PR failed one or more fast lint/documentation gates:",
"",
...failures.map((item) => `- ${item}`),
"",
"Open the failing logs in this run:",
`- ${runUrl}`,
"",
"Local fix commands:",
"- `./scripts/ci/rust_quality_gate.sh`",
"- `./scripts/ci/rust_strict_delta_gate.sh`",
"- `./scripts/ci/docs_quality_gate.sh`",
"",
"After fixes, push a new commit and CI will re-run automatically.",
].join("\n");
if (existing) {
await github.rest.issues.updateComment({
owner,
repo,
comment_id: existing.id,
body,
});
} else {
await github.rest.issues.createComment({
owner,
repo,
issue_number: issueNumber,
body,
});
}
};

View File

@ -0,0 +1,132 @@
// Extracted from pr-auto-response.yml step: Apply contributor tier label for issue author
module.exports = async ({ github, context, core }) => {
const owner = context.repo.owner;
const repo = context.repo.repo;
const issue = context.payload.issue;
const pullRequest = context.payload.pull_request;
const target = issue ?? pullRequest;
async function loadContributorTierPolicy() {
const policyPath = process.env.LABEL_POLICY_PATH || ".github/label-policy.json";
const fallback = {
contributorTierColor: "2ED9FF",
contributorTierRules: [
{ label: "distinguished contributor", minMergedPRs: 50 },
{ label: "principal contributor", minMergedPRs: 20 },
{ label: "experienced contributor", minMergedPRs: 10 },
{ label: "trusted contributor", minMergedPRs: 5 },
],
};
try {
const { data } = await github.rest.repos.getContent({
owner,
repo,
path: policyPath,
ref: context.payload.repository?.default_branch || "main",
});
const json = JSON.parse(Buffer.from(data.content, "base64").toString("utf8"));
const contributorTierRules = (json.contributor_tiers || []).map((entry) => ({
label: String(entry.label || "").trim(),
minMergedPRs: Number(entry.min_merged_prs || 0),
}));
const contributorTierColor = String(json.contributor_tier_color || "").toUpperCase();
if (!contributorTierColor || contributorTierRules.length === 0) {
return fallback;
}
return { contributorTierColor, contributorTierRules };
} catch (error) {
core.warning(`failed to load ${policyPath}, using fallback policy: ${error.message}`);
return fallback;
}
}
const { contributorTierColor, contributorTierRules } = await loadContributorTierPolicy();
const contributorTierLabels = contributorTierRules.map((rule) => rule.label);
const managedContributorLabels = new Set(contributorTierLabels);
const action = context.payload.action;
const changedLabel = context.payload.label?.name;
if (!target) return;
if ((action === "labeled" || action === "unlabeled") && !managedContributorLabels.has(changedLabel)) {
return;
}
const author = target.user;
if (!author || author.type === "Bot") return;
function contributorTierDescription(rule) {
return `Contributor with ${rule.minMergedPRs}+ merged PRs.`;
}
async function ensureContributorTierLabels() {
for (const rule of contributorTierRules) {
const label = rule.label;
const expectedDescription = contributorTierDescription(rule);
try {
const { data: existing } = await github.rest.issues.getLabel({ owner, repo, name: label });
const currentColor = (existing.color || "").toUpperCase();
const currentDescription = (existing.description || "").trim();
if (currentColor !== contributorTierColor || currentDescription !== expectedDescription) {
await github.rest.issues.updateLabel({
owner,
repo,
name: label,
new_name: label,
color: contributorTierColor,
description: expectedDescription,
});
}
} catch (error) {
if (error.status !== 404) throw error;
await github.rest.issues.createLabel({
owner,
repo,
name: label,
color: contributorTierColor,
description: expectedDescription,
});
}
}
}
function selectContributorTier(mergedCount) {
const matchedTier = contributorTierRules.find((rule) => mergedCount >= rule.minMergedPRs);
return matchedTier ? matchedTier.label : null;
}
let contributorTierLabel = null;
try {
const { data: mergedSearch } = await github.rest.search.issuesAndPullRequests({
q: `repo:${owner}/${repo} is:pr is:merged author:${author.login}`,
per_page: 1,
});
const mergedCount = mergedSearch.total_count || 0;
contributorTierLabel = selectContributorTier(mergedCount);
} catch (error) {
core.warning(`failed to evaluate contributor tier status: ${error.message}`);
return;
}
await ensureContributorTierLabels();
const { data: currentLabels } = await github.rest.issues.listLabelsOnIssue({
owner,
repo,
issue_number: target.number,
});
const keepLabels = currentLabels
.map((label) => label.name)
.filter((label) => !contributorTierLabels.includes(label));
if (contributorTierLabel) {
keepLabels.push(contributorTierLabel);
}
await github.rest.issues.setLabels({
owner,
repo,
issue_number: target.number,
labels: [...new Set(keepLabels)],
});
};

View File

@ -0,0 +1,94 @@
// Extracted from pr-auto-response.yml step: Handle label-driven responses
module.exports = async ({ github, context, core }) => {
const label = context.payload.label?.name;
if (!label) return;
const issue = context.payload.issue;
const pullRequest = context.payload.pull_request;
const target = issue ?? pullRequest;
if (!target) return;
const isIssue = Boolean(issue);
const issueNumber = target.number;
const owner = context.repo.owner;
const repo = context.repo.repo;
const rules = [
{
label: "r:support",
close: true,
closeIssuesOnly: true,
closeReason: "not_planned",
message:
"This looks like a usage/support request. Please use README + docs first, then open a focused bug with repro details if behavior is incorrect.",
},
{
label: "r:needs-repro",
close: false,
message:
"Thanks for the report. Please add deterministic repro steps, exact environment, and redacted logs so maintainers can triage quickly.",
},
{
label: "invalid",
close: true,
closeIssuesOnly: true,
closeReason: "not_planned",
message:
"Closing as invalid based on current information. If this is still relevant, open a new issue with updated evidence and reproducible steps.",
},
{
label: "duplicate",
close: true,
closeIssuesOnly: true,
closeReason: "not_planned",
message:
"Closing as duplicate. Please continue discussion in the canonical linked issue/PR.",
},
];
const rule = rules.find((entry) => entry.label === label);
if (!rule) return;
const marker = `<!-- auto-response:${rule.label} -->`;
const comments = await github.paginate(github.rest.issues.listComments, {
owner,
repo,
issue_number: issueNumber,
per_page: 100,
});
const alreadyCommented = comments.some((comment) =>
(comment.body || "").includes(marker)
);
if (!alreadyCommented) {
await github.rest.issues.createComment({
owner,
repo,
issue_number: issueNumber,
body: `${rule.message}\n\n${marker}`,
});
}
if (!rule.close) return;
if (rule.closeIssuesOnly && !isIssue) return;
if (target.state === "closed") return;
if (isIssue) {
await github.rest.issues.update({
owner,
repo,
issue_number: issueNumber,
state: "closed",
state_reason: rule.closeReason || "not_planned",
});
} else {
await github.rest.issues.update({
owner,
repo,
issue_number: issueNumber,
state: "closed",
});
}
};

View File

@ -0,0 +1,161 @@
// Extracted from pr-check-status.yml step: Nudge PRs that need rebase or CI refresh
module.exports = async ({ github, context, core }) => {
const staleHours = Number(process.env.STALE_HOURS || "48");
const ignoreLabels = new Set(["no-stale", "stale", "maintainer", "no-pr-hygiene"]);
const marker = "<!-- pr-hygiene-nudge -->";
const owner = context.repo.owner;
const repo = context.repo.repo;
const openPrs = await github.paginate(github.rest.pulls.list, {
owner,
repo,
state: "open",
per_page: 100,
});
const activePrs = openPrs.filter((pr) => {
if (pr.draft) {
return false;
}
const labels = new Set((pr.labels || []).map((label) => label.name));
return ![...ignoreLabels].some((label) => labels.has(label));
});
core.info(`Scanning ${activePrs.length} open PR(s) for hygiene nudges.`);
let nudged = 0;
let skipped = 0;
for (const pr of activePrs) {
const { data: headCommit } = await github.rest.repos.getCommit({
owner,
repo,
ref: pr.head.sha,
});
const headCommitAt =
headCommit.commit?.committer?.date || headCommit.commit?.author?.date;
if (!headCommitAt) {
skipped += 1;
core.info(`#${pr.number}: missing head commit timestamp, skipping.`);
continue;
}
const ageHours = (Date.now() - new Date(headCommitAt).getTime()) / 3600000;
if (ageHours < staleHours) {
skipped += 1;
continue;
}
const { data: prDetail } = await github.rest.pulls.get({
owner,
repo,
pull_number: pr.number,
});
const isBehindBase = prDetail.mergeable_state === "behind";
const { data: checkRunsData } = await github.rest.checks.listForRef({
owner,
repo,
ref: pr.head.sha,
per_page: 100,
});
const ciGateRuns = (checkRunsData.check_runs || [])
.filter((run) => run.name === "CI Required Gate")
.sort((a, b) => {
const aTime = new Date(a.started_at || a.completed_at || a.created_at).getTime();
const bTime = new Date(b.started_at || b.completed_at || b.created_at).getTime();
return bTime - aTime;
});
let ciState = "missing";
if (ciGateRuns.length > 0) {
const latest = ciGateRuns[0];
if (latest.status !== "completed") {
ciState = "in_progress";
} else if (["success", "neutral", "skipped"].includes(latest.conclusion || "")) {
ciState = "success";
} else {
ciState = String(latest.conclusion || "failure");
}
}
const ciMissing = ciState === "missing";
const ciFailing = !["success", "in_progress", "missing"].includes(ciState);
if (!isBehindBase && !ciMissing && !ciFailing) {
skipped += 1;
continue;
}
const reasons = [];
if (isBehindBase) {
reasons.push("- Branch is behind `main` (please rebase or merge the latest base branch).");
}
if (ciMissing) {
reasons.push("- No `CI Required Gate` run was found for the current head commit.");
}
if (ciFailing) {
reasons.push(`- Latest \`CI Required Gate\` result is \`${ciState}\`.`);
}
const shortSha = pr.head.sha.slice(0, 12);
const body = [
marker,
`Hi @${pr.user.login}, friendly automation nudge from PR hygiene.`,
"",
`This PR has had no new commits for **${Math.floor(ageHours)}h** and still needs an update before merge:`,
"",
...reasons,
"",
"### Recommended next steps",
"1. Rebase your branch on `main`.",
"2. Push the updated branch and re-run checks (or use **Re-run failed jobs**).",
"3. Post fresh validation output in this PR thread.",
"",
"Maintainers: apply `no-stale` to opt out for accepted-but-blocked work.",
`Head SHA: \`${shortSha}\``,
].join("\n");
const { data: comments } = await github.rest.issues.listComments({
owner,
repo,
issue_number: pr.number,
per_page: 100,
});
const existing = comments.find(
(comment) => comment.user?.type === "Bot" && comment.body?.includes(marker),
);
if (existing) {
if (existing.body === body) {
skipped += 1;
continue;
}
await github.rest.issues.updateComment({
owner,
repo,
comment_id: existing.id,
body,
});
} else {
await github.rest.issues.createComment({
owner,
repo,
issue_number: pr.number,
body,
});
}
nudged += 1;
core.info(`#${pr.number}: hygiene nudge posted/updated.`);
}
core.info(`Done. Nudged=${nudged}, skipped=${skipped}`);
};

View File

@ -0,0 +1,189 @@
// Run safe intake checks for PR events and maintain a single sticky comment.
// Used by .github/workflows/pr-intake-checks.yml via actions/github-script.
module.exports = async ({ github, context, core }) => {
const owner = context.repo.owner;
const repo = context.repo.repo;
const pr = context.payload.pull_request;
if (!pr) return;
const marker = "<!-- pr-intake-checks -->";
const legacyMarker = "<!-- pr-intake-sanity -->";
const requiredSections = [
"## Summary",
"## Validation Evidence (required)",
"## Security Impact (required)",
"## Privacy and Data Hygiene (required)",
"## Rollback Plan (required)",
];
const body = pr.body || "";
const missingSections = requiredSections.filter((section) => !body.includes(section));
const missingFields = [];
const requiredFieldChecks = [
["summary problem", /- Problem:\s*\S+/m],
["summary why it matters", /- Why it matters:\s*\S+/m],
["summary what changed", /- What changed:\s*\S+/m],
["validation commands", /Commands and result summary:\s*[\s\S]*```/m],
["security risk/mitigation", /- New permissions\/capabilities\?\s*\(`Yes\/No`\):\s*\S+/m],
["privacy status", /- Data-hygiene status\s*\(`pass\|needs-follow-up`\):\s*\S+/m],
["rollback plan", /- Fast rollback command\/path:\s*\S+/m],
];
for (const [name, pattern] of requiredFieldChecks) {
if (!pattern.test(body)) {
missingFields.push(name);
}
}
const files = await github.paginate(github.rest.pulls.listFiles, {
owner,
repo,
pull_number: pr.number,
per_page: 100,
});
const formatWarnings = [];
const dangerousProblems = [];
for (const file of files) {
const patch = file.patch || "";
if (!patch) continue;
const lines = patch.split("\n");
for (let idx = 0; idx < lines.length; idx += 1) {
const line = lines[idx];
if (!line.startsWith("+") || line.startsWith("+++")) continue;
const added = line.slice(1);
const lineNo = idx + 1;
if (/\t/.test(added)) {
formatWarnings.push(`${file.filename}:patch#${lineNo} contains tab characters`);
}
if (/[ \t]+$/.test(added)) {
formatWarnings.push(`${file.filename}:patch#${lineNo} contains trailing whitespace`);
}
if (/^(<<<<<<<|=======|>>>>>>>)/.test(added)) {
dangerousProblems.push(`${file.filename}:patch#${lineNo} contains merge conflict markers`);
}
}
}
const workflowFilesChanged = files
.map((file) => file.filename)
.filter((name) => name.startsWith(".github/workflows/"));
const advisoryFindings = [];
const blockingFindings = [];
if (missingSections.length > 0) {
advisoryFindings.push(`Missing required PR template sections: ${missingSections.join(", ")}`);
}
if (missingFields.length > 0) {
advisoryFindings.push(`Incomplete required PR template fields: ${missingFields.join(", ")}`);
}
if (formatWarnings.length > 0) {
advisoryFindings.push(`Formatting issues in added lines (${formatWarnings.length})`);
}
if (dangerousProblems.length > 0) {
blockingFindings.push(`Dangerous patch markers found (${dangerousProblems.length})`);
}
const comments = await github.paginate(github.rest.issues.listComments, {
owner,
repo,
issue_number: pr.number,
per_page: 100,
});
const existing = comments.find((comment) => {
const body = comment.body || "";
return body.includes(marker) || body.includes(legacyMarker);
});
if (advisoryFindings.length === 0 && blockingFindings.length === 0) {
if (existing) {
await github.rest.issues.deleteComment({
owner,
repo,
comment_id: existing.id,
});
}
core.info("PR intake sanity checks passed.");
return;
}
const runUrl = `${context.serverUrl}/${owner}/${repo}/actions/runs/${context.runId}`;
const advisoryDetails = [];
if (formatWarnings.length > 0) {
advisoryDetails.push(...formatWarnings.slice(0, 20).map((entry) => `- ${entry}`));
if (formatWarnings.length > 20) {
advisoryDetails.push(`- ...and ${formatWarnings.length - 20} more issue(s)`);
}
}
const blockingDetails = [];
if (dangerousProblems.length > 0) {
blockingDetails.push(...dangerousProblems.slice(0, 20).map((entry) => `- ${entry}`));
if (dangerousProblems.length > 20) {
blockingDetails.push(`- ...and ${dangerousProblems.length - 20} more issue(s)`);
}
}
const isBlocking = blockingFindings.length > 0;
const workflowChangeNote = workflowFilesChanged.length > 0
? [
"",
"Workflow files changed in this PR:",
...workflowFilesChanged.map((name) => `- \`${name}\``),
].join("\n")
: "";
const commentBody = [
marker,
isBlocking
? "### PR intake checks failed (blocking)"
: "### PR intake checks found warnings (non-blocking)",
"",
isBlocking
? "Fast safe checks found blocking safety issues:"
: "Fast safe checks found advisory issues. CI lint/test/build gates still enforce merge quality.",
...(blockingFindings.length > 0 ? blockingFindings.map((entry) => `- ${entry}`) : []),
...(advisoryFindings.length > 0 ? advisoryFindings.map((entry) => `- ${entry}`) : []),
"",
"Action items:",
"1. Complete required PR template sections/fields.",
"2. Remove tabs, trailing whitespace, and merge conflict markers from added lines.",
"4. Re-run local checks before pushing:",
" - `./scripts/ci/rust_quality_gate.sh`",
" - `./scripts/ci/rust_strict_delta_gate.sh`",
" - `./scripts/ci/docs_quality_gate.sh`",
"",
"",
`Run logs: ${runUrl}`,
"",
"Detected blocking line issues (sample):",
...(blockingDetails.length > 0 ? blockingDetails : ["- none"]),
"",
"Detected advisory line issues (sample):",
...(advisoryDetails.length > 0 ? advisoryDetails : ["- none"]),
workflowChangeNote,
].join("\n");
if (existing) {
await github.rest.issues.updateComment({
owner,
repo,
comment_id: existing.id,
body: commentBody,
});
} else {
await github.rest.issues.createComment({
owner,
repo,
issue_number: pr.number,
body: commentBody,
});
}
if (isBlocking) {
core.setFailed("PR intake sanity checks found blocking issues. See sticky comment for details.");
return;
}
core.info("PR intake sanity checks found advisory issues only.");
};

805
.github/workflows/scripts/pr_labeler.js vendored Normal file
View File

@ -0,0 +1,805 @@
// Apply managed PR labels (size/risk/path/module/contributor tiers).
// Extracted from pr-labeler workflow inline github-script for maintainability.
module.exports = async ({ github, context, core }) => {
const pr = context.payload.pull_request;
const owner = context.repo.owner;
const repo = context.repo.repo;
const action = context.payload.action;
const changedLabel = context.payload.label?.name;
const sizeLabels = ["size: XS", "size: S", "size: M", "size: L", "size: XL"];
const computedRiskLabels = ["risk: low", "risk: medium", "risk: high"];
const manualRiskOverrideLabel = "risk: manual";
const managedEnforcedLabels = new Set([
...sizeLabels,
manualRiskOverrideLabel,
...computedRiskLabels,
]);
if ((action === "labeled" || action === "unlabeled") && !managedEnforcedLabels.has(changedLabel)) {
core.info(`skip non-size/risk label event: ${changedLabel || "unknown"}`);
return;
}
async function loadContributorTierPolicy() {
const policyPath = process.env.LABEL_POLICY_PATH || ".github/label-policy.json";
const fallback = {
contributorTierColor: "2ED9FF",
contributorTierRules: [
{ label: "distinguished contributor", minMergedPRs: 50 },
{ label: "principal contributor", minMergedPRs: 20 },
{ label: "experienced contributor", minMergedPRs: 10 },
{ label: "trusted contributor", minMergedPRs: 5 },
],
};
try {
const { data } = await github.rest.repos.getContent({
owner,
repo,
path: policyPath,
ref: context.payload.repository?.default_branch || "main",
});
const json = JSON.parse(Buffer.from(data.content, "base64").toString("utf8"));
const contributorTierRules = (json.contributor_tiers || []).map((entry) => ({
label: String(entry.label || "").trim(),
minMergedPRs: Number(entry.min_merged_prs || 0),
}));
const contributorTierColor = String(json.contributor_tier_color || "").toUpperCase();
if (!contributorTierColor || contributorTierRules.length === 0) {
return fallback;
}
return { contributorTierColor, contributorTierRules };
} catch (error) {
core.warning(`failed to load ${policyPath}, using fallback policy: ${error.message}`);
return fallback;
}
}
const { contributorTierColor, contributorTierRules } = await loadContributorTierPolicy();
const contributorTierLabels = contributorTierRules.map((rule) => rule.label);
const managedPathLabels = [
"docs",
"dependencies",
"ci",
"core",
"agent",
"channel",
"config",
"cron",
"daemon",
"doctor",
"gateway",
"health",
"heartbeat",
"integration",
"memory",
"observability",
"onboard",
"provider",
"runtime",
"security",
"service",
"skillforge",
"skills",
"tool",
"tunnel",
"tests",
"scripts",
"dev",
];
const managedPathLabelSet = new Set(managedPathLabels);
const moduleNamespaceRules = [
{ root: "src/agent/", prefix: "agent", coreEntries: new Set(["mod.rs"]) },
{ root: "src/channels/", prefix: "channel", coreEntries: new Set(["mod.rs", "traits.rs"]) },
{ root: "src/config/", prefix: "config", coreEntries: new Set(["mod.rs", "schema.rs"]) },
{ root: "src/cron/", prefix: "cron", coreEntries: new Set(["mod.rs"]) },
{ root: "src/daemon/", prefix: "daemon", coreEntries: new Set(["mod.rs"]) },
{ root: "src/doctor/", prefix: "doctor", coreEntries: new Set(["mod.rs"]) },
{ root: "src/gateway/", prefix: "gateway", coreEntries: new Set(["mod.rs"]) },
{ root: "src/health/", prefix: "health", coreEntries: new Set(["mod.rs"]) },
{ root: "src/heartbeat/", prefix: "heartbeat", coreEntries: new Set(["mod.rs"]) },
{ root: "src/integrations/", prefix: "integration", coreEntries: new Set(["mod.rs", "registry.rs"]) },
{ root: "src/memory/", prefix: "memory", coreEntries: new Set(["mod.rs", "traits.rs"]) },
{ root: "src/observability/", prefix: "observability", coreEntries: new Set(["mod.rs", "traits.rs"]) },
{ root: "src/onboard/", prefix: "onboard", coreEntries: new Set(["mod.rs"]) },
{ root: "src/providers/", prefix: "provider", coreEntries: new Set(["mod.rs", "traits.rs"]) },
{ root: "src/runtime/", prefix: "runtime", coreEntries: new Set(["mod.rs", "traits.rs"]) },
{ root: "src/security/", prefix: "security", coreEntries: new Set(["mod.rs"]) },
{ root: "src/service/", prefix: "service", coreEntries: new Set(["mod.rs"]) },
{ root: "src/skillforge/", prefix: "skillforge", coreEntries: new Set(["mod.rs"]) },
{ root: "src/skills/", prefix: "skills", coreEntries: new Set(["mod.rs"]) },
{ root: "src/tools/", prefix: "tool", coreEntries: new Set(["mod.rs", "traits.rs"]) },
{ root: "src/tunnel/", prefix: "tunnel", coreEntries: new Set(["mod.rs"]) },
];
const managedModulePrefixes = [...new Set(moduleNamespaceRules.map((rule) => `${rule.prefix}:`))];
const orderedOtherLabelStyles = [
{ label: "health", color: "8EC9B8" },
{ label: "tool", color: "7FC4B6" },
{ label: "agent", color: "86C4A2" },
{ label: "memory", color: "8FCB99" },
{ label: "channel", color: "7EB6F2" },
{ label: "service", color: "95C7B6" },
{ label: "integration", color: "8DC9AE" },
{ label: "tunnel", color: "9FC8B3" },
{ label: "config", color: "AABCD0" },
{ label: "observability", color: "84C9D0" },
{ label: "docs", color: "8FBBE0" },
{ label: "dev", color: "B9C1CC" },
{ label: "tests", color: "9DC8C7" },
{ label: "skills", color: "BFC89B" },
{ label: "skillforge", color: "C9C39B" },
{ label: "provider", color: "958DF0" },
{ label: "runtime", color: "A3ADD8" },
{ label: "heartbeat", color: "C0C88D" },
{ label: "daemon", color: "C8C498" },
{ label: "doctor", color: "C1CF9D" },
{ label: "onboard", color: "D2BF86" },
{ label: "cron", color: "D2B490" },
{ label: "ci", color: "AEB4CE" },
{ label: "dependencies", color: "9FB1DE" },
{ label: "gateway", color: "B5A8E5" },
{ label: "security", color: "E58D85" },
{ label: "core", color: "C8A99B" },
{ label: "scripts", color: "C9B49F" },
];
const otherLabelDisplayOrder = orderedOtherLabelStyles.map((entry) => entry.label);
const modulePrefixSet = new Set(moduleNamespaceRules.map((rule) => rule.prefix));
const modulePrefixPriority = otherLabelDisplayOrder.filter((label) => modulePrefixSet.has(label));
const pathLabelPriority = [...otherLabelDisplayOrder];
const riskDisplayOrder = ["risk: high", "risk: medium", "risk: low", "risk: manual"];
const sizeDisplayOrder = ["size: XS", "size: S", "size: M", "size: L", "size: XL"];
const contributorDisplayOrder = [
"distinguished contributor",
"principal contributor",
"experienced contributor",
"trusted contributor",
];
const modulePrefixPriorityIndex = new Map(
modulePrefixPriority.map((prefix, index) => [prefix, index])
);
const pathLabelPriorityIndex = new Map(
pathLabelPriority.map((label, index) => [label, index])
);
const riskPriorityIndex = new Map(
riskDisplayOrder.map((label, index) => [label, index])
);
const sizePriorityIndex = new Map(
sizeDisplayOrder.map((label, index) => [label, index])
);
const contributorPriorityIndex = new Map(
contributorDisplayOrder.map((label, index) => [label, index])
);
const otherLabelColors = Object.fromEntries(
orderedOtherLabelStyles.map((entry) => [entry.label, entry.color])
);
const staticLabelColors = {
"size: XS": "E7CDD3",
"size: S": "E1BEC7",
"size: M": "DBB0BB",
"size: L": "D4A2AF",
"size: XL": "CE94A4",
"risk: low": "97D3A6",
"risk: medium": "E4C47B",
"risk: high": "E98E88",
"risk: manual": "B7A4E0",
...otherLabelColors,
};
const staticLabelDescriptions = {
"size: XS": "Auto size: <=80 non-doc changed lines.",
"size: S": "Auto size: 81-250 non-doc changed lines.",
"size: M": "Auto size: 251-500 non-doc changed lines.",
"size: L": "Auto size: 501-1000 non-doc changed lines.",
"size: XL": "Auto size: >1000 non-doc changed lines.",
"risk: low": "Auto risk: docs/chore-only paths.",
"risk: medium": "Auto risk: src/** or dependency/config changes.",
"risk: high": "Auto risk: security/runtime/gateway/tools/workflows.",
"risk: manual": "Maintainer override: keep selected risk label.",
docs: "Auto scope: docs/markdown/template files changed.",
dependencies: "Auto scope: dependency manifest/lock/policy changed.",
ci: "Auto scope: CI/workflow/hook files changed.",
core: "Auto scope: root src/*.rs files changed.",
agent: "Auto scope: src/agent/** changed.",
channel: "Auto scope: src/channels/** changed.",
config: "Auto scope: src/config/** changed.",
cron: "Auto scope: src/cron/** changed.",
daemon: "Auto scope: src/daemon/** changed.",
doctor: "Auto scope: src/doctor/** changed.",
gateway: "Auto scope: src/gateway/** changed.",
health: "Auto scope: src/health/** changed.",
heartbeat: "Auto scope: src/heartbeat/** changed.",
integration: "Auto scope: src/integrations/** changed.",
memory: "Auto scope: src/memory/** changed.",
observability: "Auto scope: src/observability/** changed.",
onboard: "Auto scope: src/onboard/** changed.",
provider: "Auto scope: src/providers/** changed.",
runtime: "Auto scope: src/runtime/** changed.",
security: "Auto scope: src/security/** changed.",
service: "Auto scope: src/service/** changed.",
skillforge: "Auto scope: src/skillforge/** changed.",
skills: "Auto scope: src/skills/** changed.",
tool: "Auto scope: src/tools/** changed.",
tunnel: "Auto scope: src/tunnel/** changed.",
tests: "Auto scope: tests/** changed.",
scripts: "Auto scope: scripts/** changed.",
dev: "Auto scope: dev/** changed.",
};
for (const label of contributorTierLabels) {
staticLabelColors[label] = contributorTierColor;
const rule = contributorTierRules.find((entry) => entry.label === label);
if (rule) {
staticLabelDescriptions[label] = `Contributor with ${rule.minMergedPRs}+ merged PRs.`;
}
}
const modulePrefixColors = Object.fromEntries(
modulePrefixPriority.map((prefix) => [
`${prefix}:`,
otherLabelColors[prefix] || "BFDADC",
])
);
const providerKeywordHints = [
"deepseek",
"moonshot",
"kimi",
"qwen",
"mistral",
"doubao",
"baichuan",
"yi",
"siliconflow",
"vertex",
"azure",
"perplexity",
"venice",
"vercel",
"cloudflare",
"synthetic",
"opencode",
"zai",
"glm",
"minimax",
"bedrock",
"qianfan",
"groq",
"together",
"fireworks",
"novita",
"cohere",
"openai",
"openrouter",
"anthropic",
"gemini",
"ollama",
];
const channelKeywordHints = [
"telegram",
"discord",
"slack",
"whatsapp",
"matrix",
"irc",
"imessage",
"email",
"cli",
];
function isDocsLike(path) {
return (
path.startsWith("docs/") ||
path.endsWith(".md") ||
path.endsWith(".mdx") ||
path === "LICENSE" ||
path === ".markdownlint-cli2.yaml" ||
path === ".github/pull_request_template.md" ||
path.startsWith(".github/ISSUE_TEMPLATE/")
);
}
function normalizeLabelSegment(segment) {
return (segment || "")
.toLowerCase()
.replace(/\.rs$/g, "")
.replace(/[^a-z0-9_-]+/g, "-")
.replace(/^[-_]+|[-_]+$/g, "")
.slice(0, 40);
}
function containsKeyword(text, keyword) {
const escaped = keyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const pattern = new RegExp(`(^|[^a-z0-9_])${escaped}([^a-z0-9_]|$)`, "i");
return pattern.test(text);
}
function formatModuleLabel(prefix, segment) {
return `${prefix}: ${segment}`;
}
function parseModuleLabel(label) {
if (typeof label !== "string") return null;
const match = label.match(/^([^:]+):\s*(.+)$/);
if (!match) return null;
const prefix = match[1].trim().toLowerCase();
const segment = (match[2] || "").trim().toLowerCase();
if (!prefix || !segment) return null;
return { prefix, segment };
}
function sortByPriority(labels, priorityIndex) {
return [...new Set(labels)].sort((left, right) => {
const leftPriority = priorityIndex.has(left) ? priorityIndex.get(left) : Number.MAX_SAFE_INTEGER;
const rightPriority = priorityIndex.has(right)
? priorityIndex.get(right)
: Number.MAX_SAFE_INTEGER;
if (leftPriority !== rightPriority) return leftPriority - rightPriority;
return left.localeCompare(right);
});
}
function sortModuleLabels(labels) {
return [...new Set(labels)].sort((left, right) => {
const leftParsed = parseModuleLabel(left);
const rightParsed = parseModuleLabel(right);
if (!leftParsed || !rightParsed) return left.localeCompare(right);
const leftPrefixPriority = modulePrefixPriorityIndex.has(leftParsed.prefix)
? modulePrefixPriorityIndex.get(leftParsed.prefix)
: Number.MAX_SAFE_INTEGER;
const rightPrefixPriority = modulePrefixPriorityIndex.has(rightParsed.prefix)
? modulePrefixPriorityIndex.get(rightParsed.prefix)
: Number.MAX_SAFE_INTEGER;
if (leftPrefixPriority !== rightPrefixPriority) {
return leftPrefixPriority - rightPrefixPriority;
}
if (leftParsed.prefix !== rightParsed.prefix) {
return leftParsed.prefix.localeCompare(rightParsed.prefix);
}
const leftIsCore = leftParsed.segment === "core";
const rightIsCore = rightParsed.segment === "core";
if (leftIsCore !== rightIsCore) return leftIsCore ? 1 : -1;
return leftParsed.segment.localeCompare(rightParsed.segment);
});
}
function refineModuleLabels(rawLabels) {
const refined = new Set(rawLabels);
const segmentsByPrefix = new Map();
for (const label of rawLabels) {
const parsed = parseModuleLabel(label);
if (!parsed) continue;
if (!segmentsByPrefix.has(parsed.prefix)) {
segmentsByPrefix.set(parsed.prefix, new Set());
}
segmentsByPrefix.get(parsed.prefix).add(parsed.segment);
}
for (const [prefix, segments] of segmentsByPrefix) {
const hasSpecificSegment = [...segments].some((segment) => segment !== "core");
if (hasSpecificSegment) {
refined.delete(formatModuleLabel(prefix, "core"));
}
}
return refined;
}
function compactModuleLabels(labels) {
const groupedSegments = new Map();
const compactedModuleLabels = new Set();
const forcePathPrefixes = new Set();
for (const label of labels) {
const parsed = parseModuleLabel(label);
if (!parsed) {
compactedModuleLabels.add(label);
continue;
}
if (!groupedSegments.has(parsed.prefix)) {
groupedSegments.set(parsed.prefix, new Set());
}
groupedSegments.get(parsed.prefix).add(parsed.segment);
}
for (const [prefix, segments] of groupedSegments) {
const uniqueSegments = [...new Set([...segments].filter(Boolean))];
if (uniqueSegments.length === 0) continue;
if (uniqueSegments.length === 1) {
compactedModuleLabels.add(formatModuleLabel(prefix, uniqueSegments[0]));
} else {
forcePathPrefixes.add(prefix);
}
}
return {
moduleLabels: compactedModuleLabels,
forcePathPrefixes,
};
}
function colorForLabel(label) {
if (staticLabelColors[label]) return staticLabelColors[label];
const matchedPrefix = Object.keys(modulePrefixColors).find((prefix) => label.startsWith(prefix));
if (matchedPrefix) return modulePrefixColors[matchedPrefix];
return "BFDADC";
}
function descriptionForLabel(label) {
if (staticLabelDescriptions[label]) return staticLabelDescriptions[label];
const parsed = parseModuleLabel(label);
if (parsed) {
if (parsed.segment === "core") {
return `Auto module: ${parsed.prefix} core files changed.`;
}
return `Auto module: ${parsed.prefix}/${parsed.segment} changed.`;
}
return "Auto-managed label.";
}
async function ensureLabel(name, existing = null) {
const expectedColor = colorForLabel(name);
const expectedDescription = descriptionForLabel(name);
try {
const current = existing || (await github.rest.issues.getLabel({ owner, repo, name })).data;
const currentColor = (current.color || "").toUpperCase();
const currentDescription = (current.description || "").trim();
if (currentColor !== expectedColor || currentDescription !== expectedDescription) {
await github.rest.issues.updateLabel({
owner,
repo,
name,
new_name: name,
color: expectedColor,
description: expectedDescription,
});
}
} catch (error) {
if (error.status !== 404) throw error;
await github.rest.issues.createLabel({
owner,
repo,
name,
color: expectedColor,
description: expectedDescription,
});
}
}
function isManagedLabel(label) {
if (label === manualRiskOverrideLabel) return true;
if (sizeLabels.includes(label) || computedRiskLabels.includes(label)) return true;
if (managedPathLabelSet.has(label)) return true;
if (contributorTierLabels.includes(label)) return true;
if (managedModulePrefixes.some((prefix) => label.startsWith(prefix))) return true;
return false;
}
async function ensureManagedRepoLabelsMetadata() {
const repoLabels = await github.paginate(github.rest.issues.listLabelsForRepo, {
owner,
repo,
per_page: 100,
});
for (const existingLabel of repoLabels) {
const labelName = existingLabel.name || "";
if (!isManagedLabel(labelName)) continue;
await ensureLabel(labelName, existingLabel);
}
}
function selectContributorTier(mergedCount) {
const matchedTier = contributorTierRules.find((rule) => mergedCount >= rule.minMergedPRs);
return matchedTier ? matchedTier.label : null;
}
if (context.eventName === "workflow_dispatch") {
const mode = (context.payload.inputs?.mode || "audit").toLowerCase();
const shouldRepair = mode === "repair";
const repoLabels = await github.paginate(github.rest.issues.listLabelsForRepo, {
owner,
repo,
per_page: 100,
});
let managedScanned = 0;
const drifts = [];
for (const existingLabel of repoLabels) {
const labelName = existingLabel.name || "";
if (!isManagedLabel(labelName)) continue;
managedScanned += 1;
const expectedColor = colorForLabel(labelName);
const expectedDescription = descriptionForLabel(labelName);
const currentColor = (existingLabel.color || "").toUpperCase();
const currentDescription = (existingLabel.description || "").trim();
if (currentColor !== expectedColor || currentDescription !== expectedDescription) {
drifts.push({
name: labelName,
currentColor,
expectedColor,
currentDescription,
expectedDescription,
});
if (shouldRepair) {
await ensureLabel(labelName, existingLabel);
}
}
}
core.summary
.addHeading("Managed Label Governance", 2)
.addRaw(`Mode: ${shouldRepair ? "repair" : "audit"}`)
.addEOL()
.addRaw(`Managed labels scanned: ${managedScanned}`)
.addEOL()
.addRaw(`Drifts found: ${drifts.length}`)
.addEOL();
if (drifts.length > 0) {
const sample = drifts.slice(0, 30).map((entry) => [
entry.name,
`${entry.currentColor} -> ${entry.expectedColor}`,
`${entry.currentDescription || "(blank)"} -> ${entry.expectedDescription}`,
]);
core.summary.addTable([
[{ data: "Label", header: true }, { data: "Color", header: true }, { data: "Description", header: true }],
...sample,
]);
if (drifts.length > sample.length) {
core.summary
.addRaw(`Additional drifts not shown: ${drifts.length - sample.length}`)
.addEOL();
}
}
await core.summary.write();
if (!shouldRepair && drifts.length > 0) {
core.info(`Managed-label metadata drifts detected: ${drifts.length}. Re-run with mode=repair to auto-fix.`);
} else if (shouldRepair) {
core.info(`Managed-label metadata repair applied to ${drifts.length} labels.`);
} else {
core.info("No managed-label metadata drift detected.");
}
return;
}
const files = await github.paginate(github.rest.pulls.listFiles, {
owner,
repo,
pull_number: pr.number,
per_page: 100,
});
const detectedModuleLabels = new Set();
for (const file of files) {
const path = (file.filename || "").toLowerCase();
for (const rule of moduleNamespaceRules) {
if (!path.startsWith(rule.root)) continue;
const relative = path.slice(rule.root.length);
if (!relative) continue;
const first = relative.split("/")[0];
const firstStem = first.endsWith(".rs") ? first.slice(0, -3) : first;
let segment = firstStem;
if (rule.coreEntries.has(first) || rule.coreEntries.has(firstStem)) {
segment = "core";
}
segment = normalizeLabelSegment(segment);
if (!segment) continue;
detectedModuleLabels.add(formatModuleLabel(rule.prefix, segment));
}
}
const providerRelevantFiles = files.filter((file) => {
const path = file.filename || "";
return (
path.startsWith("src/providers/") ||
path.startsWith("src/integrations/") ||
path.startsWith("src/onboard/") ||
path.startsWith("src/config/")
);
});
if (providerRelevantFiles.length > 0) {
const searchableText = [
pr.title || "",
pr.body || "",
...providerRelevantFiles.map((file) => file.filename || ""),
...providerRelevantFiles.map((file) => file.patch || ""),
]
.join("\n")
.toLowerCase();
for (const keyword of providerKeywordHints) {
if (containsKeyword(searchableText, keyword)) {
detectedModuleLabels.add(formatModuleLabel("provider", keyword));
}
}
}
const channelRelevantFiles = files.filter((file) => {
const path = file.filename || "";
return (
path.startsWith("src/channels/") ||
path.startsWith("src/onboard/") ||
path.startsWith("src/config/")
);
});
if (channelRelevantFiles.length > 0) {
const searchableText = [
pr.title || "",
pr.body || "",
...channelRelevantFiles.map((file) => file.filename || ""),
...channelRelevantFiles.map((file) => file.patch || ""),
]
.join("\n")
.toLowerCase();
for (const keyword of channelKeywordHints) {
if (containsKeyword(searchableText, keyword)) {
detectedModuleLabels.add(formatModuleLabel("channel", keyword));
}
}
}
const refinedModuleLabels = refineModuleLabels(detectedModuleLabels);
const compactedModuleState = compactModuleLabels(refinedModuleLabels);
const selectedModuleLabels = compactedModuleState.moduleLabels;
const forcePathPrefixes = compactedModuleState.forcePathPrefixes;
const modulePrefixesWithLabels = new Set(
[...selectedModuleLabels]
.map((label) => parseModuleLabel(label)?.prefix)
.filter(Boolean)
);
const { data: currentLabels } = await github.rest.issues.listLabelsOnIssue({
owner,
repo,
issue_number: pr.number,
});
const currentLabelNames = currentLabels.map((label) => label.name);
const currentPathLabels = currentLabelNames.filter((label) => managedPathLabelSet.has(label));
const candidatePathLabels = new Set([...currentPathLabels, ...forcePathPrefixes]);
const dedupedPathLabels = [...candidatePathLabels].filter((label) => {
if (label === "core") return true;
if (forcePathPrefixes.has(label)) return true;
return !modulePrefixesWithLabels.has(label);
});
const excludedLockfiles = new Set(["Cargo.lock"]);
const changedLines = files.reduce((total, file) => {
const path = file.filename || "";
if (isDocsLike(path) || excludedLockfiles.has(path)) {
return total;
}
return total + (file.additions || 0) + (file.deletions || 0);
}, 0);
let sizeLabel = "size: XL";
if (changedLines <= 80) sizeLabel = "size: XS";
else if (changedLines <= 250) sizeLabel = "size: S";
else if (changedLines <= 500) sizeLabel = "size: M";
else if (changedLines <= 1000) sizeLabel = "size: L";
const hasHighRiskPath = files.some((file) => {
const path = file.filename || "";
return (
path.startsWith("src/security/") ||
path.startsWith("src/runtime/") ||
path.startsWith("src/gateway/") ||
path.startsWith("src/tools/") ||
path.startsWith(".github/workflows/")
);
});
const hasMediumRiskPath = files.some((file) => {
const path = file.filename || "";
return (
path.startsWith("src/") ||
path === "Cargo.toml" ||
path === "Cargo.lock" ||
path === "deny.toml" ||
path.startsWith(".githooks/")
);
});
let riskLabel = "risk: low";
if (hasHighRiskPath) {
riskLabel = "risk: high";
} else if (hasMediumRiskPath) {
riskLabel = "risk: medium";
}
await ensureManagedRepoLabelsMetadata();
const labelsToEnsure = new Set([
...sizeLabels,
...computedRiskLabels,
manualRiskOverrideLabel,
...managedPathLabels,
...contributorTierLabels,
...selectedModuleLabels,
]);
for (const label of labelsToEnsure) {
await ensureLabel(label);
}
let contributorTierLabel = null;
const authorLogin = pr.user?.login;
if (authorLogin && pr.user?.type !== "Bot") {
try {
const { data: mergedSearch } = await github.rest.search.issuesAndPullRequests({
q: `repo:${owner}/${repo} is:pr is:merged author:${authorLogin}`,
per_page: 1,
});
const mergedCount = mergedSearch.total_count || 0;
contributorTierLabel = selectContributorTier(mergedCount);
} catch (error) {
core.warning(`failed to compute contributor tier label: ${error.message}`);
}
}
const hasManualRiskOverride = currentLabelNames.includes(manualRiskOverrideLabel);
const keepNonManagedLabels = currentLabelNames.filter((label) => {
if (label === manualRiskOverrideLabel) return true;
if (contributorTierLabels.includes(label)) return false;
if (sizeLabels.includes(label) || computedRiskLabels.includes(label)) return false;
if (managedPathLabelSet.has(label)) return false;
if (managedModulePrefixes.some((prefix) => label.startsWith(prefix))) return false;
return true;
});
const manualRiskSelection =
currentLabelNames.find((label) => computedRiskLabels.includes(label)) || riskLabel;
const moduleLabelList = sortModuleLabels([...selectedModuleLabels]);
const contributorLabelList = contributorTierLabel ? [contributorTierLabel] : [];
const selectedRiskLabels = hasManualRiskOverride
? sortByPriority([manualRiskSelection, manualRiskOverrideLabel], riskPriorityIndex)
: sortByPriority([riskLabel], riskPriorityIndex);
const selectedSizeLabels = sortByPriority([sizeLabel], sizePriorityIndex);
const sortedContributorLabels = sortByPriority(contributorLabelList, contributorPriorityIndex);
const sortedPathLabels = sortByPriority(dedupedPathLabels, pathLabelPriorityIndex);
const sortedKeepNonManagedLabels = [...new Set(keepNonManagedLabels)].sort((left, right) =>
left.localeCompare(right)
);
const nextLabels = [
...new Set([
...selectedRiskLabels,
...selectedSizeLabels,
...sortedContributorLabels,
...moduleLabelList,
...sortedPathLabels,
...sortedKeepNonManagedLabels,
]),
];
await github.rest.issues.setLabels({
owner,
repo,
issue_number: pr.number,
labels: nextLabels,
});
};

View File

@ -0,0 +1,57 @@
// Extracted from test-benchmarks.yml step: Post benchmark summary on PR
module.exports = async ({ github, context, core }) => {
const fs = require('fs');
const output = fs.readFileSync('benchmark_output.txt', 'utf8');
// Extract Criterion result lines
const lines = output.split('\n').filter(l =>
l.includes('time:') || l.includes('change:') || l.includes('Performance')
);
if (lines.length === 0) {
core.info('No benchmark results to post.');
return;
}
const body = [
'## 📊 Benchmark Results',
'',
'```',
lines.join('\n'),
'```',
'',
'<details><summary>Full output</summary>',
'',
'```',
output.substring(0, 60000),
'```',
'</details>',
].join('\n');
// Find and update or create comment
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.pull_request.number,
});
const marker = '## 📊 Benchmark Results';
const existing = comments.find(c => c.body && c.body.startsWith(marker));
if (existing) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existing.id,
body,
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.pull_request.number,
body,
});
}
};

682
.github/workflows/sec-audit.yml vendored Normal file
View File

@ -0,0 +1,682 @@
name: Sec Audit
on:
push:
branches: [dev, main]
paths:
- "Cargo.toml"
- "Cargo.lock"
- "src/**"
- "crates/**"
- "deny.toml"
- ".gitleaks.toml"
- ".github/security/gitleaks-allowlist-governance.json"
- ".github/security/deny-ignore-governance.json"
- ".github/security/unsafe-audit-governance.json"
- "scripts/ci/install_gitleaks.sh"
- "scripts/ci/install_syft.sh"
- "scripts/ci/ensure_c_toolchain.sh"
- "scripts/ci/ensure_cargo_component.sh"
- "scripts/ci/self_heal_rust_toolchain.sh"
- "scripts/ci/deny_policy_guard.py"
- "scripts/ci/secrets_governance_guard.py"
- "scripts/ci/unsafe_debt_audit.py"
- "scripts/ci/unsafe_policy_guard.py"
- "scripts/ci/config/unsafe_debt_policy.toml"
- "scripts/ci/emit_audit_event.py"
- "scripts/ci/security_regression_tests.sh"
- "scripts/ci/ensure_cc.sh"
- ".github/workflows/sec-audit.yml"
pull_request:
branches: [dev, main]
# Do not gate pull_request by paths: main branch protection requires
# "Security Required Gate" to always report a status on PRs.
merge_group:
branches: [dev, main]
schedule:
- cron: "0 6 * * 1" # Weekly on Monday 6am UTC
workflow_dispatch:
inputs:
full_secret_scan:
description: "Scan full git history for secrets"
required: true
default: false
type: boolean
fail_on_secret_leak:
description: "Fail workflow if secret leaks are detected"
required: true
default: true
type: boolean
fail_on_governance_violation:
description: "Fail workflow if secrets governance policy violations are detected"
required: true
default: true
type: boolean
concurrency:
group: security-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions:
contents: read
security-events: write
actions: read
checks: write
env:
GIT_CONFIG_COUNT: "1"
GIT_CONFIG_KEY_0: core.hooksPath
GIT_CONFIG_VALUE_0: /dev/null
CARGO_TERM_COLOR: always
jobs:
audit:
name: Security Audit
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 45
env:
CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo
RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup
CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- name: Ensure C toolchain
shell: bash
run: bash ./scripts/ci/ensure_c_toolchain.sh
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure C toolchain for Rust builds
run: ./scripts/ci/ensure_cc.sh
- name: Ensure cargo component
shell: bash
env:
ENSURE_CARGO_COMPONENT_STRICT: "true"
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- uses: rustsec/audit-check@69366f33c96575abad1ee0dba8212993eecbe998 # v2.0.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
deny:
name: License & Supply Chain
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 20
env:
CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo
RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup
CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Ensure C toolchain
shell: bash
run: bash ./scripts/ci/ensure_c_toolchain.sh
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure cargo component
shell: bash
env:
ENSURE_CARGO_COMPONENT_STRICT: "true"
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- name: Enforce deny policy hygiene
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
python3 scripts/ci/deny_policy_guard.py \
--deny-file deny.toml \
--governance-file .github/security/deny-ignore-governance.json \
--output-json artifacts/deny-policy-guard.json \
--output-md artifacts/deny-policy-guard.md \
--fail-on-violation
- name: Install cargo-deny
shell: bash
run: |
set -euo pipefail
version="0.19.0"
arch="$(uname -m)"
case "${arch}" in
x86_64|amd64)
target="x86_64-unknown-linux-musl"
expected_sha256="0e8c2aa59128612c90d9e09c02204e912f29a5b8d9a64671b94608cbe09e064f"
;;
aarch64|arm64)
target="aarch64-unknown-linux-musl"
expected_sha256="2b3567a60b7491c159d1cef8b7d8479d1ad2a31e29ef49462634ad4552fcc77d"
;;
*)
echo "Unsupported runner architecture for cargo-deny: ${arch}" >&2
exit 1
;;
esac
install_dir="${RUNNER_TEMP}/cargo-deny-${version}"
archive="${RUNNER_TEMP}/cargo-deny-${version}-${target}.tar.gz"
mkdir -p "${install_dir}"
curl --proto '=https' --tlsv1.2 --fail --location --silent --show-error \
--output "${archive}" \
"https://github.com/EmbarkStudios/cargo-deny/releases/download/${version}/cargo-deny-${version}-${target}.tar.gz"
actual_sha256="$(sha256sum "${archive}" | awk '{print $1}')"
if [ "${actual_sha256}" != "${expected_sha256}" ]; then
echo "Checksum mismatch for cargo-deny ${version} (${target})" >&2
echo "Expected: ${expected_sha256}" >&2
echo "Actual: ${actual_sha256}" >&2
exit 1
fi
tar -xzf "${archive}" -C "${install_dir}" --strip-components=1
echo "${install_dir}" >> "${GITHUB_PATH}"
"${install_dir}/cargo-deny" --version
- name: Run cargo-deny checks
shell: bash
run: cargo-deny check advisories licenses sources
- name: Emit deny audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/deny-policy-guard.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type deny_policy_guard \
--input-json artifacts/deny-policy-guard.json \
--output-json artifacts/audit-event-deny-policy-guard.json \
--artifact-name deny-policy-audit-event \
--retention-days 14
fi
- name: Upload deny policy artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: deny-policy-guard
path: artifacts/deny-policy-guard.*
if-no-files-found: ignore
retention-days: 14
- name: Upload deny policy audit event
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: deny-policy-audit-event
path: artifacts/audit-event-deny-policy-guard.json
if-no-files-found: ignore
retention-days: 14
security-regressions:
name: Security Regression Tests
runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404]
timeout-minutes: 30
env:
CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo
RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup
CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Ensure C toolchain
shell: bash
run: bash ./scripts/ci/ensure_c_toolchain.sh
- name: Self-heal Rust toolchain cache
shell: bash
run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
- name: Ensure C toolchain for Rust builds
run: ./scripts/ci/ensure_cc.sh
- name: Ensure cargo component
shell: bash
env:
ENSURE_CARGO_COMPONENT_STRICT: "true"
run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
with:
prefix-key: sec-audit-security-regressions
cache-bin: false
- name: Run security regression suite
shell: bash
run: ./scripts/ci/security_regression_tests.sh
secrets:
name: Secrets Governance (Gitleaks)
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 20
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Enforce gitleaks allowlist governance
shell: bash
env:
FAIL_ON_GOVERNANCE_INPUT: ${{ github.event.inputs.fail_on_governance_violation || 'true' }}
run: |
set -euo pipefail
mkdir -p artifacts
fail_on_governance="true"
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
fail_on_governance="${FAIL_ON_GOVERNANCE_INPUT}"
fi
cmd=(python3 scripts/ci/secrets_governance_guard.py
--gitleaks-file .gitleaks.toml
--governance-file .github/security/gitleaks-allowlist-governance.json
--output-json artifacts/secrets-governance-guard.json
--output-md artifacts/secrets-governance-guard.md)
if [ "$fail_on_governance" = "true" ]; then
cmd+=(--fail-on-violation)
fi
"${cmd[@]}"
- name: Publish secrets governance summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/secrets-governance-guard.md ]; then
cat artifacts/secrets-governance-guard.md >> "$GITHUB_STEP_SUMMARY"
else
echo "Secrets governance report missing." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Emit secrets governance audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/secrets-governance-guard.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type secrets_governance_guard \
--input-json artifacts/secrets-governance-guard.json \
--output-json artifacts/audit-event-secrets-governance-guard.json \
--artifact-name secrets-governance-audit-event \
--retention-days 14
fi
- name: Upload secrets governance artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: secrets-governance-guard
path: artifacts/secrets-governance-guard.*
if-no-files-found: ignore
retention-days: 14
- name: Upload secrets governance audit event
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: secrets-governance-audit-event
path: artifacts/audit-event-secrets-governance-guard.json
if-no-files-found: ignore
retention-days: 14
- name: Install gitleaks
shell: bash
run: |
set -euo pipefail
mkdir -p "${RUNNER_TEMP}/bin"
./scripts/ci/install_gitleaks.sh "${RUNNER_TEMP}/bin"
echo "${RUNNER_TEMP}/bin" >> "$GITHUB_PATH"
- name: Run gitleaks scan
shell: bash
env:
FULL_SECRET_SCAN_INPUT: ${{ github.event.inputs.full_secret_scan || 'false' }}
FAIL_ON_SECRET_LEAK_INPUT: ${{ github.event.inputs.fail_on_secret_leak || 'true' }}
run: |
set -euo pipefail
mkdir -p artifacts
log_opts=""
scan_scope="full-history"
fail_on_leak="true"
if [ "${GITHUB_EVENT_NAME}" = "pull_request" ]; then
log_opts="${{ github.event.pull_request.base.sha }}..${GITHUB_SHA}"
scan_scope="diff-range"
elif [ "${GITHUB_EVENT_NAME}" = "push" ]; then
base_sha="${{ github.event.before }}"
if [ -n "$base_sha" ] && [ "$base_sha" != "0000000000000000000000000000000000000000" ]; then
log_opts="${base_sha}..${GITHUB_SHA}"
scan_scope="diff-range"
fi
elif [ "${GITHUB_EVENT_NAME}" = "merge_group" ]; then
base_sha="${{ github.event.merge_group.base_sha }}"
if [ -n "$base_sha" ]; then
log_opts="${base_sha}..${GITHUB_SHA}"
scan_scope="diff-range"
fi
elif [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
if [ "${FULL_SECRET_SCAN_INPUT}" != "true" ]; then
if [ -n "${{ github.sha }}" ]; then
log_opts="${{ github.sha }}~1..${{ github.sha }}"
scan_scope="latest-commit"
fi
fi
fail_on_leak="${FAIL_ON_SECRET_LEAK_INPUT}"
fi
cmd=(gitleaks git
--config .gitleaks.toml
--redact
--report-format sarif
--report-path artifacts/gitleaks.sarif
--verbose)
if [ -n "$log_opts" ]; then
cmd+=(--log-opts="$log_opts")
fi
set +e
"${cmd[@]}"
status=$?
set -e
echo "### Gitleaks scan" >> "$GITHUB_STEP_SUMMARY"
echo "- Scope: ${scan_scope}" >> "$GITHUB_STEP_SUMMARY"
if [ -n "$log_opts" ]; then
echo "- Log range: \`${log_opts}\`" >> "$GITHUB_STEP_SUMMARY"
fi
echo "- Exit code: ${status}" >> "$GITHUB_STEP_SUMMARY"
cat > artifacts/gitleaks-summary.json <<EOF
{
"schema_version": "zeroclaw.audit.v1",
"event_type": "gitleaks_scan",
"event_name": "${GITHUB_EVENT_NAME}",
"scope": "${scan_scope}",
"log_opts": "${log_opts}",
"result_code": "${status}",
"fail_on_leak": "${fail_on_leak}"
}
EOF
if [ "$status" -ne 0 ] && [ "$fail_on_leak" = "true" ]; then
exit "$status"
fi
- name: Upload gitleaks SARIF
if: always()
uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4
with:
sarif_file: artifacts/gitleaks.sarif
category: gitleaks
- name: Upload gitleaks artifact
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: gitleaks-report
path: artifacts/gitleaks.sarif
if-no-files-found: ignore
retention-days: 14
- name: Emit gitleaks audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/gitleaks-summary.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type gitleaks_scan \
--input-json artifacts/gitleaks-summary.json \
--output-json artifacts/audit-event-gitleaks-scan.json \
--artifact-name gitleaks-audit-event \
--retention-days 14
fi
- name: Upload gitleaks audit event
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: gitleaks-audit-event
path: artifacts/audit-event-gitleaks-scan.json
if-no-files-found: ignore
retention-days: 14
sbom:
name: SBOM Snapshot
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 20
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Install syft
shell: bash
run: |
set -euo pipefail
mkdir -p "${RUNNER_TEMP}/bin"
./scripts/ci/install_syft.sh "${RUNNER_TEMP}/bin"
echo "${RUNNER_TEMP}/bin" >> "$GITHUB_PATH"
- name: Generate CycloneDX + SPDX SBOM
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
syft dir:. --source-name zeroclaw \
-o cyclonedx-json=artifacts/zeroclaw.cdx.json \
-o spdx-json=artifacts/zeroclaw.spdx.json
{
echo "### SBOM snapshot"
echo "- CycloneDX: artifacts/zeroclaw.cdx.json"
echo "- SPDX: artifacts/zeroclaw.spdx.json"
} >> "$GITHUB_STEP_SUMMARY"
- name: Upload SBOM artifacts
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: sbom-snapshot
path: artifacts/zeroclaw.*.json
retention-days: 14
- name: Emit SBOM audit event
if: always()
shell: bash
run: |
set -euo pipefail
cat > artifacts/sbom-summary.json <<EOF
{
"schema_version": "zeroclaw.audit.v1",
"event_type": "sbom_snapshot",
"cyclonedx_path": "artifacts/zeroclaw.cdx.json",
"spdx_path": "artifacts/zeroclaw.spdx.json"
}
EOF
python3 scripts/ci/emit_audit_event.py \
--event-type sbom_snapshot \
--input-json artifacts/sbom-summary.json \
--output-json artifacts/audit-event-sbom-snapshot.json \
--artifact-name sbom-audit-event \
--retention-days 14
- name: Upload SBOM audit event
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: sbom-audit-event
path: artifacts/audit-event-sbom-snapshot.json
if-no-files-found: ignore
retention-days: 14
unsafe-debt:
name: Unsafe Debt Audit
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
timeout-minutes: 20
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Setup Python 3.11
shell: bash
run: |
set -euo pipefail
python3 --version
- name: Enforce unsafe policy governance
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
python3 scripts/ci/unsafe_policy_guard.py \
--policy-file scripts/ci/config/unsafe_debt_policy.toml \
--governance-file .github/security/unsafe-audit-governance.json \
--output-json artifacts/unsafe-policy-guard.json \
--output-md artifacts/unsafe-policy-guard.md \
--fail-on-violation
- name: Publish unsafe governance summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/unsafe-policy-guard.md ]; then
cat artifacts/unsafe-policy-guard.md >> "$GITHUB_STEP_SUMMARY"
else
echo "Unsafe policy governance report missing." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Run unsafe debt audit
shell: bash
run: |
set -euo pipefail
mkdir -p artifacts
python3 scripts/ci/unsafe_debt_audit.py \
--repo-root . \
--policy-file scripts/ci/config/unsafe_debt_policy.toml \
--output-json artifacts/unsafe-debt-audit.json \
--fail-on-findings \
--fail-on-excluded-crate-roots
- name: Publish unsafe debt summary
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/unsafe-debt-audit.json ]; then
python3 - <<'PY' >> "$GITHUB_STEP_SUMMARY"
import json
from pathlib import Path
report = json.loads(Path("artifacts/unsafe-debt-audit.json").read_text(encoding="utf-8"))
summary = report.get("summary", {})
source = report.get("source", {})
by_pattern = summary.get("by_pattern", {})
print("### Unsafe debt audit")
print(f"- Total findings: `{summary.get('total_findings', 0)}`")
print(f"- Files scanned: `{source.get('files_scanned', 0)}`")
print(f"- Crate roots scanned: `{source.get('crate_roots_scanned', 0)}`")
print(f"- Crate roots excluded: `{source.get('crate_roots_excluded', 0)}`")
if by_pattern:
print("- Findings by pattern:")
for pattern_id, count in sorted(by_pattern.items()):
print(f" - `{pattern_id}`: `{count}`")
else:
print("- Findings by pattern: none")
PY
else
echo "Unsafe debt audit JSON report missing." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Emit unsafe policy governance audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/unsafe-policy-guard.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type unsafe_policy_guard \
--input-json artifacts/unsafe-policy-guard.json \
--output-json artifacts/audit-event-unsafe-policy-guard.json \
--artifact-name unsafe-policy-audit-event \
--retention-days 14
fi
- name: Emit unsafe debt audit event
if: always()
shell: bash
run: |
set -euo pipefail
if [ -f artifacts/unsafe-debt-audit.json ]; then
python3 scripts/ci/emit_audit_event.py \
--event-type unsafe_debt_audit \
--input-json artifacts/unsafe-debt-audit.json \
--output-json artifacts/audit-event-unsafe-debt-audit.json \
--artifact-name unsafe-debt-audit-event \
--retention-days 14
fi
- name: Upload unsafe policy guard artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: unsafe-policy-guard
path: artifacts/unsafe-policy-guard.*
if-no-files-found: ignore
retention-days: 14
- name: Upload unsafe debt audit artifact
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: unsafe-debt-audit
path: artifacts/unsafe-debt-audit.json
if-no-files-found: ignore
retention-days: 14
- name: Upload unsafe policy audit event
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: unsafe-policy-audit-event
path: artifacts/audit-event-unsafe-policy-guard.json
if-no-files-found: ignore
retention-days: 14
- name: Upload unsafe debt audit event
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: unsafe-debt-audit-event
path: artifacts/audit-event-unsafe-debt-audit.json
if-no-files-found: ignore
retention-days: 14
security-required:
name: Security Required Gate
if: always() && (github.event_name == 'pull_request' || github.event_name == 'push' || github.event_name == 'merge_group')
needs: [audit, deny, security-regressions, secrets, sbom, unsafe-debt]
runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40]
steps:
- name: Enforce security gate
shell: bash
run: |
set -euo pipefail
results=(
"audit=${{ needs.audit.result }}"
"deny=${{ needs.deny.result }}"
"security-regressions=${{ needs.security-regressions.result }}"
"secrets=${{ needs.secrets.result }}"
"sbom=${{ needs.sbom.result }}"
"unsafe-debt=${{ needs['unsafe-debt'].result }}"
)
for item in "${results[@]}"; do
echo "$item"
done
for item in "${results[@]}"; do
result="${item#*=}"
if [ "$result" != "success" ]; then
echo "Security gate failed: $item"
exit 1
fi
done

Some files were not shown because too many files have changed in this diff Show More