diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index 6b33ecc19..2a4bd9b15 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -9,3 +9,4 @@ self-hosted-runner: - codeql - codeql-general - blacksmith-2vcpu-ubuntu-2404 + - blacksmith-8vcpu-ubuntu-2404 diff --git a/.github/workflows/ci-change-audit.yml b/.github/workflows/ci-change-audit.yml index 8fbf33e4a..f8f2a2bc5 100644 --- a/.github/workflows/ci-change-audit.yml +++ b/.github/workflows/ci-change-audit.yml @@ -1,16 +1,8 @@ name: CI/CD Change Audit +# Moved off PR path per CI/CD optimization PRD. +# Audit trail runs on push-to-main/dev only. on: - pull_request: - branches: [dev, main] - paths: - - ".github/workflows/**" - - ".github/release/**" - - ".github/codeql/**" - - "scripts/ci/**" - - ".github/dependabot.yml" - - "deny.toml" - - ".gitleaks.toml" push: branches: [dev, main] paths: @@ -35,7 +27,7 @@ on: type: boolean concurrency: - group: ci-change-audit-${{ github.event.pull_request.number || github.sha || github.run_id }} + group: ci-change-audit-${{ github.sha || github.run_id }} cancel-in-progress: true permissions: diff --git a/.github/workflows/ci-reproducible-build.yml b/.github/workflows/ci-reproducible-build.yml index d6612c5c0..dff7fbd41 100644 --- a/.github/workflows/ci-reproducible-build.yml +++ b/.github/workflows/ci-reproducible-build.yml @@ -1,5 +1,7 @@ name: CI Reproducible Build +# Moved off PR path per CI/CD optimization PRD. +# Reproducibility is a release concern; runs on push-to-main/dev + weekly schedule. on: push: branches: [dev, main] @@ -14,19 +16,6 @@ on: - "scripts/ci/reproducible_build_check.sh" - "scripts/ci/self_heal_rust_toolchain.sh" - ".github/workflows/ci-reproducible-build.yml" - pull_request: - branches: [dev, main] - paths: - - "Cargo.toml" - - "Cargo.lock" - - "src/**" - - "crates/**" - - "scripts/ci/ensure_c_toolchain.sh" - - "scripts/ci/ensure_cargo_component.sh" - - "scripts/ci/ensure_cc.sh" - - "scripts/ci/reproducible_build_check.sh" - - "scripts/ci/self_heal_rust_toolchain.sh" - - ".github/workflows/ci-reproducible-build.yml" schedule: - cron: "45 5 * * 1" # Weekly Monday 05:45 UTC workflow_dispatch: @@ -43,7 +32,7 @@ on: type: boolean concurrency: - group: repro-build-${{ github.event.pull_request.number || github.ref || github.run_id }} + group: repro-build-${{ github.ref || github.run_id }} cancel-in-progress: true permissions: @@ -58,8 +47,8 @@ env: jobs: reproducibility: name: Reproducible Build Probe - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 75 + runs-on: blacksmith-8vcpu-ubuntu-2404 + timeout-minutes: 35 env: CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup @@ -89,6 +78,13 @@ jobs: ENSURE_CARGO_COMPONENT_STRICT: "true" run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0 + - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 + with: + prefix-key: zeroclaw-ci-v1 + shared-key: ${{ runner.os }}-rust + cache-targets: true + cache-bin: false + - name: Run reproducible build check shell: bash run: | diff --git a/.github/workflows/ci-run.yml b/.github/workflows/ci-run.yml index 8d28b0667..2eec62004 100644 --- a/.github/workflows/ci-run.yml +++ b/.github/workflows/ci-run.yml @@ -46,12 +46,15 @@ jobs: BASE_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event_name == 'merge_group' && github.event.merge_group.base_sha || github.event.before }} run: ./scripts/ci/detect_change_scope.sh - lint: - name: Lint Gate (Format + Clippy + Strict Delta) + # --- Consolidated Rust quality gate --- + # Merges: lint, workspace-check, package-check into one job on a beefy runner. + # With shared cache, sequential steps on 8 vCPU is faster than 6 parallel 2 vCPU jobs. + quality-gate: + name: Quality Gate (Fmt + Clippy + Workspace + Package Checks) needs: [changes] if: needs.changes.outputs.rust_changed == 'true' - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 75 + runs-on: blacksmith-8vcpu-ubuntu-2404 + timeout-minutes: 25 env: CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup @@ -77,8 +80,12 @@ jobs: run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0 - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 with: - prefix-key: ci-run-check + prefix-key: zeroclaw-ci-v1 + shared-key: ${{ runner.os }}-rust + cache-targets: true cache-bin: false + + # Step 1: Format + Clippy (was: lint job) - name: Run rust quality gate run: ./scripts/ci/rust_quality_gate.sh - name: Run strict lint delta gate @@ -86,58 +93,24 @@ jobs: BASE_SHA: ${{ needs.changes.outputs.base_sha }} run: ./scripts/ci/rust_strict_delta_gate.sh - workspace-check: - name: Workspace Check - needs: [changes] - if: needs.changes.outputs.rust_changed == 'true' - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 45 - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Self-heal Rust toolchain cache - shell: bash - run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0 - - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable - with: - toolchain: 1.92.0 - - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 - with: - prefix-key: ci-run-workspace-check - cache-bin: false + # Step 2: Workspace check (was: workspace-check job) - name: Check workspace run: cargo check --workspace --locked - package-check: - name: Package Check (${{ matrix.package }}) - needs: [changes] - if: needs.changes.outputs.rust_changed == 'true' - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 25 - strategy: - fail-fast: false - matrix: - package: [zeroclaw-types, zeroclaw-core] - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Self-heal Rust toolchain cache - shell: bash - run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0 - - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable - with: - toolchain: 1.92.0 - - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 - with: - prefix-key: ci-run-package-check - cache-bin: false - - name: Check package - run: cargo check -p ${{ matrix.package }} --locked + # Step 3: Package checks (was: package-check matrix job) + - name: Check package zeroclaw-types + run: cargo check -p zeroclaw-types --locked + - name: Check package zeroclaw-core + run: cargo check -p zeroclaw-core --locked - test: - name: Test + # --- Consolidated test + build --- + # Merges: test, build into one job. Incremental from shared cache. + test-and-build: + name: Test + Build needs: [changes] if: needs.changes.outputs.rust_changed == 'true' - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 120 + runs-on: blacksmith-8vcpu-ubuntu-2404 + timeout-minutes: 30 env: CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup @@ -160,8 +133,12 @@ jobs: run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0 - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 with: - prefix-key: ci-run-check + prefix-key: zeroclaw-ci-v1 + shared-key: ${{ runner.os }}-rust + cache-targets: true cache-bin: false + + # Step 1: Tests with flake detection (was: test job) - name: Run tests with flake detection shell: bash env: @@ -225,41 +202,10 @@ jobs: if-no-files-found: ignore retention-days: 14 - build: - name: Build (Smoke) - needs: [changes] - if: needs.changes.outputs.rust_changed == 'true' - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 90 - env: - CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo - RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup - CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target - - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Ensure C toolchain - shell: bash - run: bash ./scripts/ci/ensure_c_toolchain.sh - - name: Self-heal Rust toolchain cache - shell: bash - run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0 - - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable - with: - toolchain: 1.92.0 - - name: Ensure C toolchain for Rust builds - run: ./scripts/ci/ensure_cc.sh - - name: Ensure cargo component - shell: bash - run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0 - - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 - with: - prefix-key: ci-run-build - cache-targets: true - cache-bin: false + # Step 2: Release build + binary size check (was: build job) - name: Build binary (smoke check) env: - CARGO_BUILD_JOBS: 2 + CARGO_BUILD_JOBS: 8 CI_SMOKE_BUILD_ATTEMPTS: 3 run: bash scripts/ci/smoke_build_retry.sh - name: Check binary size @@ -349,7 +295,7 @@ jobs: lint-feedback: name: Lint Feedback if: github.event_name == 'pull_request' - needs: [changes, lint, docs-quality] + needs: [changes, quality-gate, docs-quality] runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] permissions: contents: read @@ -364,8 +310,8 @@ jobs: env: RUST_CHANGED: ${{ needs.changes.outputs.rust_changed }} DOCS_CHANGED: ${{ needs.changes.outputs.docs_changed }} - LINT_RESULT: ${{ needs.lint.result }} - LINT_DELTA_RESULT: ${{ needs.lint.result }} + LINT_RESULT: ${{ needs.quality-gate.result }} + LINT_DELTA_RESULT: ${{ needs.quality-gate.result }} DOCS_RESULT: ${{ needs.docs-quality.result }} with: script: | @@ -390,10 +336,11 @@ jobs: script: | const script = require('./.github/workflows/scripts/ci_license_file_owner_guard.js'); await script({ github, context, core }); + ci-required: name: CI Required Gate if: always() - needs: [changes, lint, workspace-check, package-check, test, build, docs-only, non-rust, docs-quality, lint-feedback, license-file-owner-guard] + needs: [changes, quality-gate, test-and-build, docs-only, non-rust, docs-quality, lint-feedback, license-file-owner-guard] runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] steps: - name: Enforce required status @@ -440,24 +387,18 @@ jobs: fi # --- Rust change path --- - lint_result="${{ needs.lint.result }}" - workspace_check_result="${{ needs.workspace-check.result }}" - package_check_result="${{ needs.package-check.result }}" - test_result="${{ needs.test.result }}" - build_result="${{ needs.build.result }}" + quality_gate_result="${{ needs.quality-gate.result }}" + test_and_build_result="${{ needs.test-and-build.result }}" - echo "lint=${lint_result}" - echo "workspace-check=${workspace_check_result}" - echo "package-check=${package_check_result}" - echo "test=${test_result}" - echo "build=${build_result}" + echo "quality-gate=${quality_gate_result}" + echo "test-and-build=${test_and_build_result}" echo "docs=${docs_result}" echo "license_file_owner_guard=${license_owner_result}" check_pr_governance - if [ "$lint_result" != "success" ] || [ "$workspace_check_result" != "success" ] || [ "$package_check_result" != "success" ] || [ "$test_result" != "success" ] || [ "$build_result" != "success" ]; then - echo "Required CI jobs did not pass: lint=${lint_result} workspace-check=${workspace_check_result} package-check=${package_check_result} test=${test_result} build=${build_result}" + if [ "$quality_gate_result" != "success" ] || [ "$test_and_build_result" != "success" ]; then + echo "Required CI jobs did not pass: quality-gate=${quality_gate_result} test-and-build=${test_and_build_result}" exit 1 fi diff --git a/.github/workflows/ci-supply-chain-provenance.yml b/.github/workflows/ci-supply-chain-provenance.yml index 84803befe..be9083765 100644 --- a/.github/workflows/ci-supply-chain-provenance.yml +++ b/.github/workflows/ci-supply-chain-provenance.yml @@ -32,8 +32,8 @@ env: jobs: provenance: name: Build + Provenance Bundle - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 60 + runs-on: blacksmith-8vcpu-ubuntu-2404 + timeout-minutes: 30 steps: - name: Checkout uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 diff --git a/.github/workflows/feature-matrix.yml b/.github/workflows/feature-matrix.yml index 9d61f84f0..00a93fdc3 100644 --- a/.github/workflows/feature-matrix.yml +++ b/.github/workflows/feature-matrix.yml @@ -1,5 +1,7 @@ name: Feature Matrix +# Non-default feature lanes moved to nightly/weekly only per CI/CD optimization PRD. +# PR path only runs default lane via ci:full/ci:feature-matrix labels. on: push: branches: [dev] @@ -51,7 +53,7 @@ env: jobs: resolve-profile: name: Resolve Matrix Profile - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] + runs-on: ubuntu-latest outputs: profile: ${{ steps.resolve.outputs.profile }} lane_job_prefix: ${{ steps.resolve.outputs.lane_job_prefix }} @@ -127,28 +129,34 @@ jobs: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full') || contains(github.event.pull_request.labels.*.name, 'ci:feature-matrix') - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] + runs-on: blacksmith-8vcpu-ubuntu-2404 timeout-minutes: ${{ fromJSON(needs.resolve-profile.outputs.lane_timeout_minutes) }} strategy: fail-fast: false matrix: include: + # Default lane: always runs (PR compile + nightly) - name: default compile_command: cargo check --locked nightly_command: cargo test --locked --test agent_e2e --verbose install_libudev: false + nightly_only: false + # Non-default lanes: nightly/weekly/dispatch only (skipped on PR compile profile) - name: whatsapp-web compile_command: cargo check --locked --no-default-features --features whatsapp-web nightly_command: cargo check --locked --no-default-features --features whatsapp-web --verbose install_libudev: false + nightly_only: true - name: browser-native compile_command: cargo check --locked --no-default-features --features browser-native nightly_command: cargo check --locked --no-default-features --features browser-native --verbose install_libudev: false + nightly_only: true - name: nightly-all-features compile_command: cargo check --locked --all-features nightly_command: cargo test --locked --all-features --test agent_e2e --verbose install_libudev: true + nightly_only: true steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 @@ -162,11 +170,14 @@ jobs: run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0 - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 + if: "!matrix.nightly_only || needs.resolve-profile.outputs.profile == 'nightly'" with: - prefix-key: feature-matrix-${{ matrix.name }} + prefix-key: zeroclaw-ci-v1 + shared-key: ${{ runner.os }}-rust + cache-targets: true - name: Ensure Linux deps for all-features lane - if: matrix.install_libudev + if: matrix.install_libudev && (!matrix.nightly_only || needs.resolve-profile.outputs.profile == 'nightly') shell: bash run: | set -euo pipefail @@ -191,7 +202,12 @@ jobs: sleep 10 done + - name: Skip non-default lane on compile profile + if: matrix.nightly_only && needs.resolve-profile.outputs.profile != 'nightly' + run: echo "Skipping non-default lane '${{ matrix.name }}' on compile profile." + - name: Run matrix lane command + if: "!matrix.nightly_only || needs.resolve-profile.outputs.profile == 'nightly'" id: lane shell: bash run: | @@ -258,7 +274,7 @@ jobs: echo "lane_exit_code=${status}" >> "$GITHUB_OUTPUT" - name: Upload lane report - if: always() + if: always() && steps.lane.outcome != 'skipped' uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ needs.resolve-profile.outputs.lane_artifact_prefix }}-${{ matrix.name }} @@ -267,7 +283,7 @@ jobs: retention-days: ${{ fromJSON(needs.resolve-profile.outputs.lane_retention_days) }} - name: Enforce lane success - if: steps.lane.outputs.lane_status != 'success' + if: steps.lane.outcome == 'success' && steps.lane.outputs.lane_status != 'success' shell: bash run: | set -euo pipefail @@ -283,7 +299,7 @@ jobs: name: ${{ needs.resolve-profile.outputs.summary_job_name }} needs: [resolve-profile, feature-check] if: always() - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] + runs-on: ubuntu-latest steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 diff --git a/.github/workflows/scripts/lint_feedback.js b/.github/workflows/scripts/lint_feedback.js index 8b90161a8..472a4ed04 100644 --- a/.github/workflows/scripts/lint_feedback.js +++ b/.github/workflows/scripts/lint_feedback.js @@ -4,8 +4,8 @@ // Required environment variables: // RUST_CHANGED — "true" if Rust files changed // DOCS_CHANGED — "true" if docs files changed -// LINT_RESULT — result of the lint job -// LINT_DELTA_RESULT — result of the strict delta lint job +// LINT_RESULT — result of the quality-gate job (fmt + clippy) +// LINT_DELTA_RESULT — result of the quality-gate job (strict delta) // DOCS_RESULT — result of the docs-quality job module.exports = async ({ github, context, core }) => { @@ -23,10 +23,10 @@ module.exports = async ({ github, context, core }) => { const failures = []; if (rustChanged && !["success", "skipped"].includes(lintResult)) { - failures.push("`Lint Gate (Format + Clippy)` failed."); + failures.push("`Quality Gate (Format + Clippy)` failed."); } if (rustChanged && !["success", "skipped"].includes(lintDeltaResult)) { - failures.push("`Lint Gate (Strict Delta)` failed."); + failures.push("`Quality Gate (Strict Delta)` failed."); } if (docsChanged && !["success", "skipped"].includes(docsResult)) { failures.push("`Docs Quality` failed."); diff --git a/.github/workflows/sec-audit.yml b/.github/workflows/sec-audit.yml index 260e8a5dd..5af1ceade 100644 --- a/.github/workflows/sec-audit.yml +++ b/.github/workflows/sec-audit.yml @@ -70,10 +70,35 @@ env: CARGO_TERM_COLOR: always jobs: - audit: - name: Security Audit - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 45 + # --- Change detection for fast-path on non-Rust PRs --- + changes: + name: Detect Change Scope + runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] + outputs: + rust_changed: ${{ steps.scope.outputs.rust_changed }} + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 + - name: Detect Rust changes + id: scope + shell: bash + env: + EVENT_NAME: ${{ github.event_name }} + BASE_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event_name == 'merge_group' && github.event.merge_group.base_sha || github.event.before }} + run: ./scripts/ci/detect_change_scope.sh + + # --- Consolidated Rust security: audit + deny + security regressions --- + # Merges 3 separate Rust-compiling jobs into 1 sequential job on 8 vCPU. + rust-security: + name: Rust Security (Audit + Deny + Regressions) + needs: [changes] + if: >- + needs.changes.outputs.rust_changed == 'true' || + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' + runs-on: blacksmith-8vcpu-ubuntu-2404 + timeout-minutes: 25 env: CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup @@ -101,34 +126,19 @@ jobs: ENSURE_CARGO_COMPONENT_STRICT: "true" run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0 + - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 + with: + prefix-key: zeroclaw-ci-v1 + shared-key: ${{ runner.os }}-rust + cache-targets: true + cache-bin: false + + # --- Step 1: cargo-audit (was: audit job) --- - uses: rustsec/audit-check@69366f33c96575abad1ee0dba8212993eecbe998 # v2.0.0 with: token: ${{ secrets.GITHUB_TOKEN }} - deny: - name: License & Supply Chain - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 20 - env: - CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo - RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup - CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - - name: Ensure C toolchain - shell: bash - run: bash ./scripts/ci/ensure_c_toolchain.sh - - - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable - with: - toolchain: 1.92.0 - - name: Ensure cargo component - shell: bash - env: - ENSURE_CARGO_COMPONENT_STRICT: "true" - run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0 - + # --- Step 2: cargo-deny (was: deny job) --- - name: Enforce deny policy hygiene shell: bash run: | @@ -214,41 +224,24 @@ jobs: if-no-files-found: ignore retention-days: 14 - security-regressions: - name: Security Regression Tests - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 30 - env: - CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo - RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup - CARGO_TARGET_DIR: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/target - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Ensure C toolchain - shell: bash - run: bash ./scripts/ci/ensure_c_toolchain.sh - - - name: Self-heal Rust toolchain cache - shell: bash - run: ./scripts/ci/self_heal_rust_toolchain.sh 1.92.0 - - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable - with: - toolchain: 1.92.0 - - name: Ensure C toolchain for Rust builds - run: ./scripts/ci/ensure_cc.sh - - name: Ensure cargo component - shell: bash - env: - ENSURE_CARGO_COMPONENT_STRICT: "true" - run: bash ./scripts/ci/ensure_cargo_component.sh 1.92.0 - - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 - with: - prefix-key: sec-audit-security-regressions - cache-bin: false + # --- Step 3: Security regression tests (was: security-regressions job) --- - name: Run security regression suite shell: bash run: ./scripts/ci/security_regression_tests.sh + # --- Fast-path for non-Rust PRs (no compilation needed) --- + rust-security-skipped: + name: Rust Security (Skipped — Non-Rust PR) + needs: [changes] + if: >- + needs.changes.outputs.rust_changed != 'true' && + github.event_name != 'schedule' && + github.event_name != 'workflow_dispatch' + runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] + steps: + - name: Skip Rust security for non-Rust PR + run: echo "Non-Rust PR; Rust security checks skipped." + secrets: name: Secrets Governance (Gitleaks) runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] @@ -444,13 +437,15 @@ jobs: if-no-files-found: ignore retention-days: 14 - sbom: - name: SBOM Snapshot + # --- Compliance: SBOM + unsafe debt (no Rust compilation needed) --- + compliance: + name: Compliance (SBOM + Unsafe Debt) runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] timeout-minutes: 20 steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + # --- SBOM (was: sbom job) --- - name: Install syft shell: bash run: | @@ -509,19 +504,7 @@ jobs: if-no-files-found: ignore retention-days: 14 - unsafe-debt: - name: Unsafe Debt Audit - runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] - timeout-minutes: 20 - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - - name: Setup Python 3.11 - shell: bash - run: | - set -euo pipefail - python3 --version - + # --- Unsafe debt (was: unsafe-debt job) --- - name: Enforce unsafe policy governance shell: bash run: | @@ -655,28 +638,40 @@ jobs: security-required: name: Security Required Gate if: always() && (github.event_name == 'pull_request' || github.event_name == 'push' || github.event_name == 'merge_group') - needs: [audit, deny, security-regressions, secrets, sbom, unsafe-debt] + needs: [changes, rust-security, rust-security-skipped, secrets, compliance] runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] steps: - name: Enforce security gate shell: bash run: | set -euo pipefail - results=( - "audit=${{ needs.audit.result }}" - "deny=${{ needs.deny.result }}" - "security-regressions=${{ needs.security-regressions.result }}" - "secrets=${{ needs.secrets.result }}" - "sbom=${{ needs.sbom.result }}" - "unsafe-debt=${{ needs['unsafe-debt'].result }}" - ) - for item in "${results[@]}"; do - echo "$item" - done - for item in "${results[@]}"; do - result="${item#*=}" - if [ "$result" != "success" ]; then - echo "Security gate failed: $item" + rust_changed="${{ needs.changes.outputs.rust_changed }}" + + # Rust security: must pass if Rust changed, or must be skipped + if [ "$rust_changed" = "true" ]; then + rust_sec="${{ needs.rust-security.result }}" + if [ "$rust_sec" != "success" ]; then + echo "Security gate failed: rust-security=${rust_sec}" exit 1 fi - done + else + rust_skip="${{ needs.rust-security-skipped.result }}" + if [ "$rust_skip" != "success" ]; then + echo "Security gate failed: rust-security-skipped=${rust_skip}" + exit 1 + fi + fi + + # Non-Rust security: always required + secrets_result="${{ needs.secrets.result }}" + compliance_result="${{ needs.compliance.result }}" + + echo "secrets=${secrets_result}" + echo "compliance=${compliance_result}" + + if [ "$secrets_result" != "success" ] || [ "$compliance_result" != "success" ]; then + echo "Security gate failed: secrets=${secrets_result} compliance=${compliance_result}" + exit 1 + fi + + echo "All security checks passed." diff --git a/.github/workflows/sec-codeql.yml b/.github/workflows/sec-codeql.yml index 01bec0567..7b319576a 100644 --- a/.github/workflows/sec-codeql.yml +++ b/.github/workflows/sec-codeql.yml @@ -1,5 +1,7 @@ name: Sec CodeQL +# Moved off PR path per CI/CD optimization PRD. +# Runs on push-to-main/dev + weekly schedule to catch vulnerabilities within 1 merge. on: push: branches: [dev, main] @@ -14,27 +16,12 @@ on: - "scripts/ci/self_heal_rust_toolchain.sh" - "scripts/ci/ensure_cc.sh" - ".github/workflows/sec-codeql.yml" - pull_request: - branches: [dev, main] - paths: - - "Cargo.toml" - - "Cargo.lock" - - "src/**" - - "crates/**" - - "scripts/ci/ensure_c_toolchain.sh" - - "scripts/ci/ensure_cargo_component.sh" - - ".github/codeql/**" - - "scripts/ci/self_heal_rust_toolchain.sh" - - "scripts/ci/ensure_cc.sh" - - ".github/workflows/sec-codeql.yml" - merge_group: - branches: [dev, main] schedule: - cron: "0 6 * * 1" # Weekly Monday 6am UTC workflow_dispatch: concurrency: - group: codeql-${{ github.event.pull_request.number || github.ref || github.run_id }} + group: codeql-${{ github.ref || github.run_id }} cancel-in-progress: true permissions: @@ -47,34 +34,11 @@ env: GIT_CONFIG_KEY_0: core.hooksPath GIT_CONFIG_VALUE_0: /dev/null - jobs: - select-runner: - name: Select CodeQL Runner Lane - runs-on: [self-hosted, Linux, X64, aws-india, light, cpu40] - outputs: - labels: ${{ steps.lane.outputs.labels }} - lane: ${{ steps.lane.outputs.lane }} - steps: - - name: Resolve branch lane - id: lane - shell: bash - run: | - set -euo pipefail - branch="${GITHUB_HEAD_REF:-${GITHUB_REF_NAME}}" - if [[ "$branch" == release/* ]]; then - echo 'labels=["self-hosted","Linux","X64","hetzner","codeql"]' >> "$GITHUB_OUTPUT" - echo 'lane=release' >> "$GITHUB_OUTPUT" - else - echo 'labels=["self-hosted","Linux","X64","hetzner","codeql","codeql-general"]' >> "$GITHUB_OUTPUT" - echo 'lane=general' >> "$GITHUB_OUTPUT" - fi - codeql: name: CodeQL Analysis - needs: [select-runner] - runs-on: ${{ fromJSON(needs.select-runner.outputs.labels) }} - timeout-minutes: 120 + runs-on: blacksmith-8vcpu-ubuntu-2404 + timeout-minutes: 45 env: CARGO_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/cargo RUSTUP_HOME: ${{ github.workspace }}/.ci-rust/${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }}/rustup @@ -113,7 +77,8 @@ jobs: - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 with: - prefix-key: sec-codeql-build + prefix-key: zeroclaw-ci-v1 + shared-key: ${{ runner.os }}-rust cache-targets: true cache-bin: false @@ -125,13 +90,12 @@ jobs: with: category: "/language:rust" - - name: Summarize lane + - name: Summarize runner if: always() shell: bash run: | { - echo "### CodeQL Runner Lane" + echo "### CodeQL Runner" echo "- Branch: \`${GITHUB_HEAD_REF:-${GITHUB_REF_NAME}}\`" - echo "- Lane: \`${{ needs.select-runner.outputs.lane }}\`" - echo "- Labels: \`${{ needs.select-runner.outputs.labels }}\`" + echo "- Runner: \`blacksmith-8vcpu-ubuntu-2404\`" } >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index 38fca152f..fe331b055 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -26,13 +26,12 @@ env: GIT_CONFIG_KEY_0: core.hooksPath GIT_CONFIG_VALUE_0: /dev/null CARGO_TERM_COLOR: always - ENSURE_RUST_COMPONENTS: "" jobs: integration-tests: name: Integration / E2E Tests - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] - timeout-minutes: 30 + runs-on: blacksmith-8vcpu-ubuntu-2404 + timeout-minutes: 20 steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable @@ -46,6 +45,11 @@ jobs: - name: Ensure C toolchain for Rust builds run: ./scripts/ci/ensure_cc.sh - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3 + with: + prefix-key: zeroclaw-ci-v1 + shared-key: ${{ runner.os }}-rust + cache-targets: true + cache-bin: false - name: Runner preflight (compiler + disk) shell: bash run: | diff --git a/.github/workflows/test-fuzz.yml b/.github/workflows/test-fuzz.yml index c3cd73697..a7fff0ad8 100644 --- a/.github/workflows/test-fuzz.yml +++ b/.github/workflows/test-fuzz.yml @@ -27,7 +27,7 @@ env: jobs: fuzz: name: Fuzz (${{ matrix.target }}) - runs-on: [self-hosted, Linux, X64, aws-india, blacksmith-2vcpu-ubuntu-2404] + runs-on: blacksmith-8vcpu-ubuntu-2404 timeout-minutes: 60 strategy: fail-fast: false diff --git a/docs/ci-map.md b/docs/ci-map.md index 4b85e53d9..054bfc22e 100644 --- a/docs/ci-map.md +++ b/docs/ci-map.md @@ -12,9 +12,9 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u - `.github/workflows/ci-run.yml` (`CI`) - Purpose: Rust validation (`cargo fmt --all -- --check`, `cargo clippy --locked --all-targets -- -D clippy::correctness`, strict delta lint gate on changed Rust lines, `test`, release build smoke) + docs quality checks when docs change (`markdownlint` blocks only issues on changed lines; link check scans only links added on changed lines) - - Additional behavior: for Rust-impacting PRs and pushes, `CI Required Gate` requires `lint` + `test` + `build` (no PR build-only bypass) - - Additional behavior: `lint`, `test`, and `build` run in parallel (all depend only on `changes` job) to minimize critical path duration - - Additional behavior: rust-cache is shared between `lint` and `test` via unified `prefix-key` (`ci-run-check`) to reduce redundant compilation; `build` uses a separate key for release-fast profile + - Additional behavior: for Rust-impacting PRs and pushes, `CI Required Gate` requires `quality-gate` + `test-and-build` (no PR build-only bypass) + - Additional behavior: `quality-gate` and `test-and-build` run in parallel (all depend only on `changes` job) to minimize critical path duration + - Additional behavior: rust-cache is shared across all Rust CI jobs via unified `prefix-key` (`zeroclaw-ci-v1`) and `shared-key` (`${{ runner.os }}-rust`) to reduce redundant compilation - Additional behavior: flake detection is integrated into the `test` job via single-retry probe; emits `test-flake-probe` artifact when flake is suspected; optional blocking can be enabled with repository variable `CI_BLOCK_ON_FLAKE_SUSPECTED=true` - Additional behavior: PRs that change CI/CD-governed paths require an explicit approving review from `@chumyin` (`.github/workflows/**`, `.github/codeql/**`, `.github/connectivity/**`, `.github/release/**`, `.github/security/**`, `.github/actionlint.yaml`, `.github/dependabot.yml`, `scripts/ci/**`, and CI governance docs) - Additional behavior: PRs that change root license files (`LICENSE-APACHE`, `LICENSE-MIT`) must be authored by `willsarg` @@ -104,7 +104,10 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u - `Nightly All-Features`: daily schedule and manual dispatch - `Release`: tag push (`v*`), weekly schedule (verification-only), manual dispatch (verification or publish) - `Production Release Build`: push to `main`, push tags matching `v*`, manual dispatch -- `Security Audit`: push to `dev` and `main`, PRs to `dev` and `main`, weekly schedule +- `Security Audit`: push to `dev` and `main`, PRs to `dev` and `main`, weekly schedule; non-Rust PRs fast-path skip Rust compilation jobs +- `CodeQL Analysis`: push to `dev` and `main` on Rust/CodeQL-impacting paths, weekly schedule, manual dispatch (removed from PR path) +- `CI Change Audit`: push to `dev` and `main` on CI/security paths, manual dispatch (removed from PR path) +- `Reproducible Build`: push to `dev` and `main` on Rust/CI paths, weekly schedule, manual dispatch (removed from PR path) - `Sec Vorpal Reviewdog`: manual dispatch only - `Workflow Sanity`: PR/push when `.github/workflows/**`, `.github/*.yml`, or `.github/*.yaml` change - `Dependabot`: all update PRs target `main` (not `dev`) @@ -128,12 +131,12 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u 7. PR intake failures: inspect `.github/workflows/pr-intake-checks.yml` sticky comment and run logs. If intake policy changed recently, trigger a fresh `pull_request_target` event (for example close/reopen PR) because `Re-run jobs` can reuse the original workflow snapshot. 8. Label policy parity failures: inspect `.github/workflows/pr-label-policy-check.yml`. 9. Docs failures in CI: inspect `docs-quality` job logs in `.github/workflows/ci-run.yml`. -10. Strict delta lint failures in CI: inspect `lint-strict-delta` job logs and compare with `BASE_SHA` diff scope. +10. Strict delta lint failures in CI: inspect `quality-gate` job strict-delta step logs and compare with `BASE_SHA` diff scope. ## Maintenance Rules - Keep merge-blocking checks deterministic and reproducible (`--locked` where applicable). -- Keep merge-queue compatibility explicit by supporting `merge_group` on required workflows (`ci-run`, `sec-audit`, and `sec-codeql`). +- Keep merge-queue compatibility explicit by supporting `merge_group` on required workflows (`ci-run`, `sec-audit`). - Keep PR intake backfills event-driven: when intake logic changes, prefer triggering a fresh PR event over rerunning old runs so checks evaluate against the latest workflow/script snapshot. - Keep `deny.toml` advisory ignore entries in object form with explicit reasons (enforced by `deny_policy_guard.py`). - Keep deny ignore governance metadata current in `.github/security/deny-ignore-governance.json` (owner/reason/expiry/ticket enforced by `deny_policy_guard.py`). diff --git a/docs/operations/required-check-mapping.md b/docs/operations/required-check-mapping.md index ccf6b6245..e494f03ad 100644 --- a/docs/operations/required-check-mapping.md +++ b/docs/operations/required-check-mapping.md @@ -9,19 +9,30 @@ This document maps merge-critical workflows to expected check names. | `CI Required Gate` | `.github/workflows/ci-run.yml` | core Rust/doc merge gate | | `Security Required Gate` | `.github/workflows/sec-audit.yml` | aggregated security merge gate | +### CI Run consolidated job names (referenced by CI Required Gate) + +- `Quality Gate (Fmt + Clippy + Workspace + Package Checks)` — replaces `Lint Gate`, `Workspace Check`, `Package Check` +- `Test + Build` — replaces `Test`, `Build (Smoke)` + +### Security audit consolidated job names (referenced by Security Required Gate) + +- `Rust Security (Audit + Deny + Regressions)` — replaces `Security Audit`, `License & Supply Chain`, `Security Regression Tests` +- `Secrets Governance (Gitleaks)` — unchanged +- `Compliance (SBOM + Unsafe Debt)` — replaces `SBOM Snapshot`, `Unsafe Debt Audit` + Supplemental monitors (non-blocking unless added to branch protection contexts): -- `CI Change Audit` (`.github/workflows/ci-change-audit.yml`) -- `CodeQL Analysis` (`.github/workflows/sec-codeql.yml`) +- `CI Change Audit` (`.github/workflows/ci-change-audit.yml`) — push-to-main only (removed from PR path) +- `CodeQL Analysis` (`.github/workflows/sec-codeql.yml`) — push-to-main + weekly only (removed from PR path) - `Workflow Sanity` (`.github/workflows/workflow-sanity.yml`) - `Feature Matrix Summary` (`.github/workflows/feature-matrix.yml`) Feature matrix lane check names (informational, non-required): -- `Matrix Lane (default)` -- `Matrix Lane (whatsapp-web)` -- `Matrix Lane (browser-native)` -- `Matrix Lane (nightly-all-features)` +- `Matrix Lane (default)` — runs on all profiles +- `Matrix Lane (whatsapp-web)` — nightly/weekly only +- `Matrix Lane (browser-native)` — nightly/weekly only +- `Matrix Lane (nightly-all-features)` — nightly/weekly only ## Release / Pre-release diff --git a/docs/operations/self-hosted-runner-remediation.md b/docs/operations/self-hosted-runner-remediation.md index 25c959195..e37633a4b 100644 --- a/docs/operations/self-hosted-runner-remediation.md +++ b/docs/operations/self-hosted-runner-remediation.md @@ -85,7 +85,7 @@ Safety behavior: ## 3.1) Build Smoke Exit `143` Triage -When `CI Run / Build (Smoke)` fails with `Process completed with exit code 143`: +When `CI Run / Test + Build` fails with `Process completed with exit code 143`: 1. Treat it as external termination (SIGTERM), not a compile error. 2. Confirm the build step ended with `Terminated` and no Rust compiler diagnostic was emitted.