Merge remote-tracking branch 'origin/main'

2026-02-28 12:06:01 +08:00 · 2026-02-28 12:06:01 +08:00 · fa8a98113e
commit fa8a98113e
parent 32dc3a460a 1a0bb175f2
42 changed files with 2676 additions and 1561 deletions
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -1,4 +1,6 @@
 self-hosted-runner:
    labels:
-        - blacksmith-2vcpu-ubuntu-2404
+        - Linux
+        - X64
+        - racknerd
        - aws-india
--- a/.github/workflows/ci-build-fast.yml
+++ b/.github/workflows/ci-build-fast.yml
@ -1,64 +0,0 @@
-name: CI Build (Fast)
-
-# Optional fast release build that runs alongside the normal Build (Smoke) job.
-# This workflow is informational and does not gate merges.
-
-on:
-    push:
-        branches: [dev, main]
-    pull_request:
-        branches: [dev, main]
-
-concurrency:
-    group: ci-fast-${{ github.event.pull_request.number || github.sha }}
-    cancel-in-progress: true
-
-permissions:
-    contents: read
-
-env:
-    GIT_CONFIG_COUNT: "1"
-    GIT_CONFIG_KEY_0: core.hooksPath
-    GIT_CONFIG_VALUE_0: /dev/null
-    CARGO_TERM_COLOR: always
-
-jobs:
-    changes:
-        name: Detect Change Scope
-        runs-on: [self-hosted, aws-india]
-        outputs:
-            rust_changed: ${{ steps.scope.outputs.rust_changed }}
-            docs_only: ${{ steps.scope.outputs.docs_only }}
-            workflow_changed: ${{ steps.scope.outputs.workflow_changed }}
-        steps:
-            - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-              with:
-                  fetch-depth: 0
-            - name: Detect docs-only changes
-              id: scope
-              shell: bash
-              env:
-                  EVENT_NAME: ${{ github.event_name }}
-                  BASE_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }}
-              run: ./scripts/ci/detect_change_scope.sh
-
-    build-fast:
-        name: Build (Fast)
-        needs: [changes]
-        if: needs.changes.outputs.rust_changed == 'true' || needs.changes.outputs.workflow_changed == 'true'
-        runs-on: [self-hosted, aws-india]
-        timeout-minutes: 25
-        steps:
-            - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-
-            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
-              with:
-                  toolchain: 1.92.0
-
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
-              with:
-                  prefix-key: fast-build
-                  cache-targets: true
-
-            - name: Build release binary
-              run: cargo build --release --locked --verbose
--- a/.github/workflows/ci-connectivity-probes.yml
+++ b/.github/workflows/ci-connectivity-probes.yml
@ -1,74 +0,0 @@
-name: Connectivity Probes (Legacy Wrapper)
-
-on:
-    workflow_dispatch:
-        inputs:
-            enforcement_mode:
-                description: "enforce = fail when critical endpoints are unreachable; report-only = never fail run"
-                type: choice
-                required: false
-                default: enforce
-                options:
-                    - enforce
-                    - report-only
-
-concurrency:
-    group: connectivity-probes-${{ github.ref_name }}
-    cancel-in-progress: true
-
-permissions:
-    contents: read
-
-env:
-    GIT_CONFIG_COUNT: "1"
-    GIT_CONFIG_KEY_0: core.hooksPath
-    GIT_CONFIG_VALUE_0: /dev/null
-
-
-jobs:
-    probes:
-        name: Provider Connectivity Probes
-        runs-on: [self-hosted, aws-india]
-        timeout-minutes: 20
-        steps:
-            - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-
-            - name: Legacy wrapper note
-              shell: bash
-              run: |
-                  set -euo pipefail
-                  {
-                    echo "### Connectivity Probes (Legacy Wrapper)"
-                    echo "- Preferred workflow: \`CI Provider Connectivity\`"
-                    echo "- This run uses the shared endpoint-config probe engine."
-                  } >> "$GITHUB_STEP_SUMMARY"
-
-            - name: Run provider connectivity matrix
-              shell: bash
-              env:
-                  ENFORCEMENT_MODE: ${{ github.event.inputs.enforcement_mode || 'enforce' }}
-              run: |
-                  set -euo pipefail
-                  fail_on_critical="true"
-                  if [ "${ENFORCEMENT_MODE}" = "report-only" ]; then
-                    fail_on_critical="false"
-                  fi
-
-                  cmd=(python3 scripts/ci/provider_connectivity_matrix.py
-                    --config .github/connectivity/providers.json
-                    --output-json connectivity-report.json
-                    --output-md connectivity-summary.md)
-                  if [ "$fail_on_critical" = "true" ]; then
-                    cmd+=(--fail-on-critical)
-                  fi
-                  "${cmd[@]}"
-
-            - name: Upload connectivity artifacts
-              if: always()
-              uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
-              with:
-                  name: connectivity-probes-${{ github.run_id }}
-                  if-no-files-found: error
-                  path: |
-                      connectivity-report.json
-                      connectivity-summary.md
--- a/.github/workflows/ci-run.yml
+++ b/.github/workflows/ci-run.yml
@ -59,9 +59,9 @@ jobs:
              with:
                  toolchain: 1.92.0
                  components: rustfmt, clippy
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
              with:
-                  prefix-key: ci-run-lint
+                  prefix-key: ci-run-check
            - name: Run rust quality gate
              run: ./scripts/ci/rust_quality_gate.sh
            - name: Run strict lint delta gate
@ -71,20 +71,64 @@ jobs:

    test:
        name: Test
-        needs: [changes, lint]
-        if: needs.changes.outputs.rust_changed == 'true' && needs.lint.result == 'success'
+        needs: [changes]
+        if: needs.changes.outputs.rust_changed == 'true'
        runs-on: [self-hosted, aws-india]
-        timeout-minutes: 30
+        timeout-minutes: 35
        steps:
            - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
              with:
                  toolchain: 1.92.0
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
              with:
-                  prefix-key: ci-run-test
-            - name: Run tests
-              run: cargo test --locked --verbose
+                  prefix-key: ci-run-check
+            - name: Run tests with flake detection
+              shell: bash
+              env:
+                  BLOCK_ON_FLAKE: ${{ vars.CI_BLOCK_ON_FLAKE_SUSPECTED || 'false' }}
+              run: |
+                  set -euo pipefail
+                  mkdir -p artifacts
+
+                  if cargo test --locked --verbose; then
+                    echo '{"flake_suspected":false,"status":"success"}' > artifacts/flake-probe.json
+                    exit 0
+                  fi
+
+                  echo "::warning::First test run failed. Retrying for flake detection..."
+                  if cargo test --locked --verbose; then
+                    echo '{"flake_suspected":true,"status":"flake"}' > artifacts/flake-probe.json
+                    echo "::warning::Flake suspected — test passed on retry"
+                    if [ "${BLOCK_ON_FLAKE}" = "true" ]; then
+                      echo "BLOCK_ON_FLAKE is set; failing on suspected flake."
+                      exit 1
+                    fi
+                    exit 0
+                  fi
+
+                  echo '{"flake_suspected":false,"status":"failure"}' > artifacts/flake-probe.json
+                  exit 1
+            - name: Publish flake probe summary
+              if: always()
+              shell: bash
+              run: |
+                  set -euo pipefail
+                  if [ -f artifacts/flake-probe.json ]; then
+                    status=$(python3 -c "import json; print(json.load(open('artifacts/flake-probe.json'))['status'])")
+                    flake=$(python3 -c "import json; print(json.load(open('artifacts/flake-probe.json'))['flake_suspected'])")
+                    echo "### Test Flake Probe" >> "$GITHUB_STEP_SUMMARY"
+                    echo "- Status: \`${status}\`" >> "$GITHUB_STEP_SUMMARY"
+                    echo "- Flake suspected: \`${flake}\`" >> "$GITHUB_STEP_SUMMARY"
+                  fi
+            - name: Upload flake probe artifact
+              if: always()
+              uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+              with:
+                  name: test-flake-probe
+                  path: artifacts/flake-probe.*
+                  if-no-files-found: ignore
+                  retention-days: 14

    build:
        name: Build (Smoke)
@ -98,7 +142,7 @@ jobs:
            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
              with:
                  toolchain: 1.92.0
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
              with:
                  prefix-key: ci-run-build
                  cache-targets: true
@ -107,53 +151,6 @@ jobs:
            - name: Check binary size
              run: bash scripts/ci/check_binary_size.sh target/release-fast/zeroclaw

-    flake-probe:
-        name: Test Flake Retry Probe
-        needs: [changes, lint, test]
-        if: always() && needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full'))
-        runs-on: [self-hosted, aws-india]
-        timeout-minutes: 25
-        steps:
-            - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
-              with:
-                  toolchain: 1.92.0
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
-              with:
-                  prefix-key: ci-run-flake-probe
-            - name: Probe flaky failure via single retry
-              shell: bash
-              env:
-                  INITIAL_TEST_RESULT: ${{ needs.test.result }}
-                  BLOCK_ON_FLAKE: ${{ vars.CI_BLOCK_ON_FLAKE_SUSPECTED || 'false' }}
-              run: |
-                  set -euo pipefail
-                  mkdir -p artifacts
-                  python3 scripts/ci/flake_retry_probe.py \
-                    --initial-result "${INITIAL_TEST_RESULT}" \
-                    --retry-command "cargo test --locked --verbose" \
-                    --output-json artifacts/flake-probe.json \
-                    --output-md artifacts/flake-probe.md \
-                    --block-on-flake "${BLOCK_ON_FLAKE}"
-            - name: Publish flake probe summary
-              if: always()
-              shell: bash
-              run: |
-                  set -euo pipefail
-                  if [ -f artifacts/flake-probe.md ]; then
-                    cat artifacts/flake-probe.md >> "$GITHUB_STEP_SUMMARY"
-                  else
-                    echo "Flake probe report missing." >> "$GITHUB_STEP_SUMMARY"
-                  fi
-            - name: Upload flake probe artifact
-              if: always()
-              uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
-              with:
-                  name: test-flake-probe
-                  path: artifacts/flake-probe.*
-                  if-no-files-found: ignore
-                  retention-days: 14
-
    docs-only:
        name: Docs-Only Fast Path
        needs: [changes]
@ -318,7 +315,7 @@ jobs:
    ci-required:
        name: CI Required Gate
        if: always()
-        needs: [changes, lint, test, build, flake-probe, docs-only, non-rust, docs-quality, lint-feedback, workflow-owner-approval, human-review-approval, license-file-owner-guard]
+        needs: [changes, lint, test, build, docs-only, non-rust, docs-quality, lint-feedback, workflow-owner-approval, human-review-approval, license-file-owner-guard]
        runs-on: [self-hosted, aws-india]
        steps:
            - name: Enforce required status
@ -335,112 +332,66 @@ jobs:
                  human_review_result="${{ needs.human-review-approval.result }}"
                  license_owner_result="${{ needs.license-file-owner-guard.result }}"

-                  if [ "${{ needs.changes.outputs.docs_only }}" = "true" ]; then
-                    echo "workflow_owner_approval=${workflow_owner_result}"
-                    echo "human_review_approval=${human_review_result}"
-                    echo "license_file_owner_guard=${license_owner_result}"
-                    if [ "$event_name" = "pull_request" ] && [ "$workflow_changed" = "true" ] && [ "$workflow_owner_result" != "success" ]; then
+                  # --- Helper: enforce PR governance gates ---
+                  check_pr_governance() {
+                    if [ "$event_name" != "pull_request" ]; then return 0; fi
+                    if [ "$workflow_changed" = "true" ] && [ "$workflow_owner_result" != "success" ]; then
                      echo "Workflow files changed but workflow owner approval gate did not pass."
                      exit 1
                    fi
-                    if [ "$event_name" = "pull_request" ] && [ "$human_review_result" != "success" ]; then
+                    if [ "$human_review_result" != "success" ]; then
                      echo "Human review approval guard did not pass."
                      exit 1
                    fi
-                    if [ "$event_name" = "pull_request" ] && [ "$license_owner_result" != "success" ]; then
+                    if [ "$license_owner_result" != "success" ]; then
                      echo "License file owner guard did not pass."
                      exit 1
                    fi
+                  }
+
+                  check_docs_quality() {
                    if [ "$docs_changed" = "true" ] && [ "$docs_result" != "success" ]; then
-                      echo "Docs-only change detected, but docs-quality did not pass."
+                      echo "Docs changed but docs-quality did not pass."
                      exit 1
                    fi
+                  }
+
+                  # --- Docs-only fast path ---
+                  if [ "${{ needs.changes.outputs.docs_only }}" = "true" ]; then
+                    check_pr_governance
+                    check_docs_quality
                    echo "Docs-only fast path passed."
                    exit 0
                  fi

+                  # --- Non-rust fast path ---
                  if [ "$rust_changed" != "true" ]; then
-                    echo "rust_changed=false (non-rust fast path)"
-                    echo "workflow_owner_approval=${workflow_owner_result}"
-                    echo "human_review_approval=${human_review_result}"
-                    echo "license_file_owner_guard=${license_owner_result}"
-                    if [ "$event_name" = "pull_request" ] && [ "$workflow_changed" = "true" ] && [ "$workflow_owner_result" != "success" ]; then
-                      echo "Workflow files changed but workflow owner approval gate did not pass."
-                      exit 1
-                    fi
-                    if [ "$event_name" = "pull_request" ] && [ "$human_review_result" != "success" ]; then
-                      echo "Human review approval guard did not pass."
-                      exit 1
-                    fi
-                    if [ "$event_name" = "pull_request" ] && [ "$license_owner_result" != "success" ]; then
-                      echo "License file owner guard did not pass."
-                      exit 1
-                    fi
-                    if [ "$docs_changed" = "true" ] && [ "$docs_result" != "success" ]; then
-                      echo "Non-rust change touched docs, but docs-quality did not pass."
-                      exit 1
-                    fi
+                    check_pr_governance
+                    check_docs_quality
                    echo "Non-rust fast path passed."
                    exit 0
                  fi

+                  # --- Rust change path ---
                  lint_result="${{ needs.lint.result }}"
-                  lint_strict_delta_result="${{ needs.lint.result }}"
                  test_result="${{ needs.test.result }}"
                  build_result="${{ needs.build.result }}"
-                  flake_result="${{ needs.flake-probe.result }}"

                  echo "lint=${lint_result}"
-                  echo "lint_strict_delta=${lint_strict_delta_result}"
                  echo "test=${test_result}"
                  echo "build=${build_result}"
-                  echo "flake_probe=${flake_result}"
                  echo "docs=${docs_result}"
                  echo "workflow_owner_approval=${workflow_owner_result}"
                  echo "human_review_approval=${human_review_result}"
                  echo "license_file_owner_guard=${license_owner_result}"

-                  if [ "$event_name" = "pull_request" ] && [ "$workflow_changed" = "true" ] && [ "$workflow_owner_result" != "success" ]; then
-                    echo "Workflow files changed but workflow owner approval gate did not pass."
+                  check_pr_governance
+
+                  if [ "$lint_result" != "success" ] || [ "$test_result" != "success" ] || [ "$build_result" != "success" ]; then
+                    echo "Required CI jobs did not pass: lint=${lint_result} test=${test_result} build=${build_result}"
                    exit 1
                  fi

-                  if [ "$event_name" = "pull_request" ] && [ "$human_review_result" != "success" ]; then
-                    echo "Human review approval guard did not pass."
-                    exit 1
-                  fi
+                  check_docs_quality

-                  if [ "$event_name" = "pull_request" ] && [ "$license_owner_result" != "success" ]; then
-                    echo "License file owner guard did not pass."
-                    exit 1
-                  fi
-
-                  if [ "$event_name" = "pull_request" ]; then
-                    if [ "$lint_result" != "success" ] || [ "$lint_strict_delta_result" != "success" ] || [ "$test_result" != "success" ] || [ "$build_result" != "success" ]; then
-                      echo "Required PR CI jobs did not pass."
-                      exit 1
-                    fi
-                    if [ "$docs_changed" = "true" ] && [ "$docs_result" != "success" ]; then
-                      echo "PR changed docs, but docs-quality did not pass."
-                      exit 1
-                    fi
-                    echo "PR required checks passed."
-                    exit 0
-                  fi
-
-                  if [ "$lint_result" != "success" ] || [ "$lint_strict_delta_result" != "success" ] || [ "$test_result" != "success" ] || [ "$build_result" != "success" ]; then
-                    echo "Required push CI jobs did not pass."
-                    exit 1
-                  fi
-
-                  if [ "$flake_result" != "success" ]; then
-                    echo "Flake probe did not pass under current blocking policy."
-                    exit 1
-                  fi
-
-                  if [ "$docs_changed" = "true" ] && [ "$docs_result" != "success" ]; then
-                    echo "Push changed docs, but docs-quality did not pass."
-                    exit 1
-                  fi
-
-                  echo "Push required checks passed."
+                  echo "All required checks passed."
--- a/.github/workflows/feature-matrix.yml
+++ b/.github/workflows/feature-matrix.yml
@ -2,27 +2,18 @@ name: Feature Matrix

 on:
    push:
-        branches: [dev, main]
+        branches: [dev]
        paths:
            - "Cargo.toml"
            - "Cargo.lock"
            - "src/**"
            - "crates/**"
-            - "tests/**"
            - "scripts/ci/nightly_matrix_report.py"
            - ".github/release/nightly-owner-routing.json"
            - ".github/workflows/feature-matrix.yml"
    pull_request:
        branches: [dev, main]
-        paths:
-            - "Cargo.toml"
-            - "Cargo.lock"
-            - "src/**"
-            - "crates/**"
-            - "tests/**"
-            - "scripts/ci/nightly_matrix_report.py"
-            - ".github/release/nightly-owner-routing.json"
-            - ".github/workflows/feature-matrix.yml"
+        types: [labeled]
    merge_group:
        branches: [dev, main]
    schedule:
@ -132,6 +123,10 @@ jobs:
    feature-check:
        name: ${{ needs.resolve-profile.outputs.lane_job_prefix }} (${{ matrix.name }})
        needs: [resolve-profile]
+        if: >-
+            github.event_name != 'pull_request' ||
+            contains(github.event.pull_request.labels.*.name, 'ci:full') ||
+            contains(github.event.pull_request.labels.*.name, 'ci:feature-matrix')
        runs-on: [self-hosted, aws-india]
        timeout-minutes: ${{ fromJSON(needs.resolve-profile.outputs.lane_timeout_minutes) }}
        strategy:
@ -156,14 +151,12 @@ jobs:
                      install_libudev: true
        steps:
            - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-              with:
-                  fetch-depth: 0

            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
              with:
                  toolchain: 1.92.0

-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
              with:
                  prefix-key: feature-matrix-${{ matrix.name }}

--- a/.github/workflows/main-branch-flow.md
+++ b/.github/workflows/main-branch-flow.md
@ -101,8 +101,8 @@ Notes:
 4. Approval gate possibility:
   - if Actions settings require maintainer approval for fork workflows, the `pull_request` run stays in `action_required`/waiting state until approved.
 5. Event fan-out after labeling:
-   - `pr-labeler.yml` and manual label changes emit `labeled`/`unlabeled` events.
-   - those events retrigger `pull_request_target` automation (`pr-labeler.yml` and `pr-auto-response.yml`), creating extra run volume/noise.
+   - manual label changes emit `labeled`/`unlabeled` events.
+   - those events retrigger only label-driven `pull_request_target` automation (`pr-auto-response.yml`); `pr-labeler.yml` now runs only on PR lifecycle events (`opened`/`reopened`/`synchronize`/`ready_for_review`) to reduce churn.
 6. When contributor pushes new commits to fork branch (`synchronize`):
   - reruns: `pr-intake-checks.yml`, `pr-labeler.yml`, `ci-run.yml`, `sec-audit.yml`, and matching path-scoped PR workflows.
   - does not rerun `pr-auto-response.yml` unless label/open events occur.
@ -133,7 +133,7 @@ Notes:

 1. Commit reaches `dev` or `main` (usually from a merged PR), or merge queue creates a `merge_group` validation commit.
 2. `ci-run.yml` runs on `push` and `merge_group`.
-3. `feature-matrix.yml` runs on `push` for Rust/workflow paths and on `merge_group`.
+3. `feature-matrix.yml` runs on `push` to `dev` for Rust/workflow paths and on `merge_group`.
 4. `sec-audit.yml` runs on `push` and `merge_group`.
 5. `sec-codeql.yml` runs on `push`/`merge_group` when Rust/codeql paths change (path-scoped on push).
 6. `ci-supply-chain-provenance.yml` runs on push when Rust/build provenance paths change.
@ -151,7 +151,7 @@ Workflow: `.github/workflows/pub-docker-img.yml`

 1. Triggered on `pull_request` to `dev` or `main` when Docker build-input paths change.
 2. Runs `PR Docker Smoke` job:
-   - Builds local smoke image with Blacksmith builder.
+   - Builds local smoke image with Buildx builder.
   - Verifies container with `docker run ... --version`.
 3. Typical runtime in recent sample: ~240.4s.
 4. No registry push happens on PR events.
--- a/.github/workflows/nightly-all-features.yml
+++ b/.github/workflows/nightly-all-features.yml
@ -54,7 +54,7 @@ jobs:
              with:
                  toolchain: 1.92.0

-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
              with:
                  prefix-key: nightly-all-features-${{ matrix.name }}

--- a/.github/workflows/pr-auto-response.yml
+++ b/.github/workflows/pr-auto-response.yml
@ -7,6 +7,10 @@ on:
    branches: [dev, main]
    types: [opened, labeled, unlabeled]

+concurrency:
+  group: pr-auto-response-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }}
+  cancel-in-progress: true
+
 permissions: {}

 env:
--- a/.github/workflows/pr-intake-checks.yml
+++ b/.github/workflows/pr-intake-checks.yml
@ -3,7 +3,7 @@ name: PR Intake Checks
 on:
    pull_request_target:
        branches: [dev, main]
-        types: [opened, reopened, synchronize, edited, ready_for_review]
+        types: [opened, reopened, synchronize, ready_for_review]

 concurrency:
    group: pr-intake-checks-${{ github.event.pull_request.number || github.run_id }}
--- a/.github/workflows/pr-labeler.yml
+++ b/.github/workflows/pr-labeler.yml
@ -3,7 +3,7 @@ name: PR Labeler
 on:
    pull_request_target:
        branches: [dev, main]
-        types: [opened, reopened, synchronize, edited, labeled, unlabeled]
+        types: [opened, reopened, synchronize, ready_for_review]
    workflow_dispatch:
        inputs:
            mode:
--- a/.github/workflows/pub-prerelease.yml
+++ b/.github/workflows/pub-prerelease.yml
@ -189,7 +189,7 @@ jobs:
              with:
                  toolchain: 1.92.0

-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
              with:
                  prefix-key: prerelease-${{ needs.prerelease-guard.outputs.release_tag }}
                  cache-targets: true
--- a/.github/workflows/pub-release.yml
+++ b/.github/workflows/pub-release.yml
@ -265,7 +265,7 @@ jobs:
                  toolchain: 1.92.0
                  targets: ${{ matrix.target }}

-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
              if: runner.os != 'Windows'

            - name: Install cross for cross-built targets
--- a/.github/workflows/sec-audit.yml
+++ b/.github/workflows/sec-audit.yml
@ -163,7 +163,7 @@ jobs:
            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
              with:
                  toolchain: 1.92.0
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
              with:
                  prefix-key: sec-audit-security-regressions
            - name: Run security regression suite
--- a/.github/workflows/test-benchmarks.yml
+++ b/.github/workflows/test-benchmarks.yml
@ -29,7 +29,7 @@ jobs:
            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
              with:
                  toolchain: 1.92.0
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3

            - name: Run benchmarks
              run: cargo bench --locked 2>&1 | tee benchmark_output.txt
--- a/.github/workflows/test-e2e.yml
+++ b/.github/workflows/test-e2e.yml
@ -3,6 +3,14 @@ name: Test E2E
 on:
    push:
        branches: [dev, main]
+        paths:
+            - "Cargo.toml"
+            - "Cargo.lock"
+            - "src/**"
+            - "crates/**"
+            - "tests/**"
+            - "scripts/**"
+            - ".github/workflows/test-e2e.yml"
    workflow_dispatch:

 concurrency:
@ -28,6 +36,6 @@ jobs:
            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
              with:
                  toolchain: 1.92.0
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3
            - name: Run integration / E2E tests
              run: cargo test --test agent_e2e --locked --verbose
--- a/.github/workflows/test-rust-build.yml
+++ b/.github/workflows/test-rust-build.yml
@ -53,7 +53,7 @@ jobs:

      - name: Restore Rust cache
        if: inputs.use_cache
-        uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v3

      - name: Run command
        shell: bash
--- a/README.md
+++ b/README.md
--- a/TESTING_TELEGRAM.md
+++ b/TESTING_TELEGRAM.md
@ -297,7 +297,7 @@ on: [push, pull_request]

 jobs:
  test:
-    runs-on: blacksmith-2vcpu-ubuntu-2404
+    runs-on: [self-hosted, aws-india]
    steps:
      - uses: actions/checkout@v3
      - uses: actions-rs/toolchain@v1
--- a/docs/actions-source-policy.md
+++ b/docs/actions-source-policy.md
@ -23,7 +23,7 @@ Selected allowlist patterns:
 - `softprops/action-gh-release@*`
 - `sigstore/cosign-installer@*`
 - `Checkmarx/vorpal-reviewdog-github-action@*`
- `useblacksmith/*` (Blacksmith self-hosted runner infrastructure)
+- `Swatinem/rust-cache@*`

 ## Change Control Export

@ -78,13 +78,11 @@ Latest sweep notes:
 - 2026-02-21: Added manual Vorpal reviewdog workflow for targeted secure-coding checks on supported file types
    - Added allowlist pattern: `Checkmarx/vorpal-reviewdog-github-action@*`
    - Workflow uses pinned source: `Checkmarx/vorpal-reviewdog-github-action@8cc292f337a2f1dea581b4f4bd73852e7becb50d` (v1.2.0)
- 2026-02-17: Rust dependency cache migrated from `Swatinem/rust-cache` to `useblacksmith/rust-cache`
-    - No new allowlist pattern required (`useblacksmith/*` already allowlisted)
+- 2026-02-26: Standardized runner/action sources for cache and Docker build paths
+    - Added allowlist pattern: `Swatinem/rust-cache@*`
+    - Docker build jobs use `docker/setup-buildx-action` and `docker/build-push-action`
 - 2026-02-16: Hidden dependency discovered in `release.yml`: `sigstore/cosign-installer@...`
    - Added allowlist pattern: `sigstore/cosign-installer@*`
- 2026-02-16: Blacksmith migration blocked workflow execution
-    - Added allowlist pattern: `useblacksmith/*` for self-hosted runner infrastructure
-    - Actions: `useblacksmith/setup-docker-builder@v1`, `useblacksmith/build-push-action@v2`
 - 2026-02-17: Security audit reproducibility/freshness balance update
    - Added allowlist pattern: `rustsec/audit-check@*`
    - Replaced inline `cargo install cargo-audit` execution with pinned `rustsec/audit-check@69366f33c96575abad1ee0dba8212993eecbe998` in `security.yml`
--- a/docs/ci-map.md
+++ b/docs/ci-map.md
@ -13,11 +13,12 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u
 - `.github/workflows/ci-run.yml` (`CI`)
    - Purpose: Rust validation (`cargo fmt --all -- --check`, `cargo clippy --locked --all-targets -- -D clippy::correctness`, strict delta lint gate on changed Rust lines, `test`, release build smoke) + docs quality checks when docs change (`markdownlint` blocks only issues on changed lines; link check scans only links added on changed lines)
    - Additional behavior: for Rust-impacting PRs and pushes, `CI Required Gate` requires `lint` + `test` + `build` (no PR build-only bypass)
-    - Additional behavior: rust-cache is partitioned per job role via `prefix-key` to reduce cache churn across lint/test/build/flake-probe lanes
-    - Additional behavior: emits `test-flake-probe` artifact from single-retry probe when tests fail; optional blocking can be enabled with repository variable `CI_BLOCK_ON_FLAKE_SUSPECTED=true`
+    - Additional behavior: `lint`, `test`, and `build` run in parallel (all depend only on `changes` job) to minimize critical path duration
+    - Additional behavior: rust-cache is shared between `lint` and `test` via unified `prefix-key` (`ci-run-check`) to reduce redundant compilation; `build` uses a separate key for release-fast profile
+    - Additional behavior: flake detection is integrated into the `test` job via single-retry probe; emits `test-flake-probe` artifact when flake is suspected; optional blocking can be enabled with repository variable `CI_BLOCK_ON_FLAKE_SUSPECTED=true`
    - Additional behavior: PRs that change `.github/workflows/**` require at least one approving review from a login in `WORKFLOW_OWNER_LOGINS` (repository variable fallback: `theonlyhennygod,willsarg`)
    - Additional behavior: PRs that change root license files (`LICENSE-APACHE`, `LICENSE-MIT`) must be authored by `willsarg`
-    - Additional behavior: lint gates run before `test`/`build`; when lint/docs gates fail on PRs, CI posts an actionable feedback comment with failing gate names and local fix commands
+    - Additional behavior: when lint/docs gates fail on PRs, CI posts an actionable feedback comment with failing gate names and local fix commands
    - Merge gate: `CI Required Gate`
 - `.github/workflows/workflow-sanity.yml` (`Workflow Sanity`)
    - Purpose: lint GitHub workflow files (`actionlint`, tab checks)
@ -35,6 +36,8 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u
    - Additional behavior: `ghcr_vulnerability_gate.py` enforces policy-driven Trivy gate + parity checks from `.github/release/ghcr-vulnerability-policy.json` and emits `ghcr-vulnerability-gate` audit evidence
 - `.github/workflows/feature-matrix.yml` (`Feature Matrix`)
    - Purpose: compile-time matrix validation for `default`, `whatsapp-web`, `browser-native`, and `nightly-all-features` lanes
+    - Additional behavior: push-triggered matrix runs are limited to `dev` branch Rust/workflow-path changes to avoid duplicate post-merge fan-out on `main`
+    - Additional behavior: on PRs, lanes only run when `ci:full` or `ci:feature-matrix` label is applied (push-to-dev and schedules run unconditionally)
    - Additional behavior: each lane emits machine-readable result artifacts; summary lane aggregates owner routing from `.github/release/nightly-owner-routing.json`
    - Additional behavior: supports `compile` (merge-gate) and `nightly` (integration-oriented) profiles with bounded retry policy and trend snapshot artifact (`nightly-history.json`)
    - Additional behavior: required-check mapping is anchored to stable job name `Feature Matrix Summary`; lane jobs stay informational
@ -45,10 +48,6 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u
    - Purpose: dependency advisories (`rustsec/audit-check`, pinned SHA), policy/license checks (`cargo deny`), gitleaks-based secrets governance (allowlist policy metadata + expiry guard), and SBOM snapshot artifacts (`CycloneDX` + `SPDX`)
 - `.github/workflows/sec-codeql.yml` (`CodeQL Analysis`)
    - Purpose: static analysis for security findings on PR/push (Rust/codeql paths) plus scheduled/manual runs
- `.github/workflows/ci-connectivity-probes.yml` (`Connectivity Probes`)
-    - Purpose: legacy manual wrapper for provider endpoint probe diagnostics (delegates to config-driven probe engine)
-    - Output: uploads `connectivity-report.json` and `connectivity-summary.md`
-    - Usage: prefer `CI Provider Connectivity` for scheduled + PR/push coverage
 - `.github/workflows/ci-change-audit.yml` (`CI/CD Change Audit`)
    - Purpose: machine-auditable diff report for CI/security workflow changes (line churn, new `uses:` references, unpinned action-policy violations, pipe-to-shell policy violations, broad `permissions: write-all` grants, new `pull_request_target` trigger introductions, new secret references)
 - `.github/workflows/ci-provider-connectivity.yml` (`CI Provider Connectivity`)
@ -81,7 +80,7 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u
    - Additional behavior: final label set is priority-sorted (`risk:*` first, then `size:*`, then contributor tier, then module/path labels)
    - Additional behavior: managed label colors follow display order to produce a smooth left-to-right gradient when many labels are present
    - Manual governance: supports `workflow_dispatch` with `mode=audit|repair` to inspect/fix managed label metadata drift across the whole repository
-    - Additional behavior: risk + size labels are auto-corrected on manual PR label edits (`labeled`/`unlabeled` events); apply `risk: manual` when maintainers intentionally override automated risk selection
+    - Additional behavior: risk + size labels are recomputed on PR lifecycle events (`opened`/`reopened`/`synchronize`/`ready_for_review`); maintainers can use manual `workflow_dispatch` (`mode=repair`) to re-sync managed label metadata after exceptional manual edits
    - High-risk heuristic paths: `src/security/**`, `src/runtime/**`, `src/gateway/**`, `src/tools/**`, `.github/workflows/**`
    - Guardrail: maintainers can apply `risk: manual` to freeze automated risk recalculation
 - `.github/workflows/pr-auto-response.yml` (`PR Auto Responder`)
@ -100,7 +99,7 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u

 - `CI`: push to `dev` and `main`, PRs to `dev` and `main`, merge queue `merge_group` for `dev`/`main`
 - `Docker`: tag push (`v*`) for publish, matching PRs to `dev`/`main` for smoke build, manual dispatch for smoke only
- `Feature Matrix`: PR/push on Rust + workflow paths, merge queue, weekly schedule, manual dispatch
+- `Feature Matrix`: push on Rust + workflow paths to `dev`, merge queue, weekly schedule, manual dispatch; PRs only when `ci:full` or `ci:feature-matrix` label is applied
 - `Nightly All-Features`: daily schedule and manual dispatch
 - `Release`: tag push (`v*`), weekly schedule (verification-only), manual dispatch (verification or publish)
 - `Security Audit`: push to `dev` and `main`, PRs to `dev` and `main`, weekly schedule
@ -108,10 +107,11 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u
 - `Workflow Sanity`: PR/push when `.github/workflows/**`, `.github/*.yml`, or `.github/*.yaml` change
 - `Main Promotion Gate`: PRs to `main` only; requires PR author `willsarg`/`theonlyhennygod` and head branch `dev` in the same repository
 - `Dependabot`: all update PRs target `dev` (not `main`)
- `PR Intake Checks`: `pull_request_target` on opened/reopened/synchronize/edited/ready_for_review
+- `PR Intake Checks`: `pull_request_target` on opened/reopened/synchronize/ready_for_review
 - `Label Policy Sanity`: PR/push when `.github/label-policy.json`, `.github/workflows/pr-labeler.yml`, or `.github/workflows/pr-auto-response.yml` changes
- `PR Labeler`: `pull_request_target` lifecycle events
+- `PR Labeler`: `pull_request_target` on opened/reopened/synchronize/ready_for_review
 - `PR Auto Responder`: issue opened/labeled, `pull_request_target` opened/labeled
+- `Test E2E`: push to `dev`/`main` for Rust-impacting paths (`Cargo*`, `src/**`, `crates/**`, `tests/**`, `scripts/**`) and manual dispatch
 - `Stale PR Check`: daily schedule, manual dispatch
 - `PR Hygiene`: every 12 hours schedule, manual dispatch

@ -154,6 +154,7 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u
 - Keep Actions source policy restricted to approved allowlist patterns (see `docs/actions-source-policy.md`).
 - Use path filters for expensive workflows when practical.
 - Keep docs quality checks low-noise (incremental markdown + incremental added-link checks).
+- Use `scripts/ci/queue_hygiene.py` for controlled cleanup of obsolete or superseded queued runs during runner-pressure incidents.
 - Keep dependency update volume controlled (grouping + PR limits).
 - Install third-party CI tooling through repository-managed pinned installers with checksum verification (for example `scripts/ci/install_gitleaks.sh`, `scripts/ci/install_syft.sh`); avoid remote `curl | sh` patterns.
 - Avoid mixing onboarding/community automation with merge-gating logic.
--- a/docs/i18n/el/actions-source-policy.md
+++ b/docs/i18n/el/actions-source-policy.md
@ -25,7 +25,7 @@
 - `softprops/action-gh-release@*`
 - `sigstore/cosign-installer@*`
 - `Checkmarx/vorpal-reviewdog-github-action@*`
- `useblacksmith/*` (Υποδομή Blacksmith)
+- `Swatinem/rust-cache@*`

 ## Διαδικασία Ελέγχου Αλλαγών

@ -74,7 +74,9 @@ gh api repos/zeroclaw-labs/zeroclaw/actions/permissions/selected-actions
 ## Ιστορικό Αλλαγών

 - **2026-02-21**: Προσθήκη `Checkmarx/vorpal-reviewdog-github-action@*` για στοχευμένους ελέγχους ασφαλείας.
- **2026-02-17**: Μετάβαση στο `useblacksmith/rust-cache` για τη διαχείριση προσωρινής μνήμης Rust.
+- **2026-02-26**: Τυποποίηση cache Rust και Docker build actions.
+  - Προστέθηκε allowlist μοτίβο: `Swatinem/rust-cache@*`.
+  - Τα Docker build jobs χρησιμοποιούν `docker/setup-buildx-action` και `docker/build-push-action`.
 - **2026-02-16**: Προσθήκη `sigstore/cosign-installer@*` για την υπογραφή εκδόσεων.
 - **2026-02-17**: Αντικατάσταση του `cargo install cargo-audit` με την ενέργεια `rustsec/audit-check@*`.

--- a/docs/i18n/vi/actions-source-policy.md
+++ b/docs/i18n/vi/actions-source-policy.md
@ -22,7 +22,7 @@ Các mẫu allowlist được chọn:
 - `rhysd/actionlint@*`
 - `softprops/action-gh-release@*`
 - `sigstore/cosign-installer@*`
- `useblacksmith/*` (cơ sở hạ tầng self-hosted runner Blacksmith)
+- `Swatinem/rust-cache@*`

 ## Xuất kiểm soát thay đổi

@ -74,13 +74,11 @@ Nếu gặp phải, chỉ thêm action tin cậy còn thiếu cụ thể đó, c

 Ghi chú quét gần đây nhất:

- 2026-02-17: Cache phụ thuộc Rust được migrate từ `Swatinem/rust-cache` sang `useblacksmith/rust-cache`
-    - Không cần mẫu allowlist mới (`useblacksmith/*` đã có trong allowlist)
+- 2026-02-26: Chuẩn hóa runner/action cho cache Rust và Docker build
+    - Đã thêm mẫu allowlist: `Swatinem/rust-cache@*`
+    - Docker build dùng `docker/setup-buildx-action` và `docker/build-push-action`
 - 2026-02-16: Phụ thuộc ẩn được phát hiện trong `release.yml`: `sigstore/cosign-installer@...`
    - Đã thêm mẫu allowlist: `sigstore/cosign-installer@*`
- 2026-02-16: Migration Blacksmith chặn thực thi workflow
-    - Đã thêm mẫu allowlist: `useblacksmith/*` cho cơ sở hạ tầng self-hosted runner
-    - Actions: `useblacksmith/setup-docker-builder@v1`, `useblacksmith/build-push-action@v2`
 - 2026-02-17: Cập nhật cân bằng tính tái tạo/độ tươi của security audit
    - Đã thêm mẫu allowlist: `rustsec/audit-check@*`
    - Thay thế thực thi nội tuyến `cargo install cargo-audit` bằng `rustsec/audit-check@69366f33c96575abad1ee0dba8212993eecbe998` được pin trong `security.yml`
--- a/docs/operations/connectivity-probes-runbook.md
+++ b/docs/operations/connectivity-probes-runbook.md
@ -10,10 +10,6 @@ Primary workflow:

 - `.github/workflows/ci-provider-connectivity.yml`

-Legacy compatibility wrapper (manual only):
-
- `.github/workflows/ci-connectivity-probes.yml`
-
 Probe engine and config:

 - `scripts/ci/provider_connectivity_matrix.py`
@ -47,18 +43,12 @@ Enforcement policy:
 - critical endpoint unreachable + `fail_on_critical=true` -> workflow fails
 - non-critical endpoint unreachable -> reported but non-blocking

-`Connectivity Probes (Legacy Wrapper)` behavior:
-
- manual dispatch only
- accepts `enforcement_mode=enforce|report-only`
- delegates to the same `providers.json` probe engine
-
 ## CI Artifacts

 Per run artifacts include:

- `provider-connectivity-matrix.json` or `connectivity-report.json`
- `provider-connectivity-matrix.md` or `connectivity-summary.md`
+- `provider-connectivity-matrix.json`
+- `provider-connectivity-matrix.md`
 - normalized audit event JSON when emitted by workflow

 Markdown summary is appended to `GITHUB_STEP_SUMMARY`.
--- a/docs/operations/self-hosted-runner-remediation.md
+++ b/docs/operations/self-hosted-runner-remediation.md
@ -0,0 +1,120 @@
+# Self-Hosted Runner Remediation Runbook
+
+This runbook provides operational steps for self-hosted runner capacity incidents.
+
+## Scope
+
+Use this when CI jobs remain queued, runner availability drops, or runner hosts fill disk.
+
+## Scripts
+
+- `scripts/ci/runner_health_report.py`
+  - Queries GitHub Actions runner state and workflow queue pressure.
+  - Produces console summary and optional JSON report.
+- `scripts/ci/runner_disk_cleanup.sh`
+  - Reclaims stale runner workspace/temp/diag files.
+  - Defaults to dry-run mode and requires explicit `--apply`.
+- `scripts/ci/queue_hygiene.py`
+  - Removes queued-run backlog from obsolete workflows and stale duplicate runs.
+  - Defaults to dry-run mode; use `--apply` to execute cancellations.
+
+## 1) Health Check
+
+```bash
+python3 scripts/ci/runner_health_report.py \
+  --repo zeroclaw-labs/zeroclaw \
+  --require-label self-hosted \
+  --require-label aws-india \
+  --min-online 3 \
+  --min-available 1 \
+  --max-queued-runs 20 \
+  --output-json artifacts/runner-health.json
+```
+
+Auth note:
+
+- The script reads token from `--token`, then `GH_TOKEN`/`GITHUB_TOKEN`, then falls back to `gh auth token`.
+
+Recommended alert thresholds:
+
+- `online < 3` (critical)
+- `available < 1` (critical)
+- `queued runs > 20` (critical)
+- `busy ratio > 90%` (warning)
+
+## 2) Disk Cleanup (Dry-Run First)
+
+```bash
+scripts/ci/runner_disk_cleanup.sh \
+  --runner-root /home/ubuntu/actions-runner-pool \
+  --work-retention-days 2 \
+  --diag-retention-days 7
+```
+
+Apply mode (after draining jobs):
+
+```bash
+scripts/ci/runner_disk_cleanup.sh \
+  --runner-root /home/ubuntu/actions-runner-pool \
+  --work-retention-days 2 \
+  --diag-retention-days 7 \
+  --apply
+```
+
+Optional with Docker cleanup:
+
+```bash
+scripts/ci/runner_disk_cleanup.sh \
+  --runner-root /home/ubuntu/actions-runner-pool \
+  --apply \
+  --docker-prune
+```
+
+Safety behavior:
+
+- `--apply` aborts if runner worker/listener processes are detected, unless `--force` is provided.
+- default mode is non-destructive.
+
+## 3) Recovery Sequence
+
+1. Pause or reduce non-blocking workflows if queue pressure is high.
+2. Run health report and capture JSON artifact.
+3. Run disk cleanup in dry-run mode, review candidate list.
+4. Drain runners, then apply cleanup.
+5. Re-run health report and confirm queue/availability recovery.
+
+## 4) Queue Hygiene (Dry-Run First)
+
+Dry-run example:
+
+```bash
+python3 scripts/ci/queue_hygiene.py \
+  --repo zeroclaw-labs/zeroclaw \
+  --obsolete-workflow "CI Build (Fast)" \
+  --dedupe-workflow "CI Run" \
+  --output-json artifacts/queue-hygiene.json
+```
+
+Apply mode:
+
+```bash
+python3 scripts/ci/queue_hygiene.py \
+  --repo zeroclaw-labs/zeroclaw \
+  --obsolete-workflow "CI Build (Fast)" \
+  --dedupe-workflow "CI Run" \
+  --max-cancel 200 \
+  --apply \
+  --output-json artifacts/queue-hygiene-applied.json
+```
+
+Safety behavior:
+
+- At least one policy is required (`--obsolete-workflow` or `--dedupe-workflow`).
+- `--apply` is opt-in; default is non-destructive preview.
+- Deduplication is PR-only by default; use `--dedupe-include-non-pr` only when explicitly handling push/manual backlog.
+- Cancellations are bounded by `--max-cancel`.
+
+## Notes
+
+- These scripts are operational tools and do not change merge-gating policy.
+- Keep threshold values aligned with observed runner pool size and traffic profile.
--- a/scripts/ci/queue_hygiene.py
+++ b/scripts/ci/queue_hygiene.py
@ -0,0 +1,426 @@
+#!/usr/bin/env python3
+"""Queue hygiene helper for GitHub Actions workflow runs.
+
+Default behavior is non-destructive (`dry-run`). Use `--apply` to cancel runs.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+from collections import Counter, defaultdict
+from datetime import datetime, timezone
+from typing import Any
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Cancel obsolete or superseded queued workflow runs safely.",
+    )
+    parser.add_argument(
+        "--repo",
+        default=os.getenv("GITHUB_REPOSITORY", "zeroclaw-labs/zeroclaw"),
+        help="GitHub repository in owner/repo form.",
+    )
+    parser.add_argument(
+        "--api-url",
+        default=os.getenv("GITHUB_API_URL", "https://api.github.com"),
+        help="GitHub API base URL.",
+    )
+    parser.add_argument(
+        "--token",
+        default="",
+        help="GitHub token (default: GH_TOKEN/GITHUB_TOKEN, then `gh auth token`).",
+    )
+    parser.add_argument(
+        "--status",
+        default="queued",
+        choices=["queued", "in_progress", "requested", "waiting"],
+        help="Workflow run status to inspect (default: queued).",
+    )
+    parser.add_argument(
+        "--runs-json",
+        default="",
+        help="Optional local JSON fixture for offline dry-run/testing (list or {workflow_runs:[...]}).",
+    )
+    parser.add_argument(
+        "--obsolete-workflow",
+        action="append",
+        default=[],
+        help="Workflow name to cancel unconditionally (repeatable).",
+    )
+    parser.add_argument(
+        "--dedupe-workflow",
+        action="append",
+        default=[],
+        help="Workflow name to dedupe by event+branch+PR-key, keeping newest run only (repeatable).",
+    )
+    parser.add_argument(
+        "--dedupe-include-non-pr",
+        action="store_true",
+        help="Also dedupe non-PR runs (push/manual). Default dedupe scope is PR-originated runs only.",
+    )
+    parser.add_argument(
+        "--max-cancel",
+        type=int,
+        default=200,
+        help="Maximum number of runs to cancel/apply in one execution.",
+    )
+    parser.add_argument(
+        "--apply",
+        action="store_true",
+        help="Apply cancel operations. Default is dry-run.",
+    )
+    parser.add_argument(
+        "--output-json",
+        default="",
+        help="Optional path to write structured report JSON.",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print selected run details.",
+    )
+    return parser.parse_args()
+
+
+class GitHubApi:
+    def __init__(self, api_url: str, token: str | None) -> None:
+        self.api_url = api_url.rstrip("/")
+        self.token = token
+
+    def _request(
+        self,
+        method: str,
+        path: str,
+        params: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        query = urllib.parse.urlencode(params or {}, doseq=True)
+        url = f"{self.api_url}{path}"
+        if query:
+            url = f"{url}?{query}"
+        req = urllib.request.Request(url, method=method)
+        req.add_header("Accept", "application/vnd.github+json")
+        req.add_header("X-GitHub-Api-Version", "2022-11-28")
+        if self.token:
+            req.add_header("Authorization", f"Bearer {self.token}")
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            body = resp.read().decode("utf-8")
+        if not body:
+            return {}
+        return json.loads(body)
+
+    def get(self, path: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
+        return self._request("GET", path, params=params)
+
+    def post(self, path: str) -> dict[str, Any]:
+        return self._request("POST", path)
+
+    def paginate(self, path: str, key: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
+        results: list[dict[str, Any]] = []
+        page = 1
+        while True:
+            query = {"per_page": 100, "page": page}
+            if params:
+                query.update(params)
+            payload = self.get(path, params=query)
+            items = payload.get(key, [])
+            if not items:
+                break
+            results.extend(items)
+            if len(items) < 100:
+                break
+            page += 1
+        return results
+
+
+def split_repo(repo: str) -> tuple[str, str]:
+    parts = repo.split("/", 1)
+    if len(parts) != 2 or not parts[0] or not parts[1]:
+        raise ValueError(f"Invalid --repo value: {repo!r}. Expected owner/repo.")
+    return parts[0], parts[1]
+
+
+def normalize_values(values: list[str]) -> set[str]:
+    out: set[str] = set()
+    for value in values:
+        item = value.strip()
+        if item:
+            out.add(item)
+    return out
+
+
+def parse_timestamp(value: str | None) -> datetime:
+    if not value:
+        return datetime.fromtimestamp(0, tz=timezone.utc)
+    try:
+        return datetime.fromisoformat(value.replace("Z", "+00:00"))
+    except ValueError:
+        return datetime.fromtimestamp(0, tz=timezone.utc)
+
+
+def run_identity_key(run: dict[str, Any]) -> tuple[str, str, str, str]:
+    name = str(run.get("name", ""))
+    event = str(run.get("event", ""))
+    head_branch = str(run.get("head_branch", ""))
+    head_sha = str(run.get("head_sha", ""))
+    pr_number = ""
+    pull_requests = run.get("pull_requests")
+    if isinstance(pull_requests, list) and pull_requests:
+        first = pull_requests[0]
+        if isinstance(first, dict) and first.get("number") is not None:
+            pr_number = str(first.get("number"))
+    if pr_number:
+        # For PR traffic, cancel stale runs across synchronize updates for the same PR.
+        return (name, event, f"pr:{pr_number}", "")
+    # For push/manual traffic, key by SHA to avoid collapsing distinct commits.
+    return (name, event, head_branch, head_sha)
+
+
+def collect_candidates(
+    runs: list[dict[str, Any]],
+    obsolete_workflows: set[str],
+    dedupe_workflows: set[str],
+    *,
+    include_non_pr: bool,
+) -> tuple[list[dict[str, Any]], Counter[str]]:
+    reasons_by_id: dict[int, set[str]] = defaultdict(set)
+    runs_by_id: dict[int, dict[str, Any]] = {}
+
+    for run in runs:
+        run_id_raw = run.get("id")
+        if run_id_raw is None:
+            continue
+        try:
+            run_id = int(run_id_raw)
+        except (TypeError, ValueError):
+            continue
+        runs_by_id[run_id] = run
+        if str(run.get("name", "")) in obsolete_workflows:
+            reasons_by_id[run_id].add("obsolete-workflow")
+
+    by_workflow: dict[str, dict[tuple[str, str, str, str], list[dict[str, Any]]]] = defaultdict(
+        lambda: defaultdict(list)
+    )
+    for run in runs:
+        name = str(run.get("name", ""))
+        if name not in dedupe_workflows:
+            continue
+        event = str(run.get("event", ""))
+        is_pr_event = event in {"pull_request", "pull_request_target"}
+        if not is_pr_event and not include_non_pr:
+            continue
+        pull_requests = run.get("pull_requests")
+        has_pr_context = isinstance(pull_requests, list) and len(pull_requests) > 0
+        if is_pr_event and not has_pr_context and not include_non_pr:
+            continue
+        key = run_identity_key(run)
+        by_workflow[name][key].append(run)
+
+    for groups in by_workflow.values():
+        for group_runs in groups.values():
+            if len(group_runs) <= 1:
+                continue
+            sorted_group = sorted(
+                group_runs,
+                key=lambda item: (
+                    parse_timestamp(str(item.get("created_at", ""))),
+                    int(item.get("id", 0)),
+                ),
+                reverse=True,
+            )
+            keep_id = int(sorted_group[0].get("id", 0))
+            for stale in sorted_group[1:]:
+                stale_id = int(stale.get("id", 0))
+                reasons_by_id[stale_id].add(f"dedupe-superseded-by:{keep_id}")
+
+    reason_counter: Counter[str] = Counter()
+    selected: list[dict[str, Any]] = []
+    for run_id, reasons in reasons_by_id.items():
+        run = runs_by_id.get(run_id)
+        if run is None:
+            continue
+        for reason in reasons:
+            reason_counter[reason] += 1
+        selected.append(
+            {
+                "id": run_id,
+                "name": str(run.get("name", "")),
+                "event": str(run.get("event", "")),
+                "head_branch": str(run.get("head_branch", "")),
+                "created_at": str(run.get("created_at", "")),
+                "html_url": str(run.get("html_url", "")),
+                "reasons": sorted(reasons),
+            }
+        )
+
+    selected.sort(
+        key=lambda item: (
+            parse_timestamp(item.get("created_at", "")),
+            int(item.get("id", 0)),
+        )
+    )
+    return selected, reason_counter
+
+
+def resolve_token(explicit_token: str) -> str:
+    token = explicit_token or os.getenv("GH_TOKEN") or os.getenv("GITHUB_TOKEN") or ""
+    if token:
+        return token
+    try:
+        return subprocess.check_output(
+            ["gh", "auth", "token"],
+            text=True,
+            stderr=subprocess.DEVNULL,
+        ).strip()
+    except Exception:
+        return ""
+
+
+def load_runs_from_json(path: str) -> list[dict[str, Any]]:
+    payload = json.loads(open(path, "r", encoding="utf-8").read())
+    if isinstance(payload, list):
+        return [item for item in payload if isinstance(item, dict)]
+    if isinstance(payload, dict):
+        items = payload.get("workflow_runs", [])
+        if isinstance(items, list):
+            return [item for item in items if isinstance(item, dict)]
+    raise ValueError("--runs-json must contain a list or an object with `workflow_runs` list.")
+
+
+def main() -> int:
+    args = parse_args()
+
+    obsolete_workflows = normalize_values(args.obsolete_workflow)
+    dedupe_workflows = normalize_values(args.dedupe_workflow)
+    if not obsolete_workflows and not dedupe_workflows:
+        print(
+            "queue_hygiene: no policy configured. Provide --obsolete-workflow and/or --dedupe-workflow.",
+            file=sys.stderr,
+        )
+        return 2
+
+    owner, repo = split_repo(args.repo)
+    token = resolve_token(args.token)
+    api = GitHubApi(args.api_url, token)
+
+    if args.runs_json:
+        runs = load_runs_from_json(args.runs_json)
+    else:
+        runs = api.paginate(
+            f"/repos/{owner}/{repo}/actions/runs",
+            key="workflow_runs",
+            params={"status": args.status},
+        )
+
+    selected, reason_counter = collect_candidates(
+        runs,
+        obsolete_workflows,
+        dedupe_workflows,
+        include_non_pr=args.dedupe_include_non_pr,
+    )
+
+    capped = selected[: max(0, args.max_cancel)]
+    skipped_by_cap = max(0, len(selected) - len(capped))
+
+    report: dict[str, Any] = {
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "repository": f"{owner}/{repo}",
+        "status_scope": args.status,
+        "mode": "apply" if args.apply else "dry-run",
+        "policies": {
+            "obsolete_workflows": sorted(obsolete_workflows),
+            "dedupe_workflows": sorted(dedupe_workflows),
+            "dedupe_include_non_pr": args.dedupe_include_non_pr,
+            "max_cancel": args.max_cancel,
+        },
+        "counts": {
+            "runs_in_scope": len(runs),
+            "candidate_runs_before_cap": len(selected),
+            "candidate_runs_after_cap": len(capped),
+            "skipped_by_cap": skipped_by_cap,
+        },
+        "reason_counts": dict(sorted(reason_counter.items())),
+        "planned_actions": capped,
+        "results": {
+            "canceled": 0,
+            "skipped": 0,
+            "failed": 0,
+            "failures": [],
+        },
+    }
+
+    print("Queue Hygiene Report")
+    print(f"repo: {report['repository']}")
+    print(f"status_scope: {args.status}")
+    print(
+        "runs: in_scope={runs_in_scope} candidate_before_cap={before} candidate_after_cap={after} skipped_by_cap={skipped}".format(
+            runs_in_scope=report["counts"]["runs_in_scope"],
+            before=report["counts"]["candidate_runs_before_cap"],
+            after=report["counts"]["candidate_runs_after_cap"],
+            skipped=report["counts"]["skipped_by_cap"],
+        )
+    )
+    if reason_counter:
+        print("reason_counts:")
+        for reason, count in sorted(reason_counter.items()):
+            print(f"  - {reason}: {count}")
+
+    if args.verbose:
+        for item in capped:
+            reasons = ",".join(item.get("reasons", []))
+            print(
+                f"  run_id={item['id']} workflow={item['name']} branch={item['head_branch']} "
+                f"created_at={item['created_at']} reasons={reasons}"
+            )
+
+    if args.apply and args.runs_json:
+        print("queue_hygiene: --apply cannot be used with --runs-json offline fixture.", file=sys.stderr)
+        return 2
+
+    if args.apply:
+        for item in capped:
+            run_id = int(item["id"])
+            try:
+                api.post(f"/repos/{owner}/{repo}/actions/runs/{run_id}/cancel")
+                report["results"]["canceled"] += 1
+            except urllib.error.HTTPError as exc:
+                body = exc.read().decode("utf-8", errors="replace")
+                if exc.code in (404, 409, 422):
+                    report["results"]["skipped"] += 1
+                else:
+                    report["results"]["failed"] += 1
+                    report["results"]["failures"].append(
+                        {
+                            "run_id": run_id,
+                            "status_code": exc.code,
+                            "body": body[:500],
+                        }
+                    )
+
+        print(
+            "apply_results: canceled={canceled} skipped={skipped} failed={failed}".format(
+                canceled=report["results"]["canceled"],
+                skipped=report["results"]["skipped"],
+                failed=report["results"]["failed"],
+            )
+        )
+
+    if args.output_json:
+        with open(args.output_json, "w", encoding="utf-8") as handle:
+            json.dump(report, handle, indent=2, sort_keys=True)
+            handle.write("\n")
+
+    if args.apply and report["results"]["failed"] > 0:
+        return 2
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/ci/runner_disk_cleanup.sh
+++ b/scripts/ci/runner_disk_cleanup.sh
@ -0,0 +1,171 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+usage() {
+  cat <<'EOF'
+Usage: scripts/ci/runner_disk_cleanup.sh [options]
+
+Safely reclaim disk space on self-hosted runner hosts.
+Defaults to dry-run mode.
+
+Options:
+  --runner-root <path>          Runner root (default: $RUNNER_ROOT or /home/ubuntu/actions-runner-pool)
+  --work-retention-days <n>     Keep workspace dirs newer than n days (default: 2)
+  --diag-retention-days <n>     Keep diagnostic logs newer than n days (default: 7)
+  --docker-prune                Include docker system prune -af --volumes
+  --apply                       Execute deletions (default: dry-run)
+  --force                       Allow apply even if runner worker/listener processes are detected
+  -h, --help                    Show this help text
+EOF
+}
+
+RUNNER_ROOT="${RUNNER_ROOT:-/home/ubuntu/actions-runner-pool}"
+WORK_RETENTION_DAYS=2
+DIAG_RETENTION_DAYS=7
+DOCKER_PRUNE=false
+APPLY=false
+FORCE=false
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --runner-root)
+      RUNNER_ROOT="${2:-}"
+      shift 2
+      ;;
+    --work-retention-days)
+      WORK_RETENTION_DAYS="${2:-}"
+      shift 2
+      ;;
+    --diag-retention-days)
+      DIAG_RETENTION_DAYS="${2:-}"
+      shift 2
+      ;;
+    --docker-prune)
+      DOCKER_PRUNE=true
+      shift
+      ;;
+    --apply)
+      APPLY=true
+      shift
+      ;;
+    --force)
+      FORCE=true
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "Unknown option: $1" >&2
+      usage >&2
+      exit 2
+      ;;
+  esac
+done
+
+if [[ ! -d "$RUNNER_ROOT" ]]; then
+  echo "Runner root does not exist: $RUNNER_ROOT" >&2
+  exit 2
+fi
+
+if ! [[ "$WORK_RETENTION_DAYS" =~ ^[0-9]+$ ]]; then
+  echo "Invalid --work-retention-days: $WORK_RETENTION_DAYS" >&2
+  exit 2
+fi
+if ! [[ "$DIAG_RETENTION_DAYS" =~ ^[0-9]+$ ]]; then
+  echo "Invalid --diag-retention-days: $DIAG_RETENTION_DAYS" >&2
+  exit 2
+fi
+
+if [[ "$APPLY" == true && "$FORCE" != true ]]; then
+  if pgrep -fa 'Runner\.Worker|Runner\.Listener' >/dev/null 2>&1; then
+    echo "Active runner processes detected. Re-run with --force only after draining jobs." >&2
+    exit 3
+  fi
+fi
+
+collect_candidates() {
+  local list_file="$1"
+  : > "$list_file"
+
+  # Old diagnostic logs.
+  find "$RUNNER_ROOT" -type f -path '*/_diag/*' -mtime +"$DIAG_RETENTION_DAYS" -print 2>/dev/null >> "$list_file" || true
+
+  # Stale temp artifacts.
+  find "$RUNNER_ROOT" -type f -path '*/_work/_temp/*' -mtime +1 -print 2>/dev/null >> "$list_file" || true
+  find "$RUNNER_ROOT" -type d -path '*/_work/_temp/*' -mtime +1 -print 2>/dev/null >> "$list_file" || true
+
+  # Stale repository workspaces under _work (exclude internal underscore dirs).
+  find "$RUNNER_ROOT" -mindepth 3 -maxdepth 3 -type d -path '*/_work/*' ! -name '_*' -mtime +"$WORK_RETENTION_DAYS" -print 2>/dev/null >> "$list_file" || true
+
+  sort -u -o "$list_file" "$list_file"
+}
+
+human_bytes() {
+  local bytes="$1"
+  awk -v b="$bytes" '
+    function human(x) {
+      s="B KiB MiB GiB TiB PiB"
+      split(s, a, " ")
+      i=1
+      while (x>=1024 && i<6) {x/=1024; i++}
+      return sprintf("%.2f %s", x, a[i])
+    }
+    BEGIN { print human(b) }
+  '
+}
+
+CANDIDATES_FILE="$(mktemp)"
+trap 'rm -f "$CANDIDATES_FILE"' EXIT
+collect_candidates "$CANDIDATES_FILE"
+
+TOTAL_BYTES=0
+COUNT=0
+while IFS= read -r path; do
+  [[ -z "$path" ]] && continue
+  if [[ ! -e "$path" ]]; then
+    continue
+  fi
+  COUNT=$((COUNT + 1))
+done < "$CANDIDATES_FILE"
+
+if [[ "$COUNT" -gt 0 ]]; then
+  TOTAL_BYTES="$(tr '\n' '\0' < "$CANDIDATES_FILE" | xargs -0 -r du -sb 2>/dev/null | awk '{s+=$1} END{print s+0}')"
+fi
+
+echo "Runner root: $RUNNER_ROOT"
+echo "Mode: $([[ "$APPLY" == true ]] && echo apply || echo dry-run)"
+echo "Retention: workspace>${WORK_RETENTION_DAYS}d diag>${DIAG_RETENTION_DAYS}d"
+echo "Candidates: $COUNT"
+echo "Estimated reclaim: $(human_bytes "$TOTAL_BYTES")"
+
+if [[ "$COUNT" -gt 0 ]]; then
+  echo "Sample candidates:"
+  sed -n '1,20p' "$CANDIDATES_FILE"
+  if [[ "$COUNT" -gt 20 ]]; then
+    echo "... ($((COUNT - 20)) more)"
+  fi
+fi
+
+if [[ "$APPLY" != true ]]; then
+  echo "Dry-run only. Re-run with --apply to execute cleanup."
+  exit 0
+fi
+
+while IFS= read -r path; do
+  [[ -z "$path" ]] && continue
+  if [[ -e "$path" ]]; then
+    rm -rf "$path"
+  fi
+done < "$CANDIDATES_FILE"
+
+if [[ "$DOCKER_PRUNE" == true ]]; then
+  if command -v docker >/dev/null 2>&1; then
+    docker system prune -af --volumes || true
+  else
+    echo "docker command not found; skipping docker prune." >&2
+  fi
+fi
+
+echo "Cleanup completed."
--- a/scripts/ci/runner_health_report.py
+++ b/scripts/ci/runner_health_report.py
@ -0,0 +1,328 @@
+#!/usr/bin/env python3
+"""Self-hosted runner pool health report for a GitHub repository.
+
+This script queries GitHub Actions runner and workflow-run state, then prints a
+human-readable summary and optional JSON artifact.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import urllib.parse
+import urllib.request
+from datetime import datetime, timezone
+from typing import Any
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Report self-hosted runner pool health and queue pressure.",
+    )
+    parser.add_argument(
+        "--repo",
+        default=os.getenv("GITHUB_REPOSITORY", "zeroclaw-labs/zeroclaw"),
+        help="GitHub repository in owner/repo form (default: env GITHUB_REPOSITORY or zeroclaw-labs/zeroclaw).",
+    )
+    parser.add_argument(
+        "--api-url",
+        default=os.getenv("GITHUB_API_URL", "https://api.github.com"),
+        help="GitHub API base URL.",
+    )
+    parser.add_argument(
+        "--token",
+        default="",
+        help="GitHub token (default: GH_TOKEN/GITHUB_TOKEN, then `gh auth token` fallback).",
+    )
+    parser.add_argument(
+        "--require-label",
+        action="append",
+        default=["self-hosted", "aws-india"],
+        help="Required runner label; repeatable.",
+    )
+    parser.add_argument(
+        "--min-online",
+        type=int,
+        default=3,
+        help="Minimum required online runners matching labels.",
+    )
+    parser.add_argument(
+        "--min-available",
+        type=int,
+        default=1,
+        help="Minimum required online and idle runners matching labels.",
+    )
+    parser.add_argument(
+        "--max-queued-runs",
+        type=int,
+        default=20,
+        help="Maximum acceptable queued workflow runs.",
+    )
+    parser.add_argument(
+        "--max-busy-ratio",
+        type=float,
+        default=0.90,
+        help="Maximum acceptable busy ratio among online runners.",
+    )
+    parser.add_argument(
+        "--output-json",
+        default="",
+        help="Optional path to write structured JSON report.",
+    )
+    parser.add_argument(
+        "--fail-on-threshold",
+        action="store_true",
+        help="Exit non-zero if any threshold is violated.",
+    )
+    return parser.parse_args()
+
+
+class GitHubApi:
+    def __init__(self, api_url: str, token: str | None) -> None:
+        self.api_url = api_url.rstrip("/")
+        self.token = token
+
+    def get(self, path: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
+        query = urllib.parse.urlencode(params or {}, doseq=True)
+        url = f"{self.api_url}{path}"
+        if query:
+            url = f"{url}?{query}"
+        req = urllib.request.Request(url)
+        req.add_header("Accept", "application/vnd.github+json")
+        req.add_header("X-GitHub-Api-Version", "2022-11-28")
+        if self.token:
+            req.add_header("Authorization", f"Bearer {self.token}")
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+
+    def paginate(self, path: str, key: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
+        page = 1
+        results: list[dict[str, Any]] = []
+        while True:
+            query = {"per_page": 100, "page": page}
+            if params:
+                query.update(params)
+            payload = self.get(path, query)
+            items = payload.get(key, [])
+            if not items:
+                break
+            results.extend(items)
+            if len(items) < 100:
+                break
+            page += 1
+        return results
+
+
+def split_repo(repo: str) -> tuple[str, str]:
+    parts = repo.split("/", 1)
+    if len(parts) != 2 or not parts[0] or not parts[1]:
+        raise ValueError(f"Invalid --repo value: {repo!r}. Expected owner/repo.")
+    return parts[0], parts[1]
+
+
+def normalize_labels(labels: list[str]) -> list[str]:
+    out: list[str] = []
+    seen: set[str] = set()
+    for value in labels:
+        item = value.strip()
+        if not item:
+            continue
+        if item in seen:
+            continue
+        out.append(item)
+        seen.add(item)
+    return out
+
+
+def collect_report(args: argparse.Namespace) -> dict[str, Any]:
+    owner, repo = split_repo(args.repo)
+    required_labels = normalize_labels(args.require_label)
+    token = args.token or os.getenv("GH_TOKEN") or os.getenv("GITHUB_TOKEN")
+    if not token:
+        try:
+            token = subprocess.check_output(
+                ["gh", "auth", "token"],
+                text=True,
+                stderr=subprocess.DEVNULL,
+            ).strip()
+        except Exception:
+            token = ""
+
+    api = GitHubApi(args.api_url, token)
+
+    runners = api.paginate(
+        f"/repos/{owner}/{repo}/actions/runners",
+        key="runners",
+    )
+
+    matching_runners: list[dict[str, Any]] = []
+    for runner in runners:
+        names = {entry.get("name", "") for entry in runner.get("labels", [])}
+        if all(label in names for label in required_labels):
+            matching_runners.append(runner)
+
+    queued_runs = api.paginate(
+        f"/repos/{owner}/{repo}/actions/runs",
+        key="workflow_runs",
+        params={"status": "queued"},
+    )
+    in_progress_runs = api.paginate(
+        f"/repos/{owner}/{repo}/actions/runs",
+        key="workflow_runs",
+        params={"status": "in_progress"},
+    )
+
+    total = len(matching_runners)
+    online = sum(1 for runner in matching_runners if runner.get("status") == "online")
+    offline = total - online
+    online_busy = sum(
+        1
+        for runner in matching_runners
+        if runner.get("status") == "online" and bool(runner.get("busy"))
+    )
+    available = online - online_busy
+    busy_ratio = (online_busy / online) if online else 1.0
+
+    alerts: list[dict[str, Any]] = []
+    if online < args.min_online:
+        alerts.append(
+            {
+                "id": "low-online-runners",
+                "severity": "critical",
+                "message": f"Online runners below threshold: {online} < {args.min_online}",
+            }
+        )
+    if available < args.min_available:
+        alerts.append(
+            {
+                "id": "low-available-runners",
+                "severity": "critical",
+                "message": f"Available runners below threshold: {available} < {args.min_available}",
+            }
+        )
+    if len(queued_runs) > args.max_queued_runs:
+        alerts.append(
+            {
+                "id": "queue-pressure",
+                "severity": "critical",
+                "message": f"Queued runs above threshold: {len(queued_runs)} > {args.max_queued_runs}",
+            }
+        )
+    if busy_ratio > args.max_busy_ratio:
+        alerts.append(
+            {
+                "id": "high-busy-ratio",
+                "severity": "warning",
+                "message": f"Busy ratio above threshold: {busy_ratio:.2%} > {args.max_busy_ratio:.2%}",
+            }
+        )
+    if offline > 0:
+        alerts.append(
+            {
+                "id": "offline-runners",
+                "severity": "warning",
+                "message": f"{offline} runners are offline in the target label pool.",
+            }
+        )
+
+    queued_examples = [
+        {
+            "id": item.get("id"),
+            "name": item.get("name"),
+            "head_branch": item.get("head_branch"),
+            "event": item.get("event"),
+            "created_at": item.get("created_at"),
+            "html_url": item.get("html_url"),
+        }
+        for item in queued_runs[:10]
+    ]
+
+    return {
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "repository": f"{owner}/{repo}",
+        "required_labels": required_labels,
+        "runner_counts": {
+            "total_matching": total,
+            "online": online,
+            "offline": offline,
+            "online_busy": online_busy,
+            "online_available": available,
+            "online_busy_ratio": round(busy_ratio, 4),
+        },
+        "workflow_run_counts": {
+            "queued": len(queued_runs),
+            "in_progress": len(in_progress_runs),
+        },
+        "thresholds": {
+            "min_online": args.min_online,
+            "min_available": args.min_available,
+            "max_queued_runs": args.max_queued_runs,
+            "max_busy_ratio": args.max_busy_ratio,
+        },
+        "queued_run_examples": queued_examples,
+        "alerts": alerts,
+    }
+
+
+def print_summary(report: dict[str, Any]) -> None:
+    counts = report["runner_counts"]
+    queue = report["workflow_run_counts"]
+    print("Runner Pool Health Report")
+    print(f"repo: {report['repository']}")
+    print(f"labels: {', '.join(report['required_labels'])}")
+    print(
+        "runners:"
+        f" total={counts['total_matching']} online={counts['online']} "
+        f"available={counts['online_available']} busy={counts['online_busy']} offline={counts['offline']}"
+    )
+    print(
+        "workflows:"
+        f" queued={queue['queued']} in_progress={queue['in_progress']}"
+    )
+    print(f"generated_at: {report['generated_at']}")
+    if report["alerts"]:
+        print("alerts:")
+        for alert in report["alerts"]:
+            print(f"  - [{alert['severity']}] {alert['id']}: {alert['message']}")
+    else:
+        print("alerts: none")
+
+
+def main() -> int:
+    args = parse_args()
+    try:
+        report = collect_report(args)
+    except ValueError as exc:
+        print(f"error: {exc}", file=sys.stderr)
+        return 2
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode("utf-8", errors="replace")
+        print(
+            f"error: GitHub API request failed ({exc.code} {exc.reason}): {body}",
+            file=sys.stderr,
+        )
+        return 2
+    except Exception as exc:  # pragma: no cover - defensive surface
+        print(f"error: unexpected failure: {exc}", file=sys.stderr)
+        return 2
+
+    print_summary(report)
+
+    if args.output_json:
+        output_dir = os.path.dirname(args.output_json)
+        if output_dir:
+            os.makedirs(output_dir, exist_ok=True)
+        with open(args.output_json, "w", encoding="utf-8") as handle:
+            json.dump(report, handle, ensure_ascii=False, indent=2)
+            handle.write("\n")
+
+    if args.fail_on_threshold and report["alerts"]:
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/ci/tests/test_ci_scripts.py
+++ b/scripts/ci/tests/test_ci_scripts.py
@ -3365,6 +3365,164 @@ class CiScriptsBehaviorTest(unittest.TestCase):
        self.assertIn("required_checks.rc", joined)
        self.assertIn("required_checks.stable", joined)

+    def test_queue_hygiene_dry_run_selects_obsolete_and_superseded_runs(self) -> None:
+        runs_json = self.tmp / "runs.json"
+        output_json = self.tmp / "queue-hygiene.json"
+        runs_json.write_text(
+            json.dumps(
+                {
+                    "workflow_runs": [
+                        {
+                            "id": 11,
+                            "name": "CI Build (Fast)",
+                            "event": "push",
+                            "head_branch": "main",
+                            "head_sha": "sha-11",
+                            "created_at": "2026-02-27T20:00:00Z",
+                        },
+                        {
+                            "id": 12,
+                            "name": "CI Build (Fast)",
+                            "event": "pull_request",
+                            "head_branch": "feature-a",
+                            "head_sha": "sha-12",
+                            "created_at": "2026-02-27T20:01:00Z",
+                            "pull_requests": [{"number": 1001}],
+                        },
+                        {
+                            "id": 21,
+                            "name": "CI Run",
+                            "event": "pull_request",
+                            "head_branch": "feature-a",
+                            "head_sha": "sha-21",
+                            "created_at": "2026-02-27T20:02:00Z",
+                            "pull_requests": [{"number": 1001}],
+                        },
+                        {
+                            "id": 22,
+                            "name": "CI Run",
+                            "event": "pull_request",
+                            "head_branch": "feature-a",
+                            "head_sha": "sha-22",
+                            "created_at": "2026-02-27T20:03:00Z",
+                            "pull_requests": [{"number": 1001}],
+                        },
+                        {
+                            "id": 23,
+                            "name": "CI Run",
+                            "event": "pull_request",
+                            "head_branch": "feature-a",
+                            "head_sha": "sha-23",
+                            "created_at": "2026-02-27T20:04:00Z",
+                            "pull_requests": [{"number": 1002}],
+                        },
+                        {
+                            "id": 24,
+                            "name": "CI Run",
+                            "event": "push",
+                            "head_branch": "main",
+                            "head_sha": "sha-24",
+                            "created_at": "2026-02-27T20:05:00Z",
+                        },
+                        {
+                            "id": 25,
+                            "name": "CI Run",
+                            "event": "push",
+                            "head_branch": "main",
+                            "head_sha": "sha-25",
+                            "created_at": "2026-02-27T20:06:00Z",
+                        },
+                    ]
+                }
+            )
+            + "\n",
+            encoding="utf-8",
+        )
+
+        proc = run_cmd(
+            [
+                "python3",
+                self._script("queue_hygiene.py"),
+                "--runs-json",
+                str(runs_json),
+                "--obsolete-workflow",
+                "CI Build (Fast)",
+                "--dedupe-workflow",
+                "CI Run",
+                "--output-json",
+                str(output_json),
+            ]
+        )
+        self.assertEqual(proc.returncode, 0, msg=proc.stderr)
+
+        report = json.loads(output_json.read_text(encoding="utf-8"))
+        self.assertEqual(report["counts"]["runs_in_scope"], 7)
+        self.assertEqual(report["counts"]["candidate_runs_before_cap"], 3)
+        planned_ids = [item["id"] for item in report["planned_actions"]]
+        self.assertEqual(planned_ids, [11, 12, 21])
+        reasons_by_id = {item["id"]: item["reasons"] for item in report["planned_actions"]}
+        self.assertIn("obsolete-workflow", reasons_by_id[11])
+        self.assertIn("obsolete-workflow", reasons_by_id[12])
+        self.assertTrue(any(reason.startswith("dedupe-superseded-by:22") for reason in reasons_by_id[21]))
+
+    def test_queue_hygiene_respects_max_cancel_cap(self) -> None:
+        runs_json = self.tmp / "runs-cap.json"
+        output_json = self.tmp / "queue-hygiene-cap.json"
+        runs_json.write_text(
+            json.dumps(
+                {
+                    "workflow_runs": [
+                        {
+                            "id": 101,
+                            "name": "CI Build (Fast)",
+                            "event": "push",
+                            "head_branch": "main",
+                            "created_at": "2026-02-27T20:00:00Z",
+                        },
+                        {
+                            "id": 102,
+                            "name": "CI Build (Fast)",
+                            "event": "push",
+                            "head_branch": "main",
+                            "created_at": "2026-02-27T20:01:00Z",
+                        },
+                        {
+                            "id": 103,
+                            "name": "CI Build (Fast)",
+                            "event": "push",
+                            "head_branch": "main",
+                            "created_at": "2026-02-27T20:02:00Z",
+                        },
+                    ]
+                }
+            )
+            + "\n",
+            encoding="utf-8",
+        )
+
+        proc = run_cmd(
+            [
+                "python3",
+                self._script("queue_hygiene.py"),
+                "--runs-json",
+                str(runs_json),
+                "--obsolete-workflow",
+                "CI Build (Fast)",
+                "--max-cancel",
+                "2",
+                "--output-json",
+                str(output_json),
+            ]
+        )
+        self.assertEqual(proc.returncode, 0, msg=proc.stderr)
+
+        report = json.loads(output_json.read_text(encoding="utf-8"))
+        self.assertEqual(report["counts"]["candidate_runs_before_cap"], 3)
+        self.assertEqual(report["counts"]["candidate_runs_after_cap"], 2)
+        self.assertEqual(report["counts"]["skipped_by_cap"], 1)
+        planned_ids = [item["id"] for item in report["planned_actions"]]
+        self.assertEqual(planned_ids, [101, 102])
+

 if __name__ == "__main__":  # pragma: no cover
    unittest.main(verbosity=2)
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@ -60,6 +60,20 @@ const DEFAULT_MAX_TOOL_ITERATIONS: usize = 20;
 /// Matches the channel-side constant in `channels/mod.rs`.
 const AUTOSAVE_MIN_MESSAGE_CHARS: usize = 20;

+fn should_treat_provider_as_vision_capable(provider_name: &str, provider: &dyn Provider) -> bool {
+    if provider.supports_vision() {
+        return true;
+    }
+
+    // Guardrail for issue #2107: some anthropic setups have reported false
+    // negatives from provider capability probing even though Claude models
+    // accept image inputs. Keep the preflight permissive for anthropic routes
+    // and rely on upstream API validation if a specific model cannot handle
+    // vision.
+    let normalized = provider_name.trim().to_ascii_lowercase();
+    normalized == "anthropic" || normalized.starts_with("anthropic-custom:")
+}
+
 /// Slash-command definitions for interactive-mode completion.
 /// Each entry: (trigger aliases, display label, description).
 const SLASH_COMMANDS: &[(&[&str], &str, &str)] = &[
@ -173,6 +187,43 @@ static SENSITIVE_KV_REGEX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r#"(?i)(token|api[_-]?key|password|secret|user[_-]?key|bearer|credential)["']?\s*[:=]\s*(?:"([^"]{8,})"|'([^']{8,})'|([a-zA-Z0-9_\-\.]{8,}))"#).unwrap()
 });

+/// Detect "I'll do X" style deferred-action replies that often indicate a missing
+/// follow-up tool call in agentic flows.
+static DEFERRED_ACTION_WITHOUT_TOOL_CALL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(
+        r"(?ix)
+        \b(
+            i(?:'ll|\s+will)|
+            i\s+am\s+going\s+to|
+            let\s+me|
+            let(?:'s|\s+us)|
+            we(?:'ll|\s+will)
+        )\b
+        [^.!?\n]{0,160}
+        \b(
+            check|look|search|browse|open|read|write|run|execute|call|
+            inspect|analy(?:s|z)e|verify|list|fetch|try|see|continue
+        )\b",
+    )
+    .unwrap()
+});
+
+/// Detect common CJK deferred-action phrases (e.g., Chinese "让我…查看")
+/// that imply a follow-up tool call should occur.
+static CJK_DEFERRED_ACTION_CUE_REGEX: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"(让我|我来|我会|我们来|我们会|我先|先让我|马上)").unwrap());
+
+/// Action verbs commonly used when promising to perform tool-backed work in CJK text.
+static CJK_DEFERRED_ACTION_VERB_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(r"(查看|检查|搜索|查找|浏览|打开|读取|写入|运行|执行|调用|分析|验证|列出|获取|尝试|试试|继续|处理|修复|看看|看一看|看一下)").unwrap()
+});
+
+/// Fast check for CJK scripts (Han/Hiragana/Katakana/Hangul) so we only run
+/// additional regexes when non-Latin text is present.
+static CJK_SCRIPT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(r"[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]").unwrap()
+});
+
 /// Scrub credentials from tool output to prevent accidental exfiltration.
 /// Replaces known credential patterns with a redacted placeholder while preserving
 /// a small prefix for context.
@ -241,6 +292,7 @@ const AUTO_CRON_DELIVERY_CHANNELS: &[&str] = &[

 const NON_CLI_APPROVAL_WAIT_TIMEOUT_SECS: u64 = 300;
 const NON_CLI_APPROVAL_POLL_INTERVAL_MS: u64 = 250;
+const MISSING_TOOL_CALL_RETRY_PROMPT: &str = "Internal correction: your last reply indicated you were about to take an action, but no valid tool call was emitted. If a tool is needed, emit it now using the required <tool_call>...</tool_call> format. If no tool is needed, provide the complete final answer now and do not defer action.";

 #[derive(Debug, Clone)]
 pub(crate) struct NonCliApprovalPrompt {
@ -276,6 +328,21 @@ fn truncate_tool_args_for_progress(name: &str, args: &serde_json::Value, max_len
    }
 }

+fn looks_like_deferred_action_without_tool_call(text: &str) -> bool {
+    let trimmed = text.trim();
+    if trimmed.is_empty() {
+        return false;
+    }
+
+    if DEFERRED_ACTION_WITHOUT_TOOL_CALL_REGEX.is_match(trimmed) {
+        return true;
+    }
+
+    CJK_SCRIPT_REGEX.is_match(trimmed)
+        && CJK_DEFERRED_ACTION_CUE_REGEX.is_match(trimmed)
+        && CJK_DEFERRED_ACTION_VERB_REGEX.is_match(trimmed)
+}
+
 fn maybe_inject_cron_add_delivery(
    tool_name: &str,
    tool_args: &mut serde_json::Value,
@ -726,6 +793,8 @@ pub(crate) async fn run_tool_call_loop(
    let use_native_tools = provider.supports_native_tools() && !tool_specs.is_empty();
    let turn_id = Uuid::new_v4().to_string();
    let mut seen_tool_signatures: HashSet<(String, String)> = HashSet::new();
+    let mut missing_tool_call_retry_used = false;
+    let mut missing_tool_call_retry_prompt: Option<String> = None;
    let bypass_non_cli_approval_for_turn =
        approval.is_some_and(|mgr| channel_name != "cli" && mgr.consume_non_cli_allow_all_once());
    if bypass_non_cli_approval_for_turn {
@ -750,7 +819,9 @@ pub(crate) async fn run_tool_call_loop(
        }

        let image_marker_count = multimodal::count_image_markers(history);
-        if image_marker_count > 0 && !provider.supports_vision() {
+        let provider_supports_vision =
+            should_treat_provider_as_vision_capable(provider_name, provider);
+        if image_marker_count > 0 && !provider_supports_vision {
            return Err(ProviderCapabilityError {
                provider: provider_name.to_string(),
                capability: "vision".to_string(),
@ -763,6 +834,10 @@ pub(crate) async fn run_tool_call_loop(

        let prepared_messages =
            multimodal::prepare_messages_for_provider(history, multimodal_config).await?;
+        let mut request_messages = prepared_messages.messages.clone();
+        if let Some(prompt) = missing_tool_call_retry_prompt.take() {
+            request_messages.push(ChatMessage::user(prompt));
+        }

        // ── Progress: LLM thinking ────────────────────────────
        if let Some(ref tx) = on_delta {
@ -810,7 +885,7 @@ pub(crate) async fn run_tool_call_loop(

        let chat_future = provider.chat(
            ChatRequest {
-                messages: &prepared_messages.messages,
+                messages: &request_messages,
                tools: request_tools,
            },
            model,
@ -826,138 +901,145 @@ pub(crate) async fn run_tool_call_loop(
            chat_future.await
        };

-        let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) =
-            match chat_result {
-                Ok(resp) => {
-                    let (resp_input_tokens, resp_output_tokens) = resp
-                        .usage
-                        .as_ref()
-                        .map(|u| (u.input_tokens, u.output_tokens))
-                        .unwrap_or((None, None));
+        let (
+            response_text,
+            parsed_text,
+            tool_calls,
+            assistant_history_content,
+            native_tool_calls,
+            parse_issue_detected,
+        ) = match chat_result {
+            Ok(resp) => {
+                let (resp_input_tokens, resp_output_tokens) = resp
+                    .usage
+                    .as_ref()
+                    .map(|u| (u.input_tokens, u.output_tokens))
+                    .unwrap_or((None, None));

-                    observer.record_event(&ObserverEvent::LlmResponse {
-                        provider: provider_name.to_string(),
-                        model: model.to_string(),
-                        duration: llm_started_at.elapsed(),
-                        success: true,
-                        error_message: None,
-                        input_tokens: resp_input_tokens,
-                        output_tokens: resp_output_tokens,
-                    });
+                observer.record_event(&ObserverEvent::LlmResponse {
+                    provider: provider_name.to_string(),
+                    model: model.to_string(),
+                    duration: llm_started_at.elapsed(),
+                    success: true,
+                    error_message: None,
+                    input_tokens: resp_input_tokens,
+                    output_tokens: resp_output_tokens,
+                });

-                    let response_text = resp.text_or_empty().to_string();
-                    // First try native structured tool calls (OpenAI-format).
-                    // Fall back to text-based parsing (XML tags, markdown blocks,
-                    // GLM format) only if the provider returned no native calls —
-                    // this ensures we support both native and prompt-guided models.
-                    let mut calls = parse_structured_tool_calls(&resp.tool_calls);
-                    let mut parsed_text = String::new();
+                let response_text = resp.text_or_empty().to_string();
+                // First try native structured tool calls (OpenAI-format).
+                // Fall back to text-based parsing (XML tags, markdown blocks,
+                // GLM format) only if the provider returned no native calls —
+                // this ensures we support both native and prompt-guided models.
+                let mut calls = parse_structured_tool_calls(&resp.tool_calls);
+                let mut parsed_text = String::new();

-                    if calls.is_empty() {
-                        let (fallback_text, fallback_calls) = parse_tool_calls(&response_text);
-                        if !fallback_text.is_empty() {
-                            parsed_text = fallback_text;
-                        }
-                        calls = fallback_calls;
+                if calls.is_empty() {
+                    let (fallback_text, fallback_calls) = parse_tool_calls(&response_text);
+                    if !fallback_text.is_empty() {
+                        parsed_text = fallback_text;
                    }
-
-                    if let Some(parse_issue) = detect_tool_call_parse_issue(&response_text, &calls)
-                    {
-                        runtime_trace::record_event(
-                            "tool_call_parse_issue",
-                            Some(channel_name),
-                            Some(provider_name),
-                            Some(model),
-                            Some(&turn_id),
-                            Some(false),
-                            Some(&parse_issue),
-                            serde_json::json!({
-                                "iteration": iteration + 1,
-                                "response_excerpt": truncate_with_ellipsis(
-                                    &scrub_credentials(&response_text),
-                                    600
-                                ),
-                            }),
-                        );
-                    }
-
-                    runtime_trace::record_event(
-                        "llm_response",
-                        Some(channel_name),
-                        Some(provider_name),
-                        Some(model),
-                        Some(&turn_id),
-                        Some(true),
-                        None,
-                        serde_json::json!({
-                            "iteration": iteration + 1,
-                            "duration_ms": llm_started_at.elapsed().as_millis(),
-                            "input_tokens": resp_input_tokens,
-                            "output_tokens": resp_output_tokens,
-                            "raw_response": scrub_credentials(&response_text),
-                            "native_tool_calls": resp.tool_calls.len(),
-                            "parsed_tool_calls": calls.len(),
-                        }),
-                    );
-
-                    // Preserve native tool call IDs in assistant history so role=tool
-                    // follow-up messages can reference the exact call id.
-                    let reasoning_content = resp.reasoning_content.clone();
-                    let assistant_history_content = if resp.tool_calls.is_empty() {
-                        if use_native_tools {
-                            build_native_assistant_history_from_parsed_calls(
-                                &response_text,
-                                &calls,
-                                reasoning_content.as_deref(),
-                            )
-                            .unwrap_or_else(|| response_text.clone())
-                        } else {
-                            response_text.clone()
-                        }
-                    } else {
-                        build_native_assistant_history(
-                            &response_text,
-                            &resp.tool_calls,
-                            reasoning_content.as_deref(),
-                        )
-                    };
-
-                    let native_calls = resp.tool_calls;
-                    (
-                        response_text,
-                        parsed_text,
-                        calls,
-                        assistant_history_content,
-                        native_calls,
-                    )
+                    calls = fallback_calls;
                }
-                Err(e) => {
-                    let safe_error = crate::providers::sanitize_api_error(&e.to_string());
-                    observer.record_event(&ObserverEvent::LlmResponse {
-                        provider: provider_name.to_string(),
-                        model: model.to_string(),
-                        duration: llm_started_at.elapsed(),
-                        success: false,
-                        error_message: Some(safe_error.clone()),
-                        input_tokens: None,
-                        output_tokens: None,
-                    });
+
+                let parse_issue = detect_tool_call_parse_issue(&response_text, &calls);
+                if let Some(parse_issue) = parse_issue.as_deref() {
                    runtime_trace::record_event(
-                        "llm_response",
+                        "tool_call_parse_issue",
                        Some(channel_name),
                        Some(provider_name),
                        Some(model),
                        Some(&turn_id),
                        Some(false),
-                        Some(&safe_error),
+                        Some(&parse_issue),
                        serde_json::json!({
                            "iteration": iteration + 1,
-                            "duration_ms": llm_started_at.elapsed().as_millis(),
+                            "response_excerpt": truncate_with_ellipsis(
+                                &scrub_credentials(&response_text),
+                                600
+                            ),
                        }),
                    );
-                    return Err(e);
                }
-            };
+
+                runtime_trace::record_event(
+                    "llm_response",
+                    Some(channel_name),
+                    Some(provider_name),
+                    Some(model),
+                    Some(&turn_id),
+                    Some(true),
+                    None,
+                    serde_json::json!({
+                        "iteration": iteration + 1,
+                        "duration_ms": llm_started_at.elapsed().as_millis(),
+                        "input_tokens": resp_input_tokens,
+                        "output_tokens": resp_output_tokens,
+                        "raw_response": scrub_credentials(&response_text),
+                        "native_tool_calls": resp.tool_calls.len(),
+                        "parsed_tool_calls": calls.len(),
+                    }),
+                );
+
+                // Preserve native tool call IDs in assistant history so role=tool
+                // follow-up messages can reference the exact call id.
+                let reasoning_content = resp.reasoning_content.clone();
+                let assistant_history_content = if resp.tool_calls.is_empty() {
+                    if use_native_tools {
+                        build_native_assistant_history_from_parsed_calls(
+                            &response_text,
+                            &calls,
+                            reasoning_content.as_deref(),
+                        )
+                        .unwrap_or_else(|| response_text.clone())
+                    } else {
+                        response_text.clone()
+                    }
+                } else {
+                    build_native_assistant_history(
+                        &response_text,
+                        &resp.tool_calls,
+                        reasoning_content.as_deref(),
+                    )
+                };
+
+                let native_calls = resp.tool_calls;
+                (
+                    response_text,
+                    parsed_text,
+                    calls,
+                    assistant_history_content,
+                    native_calls,
+                    parse_issue.is_some(),
+                )
+            }
+            Err(e) => {
+                let safe_error = crate::providers::sanitize_api_error(&e.to_string());
+                observer.record_event(&ObserverEvent::LlmResponse {
+                    provider: provider_name.to_string(),
+                    model: model.to_string(),
+                    duration: llm_started_at.elapsed(),
+                    success: false,
+                    error_message: Some(safe_error.clone()),
+                    input_tokens: None,
+                    output_tokens: None,
+                });
+                runtime_trace::record_event(
+                    "llm_response",
+                    Some(channel_name),
+                    Some(provider_name),
+                    Some(model),
+                    Some(&turn_id),
+                    Some(false),
+                    Some(&safe_error),
+                    serde_json::json!({
+                        "iteration": iteration + 1,
+                        "duration_ms": llm_started_at.elapsed().as_millis(),
+                    }),
+                );
+                return Err(e);
+            }
+        };

        let display_text = if parsed_text.is_empty() {
            response_text.clone()
@ -979,6 +1061,46 @@ pub(crate) async fn run_tool_call_loop(
        }

        if tool_calls.is_empty() {
+            let missing_tool_call_followthrough = !missing_tool_call_retry_used
+                && iteration + 1 < max_iterations
+                && !tool_specs.is_empty()
+                && (parse_issue_detected
+                    || looks_like_deferred_action_without_tool_call(&display_text));
+            if missing_tool_call_followthrough {
+                missing_tool_call_retry_used = true;
+                missing_tool_call_retry_prompt = Some(MISSING_TOOL_CALL_RETRY_PROMPT.to_string());
+                let retry_reason = if parse_issue_detected {
+                    "parse_issue_detected"
+                } else {
+                    "deferred_action_text_detected"
+                };
+
+                runtime_trace::record_event(
+                    "tool_call_followthrough_retry",
+                    Some(channel_name),
+                    Some(provider_name),
+                    Some(model),
+                    Some(&turn_id),
+                    Some(true),
+                    Some("llm response implied follow-up action but emitted no tool call"),
+                    serde_json::json!({
+                        "iteration": iteration + 1,
+                        "reason": retry_reason,
+                        "response_excerpt": truncate_with_ellipsis(&scrub_credentials(&display_text), 600),
+                    }),
+                );
+
+                if let Some(ref tx) = on_delta {
+                    let _ = tx
+                        .send(format!(
+                            "{DRAFT_PROGRESS_SENTINEL}\u{21bb} Retrying: response deferred action without a tool call\n"
+                        ))
+                        .await;
+                }
+
+                continue;
+            }
+
            runtime_trace::record_event(
                "turn_final_response",
                Some(channel_name),
@ -2641,6 +2763,39 @@ mod tests {
        assert_eq!(calls.load(Ordering::SeqCst), 0);
    }

+    #[tokio::test]
+    async fn run_tool_call_loop_allows_anthropic_route_on_vision_probe_false_negative() {
+        let provider = ScriptedProvider::from_text_responses(vec!["vision-ok"]);
+        let mut history = vec![ChatMessage::user(
+            "please inspect [IMAGE:data:image/png;base64,iVBORw0KGgo=]".to_string(),
+        )];
+        let tools_registry: Vec<Box<dyn Tool>> = Vec::new();
+        let observer = NoopObserver;
+
+        let result = run_tool_call_loop(
+            &provider,
+            &mut history,
+            &tools_registry,
+            &observer,
+            "anthropic",
+            "opus-4-6",
+            0.0,
+            true,
+            None,
+            "cli",
+            &crate::config::MultimodalConfig::default(),
+            3,
+            None,
+            None,
+            None,
+            &[],
+        )
+        .await
+        .expect("anthropic route should not fail on a false-negative vision capability probe");
+
+        assert_eq!(result, "vision-ok");
+    }
+
    #[tokio::test]
    async fn run_tool_call_loop_rejects_oversized_image_payload() {
        let calls = Arc::new(AtomicUsize::new(0));
@ -3249,6 +3404,57 @@ mod tests {
        );
    }

+    #[tokio::test]
+    async fn run_tool_call_loop_retries_once_when_response_defers_action_without_tool_call() {
+        let provider = ScriptedProvider::from_text_responses(vec![
+            "I'll check that right away.",
+            r#"<tool_call>
+{"name":"count_tool","arguments":{"value":"retry"}}
+</tool_call>"#,
+            "done after tool",
+        ]);
+
+        let invocations = Arc::new(AtomicUsize::new(0));
+        let tools_registry: Vec<Box<dyn Tool>> = vec![Box::new(CountingTool::new(
+            "count_tool",
+            Arc::clone(&invocations),
+        ))];
+
+        let mut history = vec![
+            ChatMessage::system("test-system"),
+            ChatMessage::user("please check the workspace"),
+        ];
+        let observer = NoopObserver;
+
+        let result = run_tool_call_loop(
+            &provider,
+            &mut history,
+            &tools_registry,
+            &observer,
+            "mock-provider",
+            "mock-model",
+            0.0,
+            true,
+            None,
+            "cli",
+            &crate::config::MultimodalConfig::default(),
+            5,
+            None,
+            None,
+            None,
+            &[],
+        )
+        .await
+        .expect("loop should recover after one deferred-action reply");
+
+        assert_eq!(result, "done after tool");
+        assert_eq!(
+            invocations.load(Ordering::SeqCst),
+            1,
+            "the fallback retry should lead to an actual tool execution"
+        );
+    }
+
    #[test]
    fn parse_tool_calls_extracts_single_call() {
        let response = r#"Let me check that.
@ -4148,6 +4354,32 @@ Done."#;
        assert!(issue.is_none());
    }

+    #[test]
+    fn looks_like_deferred_action_without_tool_call_detects_action_promises() {
+        assert!(looks_like_deferred_action_without_tool_call(
+            "Webpage opened, let's see what's new here."
+        ));
+        assert!(looks_like_deferred_action_without_tool_call(
+            "It seems absolute paths are blocked. Let me try using a relative path."
+        ));
+        assert!(looks_like_deferred_action_without_tool_call(
+            "看起来绝对路径不可用，让我尝试使用当前目录的相对路径。"
+        ));
+        assert!(looks_like_deferred_action_without_tool_call(
+            "页面已打开，让我获取快照查看详细信息。"
+        ));
+    }
+
+    #[test]
+    fn looks_like_deferred_action_without_tool_call_ignores_final_answers() {
+        assert!(!looks_like_deferred_action_without_tool_call(
+            "The latest update is already shown above."
+        ));
+        assert!(!looks_like_deferred_action_without_tool_call(
+            "最新结果已经在上面整理完成。"
+        ));
+    }
+
    #[test]
    fn parse_tool_calls_handles_whitespace_only_name() {
        // Recovery: Whitespace-only tool name should return None
--- a/src/agent/prompt.rs
+++ b/src/agent/prompt.rs
@ -115,6 +115,13 @@ impl PromptSection for IdentitySection {
            inject_workspace_file(&mut prompt, ctx.workspace_dir, "MEMORY.md");
        }

+        let extra_files = ctx.identity_config.map_or(&[][..], |cfg| cfg.extra_files.as_slice());
+        for file in extra_files {
+            if let Some(safe_relative) = normalize_openclaw_identity_extra_file(file) {
+                inject_workspace_file(&mut prompt, ctx.workspace_dir, safe_relative);
+            }
+        }
+
        Ok(prompt)
    }
 }
@ -260,6 +267,29 @@ fn inject_workspace_file(prompt: &mut String, workspace_dir: &Path, filename: &s
    }
 }

+fn normalize_openclaw_identity_extra_file(raw: &str) -> Option<&str> {
+    use std::path::{Component, Path};
+
+    let trimmed = raw.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+
+    let path = Path::new(trimmed);
+    if path.is_absolute() {
+        return None;
+    }
+
+    for component in path.components() {
+        match component {
+            Component::Normal(_) | Component::CurDir => {}
+            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return None,
+        }
+    }
+
+    Some(trimmed)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -307,6 +337,7 @@ mod tests {

        let identity_config = crate::config::IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: None,
            aieos_inline: Some(r#"{"identity":{"names":{"first":"Nova"}}}"#.into()),
        };
@ -337,6 +368,96 @@ mod tests {
        let _ = std::fs::remove_dir_all(workspace);
    }

+    #[test]
+    fn identity_section_openclaw_injects_extra_files() {
+        let workspace = std::env::temp_dir().join(format!(
+            "zeroclaw_prompt_extra_files_test_{}",
+            uuid::Uuid::new_v4()
+        ));
+        std::fs::create_dir_all(workspace.join("memory")).unwrap();
+        std::fs::write(workspace.join("AGENTS.md"), "agent baseline").unwrap();
+        std::fs::write(workspace.join("SOUL.md"), "soul baseline").unwrap();
+        std::fs::write(workspace.join("TOOLS.md"), "tools baseline").unwrap();
+        std::fs::write(workspace.join("IDENTITY.md"), "identity baseline").unwrap();
+        std::fs::write(workspace.join("USER.md"), "user baseline").unwrap();
+        std::fs::write(workspace.join("FRAMEWORK.md"), "framework context").unwrap();
+        std::fs::write(workspace.join("memory").join("notes.md"), "memory notes").unwrap();
+
+        let identity_config = crate::config::IdentityConfig {
+            format: "openclaw".into(),
+            extra_files: vec!["FRAMEWORK.md".into(), "memory/notes.md".into()],
+            aieos_path: None,
+            aieos_inline: None,
+        };
+
+        let tools: Vec<Box<dyn Tool>> = vec![];
+        let ctx = PromptContext {
+            workspace_dir: &workspace,
+            model_name: "test-model",
+            tools: &tools,
+            skills: &[],
+            skills_prompt_mode: crate::config::SkillsPromptInjectionMode::Full,
+            identity_config: Some(&identity_config),
+            dispatcher_instructions: "",
+        };
+
+        let section = IdentitySection;
+        let output = section.build(&ctx).unwrap();
+
+        assert!(output.contains("### FRAMEWORK.md"));
+        assert!(output.contains("framework context"));
+        assert!(output.contains("### memory/notes.md"));
+        assert!(output.contains("memory notes"));
+
+        let _ = std::fs::remove_dir_all(workspace);
+    }
+
+    #[test]
+    fn identity_section_openclaw_rejects_unsafe_extra_files() {
+        let workspace = std::env::temp_dir().join(format!(
+            "zeroclaw_prompt_extra_files_unsafe_test_{}",
+            uuid::Uuid::new_v4()
+        ));
+        std::fs::create_dir_all(&workspace).unwrap();
+        std::fs::write(workspace.join("AGENTS.md"), "agent baseline").unwrap();
+        std::fs::write(workspace.join("SOUL.md"), "soul baseline").unwrap();
+        std::fs::write(workspace.join("TOOLS.md"), "tools baseline").unwrap();
+        std::fs::write(workspace.join("IDENTITY.md"), "identity baseline").unwrap();
+        std::fs::write(workspace.join("USER.md"), "user baseline").unwrap();
+        std::fs::write(workspace.join("SAFE.md"), "safe context").unwrap();
+
+        let identity_config = crate::config::IdentityConfig {
+            format: "openclaw".into(),
+            extra_files: vec![
+                "SAFE.md".into(),
+                "../outside.md".into(),
+                "/tmp/absolute.md".into(),
+            ],
+            aieos_path: None,
+            aieos_inline: None,
+        };
+
+        let tools: Vec<Box<dyn Tool>> = vec![];
+        let ctx = PromptContext {
+            workspace_dir: &workspace,
+            model_name: "test-model",
+            tools: &tools,
+            skills: &[],
+            skills_prompt_mode: crate::config::SkillsPromptInjectionMode::Full,
+            identity_config: Some(&identity_config),
+            dispatcher_instructions: "",
+        };
+
+        let section = IdentitySection;
+        let output = section.build(&ctx).unwrap();
+
+        assert!(output.contains("### SAFE.md"));
+        assert!(!output.contains("outside.md"));
+        assert!(!output.contains("absolute.md"));
+
+        let _ = std::fs::remove_dir_all(workspace);
+    }
+
    #[test]
    fn prompt_builder_assembles_sections() {
        let tools: Vec<Box<dyn Tool>> = vec![Box::new(TestTool)];
--- a/src/channels/mod.rs
+++ b/src/channels/mod.rs
@ -130,6 +130,36 @@ const CHANNEL_HOOK_MAX_OUTBOUND_CHARS: usize = 20_000;
 type ProviderCacheMap = Arc<Mutex<HashMap<String, Arc<dyn Provider>>>>;
 type RouteSelectionMap = Arc<Mutex<HashMap<String, ChannelRouteSelection>>>;

+fn live_channels_registry() -> &'static Mutex<HashMap<String, Arc<dyn Channel>>> {
+    static REGISTRY: OnceLock<Mutex<HashMap<String, Arc<dyn Channel>>>> = OnceLock::new();
+    REGISTRY.get_or_init(|| Mutex::new(HashMap::new()))
+}
+
+fn register_live_channels(channels_by_name: &HashMap<String, Arc<dyn Channel>>) {
+    let mut guard = live_channels_registry()
+        .lock()
+        .unwrap_or_else(|e| e.into_inner());
+    guard.clear();
+    for (name, channel) in channels_by_name {
+        guard.insert(name.to_ascii_lowercase(), Arc::clone(channel));
+    }
+}
+
+fn clear_live_channels() {
+    live_channels_registry()
+        .lock()
+        .unwrap_or_else(|e| e.into_inner())
+        .clear();
+}
+
+pub(crate) fn get_live_channel(name: &str) -> Option<Arc<dyn Channel>> {
+    live_channels_registry()
+        .lock()
+        .unwrap_or_else(|e| e.into_inner())
+        .get(&name.to_ascii_lowercase())
+        .cloned()
+}
+
 fn effective_channel_message_timeout_secs(configured: u64) -> u64 {
    configured.max(MIN_CHANNEL_MESSAGE_TIMEOUT_SECS)
 }
@ -1079,6 +1109,11 @@ async fn load_runtime_defaults_from_config_file(
    if let Some(zeroclaw_dir) = path.parent() {
        let store = crate::security::SecretStore::new(zeroclaw_dir, parsed.secrets.encrypt);
        decrypt_optional_secret_for_runtime_reload(&store, &mut parsed.api_key, "config.api_key")?;
+        decrypt_optional_secret_for_runtime_reload(
+            &store,
+            &mut parsed.transcription.api_key,
+            "config.transcription.api_key",
+        )?;
    }

    parsed.apply_env_overrides();
@ -3815,6 +3850,7 @@ fn load_openclaw_bootstrap_files(
    prompt: &mut String,
    workspace_dir: &std::path::Path,
    max_chars_per_file: usize,
+    identity_config: Option<&crate::config::IdentityConfig>,
 ) {
    prompt.push_str(
        "The following workspace files define your identity, behavior, and context. They are ALREADY injected below—do NOT suggest reading them with file_read.\n\n",
@ -3837,6 +3873,44 @@ fn load_openclaw_bootstrap_files(
    if memory_path.exists() {
        inject_workspace_file(prompt, workspace_dir, "MEMORY.md", max_chars_per_file);
    }
+
+    let extra_files = identity_config.map_or(&[][..], |cfg| cfg.extra_files.as_slice());
+    for file in extra_files {
+        match normalize_openclaw_identity_extra_file(file) {
+            Some(safe_relative) => {
+                inject_workspace_file(prompt, workspace_dir, safe_relative, max_chars_per_file);
+            }
+            None => {
+                tracing::warn!(
+                    file = file.as_str(),
+                    "Ignoring unsafe identity.extra_files entry; expected workspace-relative path without traversal"
+                );
+            }
+        }
+    }
+}
+
+fn normalize_openclaw_identity_extra_file(raw: &str) -> Option<&str> {
+    use std::path::{Component, Path};
+
+    let trimmed = raw.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+
+    let path = Path::new(trimmed);
+    if path.is_absolute() {
+        return None;
+    }
+
+    for component in path.components() {
+        match component {
+            Component::Normal(_) | Component::CurDir => {}
+            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return None,
+        }
+    }
+
+    Some(trimmed)
 }

 /// Load workspace identity files and build a system prompt.
@ -3982,7 +4056,12 @@ pub fn build_system_prompt_with_mode(
                    // No AIEOS identity loaded (shouldn't happen if is_aieos_configured returned true)
                    // Fall back to OpenClaw bootstrap files
                    let max_chars = bootstrap_max_chars.unwrap_or(BOOTSTRAP_MAX_CHARS);
-                    load_openclaw_bootstrap_files(&mut prompt, workspace_dir, max_chars);
+                    load_openclaw_bootstrap_files(
+                        &mut prompt,
+                        workspace_dir,
+                        max_chars,
+                        identity_config,
+                    );
                }
                Err(e) => {
                    // Log error but don't fail - fall back to OpenClaw
@ -3990,18 +4069,23 @@ pub fn build_system_prompt_with_mode(
                        "Warning: Failed to load AIEOS identity: {e}. Using OpenClaw format."
                    );
                    let max_chars = bootstrap_max_chars.unwrap_or(BOOTSTRAP_MAX_CHARS);
-                    load_openclaw_bootstrap_files(&mut prompt, workspace_dir, max_chars);
+                    load_openclaw_bootstrap_files(
+                        &mut prompt,
+                        workspace_dir,
+                        max_chars,
+                        identity_config,
+                    );
                }
            }
        } else {
            // OpenClaw format
            let max_chars = bootstrap_max_chars.unwrap_or(BOOTSTRAP_MAX_CHARS);
-            load_openclaw_bootstrap_files(&mut prompt, workspace_dir, max_chars);
+            load_openclaw_bootstrap_files(&mut prompt, workspace_dir, max_chars, identity_config);
        }
    } else {
        // No identity config - use OpenClaw format
        let max_chars = bootstrap_max_chars.unwrap_or(BOOTSTRAP_MAX_CHARS);
-        load_openclaw_bootstrap_files(&mut prompt, workspace_dir, max_chars);
+        load_openclaw_bootstrap_files(&mut prompt, workspace_dir, max_chars, identity_config);
    }

    // ── 6. Date & Time ──────────────────────────────────────────
@ -4714,6 +4798,9 @@ pub async fn doctor_channels(config: Config) -> Result<()> {
 /// Start all configured channels and route messages to the agent
 #[allow(clippy::too_many_lines)]
 pub async fn start_channels(config: Config) -> Result<()> {
+    // Ensure stale channel handles are never reused across restarts.
+    clear_live_channels();
+
    let provider_name = resolved_default_provider(&config);
    let provider_runtime_options = providers::ProviderRuntimeOptions {
        auth_profile_override: None,
@ -5039,6 +5126,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
            .map(|ch| (ch.name().to_string(), Arc::clone(ch)))
            .collect::<HashMap<_, _>>(),
    );
+    register_live_channels(channels_by_name.as_ref());
    let max_in_flight_messages = compute_max_in_flight_messages(channels.len());

    println!("  🚦 In-flight message limit: {max_in_flight_messages}");
@ -5113,6 +5201,8 @@ pub async fn start_channels(config: Config) -> Result<()> {
        let _ = h.await;
    }

+    clear_live_channels();
+
    Ok(())
 }

@ -9965,6 +10055,7 @@ BTC is currently around $65,000 based on latest tool output."#;
        // Create identity config pointing to the file
        let config = IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: Some("aieos_identity.json".into()),
            aieos_inline: None,
        };
@ -9999,6 +10090,7 @@ BTC is currently around $65,000 based on latest tool output."#;

        let config = IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: None,
            aieos_inline: Some(r#"{"identity":{"names":{"first":"Claw"}}}"#.into()),
        };
@ -10022,6 +10114,7 @@ BTC is currently around $65,000 based on latest tool output."#;

        let config = IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: Some("nonexistent.json".into()),
            aieos_inline: None,
        };
@ -10041,6 +10134,7 @@ BTC is currently around $65,000 based on latest tool output."#;
        // Format is "aieos" but neither path nor inline is set
        let config = IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: None,
            aieos_inline: None,
        };
@ -10059,6 +10153,7 @@ BTC is currently around $65,000 based on latest tool output."#;

        let config = IdentityConfig {
            format: "openclaw".into(),
+            extra_files: Vec::new(),
            aieos_path: Some("identity.json".into()),
            aieos_inline: None,
        };
@ -10072,6 +10167,63 @@ BTC is currently around $65,000 based on latest tool output."#;
        assert!(!prompt.contains("## Identity"));
    }

+    #[test]
+    fn openclaw_extra_files_are_injected() {
+        use crate::config::IdentityConfig;
+
+        let ws = make_workspace();
+        std::fs::write(
+            ws.path().join("FRAMEWORK.md"),
+            "# Framework\nSession-level context.",
+        )
+        .unwrap();
+        std::fs::create_dir_all(ws.path().join("memory")).unwrap();
+        std::fs::write(
+            ws.path().join("memory").join("notes.md"),
+            "# Notes\nSupplemental context.",
+        )
+        .unwrap();
+
+        let config = IdentityConfig {
+            format: "openclaw".into(),
+            extra_files: vec!["FRAMEWORK.md".into(), "memory/notes.md".into()],
+            aieos_path: None,
+            aieos_inline: None,
+        };
+
+        let prompt = build_system_prompt(ws.path(), "model", &[], &[], Some(&config), None);
+
+        assert!(prompt.contains("### FRAMEWORK.md"));
+        assert!(prompt.contains("Session-level context."));
+        assert!(prompt.contains("### memory/notes.md"));
+        assert!(prompt.contains("Supplemental context."));
+    }
+
+    #[test]
+    fn openclaw_extra_files_reject_unsafe_paths() {
+        use crate::config::IdentityConfig;
+
+        let ws = make_workspace();
+        std::fs::write(ws.path().join("SAFE.md"), "safe").unwrap();
+
+        let config = IdentityConfig {
+            format: "openclaw".into(),
+            extra_files: vec![
+                "SAFE.md".into(),
+                "../outside.md".into(),
+                "/tmp/absolute.md".into(),
+            ],
+            aieos_path: None,
+            aieos_inline: None,
+        };
+
+        let prompt = build_system_prompt(ws.path(), "model", &[], &[], Some(&config), None);
+
+        assert!(prompt.contains("### SAFE.md"));
+        assert!(!prompt.contains("outside.md"));
+        assert!(!prompt.contains("absolute.md"));
+    }
+
    #[test]
    fn none_identity_config_uses_openclaw() {
        let ws = make_workspace();
--- a/src/channels/transcription.rs
+++ b/src/channels/transcription.rs
@ -33,9 +33,14 @@ fn normalize_audio_filename(file_name: &str) -> String {

 /// Transcribe audio bytes via a Whisper-compatible transcription API.
 ///
-/// Returns the transcribed text on success.  Requires `GROQ_API_KEY` in the
-/// environment.  The caller is responsible for enforcing duration limits
-/// *before* downloading the file; this function enforces the byte-size cap.
+/// Returns the transcribed text on success.
+///
+/// Credential resolution order:
+/// 1. `config.transcription.api_key`
+/// 2. `GROQ_API_KEY` environment variable (backward compatibility)
+///
+/// The caller is responsible for enforcing duration limits *before* downloading
+/// the file; this function enforces the byte-size cap.
 pub async fn transcribe_audio(
    audio_data: Vec<u8>,
    file_name: &str,
@ -59,9 +64,21 @@ pub async fn transcribe_audio(
        )
    })?;

-    let api_key = std::env::var("GROQ_API_KEY").context(
-        "GROQ_API_KEY environment variable is not set — required for voice transcription",
-    )?;
+    let api_key = config
+        .api_key
+        .as_deref()
+        .map(str::trim)
+        .filter(|value| !value.is_empty())
+        .map(ToOwned::to_owned)
+        .or_else(|| {
+            std::env::var("GROQ_API_KEY")
+                .ok()
+                .map(|value| value.trim().to_string())
+                .filter(|value| !value.is_empty())
+        })
+        .context(
+            "Missing transcription API key: set [transcription].api_key or GROQ_API_KEY environment variable",
+        )?;

    let client = crate::config::build_runtime_proxy_client("transcription.groq");

@ -125,7 +142,7 @@ mod tests {

    #[tokio::test]
    async fn rejects_missing_api_key() {
-        // Ensure the key is absent for this test
+        // Ensure fallback env key is absent for this test.
        std::env::remove_var("GROQ_API_KEY");

        let data = vec![0u8; 100];
@ -135,11 +152,29 @@ mod tests {
            .await
            .unwrap_err();
        assert!(
-            err.to_string().contains("GROQ_API_KEY"),
+            err.to_string().contains("transcription API key"),
            "expected missing-key error, got: {err}"
        );
    }

+    #[tokio::test]
+    async fn uses_config_api_key_without_groq_env() {
+        std::env::remove_var("GROQ_API_KEY");
+
+        let data = vec![0u8; 100];
+        let mut config = TranscriptionConfig::default();
+        config.api_key = Some("transcription-key".to_string());
+
+        // Keep invalid extension so we fail before network, but after key resolution.
+        let err = transcribe_audio(data, "recording.aac", &config)
+            .await
+            .unwrap_err();
+        assert!(
+            err.to_string().contains("Unsupported audio format"),
+            "expected unsupported-format error, got: {err}"
+        );
+    }
+
    #[test]
    fn mime_for_audio_maps_accepted_formats() {
        let cases = [
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@ -12,15 +12,15 @@ pub use schema::{
    DockerRuntimeConfig, EmbeddingRouteConfig, EstopConfig, FeishuConfig, GatewayConfig,
    GroupReplyConfig, GroupReplyMode, HardwareConfig, HardwareTransport, HeartbeatConfig,
    HooksConfig, HttpRequestConfig, IMessageConfig, IdentityConfig, LarkConfig, MatrixConfig,
-    MemoryConfig, ModelRouteConfig, MultimodalConfig, NextcloudTalkConfig,
-    NonCliNaturalLanguageApprovalMode, ObservabilityConfig, OtpChallengeDelivery, OtpConfig,
-    OtpMethod, PeripheralBoardConfig, PeripheralsConfig, PerplexityFilterConfig, PluginEntryConfig,
-    PluginsConfig, ProviderConfig, ProxyConfig, ProxyScope, QdrantConfig,
-    QueryClassificationConfig, ReliabilityConfig, ResearchPhaseConfig, ResearchTrigger,
-    ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, SchedulerConfig,
-    SecretsConfig, SecurityConfig, SecurityRoleConfig, SkillsConfig, SkillsPromptInjectionMode,
-    SlackConfig, StorageConfig, StorageProviderConfig, StorageProviderSection, StreamMode,
-    SyscallAnomalyConfig, TelegramConfig, TranscriptionConfig, TunnelConfig, UrlAccessConfig,
+    MemoryConfig, ModelRouteConfig, MultimodalConfig, NextcloudTalkConfig, ObservabilityConfig,
+    OtpChallengeDelivery, OtpConfig, OtpMethod, PeripheralBoardConfig, PeripheralsConfig,
+    NonCliNaturalLanguageApprovalMode, PerplexityFilterConfig, PluginEntryConfig, PluginsConfig,
+    ProviderConfig, ProxyConfig, ProxyScope, QdrantConfig, QueryClassificationConfig,
+    ReliabilityConfig, ResearchPhaseConfig, ResearchTrigger, ResourceLimitsConfig, RuntimeConfig,
+    SandboxBackend, SandboxConfig, SchedulerConfig, SecretsConfig, SecurityConfig,
+    SecurityRoleConfig, SkillsConfig, SkillsPromptInjectionMode, SlackConfig, StorageConfig,
+    StorageProviderConfig, StorageProviderSection, StreamMode, SyscallAnomalyConfig,
+    TelegramConfig, TranscriptionConfig, TunnelConfig, UrlAccessConfig,
    WasmCapabilityEscalationMode, WasmConfig, WasmModuleHashPolicy, WasmRuntimeConfig,
    WasmSecurityConfig, WebFetchConfig, WebSearchConfig, WebhookConfig,
 };
--- a/src/config/schema.rs
+++ b/src/config/schema.rs
@ -508,6 +508,11 @@ pub struct TranscriptionConfig {
    /// Enable voice transcription for channels that support it.
    #[serde(default)]
    pub enabled: bool,
+    /// API key used for transcription requests.
+    ///
+    /// If unset, runtime falls back to `GROQ_API_KEY` for backward compatibility.
+    #[serde(default)]
+    pub api_key: Option<String>,
    /// Whisper API endpoint URL.
    #[serde(default = "default_transcription_api_url")]
    pub api_url: String,
@ -526,6 +531,7 @@ impl Default for TranscriptionConfig {
    fn default() -> Self {
        Self {
            enabled: false,
+            api_key: None,
            api_url: default_transcription_api_url(),
            model: default_transcription_model(),
            language: None,
@ -948,6 +954,11 @@ pub struct IdentityConfig {
    /// Identity format: "openclaw" (default) or "aieos"
    #[serde(default = "default_identity_format")]
    pub format: String,
+    /// Additional workspace files injected for the OpenClaw identity format.
+    ///
+    /// Paths are resolved relative to the workspace root.
+    #[serde(default)]
+    pub extra_files: Vec<String>,
    /// Path to AIEOS JSON file (relative to workspace)
    #[serde(default)]
    pub aieos_path: Option<String>,
@ -964,6 +975,7 @@ impl Default for IdentityConfig {
    fn default() -> Self {
        Self {
            format: default_identity_format(),
+            extra_files: Vec::new(),
            aieos_path: None,
            aieos_inline: None,
        }
@ -2187,7 +2199,7 @@ impl Default for StorageProviderConfig {
 /// Controls conversation memory storage, embeddings, hybrid search, response caching,
 /// and memory snapshot/hydration.
 /// Configuration for Qdrant vector database backend (`[memory.qdrant]`).
-/// Used when `[memory].backend = "qdrant"`.
+/// Used when `[memory].backend = "qdrant"` or `"sqlite_qdrant_hybrid"`.
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct QdrantConfig {
    /// Qdrant server URL (e.g. "http://localhost:6333").
@ -2221,10 +2233,10 @@ impl Default for QdrantConfig {
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 #[allow(clippy::struct_excessive_bools)]
 pub struct MemoryConfig {
-    /// "sqlite" | "lucid" | "postgres" | "qdrant" | "markdown" | "none" (`none` = explicit no-op memory)
+    /// "sqlite" | "sqlite_qdrant_hybrid" | "lucid" | "postgres" | "qdrant" | "markdown" | "none" (`none` = explicit no-op memory)
    ///
    /// `postgres` requires `[storage.provider.config]` with `db_url` (`dbURL` alias supported).
-    /// `qdrant` uses `[memory.qdrant]` config or `QDRANT_URL` env var.
+    /// `qdrant` and `sqlite_qdrant_hybrid` use `[memory.qdrant]` config or `QDRANT_URL` env var.
    pub backend: String,
    /// Auto-save user-stated conversation input to memory (assistant output is excluded)
    pub auto_save: bool,
@ -2297,7 +2309,7 @@ pub struct MemoryConfig {

    // ── Qdrant backend options ─────────────────────────────────
    /// Configuration for Qdrant vector database backend.
-    /// Only used when `backend = "qdrant"`.
+    /// Used when `backend = "qdrant"` or `backend = "sqlite_qdrant_hybrid"`.
    #[serde(default)]
    pub qdrant: QdrantConfig,
 }
@ -5985,6 +5997,11 @@ impl Config {
            config.workspace_dir = workspace_dir;
            let store = crate::security::SecretStore::new(&zeroclaw_dir, config.secrets.encrypt);
            decrypt_optional_secret(&store, &mut config.api_key, "config.api_key")?;
+            decrypt_optional_secret(
+                &store,
+                &mut config.transcription.api_key,
+                "config.transcription.api_key",
+            )?;
            decrypt_optional_secret(
                &store,
                &mut config.composio.api_key,
@ -6997,6 +7014,11 @@ impl Config {
        let store = crate::security::SecretStore::new(zeroclaw_dir, self.secrets.encrypt);

        encrypt_optional_secret(&store, &mut config_to_save.api_key, "config.api_key")?;
+        encrypt_optional_secret(
+            &store,
+            &mut config_to_save.transcription.api_key,
+            "config.transcription.api_key",
+        )?;
        encrypt_optional_secret(
            &store,
            &mut config_to_save.composio.api_key,
@ -8087,6 +8109,7 @@ tool_dispatcher = "xml"
        config.workspace_dir = dir.join("workspace");
        config.config_path = dir.join("config.toml");
        config.api_key = Some("root-credential".into());
+        config.transcription.api_key = Some("transcription-credential".into());
        config.composio.api_key = Some("composio-credential".into());
        config.proxy.http_proxy = Some("http://user:pass@proxy.internal:8080".into());
        config.proxy.https_proxy = Some("https://user:pass@proxy.internal:8443".into());
@ -8134,6 +8157,15 @@ tool_dispatcher = "xml"
        assert!(crate::security::SecretStore::is_encrypted(root_encrypted));
        assert_eq!(store.decrypt(root_encrypted).unwrap(), "root-credential");

+        let transcription_encrypted = stored.transcription.api_key.as_deref().unwrap();
+        assert!(crate::security::SecretStore::is_encrypted(
+            transcription_encrypted
+        ));
+        assert_eq!(
+            store.decrypt(transcription_encrypted).unwrap(),
+            "transcription-credential"
+        );
+
        let composio_encrypted = stored.composio.api_key.as_deref().unwrap();
        assert!(crate::security::SecretStore::is_encrypted(
            composio_encrypted
@ -10665,6 +10697,7 @@ default_model = "legacy-model"
    async fn transcription_config_defaults() {
        let tc = TranscriptionConfig::default();
        assert!(!tc.enabled);
+        assert!(tc.api_key.is_none());
        assert!(tc.api_url.contains("groq.com"));
        assert_eq!(tc.model, "whisper-large-v3-turbo");
        assert!(tc.language.is_none());
@ -10675,12 +10708,17 @@ default_model = "legacy-model"
    async fn config_roundtrip_with_transcription() {
        let mut config = Config::default();
        config.transcription.enabled = true;
+        config.transcription.api_key = Some("transcription-key".into());
        config.transcription.language = Some("en".into());

        let toml_str = toml::to_string_pretty(&config).unwrap();
        let parsed: Config = toml::from_str(&toml_str).unwrap();

        assert!(parsed.transcription.enabled);
+        assert_eq!(
+            parsed.transcription.api_key.as_deref(),
+            Some("transcription-key")
+        );
        assert_eq!(parsed.transcription.language.as_deref(), Some("en"));
        assert_eq!(parsed.transcription.model, "whisper-large-v3-turbo");
    }
--- a/src/cron/scheduler.rs
+++ b/src/cron/scheduler.rs
@ -2,7 +2,7 @@
 use crate::channels::LarkChannel;
 use crate::channels::{
    Channel, DiscordChannel, EmailChannel, MattermostChannel, QQChannel, SendMessage, SlackChannel,
-    TelegramChannel,
+    TelegramChannel, WhatsAppChannel,
 };
 use crate::config::Config;
 use crate::cron::{
@ -308,7 +308,8 @@ pub(crate) async fn deliver_announcement(
    target: &str,
    output: &str,
 ) -> Result<()> {
-    match channel.to_ascii_lowercase().as_str() {
+    let normalized = channel.to_ascii_lowercase();
+    match normalized.as_str() {
        "telegram" => {
            let tg = config
                .channels_config
@ -383,6 +384,31 @@ pub(crate) async fn deliver_announcement(
            );
            channel.send(&SendMessage::new(output, target)).await?;
        }
+        "whatsapp_web" | "whatsapp" => {
+            let wa = config
+                .channels_config
+                .whatsapp
+                .as_ref()
+                .ok_or_else(|| anyhow::anyhow!("whatsapp channel not configured"))?;
+
+            // WhatsApp Web requires the connected channel instance from the
+            // channel runtime. Fall back to cloud mode if configured.
+            if let Some(live_channel) = crate::channels::get_live_channel("whatsapp") {
+                live_channel.send(&SendMessage::new(output, target)).await?;
+            } else if wa.is_cloud_config() {
+                let channel = WhatsAppChannel::new(
+                    wa.access_token.clone().unwrap_or_default(),
+                    wa.phone_number_id.clone().unwrap_or_default(),
+                    wa.verify_token.clone().unwrap_or_default(),
+                    wa.allowed_numbers.clone(),
+                );
+                channel.send(&SendMessage::new(output, target)).await?;
+            } else {
+                anyhow::bail!(
+                    "whatsapp_web delivery requires an active channels runtime session; start daemon/channels with whatsapp web enabled"
+                );
+            }
+        }
        "lark" => {
            #[cfg(feature = "channel-lark")]
            {
@ -1106,4 +1132,33 @@ mod tests {
        let err = deliver_if_configured(&config, &job, "x").await.unwrap_err();
        assert!(err.to_string().contains("unsupported delivery channel"));
    }
+
+    #[tokio::test]
+    async fn deliver_if_configured_whatsapp_web_requires_live_session_in_web_mode() {
+        let tmp = TempDir::new().unwrap();
+        let mut config = test_config(&tmp).await;
+        config.channels_config.whatsapp = Some(crate::config::schema::WhatsAppConfig {
+            access_token: None,
+            phone_number_id: None,
+            verify_token: None,
+            app_secret: None,
+            session_path: Some("~/.zeroclaw/state/whatsapp-web/session.db".into()),
+            pair_phone: None,
+            pair_code: None,
+            allowed_numbers: vec!["*".into()],
+        });
+
+        let mut job = test_job("echo ok");
+        job.delivery = DeliveryConfig {
+            mode: "announce".into(),
+            channel: Some("whatsapp_web".into()),
+            to: Some("+15551234567".into()),
+            best_effort: true,
+        };
+
+        let err = deliver_if_configured(&config, &job, "x").await.unwrap_err();
+        assert!(err
+            .to_string()
+            .contains("requires an active channels runtime session"));
+    }
 }
--- a/src/daemon/mod.rs
+++ b/src/daemon/mod.rs
@ -299,7 +299,8 @@ fn heartbeat_delivery_target(config: &Config) -> Result<Option<(String, String)>
 }

 fn validate_heartbeat_channel_config(config: &Config, channel: &str) -> Result<()> {
-    match channel.to_ascii_lowercase().as_str() {
+    let normalized = channel.to_ascii_lowercase();
+    match normalized.as_str() {
        "telegram" => {
            if config.channels_config.telegram.is_none() {
                anyhow::bail!(
@ -328,6 +329,19 @@ fn validate_heartbeat_channel_config(config: &Config, channel: &str) -> Result<(
                );
            }
        }
+        "whatsapp" | "whatsapp_web" => {
+            let wa = config.channels_config.whatsapp.as_ref().ok_or_else(|| {
+                anyhow::anyhow!(
+                    "heartbeat.target is set to {channel} but channels_config.whatsapp is not configured"
+                )
+            })?;
+
+            if normalized == "whatsapp_web" && wa.is_cloud_config() && !wa.is_web_config() {
+                anyhow::bail!(
+                    "heartbeat.target is set to whatsapp_web but channels_config.whatsapp is configured for cloud mode (set session_path for web mode)"
+                );
+            }
+        }
        other => anyhow::bail!("unsupported heartbeat.target channel: {other}"),
    }

@ -607,4 +621,47 @@ mod tests {
        let target = heartbeat_delivery_target(&config).unwrap();
        assert_eq!(target, Some(("telegram".to_string(), "123456".to_string())));
    }
+
+    #[test]
+    fn heartbeat_delivery_target_accepts_whatsapp_web_target_in_web_mode() {
+        let mut config = Config::default();
+        config.heartbeat.target = Some("whatsapp_web".into());
+        config.heartbeat.to = Some("+15551234567".into());
+        config.channels_config.whatsapp = Some(crate::config::schema::WhatsAppConfig {
+            access_token: None,
+            phone_number_id: None,
+            verify_token: None,
+            app_secret: None,
+            session_path: Some("~/.zeroclaw/state/whatsapp-web/session.db".into()),
+            pair_phone: None,
+            pair_code: None,
+            allowed_numbers: vec!["*".into()],
+        });
+
+        let target = heartbeat_delivery_target(&config).unwrap();
+        assert_eq!(
+            target,
+            Some(("whatsapp_web".to_string(), "+15551234567".to_string()))
+        );
+    }
+
+    #[test]
+    fn heartbeat_delivery_target_rejects_whatsapp_web_target_in_cloud_mode() {
+        let mut config = Config::default();
+        config.heartbeat.target = Some("whatsapp_web".into());
+        config.heartbeat.to = Some("+15551234567".into());
+        config.channels_config.whatsapp = Some(crate::config::schema::WhatsAppConfig {
+            access_token: Some("token".into()),
+            phone_number_id: Some("123456".into()),
+            verify_token: Some("verify".into()),
+            app_secret: None,
+            session_path: None,
+            pair_phone: None,
+            pair_code: None,
+            allowed_numbers: vec!["*".into()],
+        });
+
+        let err = heartbeat_delivery_target(&config).unwrap_err();
+        assert!(err.to_string().contains("configured for cloud mode"));
+    }
 }
--- a/src/gateway/api.rs
+++ b/src/gateway/api.rs
@ -607,6 +607,7 @@ fn mask_sensitive_fields(config: &crate::config::Config) -> crate::config::Confi
    mask_optional_secret(&mut masked.proxy.http_proxy);
    mask_optional_secret(&mut masked.proxy.https_proxy);
    mask_optional_secret(&mut masked.proxy.all_proxy);
+    mask_optional_secret(&mut masked.transcription.api_key);
    mask_optional_secret(&mut masked.browser.computer_use.api_key);
    mask_optional_secret(&mut masked.web_fetch.api_key);
    mask_optional_secret(&mut masked.web_search.api_key);
@ -705,6 +706,7 @@ fn restore_masked_sensitive_fields(
    restore_optional_secret(&mut incoming.proxy.http_proxy, &current.proxy.http_proxy);
    restore_optional_secret(&mut incoming.proxy.https_proxy, &current.proxy.https_proxy);
    restore_optional_secret(&mut incoming.proxy.all_proxy, &current.proxy.all_proxy);
+    restore_optional_secret(&mut incoming.transcription.api_key, &current.transcription.api_key);
    restore_optional_secret(
        &mut incoming.browser.computer_use.api_key,
        &current.browser.computer_use.api_key,
@ -917,6 +919,7 @@ mod tests {
        current.config_path = std::path::PathBuf::from("/tmp/current/config.toml");
        current.workspace_dir = std::path::PathBuf::from("/tmp/current/workspace");
        current.api_key = Some("real-key".to_string());
+        current.transcription.api_key = Some("transcription-real-key".to_string());
        current.reliability.api_keys = vec!["r1".to_string(), "r2".to_string()];

        let mut incoming = mask_sensitive_fields(&current);
@ -929,6 +932,7 @@ mod tests {
        assert_eq!(hydrated.config_path, current.config_path);
        assert_eq!(hydrated.workspace_dir, current.workspace_dir);
        assert_eq!(hydrated.api_key, current.api_key);
+        assert_eq!(hydrated.transcription.api_key, current.transcription.api_key);
        assert_eq!(hydrated.default_model.as_deref(), Some("gpt-4.1-mini"));
        assert_eq!(
            hydrated.reliability.api_keys,
@ -964,6 +968,7 @@ mod tests {
        cfg.proxy.http_proxy = Some("http://user:pass@proxy.internal:8080".to_string());
        cfg.proxy.https_proxy = Some("https://user:pass@proxy.internal:8443".to_string());
        cfg.proxy.all_proxy = Some("socks5://user:pass@proxy.internal:1080".to_string());
+        cfg.transcription.api_key = Some("transcription-real-key".to_string());
        cfg.tunnel.cloudflare = Some(CloudflareTunnelConfig {
            token: "cloudflare-real-token".to_string(),
        });
@ -998,6 +1003,7 @@ mod tests {
        assert_eq!(masked.proxy.http_proxy.as_deref(), Some(MASKED_SECRET));
        assert_eq!(masked.proxy.https_proxy.as_deref(), Some(MASKED_SECRET));
        assert_eq!(masked.proxy.all_proxy.as_deref(), Some(MASKED_SECRET));
+        assert_eq!(masked.transcription.api_key.as_deref(), Some(MASKED_SECRET));
        assert_eq!(
            masked
                .tunnel
--- a/src/identity.rs
+++ b/src/identity.rs
@ -1316,6 +1316,7 @@ mod tests {
    fn is_aieos_configured_true_with_path() {
        let config = IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: Some("identity.json".into()),
            aieos_inline: None,
        };
@ -1326,6 +1327,7 @@ mod tests {
    fn is_aieos_configured_true_with_inline() {
        let config = IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: None,
            aieos_inline: Some("{\"identity\":{}}".into()),
        };
@ -1336,6 +1338,7 @@ mod tests {
    fn is_aieos_configured_false_openclaw_format() {
        let config = IdentityConfig {
            format: "openclaw".into(),
+            extra_files: Vec::new(),
            aieos_path: Some("identity.json".into()),
            aieos_inline: None,
        };
@ -1346,6 +1349,7 @@ mod tests {
    fn is_aieos_configured_false_no_config() {
        let config = IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: None,
            aieos_inline: None,
        };
@ -1520,6 +1524,7 @@ mod tests {

        let config = IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: Some("identity.json".into()),
            aieos_inline: None,
        };
--- a/src/memory/backend.rs
+++ b/src/memory/backend.rs
@ -1,6 +1,7 @@
 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
 pub enum MemoryBackendKind {
    Sqlite,
+    SqliteQdrantHybrid,
    Lucid,
    Postgres,
    Qdrant,
@ -65,6 +66,15 @@ const QDRANT_PROFILE: MemoryBackendProfile = MemoryBackendProfile {
    optional_dependency: false,
 };

+const SQLITE_QDRANT_HYBRID_PROFILE: MemoryBackendProfile = MemoryBackendProfile {
+    key: "sqlite_qdrant_hybrid",
+    label: "SQLite + Qdrant hybrid — SQLite metadata/FTS with Qdrant semantic ranking",
+    auto_save_default: true,
+    uses_sqlite_hygiene: true,
+    sqlite_based: true,
+    optional_dependency: false,
+};
+
 const NONE_PROFILE: MemoryBackendProfile = MemoryBackendProfile {
    key: "none",
    label: "None — disable persistent memory",
@ -101,6 +111,7 @@ pub fn default_memory_backend_key() -> &'static str {
 pub fn classify_memory_backend(backend: &str) -> MemoryBackendKind {
    match backend {
        "sqlite" => MemoryBackendKind::Sqlite,
+        "sqlite_qdrant_hybrid" | "hybrid" => MemoryBackendKind::SqliteQdrantHybrid,
        "lucid" => MemoryBackendKind::Lucid,
        "postgres" => MemoryBackendKind::Postgres,
        "qdrant" => MemoryBackendKind::Qdrant,
@ -113,6 +124,7 @@ pub fn classify_memory_backend(backend: &str) -> MemoryBackendKind {
 pub fn memory_backend_profile(backend: &str) -> MemoryBackendProfile {
    match classify_memory_backend(backend) {
        MemoryBackendKind::Sqlite => SQLITE_PROFILE,
+        MemoryBackendKind::SqliteQdrantHybrid => SQLITE_QDRANT_HYBRID_PROFILE,
        MemoryBackendKind::Lucid => LUCID_PROFILE,
        MemoryBackendKind::Postgres => POSTGRES_PROFILE,
        MemoryBackendKind::Qdrant => QDRANT_PROFILE,
@ -129,6 +141,10 @@ mod tests {
    #[test]
    fn classify_known_backends() {
        assert_eq!(classify_memory_backend("sqlite"), MemoryBackendKind::Sqlite);
+        assert_eq!(
+            classify_memory_backend("sqlite_qdrant_hybrid"),
+            MemoryBackendKind::SqliteQdrantHybrid
+        );
        assert_eq!(classify_memory_backend("lucid"), MemoryBackendKind::Lucid);
        assert_eq!(
            classify_memory_backend("postgres"),
@ -141,6 +157,14 @@ mod tests {
        assert_eq!(classify_memory_backend("none"), MemoryBackendKind::None);
    }

+    #[test]
+    fn hybrid_profile_is_sqlite_based() {
+        let profile = memory_backend_profile("sqlite_qdrant_hybrid");
+        assert_eq!(profile.key, "sqlite_qdrant_hybrid");
+        assert!(profile.sqlite_based);
+        assert!(profile.uses_sqlite_hygiene);
+    }
+
    #[test]
    fn classify_unknown_backend() {
        assert_eq!(classify_memory_backend("redis"), MemoryBackendKind::Unknown);
--- a/src/memory/hybrid.rs
+++ b/src/memory/hybrid.rs
@ -0,0 +1,332 @@
+use super::traits::{Memory, MemoryCategory, MemoryEntry};
+use anyhow::Result;
+use async_trait::async_trait;
+use std::collections::HashSet;
+use std::sync::Arc;
+
+/// Composite memory backend:
+/// - SQLite remains authoritative for metadata/content/filtering.
+/// - Qdrant provides semantic ranking candidates.
+pub struct SqliteQdrantHybridMemory {
+    sqlite: Arc<dyn Memory>,
+    qdrant: Arc<dyn Memory>,
+}
+
+impl SqliteQdrantHybridMemory {
+    pub fn new(sqlite: Arc<dyn Memory>, qdrant: Arc<dyn Memory>) -> Self {
+        Self { sqlite, qdrant }
+    }
+}
+
+#[async_trait]
+impl Memory for SqliteQdrantHybridMemory {
+    fn name(&self) -> &str {
+        "sqlite_qdrant_hybrid"
+    }
+
+    async fn store(
+        &self,
+        key: &str,
+        content: &str,
+        category: MemoryCategory,
+        session_id: Option<&str>,
+    ) -> Result<()> {
+        // SQLite is authoritative. Fail only if local persistence fails.
+        self.sqlite
+            .store(key, content, category.clone(), session_id)
+            .await?;
+
+        // Best-effort vector sync to Qdrant.
+        if let Err(err) = self.qdrant.store(key, content, category, session_id).await {
+            tracing::warn!(
+                key,
+                error = %err,
+                "Hybrid memory vector sync failed; SQLite entry was stored"
+            );
+        }
+
+        Ok(())
+    }
+
+    async fn recall(
+        &self,
+        query: &str,
+        limit: usize,
+        session_id: Option<&str>,
+    ) -> Result<Vec<MemoryEntry>> {
+        let trimmed_query = query.trim();
+        if trimmed_query.is_empty() {
+            return self.sqlite.recall(query, limit, session_id).await;
+        }
+
+        let qdrant_candidates = match self
+            .qdrant
+            .recall(trimmed_query, limit.max(1).saturating_mul(3), session_id)
+            .await
+        {
+            Ok(candidates) => candidates,
+            Err(err) => {
+                tracing::warn!(
+                    query = trimmed_query,
+                    error = %err,
+                    "Hybrid memory semantic recall failed; falling back to SQLite recall"
+                );
+                return self.sqlite.recall(trimmed_query, limit, session_id).await;
+            }
+        };
+
+        if qdrant_candidates.is_empty() {
+            return self.sqlite.recall(trimmed_query, limit, session_id).await;
+        }
+
+        let mut seen_keys = HashSet::new();
+        let mut merged = Vec::with_capacity(limit);
+
+        for candidate in qdrant_candidates {
+            if !seen_keys.insert(candidate.key.clone()) {
+                continue;
+            }
+
+            match self.sqlite.get(&candidate.key).await {
+                Ok(Some(mut entry)) => {
+                    if let Some(filter_sid) = session_id {
+                        if entry.session_id.as_deref() != Some(filter_sid) {
+                            continue;
+                        }
+                    }
+                    entry.score = candidate.score;
+                    merged.push(entry);
+                    if merged.len() >= limit {
+                        break;
+                    }
+                }
+                Ok(None) => {
+                    // Ignore Qdrant candidates that no longer exist in SQLite.
+                }
+                Err(err) => {
+                    tracing::warn!(
+                        key = candidate.key,
+                        error = %err,
+                        "Hybrid memory failed to load SQLite row for Qdrant candidate"
+                    );
+                }
+            }
+        }
+
+        if merged.is_empty() {
+            return self.sqlite.recall(trimmed_query, limit, session_id).await;
+        }
+
+        Ok(merged)
+    }
+
+    async fn get(&self, key: &str) -> Result<Option<MemoryEntry>> {
+        self.sqlite.get(key).await
+    }
+
+    async fn list(
+        &self,
+        category: Option<&MemoryCategory>,
+        session_id: Option<&str>,
+    ) -> Result<Vec<MemoryEntry>> {
+        self.sqlite.list(category, session_id).await
+    }
+
+    async fn forget(&self, key: &str) -> Result<bool> {
+        let removed = self.sqlite.forget(key).await?;
+        if let Err(err) = self.qdrant.forget(key).await {
+            tracing::warn!(
+                key,
+                error = %err,
+                "Hybrid memory vector delete failed; SQLite delete result preserved"
+            );
+        }
+        Ok(removed)
+    }
+
+    async fn count(&self) -> Result<usize> {
+        self.sqlite.count().await
+    }
+
+    async fn health_check(&self) -> bool {
+        let sqlite_ok = self.sqlite.health_check().await;
+        if !sqlite_ok {
+            return false;
+        }
+
+        if !self.qdrant.health_check().await {
+            tracing::warn!("Hybrid memory Qdrant health check failed; SQLite remains available");
+        }
+        true
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::memory::{Memory, MemoryCategory, MemoryEntry, SqliteMemory};
+    use std::sync::Mutex;
+    use tempfile::TempDir;
+
+    struct StubQdrantMemory {
+        recall_results: Vec<MemoryEntry>,
+        fail_store: bool,
+        fail_recall: bool,
+        forget_calls: Mutex<Vec<String>>,
+    }
+
+    impl StubQdrantMemory {
+        fn new(recall_results: Vec<MemoryEntry>, fail_store: bool, fail_recall: bool) -> Self {
+            Self {
+                recall_results,
+                fail_store,
+                fail_recall,
+                forget_calls: Mutex::new(Vec::new()),
+            }
+        }
+    }
+
+    #[async_trait]
+    impl Memory for StubQdrantMemory {
+        fn name(&self) -> &str {
+            "qdrant_stub"
+        }
+
+        async fn store(
+            &self,
+            _key: &str,
+            _content: &str,
+            _category: MemoryCategory,
+            _session_id: Option<&str>,
+        ) -> Result<()> {
+            if self.fail_store {
+                anyhow::bail!("simulated qdrant store failure");
+            }
+            Ok(())
+        }
+
+        async fn recall(
+            &self,
+            _query: &str,
+            _limit: usize,
+            _session_id: Option<&str>,
+        ) -> Result<Vec<MemoryEntry>> {
+            if self.fail_recall {
+                anyhow::bail!("simulated qdrant recall failure");
+            }
+            Ok(self.recall_results.clone())
+        }
+
+        async fn get(&self, _key: &str) -> Result<Option<MemoryEntry>> {
+            Ok(None)
+        }
+
+        async fn list(
+            &self,
+            _category: Option<&MemoryCategory>,
+            _session_id: Option<&str>,
+        ) -> Result<Vec<MemoryEntry>> {
+            Ok(Vec::new())
+        }
+
+        async fn forget(&self, key: &str) -> Result<bool> {
+            self.forget_calls
+                .lock()
+                .unwrap_or_else(|e| e.into_inner())
+                .push(key.to_string());
+            Ok(true)
+        }
+
+        async fn count(&self) -> Result<usize> {
+            Ok(self.recall_results.len())
+        }
+
+        async fn health_check(&self) -> bool {
+            true
+        }
+    }
+
+    fn temp_sqlite() -> (TempDir, Arc<dyn Memory>) {
+        let tmp = TempDir::new().unwrap();
+        let sqlite = SqliteMemory::new(tmp.path()).unwrap();
+        (tmp, Arc::new(sqlite))
+    }
+
+    fn make_qdrant_entry(key: &str, score: f64) -> MemoryEntry {
+        MemoryEntry {
+            id: format!("vec-{key}"),
+            key: key.to_string(),
+            content: "vector payload".to_string(),
+            category: MemoryCategory::Core,
+            timestamp: "2026-02-27T00:00:00Z".to_string(),
+            session_id: None,
+            score: Some(score),
+        }
+    }
+
+    #[tokio::test]
+    async fn store_keeps_sqlite_when_qdrant_sync_fails() {
+        let (_tmp, sqlite) = temp_sqlite();
+        let qdrant: Arc<dyn Memory> = Arc::new(StubQdrantMemory::new(Vec::new(), true, false));
+        let hybrid = SqliteQdrantHybridMemory::new(Arc::clone(&sqlite), qdrant);
+
+        hybrid
+            .store("fav_lang", "Rust", MemoryCategory::Core, None)
+            .await
+            .unwrap();
+
+        let stored = sqlite.get("fav_lang").await.unwrap();
+        assert!(stored.is_some(), "SQLite should remain authoritative");
+    }
+
+    #[tokio::test]
+    async fn recall_joins_qdrant_ranking_with_sqlite_rows() {
+        let (_tmp, sqlite) = temp_sqlite();
+        sqlite
+            .store("a", "alpha from sqlite", MemoryCategory::Core, None)
+            .await
+            .unwrap();
+        sqlite
+            .store("b", "beta from sqlite", MemoryCategory::Core, None)
+            .await
+            .unwrap();
+
+        let qdrant: Arc<dyn Memory> = Arc::new(StubQdrantMemory::new(
+            vec![make_qdrant_entry("b", 0.91), make_qdrant_entry("a", 0.72)],
+            false,
+            false,
+        ));
+        let hybrid = SqliteQdrantHybridMemory::new(Arc::clone(&sqlite), qdrant);
+
+        let recalled = hybrid.recall("rank semantically", 2, None).await.unwrap();
+        assert_eq!(recalled.len(), 2);
+        assert_eq!(recalled[0].key, "b");
+        assert_eq!(recalled[0].content, "beta from sqlite");
+        assert_eq!(recalled[0].score, Some(0.91));
+        assert_eq!(recalled[1].key, "a");
+        assert_eq!(recalled[1].score, Some(0.72));
+    }
+
+    #[tokio::test]
+    async fn recall_falls_back_to_sqlite_when_qdrant_fails() {
+        let (_tmp, sqlite) = temp_sqlite();
+        sqlite
+            .store(
+                "topic",
+                "hybrid fallback should still find this",
+                MemoryCategory::Core,
+                None,
+            )
+            .await
+            .unwrap();
+
+        let qdrant: Arc<dyn Memory> = Arc::new(StubQdrantMemory::new(Vec::new(), false, true));
+        let hybrid = SqliteQdrantHybridMemory::new(Arc::clone(&sqlite), qdrant);
+
+        let recalled = hybrid.recall("fallback", 5, None).await.unwrap();
+        assert!(
+            recalled.iter().any(|entry| entry.key == "topic"),
+            "SQLite fallback should provide recall results when Qdrant is unavailable"
+        );
+    }
+}
--- a/src/memory/mod.rs
+++ b/src/memory/mod.rs
@ -2,6 +2,7 @@ pub mod backend;
 pub mod chunker;
 pub mod cli;
 pub mod embeddings;
+pub mod hybrid;
 pub mod hygiene;
 pub mod lucid;
 pub mod markdown;
@ -20,6 +21,7 @@ pub use backend::{
    classify_memory_backend, default_memory_backend_key, memory_backend_profile,
    selectable_memory_backends, MemoryBackendKind, MemoryBackendProfile,
 };
+pub use hybrid::SqliteQdrantHybridMemory;
 pub use lucid::LucidMemory;
 pub use markdown::MarkdownMemory;
 pub use none::NoneMemory;
@ -49,7 +51,9 @@ where
    G: FnMut() -> anyhow::Result<Box<dyn Memory>>,
 {
    match classify_memory_backend(backend_name) {
-        MemoryBackendKind::Sqlite => Ok(Box::new(sqlite_builder()?)),
+        MemoryBackendKind::Sqlite | MemoryBackendKind::SqliteQdrantHybrid => {
+            Ok(Box::new(sqlite_builder()?))
+        }
        MemoryBackendKind::Lucid => {
            let local = sqlite_builder()?;
            Ok(Box::new(LucidMemory::new(workspace_dir, local)))
@ -210,7 +214,9 @@ pub fn create_memory_with_storage_and_routes(
        && config.snapshot_on_hygiene
        && matches!(
            backend_kind,
-            MemoryBackendKind::Sqlite | MemoryBackendKind::Lucid
+            MemoryBackendKind::Sqlite
+                | MemoryBackendKind::SqliteQdrantHybrid
+                | MemoryBackendKind::Lucid
        )
    {
        if let Err(e) = snapshot::export_snapshot(workspace_dir) {
@ -223,7 +229,9 @@ pub fn create_memory_with_storage_and_routes(
    if config.auto_hydrate
        && matches!(
            backend_kind,
-            MemoryBackendKind::Sqlite | MemoryBackendKind::Lucid
+            MemoryBackendKind::Sqlite
+                | MemoryBackendKind::SqliteQdrantHybrid
+                | MemoryBackendKind::Lucid
        )
        && snapshot::should_hydrate(workspace_dir)
    {
@ -299,7 +307,10 @@ pub fn create_memory_with_storage_and_routes(
        );
    }

-    if matches!(backend_kind, MemoryBackendKind::Qdrant) {
+    fn build_qdrant_memory(
+        config: &MemoryConfig,
+        resolved_embedding: &ResolvedEmbeddingConfig,
+    ) -> anyhow::Result<QdrantMemory> {
        let url = config
            .qdrant
            .url
@ -332,12 +343,26 @@ pub fn create_memory_with_storage_and_routes(
            url,
            collection
        );
-        return Ok(Box::new(QdrantMemory::new_lazy(
+        Ok(QdrantMemory::new_lazy(
            &url,
            &collection,
            qdrant_api_key,
            embedder,
-        )));
+        ))
+    }
+
+    if matches!(backend_kind, MemoryBackendKind::Qdrant) {
+        return Ok(Box::new(build_qdrant_memory(config, &resolved_embedding)?));
+    }
+
+    if matches!(backend_kind, MemoryBackendKind::SqliteQdrantHybrid) {
+        let sqlite: Arc<dyn Memory> = Arc::new(build_sqlite_memory(
+            config,
+            workspace_dir,
+            &resolved_embedding,
+        )?);
+        let qdrant: Arc<dyn Memory> = Arc::new(build_qdrant_memory(config, &resolved_embedding)?);
+        return Ok(Box::new(SqliteQdrantHybridMemory::new(sqlite, qdrant)));
    }

    create_memory_with_builders(
@ -451,6 +476,21 @@ mod tests {
        assert_eq!(mem.name(), "lucid");
    }

+    #[test]
+    fn factory_sqlite_qdrant_hybrid() {
+        let tmp = TempDir::new().unwrap();
+        let cfg = MemoryConfig {
+            backend: "sqlite_qdrant_hybrid".into(),
+            qdrant: crate::config::QdrantConfig {
+                url: Some("http://localhost:6333".into()),
+                ..crate::config::QdrantConfig::default()
+            },
+            ..MemoryConfig::default()
+        };
+        let mem = create_memory(&cfg, tmp.path(), None).unwrap();
+        assert_eq!(mem.name(), "sqlite_qdrant_hybrid");
+    }
+
    #[test]
    fn factory_none_uses_noop_memory() {
        let tmp = TempDir::new().unwrap();
@ -526,6 +566,26 @@ mod tests {
        }
    }

+    #[test]
+    fn factory_hybrid_requires_qdrant_url() {
+        let tmp = TempDir::new().unwrap();
+        let cfg = MemoryConfig {
+            backend: "sqlite_qdrant_hybrid".into(),
+            qdrant: crate::config::QdrantConfig {
+                url: None,
+                ..crate::config::QdrantConfig::default()
+            },
+            ..MemoryConfig::default()
+        };
+
+        let error = create_memory(&cfg, tmp.path(), None)
+            .err()
+            .expect("hybrid backend should require qdrant url");
+        assert!(error
+            .to_string()
+            .contains("Qdrant memory backend requires url"));
+    }
+
    #[test]
    fn resolve_embedding_config_uses_base_config_when_model_is_not_hint() {
        let cfg = MemoryConfig {
--- a/src/onboard/wizard.rs
+++ b/src/onboard/wizard.rs
@ -779,6 +779,7 @@ fn default_model_for_provider(provider: &str) -> String {
        "ollama" => "llama3.2".into(),
        "llamacpp" => "ggml-org/gpt-oss-20b-GGUF".into(),
        "sglang" | "vllm" | "osaurus" => "default".into(),
+        "copilot" => "default".into(),
        "gemini" => "gemini-2.5-pro".into(),
        "kimi-code" => "kimi-for-coding".into(),
        "bedrock" => "anthropic.claude-sonnet-4-5-20250929-v1:0".into(),
@ -1225,6 +1226,10 @@ fn curated_models_for_provider(provider_name: &str) -> Vec<(String, String)> {
                "Gemini 2.5 Flash-Lite (lowest cost)".to_string(),
            ),
        ],
+        "copilot" => vec![(
+            "default".to_string(),
+            "Copilot default model (recommended)".to_string(),
+        )],
        _ => vec![("default".to_string(), "Default model".to_string())],
    }
 }
@ -2213,7 +2218,7 @@ async fn setup_workspace() -> Result<(PathBuf, PathBuf)> {
 async fn setup_provider(workspace_dir: &Path) -> Result<(String, String, String, Option<String>)> {
    // ── Tier selection ──
    let tiers = vec![
-        "⭐ Recommended (OpenRouter, Venice, Anthropic, OpenAI, Gemini)",
+        "⭐ Recommended (OpenRouter, Venice, Anthropic, OpenAI, Gemini, GitHub Copilot)",
        "⚡ Fast inference (Groq, Fireworks, Together AI, NVIDIA NIM)",
        "🌐 Gateway / proxy (Vercel AI, Cloudflare AI, Amazon Bedrock)",
        "🔬 Specialized (Moonshot/Kimi, GLM/Zhipu, MiniMax, Qwen/DashScope, Qianfan, Z.AI, Synthetic, OpenCode Zen, Cohere)",
@ -2240,6 +2245,10 @@ async fn setup_provider(workspace_dir: &Path) -> Result<(String, String, String,
                "openai-codex",
                "OpenAI Codex (ChatGPT subscription OAuth, no API key)",
            ),
+            (
+                "copilot",
+                "GitHub Copilot — OAuth device flow (Copilot subscription)",
+            ),
            ("deepseek", "DeepSeek — V3 & R1 (affordable)"),
            ("mistral", "Mistral — Large & Codestral"),
            ("xai", "xAI — Grok 3 & 4"),
@ -2536,6 +2545,24 @@ async fn setup_provider(workspace_dir: &Path) -> Result<(String, String, String,
            ));
        }

+        key
+    } else if canonical_provider_name(provider_name) == "copilot" {
+        print_bullet("GitHub Copilot uses GitHub OAuth device flow.");
+        print_bullet("Press Enter to keep setup keyless and authenticate on first run.");
+        print_bullet("Optional: paste a GitHub token now to skip the first-run device prompt.");
+        println!();
+
+        let key: String = Input::new()
+            .with_prompt("  Paste your GitHub token (optional; Enter = device flow)")
+            .allow_empty(true)
+            .interact_text()?;
+
+        if key.trim().is_empty() {
+            print_bullet(
+                "No token provided. ZeroClaw will open the GitHub device login flow on first use.",
+            );
+        }
+
        key
    } else if canonical_provider_name(provider_name) == "gemini" {
        // Special handling for Gemini: check for CLI auth first
@ -3649,6 +3676,7 @@ fn setup_identity_backend() -> Result<IdentityConfig> {
        );
        IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: Some(default_path),
            aieos_inline: None,
        }
@ -3660,6 +3688,7 @@ fn setup_identity_backend() -> Result<IdentityConfig> {
        );
        IdentityConfig {
            format: "openclaw".into(),
+            extra_files: Vec::new(),
            aieos_path: None,
            aieos_inline: None,
        }
@ -6094,8 +6123,19 @@ fn print_summary(config: &Config) {
    let mut step = 1u8;

    let provider = config.default_provider.as_deref().unwrap_or("openrouter");
+    let canonical_provider = canonical_provider_name(provider);
    if config.api_key.is_none() && !provider_supports_keyless_local_usage(provider) {
-        if provider == "openai-codex" {
+        if canonical_provider == "copilot" {
+            println!(
+                "    {} Authenticate GitHub Copilot:",
+                style(format!("{step}.")).cyan().bold()
+            );
+            println!("       {}", style("zeroclaw agent -m \"Hello!\"").yellow());
+            println!(
+                "       {}",
+                style("(device/OAuth prompt appears automatically on first run)").dim()
+            );
+        } else if canonical_provider == "openai-codex" {
            println!(
                "    {} Authenticate OpenAI Codex:",
                style(format!("{step}.")).cyan().bold()
@ -6104,7 +6144,7 @@ fn print_summary(config: &Config) {
                "       {}",
                style("zeroclaw auth login --provider openai-codex --device-code").yellow()
            );
-        } else if provider == "anthropic" {
+        } else if canonical_provider == "anthropic" {
            println!(
                "    {} Configure Anthropic auth:",
                style(format!("{step}.")).cyan().bold()
@ -6576,6 +6616,7 @@ mod tests {
        };
        let identity_config = crate::config::IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: Some("identity.aieos.json".into()),
            aieos_inline: None,
        };
@ -6605,6 +6646,7 @@ mod tests {
        let ctx = ProjectContext::default();
        let identity_config = crate::config::IdentityConfig {
            format: "aieos".into(),
+            extra_files: Vec::new(),
            aieos_path: Some("identity.aieos.json".into()),
            aieos_inline: None,
        };
@ -7250,6 +7292,7 @@ mod tests {
        assert_eq!(default_model_for_provider("zai-cn"), "glm-5");
        assert_eq!(default_model_for_provider("gemini"), "gemini-2.5-pro");
        assert_eq!(default_model_for_provider("google"), "gemini-2.5-pro");
+        assert_eq!(default_model_for_provider("copilot"), "default");
        assert_eq!(default_model_for_provider("kimi-code"), "kimi-for-coding");
        assert_eq!(
            default_model_for_provider("bedrock"),
@ -7343,6 +7386,18 @@ mod tests {
        assert!(ids.contains(&"gpt-5.2-codex".to_string()));
    }

+    #[test]
+    fn curated_models_for_copilot_have_default_entry() {
+        let models = curated_models_for_provider("copilot");
+        assert_eq!(
+            models,
+            vec![(
+                "default".to_string(),
+                "Copilot default model (recommended)".to_string(),
+            )]
+        );
+    }
+
    #[test]
    fn curated_models_for_openrouter_use_valid_anthropic_id() {
        let ids: Vec<String> = curated_models_for_provider("openrouter")