fix(agent): add system prompt budgeting for small-context models (#4185 )

For models with small context windows (e.g. glm-4.5-air ~8K tokens), the system prompt alone can exceed the limit. This adds: - max_system_prompt_chars config option (default 0 = unlimited) - compact_context now also compacts the system prompt: skips the Channel Capabilities section and shows only tool names - Truncation with marker when prompt exceeds the budget Users can set `max_system_prompt_chars = 8000` in [agent] config to cap the system prompt for small-context models. Closes #4124
fix(approval): support wildcard * in auto_approve and always_ask (#4184 )
2026-03-21 19:40:21 -04:00 · 2026-03-21 19:38:11 -04:00 · 2026-03-21 19:06:37 -04:00 · 2026-03-21 18:03:38 -04:00 · 2026-03-21 16:20:50 -04:00 · 2026-03-21 14:15:21 -04:00
31 changed files with 737 additions and 97 deletions
@@ -102,6 +102,22 @@ jobs:
      - name: Clean web build artifacts
        run: rm -rf web/node_modules web/src web/package.json web/package-lock.json web/tsconfig*.json web/vite.config.ts web/index.html

+      - name: Publish aardvark-sys to crates.io
+        shell: bash
+        env:
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+        run: |
+          OUTPUT=$(cargo publish --locked --allow-dirty --no-verify -p aardvark-sys 2>&1) && exit 0
+          echo "$OUTPUT"
+          if echo "$OUTPUT" | grep -q 'already exists'; then
+            echo "::notice::aardvark-sys already on crates.io — skipping"
+            exit 0
+          fi
+          exit 1
+
+      - name: Wait for aardvark-sys to index
+        run: sleep 15
+
      - name: Publish to crates.io
        shell: bash
        env:
@@ -67,6 +67,24 @@ jobs:
      - name: Clean web build artifacts
        run: rm -rf web/node_modules web/src web/package.json web/package-lock.json web/tsconfig*.json web/vite.config.ts web/index.html

+      - name: Publish aardvark-sys to crates.io
+        if: "!inputs.dry_run"
+        shell: bash
+        env:
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+        run: |
+          OUTPUT=$(cargo publish --locked --allow-dirty --no-verify -p aardvark-sys 2>&1) && exit 0
+          echo "$OUTPUT"
+          if echo "$OUTPUT" | grep -q 'already exists'; then
+            echo "::notice::aardvark-sys already on crates.io — skipping"
+            exit 0
+          fi
+          exit 1
+
+      - name: Wait for aardvark-sys to index
+        if: "!inputs.dry_run"
+        run: sleep 15
+
      - name: Publish (dry run)
        if: inputs.dry_run
        run: cargo publish --dry-run --locked --allow-dirty --no-verify
@@ -323,6 +323,21 @@ jobs:
      - name: Clean web build artifacts
        run: rm -rf web/node_modules web/src web/package.json web/package-lock.json web/tsconfig*.json web/vite.config.ts web/index.html

+      - name: Publish aardvark-sys to crates.io
+        env:
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+        run: |
+          OUTPUT=$(cargo publish --locked --allow-dirty --no-verify -p aardvark-sys 2>&1) && exit 0
+          echo "$OUTPUT"
+          if echo "$OUTPUT" | grep -q 'already exists'; then
+            echo "::notice::aardvark-sys already on crates.io — skipping"
+            exit 0
+          fi
+          exit 1
+
+      - name: Wait for aardvark-sys to index
+        run: sleep 15
+
      - name: Publish to crates.io
        env:
          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
@@ -9203,7 +9203,7 @@ dependencies = [

 [[package]]
 name = "zeroclawlabs"
-version = "0.5.5"
+version = "0.5.6"
 dependencies = [
 "aardvark-sys",
 "anyhow",
@@ -4,7 +4,7 @@ resolver = "2"

 [package]
 name = "zeroclawlabs"
-version = "0.5.5"
+version = "0.5.6"
 edition = "2021"
 authors = ["theonlyhennygod"]
 license = "MIT OR Apache-2.0"
@@ -97,7 +97,7 @@ anyhow = "1.0"
 thiserror = "2.0"

 # Aardvark I2C/SPI/GPIO USB adapter (Total Phase) — stub when SDK absent
-aardvark-sys = { path = "crates/aardvark-sys" }
+aardvark-sys = { path = "crates/aardvark-sys", version = "0.1.0" }

 # UUID generation
 uuid = { version = "1.22", default-features = false, features = ["v4", "std"] }
@@ -27,6 +27,7 @@ COPY Cargo.toml Cargo.lock ./
 # Previously we used sed to drop `crates/robot-kit`, which made the manifest disagree
 # with the lockfile and caused `cargo --locked` to fail (Cargo refused to rewrite the lock).
 COPY crates/robot-kit/ crates/robot-kit/
+COPY crates/aardvark-sys/ crates/aardvark-sys/
 # Create dummy targets declared in Cargo.toml so manifest parsing succeeds.
 RUN mkdir -p src benches \
    && echo "fn main() {}" > src/main.rs \
@@ -1,6 +1,6 @@
 pkgbase = zeroclaw
 	pkgdesc = Zero overhead. Zero compromise. 100% Rust. The fastest, smallest AI assistant.
-	pkgver = 0.5.5
+	pkgver = 0.5.6
 	pkgrel = 1
 	url = https://github.com/zeroclaw-labs/zeroclaw
 	arch = x86_64
@@ -10,7 +10,7 @@ pkgbase = zeroclaw
 	makedepends = git
 	depends = gcc-libs
 	depends = openssl
-	source = zeroclaw-0.5.5.tar.gz::https://github.com/zeroclaw-labs/zeroclaw/archive/refs/tags/v0.5.5.tar.gz
+	source = zeroclaw-0.5.6.tar.gz::https://github.com/zeroclaw-labs/zeroclaw/archive/refs/tags/v0.5.6.tar.gz
 	sha256sums = SKIP

 pkgname = zeroclaw
@@ -1,6 +1,6 @@
 # Maintainer: zeroclaw-labs <bot@zeroclaw.dev>
 pkgname=zeroclaw
-pkgver=0.5.5
+pkgver=0.5.6
 pkgrel=1
 pkgdesc="Zero overhead. Zero compromise. 100% Rust. The fastest, smallest AI assistant."
 arch=('x86_64')
@@ -1,11 +1,11 @@
 {
-    "version": "0.5.5",
+    "version": "0.5.6",
    "description": "Zero overhead. Zero compromise. 100% Rust. The fastest, smallest AI assistant.",
    "homepage": "https://github.com/zeroclaw-labs/zeroclaw",
    "license": "MIT|Apache-2.0",
    "architecture": {
        "64bit": {
-            "url": "https://github.com/zeroclaw-labs/zeroclaw/releases/download/v0.5.5/zeroclaw-x86_64-pc-windows-msvc.zip",
+            "url": "https://github.com/zeroclaw-labs/zeroclaw/releases/download/v0.5.6/zeroclaw-x86_64-pc-windows-msvc.zip",
            "hash": "",
            "bin": "zeroclaw.exe"
        }
@@ -122,6 +122,34 @@ tools = ["mcp_browser_*"]
 keywords = ["browse", "navigate", "open url", "screenshot"]
 ```

+## `[pacing]`
+
+Pacing controls for slow/local LLM workloads (Ollama, llama.cpp, vLLM). All keys are optional; when absent, existing behavior is preserved.
+
+| Key | Default | Purpose |
+|---|---|---|
+| `step_timeout_secs` | _none_ | Per-step timeout: maximum seconds for a single LLM inference turn. Catches a truly hung model without terminating the overall task loop |
+| `loop_detection_min_elapsed_secs` | _none_ | Minimum elapsed seconds before loop detection activates. Tasks completing under this threshold get aggressive loop protection; longer-running tasks receive a grace period |
+| `loop_ignore_tools` | `[]` | Tool names excluded from identical-output loop detection. Useful for browser workflows where `browser_screenshot` structurally resembles a loop |
+| `message_timeout_scale_max` | `4` | Override for the hardcoded timeout scaling cap. The channel message timeout budget is `message_timeout_secs * min(max_tool_iterations, message_timeout_scale_max)` |
+
+Notes:
+
+- These settings are intended for local/slow LLM deployments. Cloud-provider users typically do not need them.
+- `step_timeout_secs` operates independently of the total channel message timeout budget. A step timeout abort does not consume the overall budget; the loop simply stops.
+- `loop_detection_min_elapsed_secs` delays loop-detection counting, not the task itself. Loop protection remains fully active for short tasks (the default).
+- `loop_ignore_tools` only suppresses tool-output-based loop detection for the listed tools. Other safety features (max iterations, overall timeout) remain active.
+- `message_timeout_scale_max` must be >= 1. Setting it higher than `max_tool_iterations` has no additional effect (the formula uses `min()`).
+- Example configuration for a slow local Ollama deployment:
+
+```toml
+[pacing]
+step_timeout_secs = 120
+loop_detection_min_elapsed_secs = 60
+loop_ignore_tools = ["browser_screenshot", "browser_navigate"]
+message_timeout_scale_max = 8
+```
+
 ## `[security.otp]`

 | Key | Default | Purpose |
@@ -425,6 +453,12 @@ Notes:
 | `port` | `42617` | gateway listen port |
 | `require_pairing` | `true` | require pairing before bearer auth |
 | `allow_public_bind` | `false` | block accidental public exposure |
+| `path_prefix` | _(none)_ | URL path prefix for reverse-proxy deployments (e.g. `"/zeroclaw"`) |
+
+When deploying behind a reverse proxy that maps ZeroClaw to a sub-path,
+set `path_prefix` to that sub-path (e.g. `"/zeroclaw"`). All gateway
+routes will be served under this prefix. The value must start with `/`
+and must not end with `/`.

 ## `[autonomy]`

@@ -597,7 +631,7 @@ Top-level channel options are configured under `channels_config`.

 | Key | Default | Purpose |
 |---|---|---|
-| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x) |
+| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x, overridable via `[pacing].message_timeout_scale_max`) |

 Examples:

@@ -612,7 +646,7 @@ Examples:
 Notes:

 - Default `300s` is optimized for on-device LLMs (Ollama) which are slower than cloud APIs.
- Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, 4)` and a minimum of `1`.
+- Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, cap)` and a minimum of `1`. The default cap is `4`; override with `[pacing].message_timeout_scale_max`.
 - This scaling avoids false timeouts when the first LLM turn is slow/retried but later tool-loop turns still need to complete.
 - If using cloud APIs (OpenAI, Anthropic, etc.), you can reduce this to `60` or lower.
 - Values below `30` are clamped to `30` to avoid immediate timeout churn.
@@ -569,11 +569,29 @@ MSG
        exit 0
      fi
      # Detect un-accepted Xcode/CLT license (causes `cc` to exit 69).
-      if ! /usr/bin/xcrun --show-sdk-path >/dev/null 2>&1; then
-        warn "Xcode license has not been accepted. Run:"
-        warn "  sudo xcodebuild -license accept"
-        warn "then re-run this installer."
-        exit 1
+      # xcrun --show-sdk-path can succeed even without an accepted license,
+      # so we test-compile a trivial C file which reliably triggers the error.
+      _xcode_test_file="$(mktemp /tmp/zeroclaw-xcode-check.XXXXXX.c)"
+      printf 'int main(){return 0;}\n' > "$_xcode_test_file"
+      if ! cc -x c "$_xcode_test_file" -o /dev/null 2>/dev/null; then
+        rm -f "$_xcode_test_file"
+        warn "Xcode/CLT license has not been accepted. Attempting to accept it now..."
+        _xcode_accept_ok=false
+        if [[ "$(id -u)" -eq 0 ]]; then
+          xcodebuild -license accept && _xcode_accept_ok=true
+        elif [[ -c /dev/tty ]] && have_cmd sudo; then
+          sudo xcodebuild -license accept < /dev/tty && _xcode_accept_ok=true
+        fi
+        if [[ "$_xcode_accept_ok" == true ]]; then
+          step_ok "Xcode license accepted"
+        else
+          error "Could not accept Xcode license. Run manually:"
+          error "  sudo xcodebuild -license accept"
+          error "then re-run this installer."
+          exit 1
+        fi
+      else
+        rm -f "$_xcode_test_file"
      fi
      if ! have_cmd git; then
        warn "git is not available. Install git (e.g., Homebrew) and re-run bootstrap."
@@ -1175,6 +1193,43 @@ else
    install_system_deps
  fi

+  # Always check Xcode/CLT license on macOS, regardless of --install-system-deps.
+  # An un-accepted license causes `cc` to exit 69, breaking all Rust builds.
+  if [[ "$OS_NAME" == "Darwin" ]]; then
+    _xcode_test_file="$(mktemp /tmp/zeroclaw-xcode-check.XXXXXX.c)"
+    printf 'int main(){return 0;}\n' > "$_xcode_test_file"
+    if ! cc -x c "$_xcode_test_file" -o /dev/null 2>/dev/null; then
+      rm -f "$_xcode_test_file"
+      warn "Xcode/CLT license has not been accepted. Attempting to accept it now..."
+      # Use /dev/tty so sudo can prompt for a password even in a curl|bash pipe.
+      _xcode_accept_ok=false
+      if [[ "$(id -u)" -eq 0 ]]; then
+        xcodebuild -license accept && _xcode_accept_ok=true
+      elif [[ -c /dev/tty ]] && have_cmd sudo; then
+        sudo xcodebuild -license accept < /dev/tty && _xcode_accept_ok=true
+      fi
+      if [[ "$_xcode_accept_ok" == true ]]; then
+        step_ok "Xcode license accepted"
+        # Re-test compilation to confirm it's fixed.
+        _xcode_test_file="$(mktemp /tmp/zeroclaw-xcode-check.XXXXXX.c)"
+        printf 'int main(){return 0;}\n' > "$_xcode_test_file"
+        if ! cc -x c "$_xcode_test_file" -o /dev/null 2>/dev/null; then
+          rm -f "$_xcode_test_file"
+          error "C compiler still failing after license accept. Check your Xcode/CLT installation."
+          exit 1
+        fi
+        rm -f "$_xcode_test_file"
+      else
+        error "Could not accept Xcode license. Run manually:"
+        error "  sudo xcodebuild -license accept"
+        error "then re-run this installer."
+        exit 1
+      fi
+    else
+      rm -f "$_xcode_test_file"
+    fi
+  fi
+
  if [[ "$INSTALL_RUST" == true ]]; then
    install_rust_toolchain
  fi
@@ -1467,25 +1522,6 @@ if [[ -n "$ZEROCLAW_BIN" ]]; then
    if "$ZEROCLAW_BIN" service restart 2>/dev/null; then
      step_ok "Gateway service restarted"

-      # Fetch and display pairing code from running gateway
-      PAIR_CODE=""
-      for i in 1 2 3 4 5; do
-        sleep 2
-        if PAIR_CODE=$("$ZEROCLAW_BIN" gateway get-paircode 2>/dev/null | grep -oE '[0-9]{6}'); then
-          break
-        fi
-      done
-      if [[ -n "$PAIR_CODE" ]]; then
-        echo
-        echo -e "  ${BOLD_BLUE}🔐 Gateway Pairing Code${RESET}"
-        echo
-        echo -e "  ${BOLD_BLUE}┌──────────────┐${RESET}"
-        echo -e "  ${BOLD_BLUE}│${RESET}  ${BOLD}${PAIR_CODE}${RESET}  ${BOLD_BLUE}│${RESET}"
-        echo -e "  ${BOLD_BLUE}└──────────────┘${RESET}"
-        echo
-        echo -e "  ${DIM}Enter this code in the dashboard to pair your device.${RESET}"
-        echo -e "  ${DIM}Run 'zeroclaw gateway get-paircode --new' anytime to generate a fresh code.${RESET}"
-      fi
    else
      step_fail "Gateway service restart failed — re-run with zeroclaw service start"
    fi
@@ -1532,7 +1568,6 @@ GATEWAY_PORT=42617
 DASHBOARD_URL="http://127.0.0.1:${GATEWAY_PORT}"
 echo
 echo -e "${BOLD}Dashboard URL:${RESET} ${BLUE}${DASHBOARD_URL}${RESET}"
-echo -e "${DIM}  Run 'zeroclaw gateway get-paircode' to get your pairing code.${RESET}"

 # --- Copy to clipboard ---
 COPIED_TO_CLIPBOARD=false
@@ -2331,6 +2331,7 @@ pub(crate) async fn agent_turn(
        dedup_exempt_tools,
        activated_tools,
        model_switch_callback,
+        &crate::config::PacingConfig::default(),
    )
    .await
 }
@@ -2640,6 +2641,7 @@ pub(crate) async fn run_tool_call_loop(
    dedup_exempt_tools: &[String],
    activated_tools: Option<&std::sync::Arc<std::sync::Mutex<crate::tools::ActivatedToolSet>>>,
    model_switch_callback: Option<ModelSwitchCallback>,
+    pacing: &crate::config::PacingConfig,
 ) -> Result<String> {
    let max_iterations = if max_tool_iterations == 0 {
        DEFAULT_MAX_TOOL_ITERATIONS
@@ -2648,6 +2650,14 @@ pub(crate) async fn run_tool_call_loop(
    };

    let turn_id = Uuid::new_v4().to_string();
+    let loop_started_at = Instant::now();
+    let loop_ignore_tools: HashSet<&str> = pacing
+        .loop_ignore_tools
+        .iter()
+        .map(String::as_str)
+        .collect();
+    let mut consecutive_identical_outputs: usize = 0;
+    let mut last_tool_output_hash: Option<u64> = None;

    for iteration in 0..max_iterations {
        let mut seen_tool_signatures: HashSet<(String, String)> = HashSet::new();
@@ -2777,13 +2787,43 @@ pub(crate) async fn run_tool_call_loop(
            temperature,
        );

-        let chat_result = if let Some(token) = cancellation_token.as_ref() {
-            tokio::select! {
-                () = token.cancelled() => return Err(ToolLoopCancelled.into()),
-                result = chat_future => result,
+        // Wrap the LLM call with an optional per-step timeout from pacing config.
+        // This catches a truly hung model response without terminating the overall
+        // task loop (the per-message budget handles that separately).
+        let chat_result = match pacing.step_timeout_secs {
+            Some(step_secs) if step_secs > 0 => {
+                let step_timeout = Duration::from_secs(step_secs);
+                if let Some(token) = cancellation_token.as_ref() {
+                    tokio::select! {
+                        () = token.cancelled() => return Err(ToolLoopCancelled.into()),
+                        result = tokio::time::timeout(step_timeout, chat_future) => {
+                            match result {
+                                Ok(inner) => inner,
+                                Err(_) => anyhow::bail!(
+                                    "LLM inference step timed out after {step_secs}s (step_timeout_secs)"
+                                ),
+                            }
+                        },
+                    }
+                } else {
+                    match tokio::time::timeout(step_timeout, chat_future).await {
+                        Ok(inner) => inner,
+                        Err(_) => anyhow::bail!(
+                            "LLM inference step timed out after {step_secs}s (step_timeout_secs)"
+                        ),
+                    }
+                }
+            }
+            _ => {
+                if let Some(token) = cancellation_token.as_ref() {
+                    tokio::select! {
+                        () = token.cancelled() => return Err(ToolLoopCancelled.into()),
+                        result = chat_future => result,
+                    }
+                } else {
+                    chat_future.await
+                }
            }
-        } else {
-            chat_future.await
        };

        let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) =
@@ -3282,7 +3322,13 @@ pub(crate) async fn run_tool_call_loop(
            ordered_results[*idx] = Some((call.name.clone(), call.tool_call_id.clone(), outcome));
        }

+        // Collect tool results and build per-tool output for loop detection.
+        // Only non-ignored tool outputs contribute to the identical-output hash.
+        let mut detection_relevant_output = String::new();
        for (tool_name, tool_call_id, outcome) in ordered_results.into_iter().flatten() {
+            if !loop_ignore_tools.contains(tool_name.as_str()) {
+                detection_relevant_output.push_str(&outcome.output);
+            }
            individual_results.push((tool_call_id, outcome.output.clone()));
            let _ = writeln!(
                tool_results,
@@ -3291,6 +3337,53 @@ pub(crate) async fn run_tool_call_loop(
            );
        }

+        // ── Time-gated loop detection ──────────────────────────
+        // When pacing.loop_detection_min_elapsed_secs is set, identical-output
+        // loop detection activates after the task has been running that long.
+        // This avoids false-positive aborts on long-running browser/research
+        // workflows while keeping aggressive protection for quick tasks.
+        // When not configured, identical-output detection is disabled (preserving
+        // existing behavior where only max_iterations prevents runaway loops).
+        let loop_detection_active = match pacing.loop_detection_min_elapsed_secs {
+            Some(min_secs) => loop_started_at.elapsed() >= Duration::from_secs(min_secs),
+            None => false, // disabled when not configured (backwards compatible)
+        };
+
+        if loop_detection_active && !detection_relevant_output.is_empty() {
+            use std::hash::{Hash, Hasher};
+            let mut hasher = std::collections::hash_map::DefaultHasher::new();
+            detection_relevant_output.hash(&mut hasher);
+            let current_hash = hasher.finish();
+
+            if last_tool_output_hash == Some(current_hash) {
+                consecutive_identical_outputs += 1;
+            } else {
+                consecutive_identical_outputs = 0;
+                last_tool_output_hash = Some(current_hash);
+            }
+
+            // Bail if we see 3+ consecutive identical tool outputs (clear runaway).
+            if consecutive_identical_outputs >= 3 {
+                runtime_trace::record_event(
+                    "tool_loop_identical_output_abort",
+                    Some(channel_name),
+                    Some(provider_name),
+                    Some(model),
+                    Some(&turn_id),
+                    Some(false),
+                    Some("identical tool output detected 3 consecutive times"),
+                    serde_json::json!({
+                        "iteration": iteration + 1,
+                        "consecutive_identical": consecutive_identical_outputs,
+                    }),
+                );
+                anyhow::bail!(
+                    "Agent loop aborted: identical tool output detected {} consecutive times",
+                    consecutive_identical_outputs
+                );
+            }
+        }
+
        // Add assistant message with tool calls + tool results to history.
        // Native mode: use JSON-structured messages so convert_messages() can
        // reconstruct proper OpenAI-format tool_calls and tool result messages.
@@ -3740,6 +3833,8 @@ pub async fn run(
        Some(&config.autonomy),
        native_tools,
        config.skills.prompt_injection_mode,
+        config.agent.compact_context,
+        config.agent.max_system_prompt_chars,
    );

    // Append structured tool-use instructions with schemas (only for non-native providers)
@@ -3840,6 +3935,7 @@ pub async fn run(
                &config.agent.tool_call_dedup_exempt,
                activated_handle.as_ref(),
                Some(model_switch_callback.clone()),
+                &config.pacing,
            )
            .await
            {
@@ -4067,6 +4163,7 @@ pub async fn run(
                    &config.agent.tool_call_dedup_exempt,
                    activated_handle.as_ref(),
                    Some(model_switch_callback.clone()),
+                    &config.pacing,
                )
                .await
                {
@@ -4397,6 +4494,8 @@ pub async fn process_message(
        Some(&config.autonomy),
        native_tools,
        config.skills.prompt_injection_mode,
+        config.agent.compact_context,
+        config.agent.max_system_prompt_chars,
    );
    if !native_tools {
        system_prompt.push_str(&build_tool_instructions(&tools_registry, Some(&i18n_descs)));
@@ -4964,6 +5063,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect_err("provider without vision support should fail");
@@ -5014,6 +5114,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect_err("oversized payload must fail");
@@ -5058,6 +5159,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("valid multimodal payload should pass");
@@ -5188,6 +5290,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("parallel execution should complete");
@@ -5258,6 +5361,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("cron_add delivery defaults should be injected");
@@ -5320,6 +5424,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("explicit delivery mode should be preserved");
@@ -5377,6 +5482,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("loop should finish after deduplicating repeated calls");
@@ -5446,6 +5552,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("non-interactive shell should succeed for low-risk command");
@@ -5506,6 +5613,7 @@ mod tests {
            &exempt,
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("loop should finish with exempt tool executing twice");
@@ -5586,6 +5694,7 @@ mod tests {
            &exempt,
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("loop should complete");
@@ -5643,6 +5752,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("native fallback id flow should complete");
@@ -5724,6 +5834,7 @@ mod tests {
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("native tool-call text should be relayed through on_delta");
@@ -7709,6 +7820,7 @@ Let me check the result."#;
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("tool loop should complete");
@@ -7856,6 +7968,7 @@ Let me check the result."#;
                    &[],
                    None,
                    None,
+                    &crate::config::PacingConfig::default(),
                ),
            )
            .await
@@ -7934,6 +8047,7 @@ Let me check the result."#;
                    &[],
                    None,
                    None,
+                    &crate::config::PacingConfig::default(),
                ),
            )
            .await
@@ -7988,6 +8102,7 @@ Let me check the result."#;
            &[],
            None,
            None,
+            &crate::config::PacingConfig::default(),
        )
        .await
        .expect("should succeed without cost scope");
@@ -122,7 +122,7 @@ impl ApprovalManager {
        }

        // always_ask overrides everything.
-        if self.always_ask.contains(tool_name) {
+        if self.always_ask.contains("*") || self.always_ask.contains(tool_name) {
            return true;
        }

@@ -136,7 +136,7 @@ impl ApprovalManager {
        }

        // auto_approve skips the prompt.
-        if self.auto_approve.contains(tool_name) {
+        if self.auto_approve.contains("*") || self.auto_approve.contains(tool_name) {
            return false;
        }

@@ -222,9 +222,21 @@ fn effective_channel_message_timeout_secs(configured: u64) -> u64 {
 fn channel_message_timeout_budget_secs(
    message_timeout_secs: u64,
    max_tool_iterations: usize,
+) -> u64 {
+    channel_message_timeout_budget_secs_with_cap(
+        message_timeout_secs,
+        max_tool_iterations,
+        CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP,
+    )
+}
+
+fn channel_message_timeout_budget_secs_with_cap(
+    message_timeout_secs: u64,
+    max_tool_iterations: usize,
+    scale_cap: u64,
 ) -> u64 {
    let iterations = max_tool_iterations.max(1) as u64;
-    let scale = iterations.min(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP);
+    let scale = iterations.min(scale_cap);
    message_timeout_secs.saturating_mul(scale)
 }

@@ -362,6 +374,7 @@ struct ChannelRuntimeContext {
    approval_manager: Arc<ApprovalManager>,
    activated_tools: Option<std::sync::Arc<std::sync::Mutex<crate::tools::ActivatedToolSet>>>,
    cost_tracking: Option<ChannelCostTrackingState>,
+    pacing: crate::config::PacingConfig,
 }

 #[derive(Clone)]
@@ -2402,8 +2415,15 @@ async fn process_channel_message(
    }

    let model_switch_callback = get_model_switch_state();
-    let timeout_budget_secs =
-        channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations);
+    let scale_cap = ctx
+        .pacing
+        .message_timeout_scale_max
+        .unwrap_or(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP);
+    let timeout_budget_secs = channel_message_timeout_budget_secs_with_cap(
+        ctx.message_timeout_secs,
+        ctx.max_tool_iterations,
+        scale_cap,
+    );
    let cost_tracking_context = ctx.cost_tracking.clone().map(|state| {
        crate::agent::loop_::ToolLoopCostTrackingContext::new(state.tracker, state.prices)
    });
@@ -2445,6 +2465,7 @@ async fn process_channel_message(
                    ctx.tool_call_dedup_exempt.as_ref(),
                    ctx.activated_tools.as_ref(),
                    Some(model_switch_callback.clone()),
+                    &ctx.pacing,
                ),
                ),
            ) => LlmExecutionResult::Completed(result),
@@ -3107,9 +3128,12 @@ pub fn build_system_prompt_with_mode(
        Some(&autonomy_cfg),
        native_tools,
        skills_prompt_mode,
+        false,
+        0,
    )
 }

+#[allow(clippy::too_many_arguments)]
 pub fn build_system_prompt_with_mode_and_autonomy(
    workspace_dir: &std::path::Path,
    model_name: &str,
@@ -3120,6 +3144,8 @@ pub fn build_system_prompt_with_mode_and_autonomy(
    autonomy_config: Option<&crate::config::AutonomyConfig>,
    native_tools: bool,
    skills_prompt_mode: crate::config::SkillsPromptInjectionMode,
+    compact_context: bool,
+    max_system_prompt_chars: usize,
 ) -> String {
    use std::fmt::Write;
    let mut prompt = String::with_capacity(8192);
@@ -3146,11 +3172,19 @@ pub fn build_system_prompt_with_mode_and_autonomy(
    // ── 1. Tooling ──────────────────────────────────────────────
    if !tools.is_empty() {
        prompt.push_str("## Tools\n\n");
-        prompt.push_str("You have access to the following tools:\n\n");
-        for (name, desc) in tools {
-            let _ = writeln!(prompt, "- **{name}**: {desc}");
+        if compact_context {
+            // Compact mode: tool names only, no descriptions/schemas
+            prompt.push_str("Available tools: ");
+            let names: Vec<&str> = tools.iter().map(|(name, _)| *name).collect();
+            prompt.push_str(&names.join(", "));
+            prompt.push_str("\n\n");
+        } else {
+            prompt.push_str("You have access to the following tools:\n\n");
+            for (name, desc) in tools {
+                let _ = writeln!(prompt, "- **{name}**: {desc}");
+            }
+            prompt.push('\n');
        }
-        prompt.push('\n');
    }

    // ── 1b. Hardware (when gpio/arduino tools present) ───────────
@@ -3294,11 +3328,13 @@ pub fn build_system_prompt_with_mode_and_autonomy(
        std::env::consts::OS,
    );

-    // ── 8. Channel Capabilities ─────────────────────────────────────
-    prompt.push_str("## Channel Capabilities\n\n");
-    prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n");
-    prompt.push_str("- You do NOT need to ask permission to respond — just respond directly.\n");
-    prompt.push_str(match autonomy_config.map(|cfg| cfg.level) {
+    // ── 8. Channel Capabilities (skipped in compact_context mode) ──
+    if !compact_context {
+        prompt.push_str("## Channel Capabilities\n\n");
+        prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n");
+        prompt
+            .push_str("- You do NOT need to ask permission to respond — just respond directly.\n");
+        prompt.push_str(match autonomy_config.map(|cfg| cfg.level) {
        Some(crate::security::AutonomyLevel::Full) => {
            "- If the runtime policy already allows a tool, use it directly; do not ask the user for extra approval.\n\
             - Never pretend you are waiting for a human approval click or confirmation when the runtime policy already permits the action.\n\
@@ -3312,10 +3348,23 @@ pub fn build_system_prompt_with_mode_and_autonomy(
             - If there is no approval path for this channel or the runtime blocks an action, explain that restriction directly instead of simulating an approval flow.\n"
        }
    });
-    prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n");
-    prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n");
-    prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n");
-    prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n");
+        prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n");
+        prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n");
+        prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n");
+        prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n");
+    } // end if !compact_context (Channel Capabilities)
+
+    // ── 9. Truncation (max_system_prompt_chars budget) ──────────
+    if max_system_prompt_chars > 0 && prompt.len() > max_system_prompt_chars {
+        // Truncate on a char boundary, keeping the top portion (identity + safety).
+        let mut end = max_system_prompt_chars;
+        // Ensure we don't split a multi-byte UTF-8 character.
+        while !prompt.is_char_boundary(end) && end > 0 {
+            end -= 1;
+        }
+        prompt.truncate(end);
+        prompt.push_str("\n\n[System prompt truncated to fit context budget]\n");
+    }

    if prompt.is_empty() {
        "You are ZeroClaw, a fast and efficient AI assistant built in Rust. Be helpful, concise, and direct."
@@ -4431,6 +4480,8 @@ pub async fn start_channels(config: Config) -> Result<()> {
        Some(&config.autonomy),
        native_tools,
        config.skills.prompt_injection_mode,
+        config.agent.compact_context,
+        config.agent.max_system_prompt_chars,
    );
    if !native_tools {
        system_prompt.push_str(&build_tool_instructions(
@@ -4641,6 +4692,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
            tracker,
            prices: Arc::new(config.cost.prices.clone()),
        }),
+        pacing: config.pacing.clone(),
    });

    // Hydrate in-memory conversation histories from persisted JSONL session files.
@@ -4737,6 +4789,49 @@ mod tests {
        );
    }

+    #[test]
+    fn channel_message_timeout_budget_with_custom_scale_cap() {
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(300, 8, 8),
+            300 * 8
+        );
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(300, 20, 8),
+            300 * 8
+        );
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(300, 10, 1),
+            300
+        );
+    }
+
+    #[test]
+    fn pacing_config_defaults_preserve_existing_behavior() {
+        let pacing = crate::config::PacingConfig::default();
+        assert!(pacing.step_timeout_secs.is_none());
+        assert!(pacing.loop_detection_min_elapsed_secs.is_none());
+        assert!(pacing.loop_ignore_tools.is_empty());
+        assert!(pacing.message_timeout_scale_max.is_none());
+    }
+
+    #[test]
+    fn pacing_message_timeout_scale_max_overrides_default_cap() {
+        // Custom cap of 8 scales budget proportionally
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(300, 10, 8),
+            300 * 8
+        );
+        // Default cap produces the standard behavior
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(
+                300,
+                10,
+                CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP
+            ),
+            300 * CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP
+        );
+    }
+
    #[test]
    fn context_window_overflow_error_detector_matches_known_messages() {
        let overflow_err = anyhow::anyhow!(
@@ -4941,6 +5036,7 @@ mod tests {
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        };

        assert!(compact_sender_history(&ctx, &sender));
@@ -5057,6 +5153,7 @@ mod tests {
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        };

        append_sender_turn(&ctx, &sender, ChatMessage::user("hello"));
@@ -5129,6 +5226,7 @@ mod tests {
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        };

        assert!(rollback_orphan_user_turn(&ctx, &sender, "pending"));
@@ -5220,6 +5318,7 @@ mod tests {
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        };

        assert!(rollback_orphan_user_turn(
@@ -5761,6 +5860,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -5842,6 +5942,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -5937,6 +6038,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -6017,6 +6119,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -6107,6 +6210,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -6218,6 +6322,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -6310,6 +6415,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -6417,6 +6523,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -6509,6 +6616,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -6591,6 +6699,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -6788,6 +6897,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(4);
@@ -6890,6 +7000,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(8);
@@ -7007,6 +7118,7 @@ BTC is currently around $65,000 based on latest tool output."#
            activated_tools: None,
            cost_tracking: None,
            query_classification: crate::config::QueryClassificationConfig::default(),
+            pacing: crate::config::PacingConfig::default(),
        });

        let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(8);
@@ -7121,6 +7233,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(8);
@@ -7217,6 +7330,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -7297,6 +7411,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -7689,6 +7804,8 @@ BTC is currently around $65,000 based on latest tool output."#
            Some(&config),
            false,
            crate::config::SkillsPromptInjectionMode::Full,
+            false,
+            0,
        );

        assert!(
@@ -7718,6 +7835,8 @@ BTC is currently around $65,000 based on latest tool output."#
            Some(&config),
            false,
            crate::config::SkillsPromptInjectionMode::Full,
+            false,
+            0,
        );

        assert!(
@@ -8063,6 +8182,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -8194,6 +8314,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -8365,6 +8486,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -8473,6 +8595,7 @@ BTC is currently around $65,000 based on latest tool output."#
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -9045,6 +9168,7 @@ This is an example JSON object for profile settings."#;
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        // Simulate a photo attachment message with [IMAGE:] marker.
@@ -9132,6 +9256,7 @@ This is an example JSON object for profile settings."#;
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -9294,6 +9419,7 @@ This is an example JSON object for profile settings."#;
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -9405,6 +9531,7 @@ This is an example JSON object for profile settings."#;
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -9508,6 +9635,7 @@ This is an example JSON object for profile settings."#;
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -9631,6 +9759,7 @@ This is an example JSON object for profile settings."#;
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        process_channel_message(
@@ -9892,6 +10021,7 @@ This is an example JSON object for profile settings."#;
            )),
            activated_tools: None,
            cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
        });

        let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(8);
@@ -21,7 +21,7 @@ pub use schema::{
    MatrixConfig, McpConfig, McpServerConfig, McpTransport, MemoryConfig, Microsoft365Config,
    ModelRouteConfig, MultimodalConfig, NextcloudTalkConfig, NodeTransportConfig, NodesConfig,
    NotionConfig, ObservabilityConfig, OpenAiSttConfig, OpenAiTtsConfig, OpenVpnTunnelConfig,
-    OtpConfig, OtpMethod, PeripheralBoardConfig, PeripheralsConfig, PluginsConfig,
+    OtpConfig, OtpMethod, PacingConfig, PeripheralBoardConfig, PeripheralsConfig, PluginsConfig,
    ProjectIntelConfig, ProxyConfig, ProxyScope, QdrantConfig, QueryClassificationConfig,
    ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig,
    SchedulerConfig, SecretsConfig, SecurityConfig, SecurityOpsConfig, SkillCreationConfig,
@@ -165,6 +165,10 @@ pub struct Config {
    #[serde(default)]
    pub agent: AgentConfig,

+    /// Pacing controls for slow/local LLM workloads (`[pacing]`).
+    #[serde(default)]
+    pub pacing: PacingConfig,
+
    /// Skills loading and community repository behavior (`[skills]`).
    #[serde(default)]
    pub skills: SkillsConfig,
@@ -1244,6 +1248,12 @@ pub struct AgentConfig {
    /// Default: `[]` (no filtering — all tools included).
    #[serde(default)]
    pub tool_filter_groups: Vec<ToolFilterGroup>,
+    /// Maximum characters for the assembled system prompt. When `> 0`, the prompt
+    /// is truncated to this limit after assembly (keeping the top portion which
+    /// contains identity and safety instructions). `0` means unlimited.
+    /// Useful for small-context models (e.g. glm-4.5-air ~8K tokens → set to 8000).
+    #[serde(default = "default_max_system_prompt_chars")]
+    pub max_system_prompt_chars: usize,
 }

 fn default_agent_max_tool_iterations() -> usize {
@@ -1262,6 +1272,10 @@ fn default_agent_tool_dispatcher() -> String {
    "auto".into()
 }

+fn default_max_system_prompt_chars() -> usize {
+    0
+}
+
 impl Default for AgentConfig {
    fn default() -> Self {
        Self {
@@ -1273,10 +1287,48 @@ impl Default for AgentConfig {
            tool_dispatcher: default_agent_tool_dispatcher(),
            tool_call_dedup_exempt: Vec::new(),
            tool_filter_groups: Vec::new(),
+            max_system_prompt_chars: default_max_system_prompt_chars(),
        }
    }
 }

+// ── Pacing ────────────────────────────────────────────────────────
+
+/// Pacing controls for slow/local LLM workloads (`[pacing]` section).
+///
+/// All fields are optional and default to values that preserve existing
+/// behavior. When set, they extend — not replace — the existing timeout
+/// and loop-detection subsystems.
+#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
+pub struct PacingConfig {
+    /// Per-step timeout in seconds: the maximum time allowed for a single
+    /// LLM inference turn, independent of the total message budget.
+    /// `None` means no per-step timeout (existing behavior).
+    #[serde(default)]
+    pub step_timeout_secs: Option<u64>,
+
+    /// Minimum elapsed seconds before loop detection activates.
+    /// Tasks completing under this threshold get aggressive loop protection;
+    /// longer-running tasks receive a grace period before the detector starts
+    /// counting. `None` means loop detection is always active (existing behavior).
+    #[serde(default)]
+    pub loop_detection_min_elapsed_secs: Option<u64>,
+
+    /// Tool names excluded from identical-output / alternating-pattern loop
+    /// detection. Useful for browser workflows where `browser_screenshot`
+    /// structurally resembles a loop even when making progress.
+    #[serde(default)]
+    pub loop_ignore_tools: Vec<String>,
+
+    /// Override for the hardcoded timeout scaling cap (default: 4).
+    /// The channel message timeout budget is computed as:
+    ///   `message_timeout_secs * min(max_tool_iterations, message_timeout_scale_max)`
+    /// Raising this value lets long multi-step tasks with slow local models
+    /// receive a proportionally larger budget without inflating the base timeout.
+    #[serde(default)]
+    pub message_timeout_scale_max: Option<u64>,
+}
+
 /// Skills loading configuration (`[skills]` section).
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)]
 #[serde(rename_all = "snake_case")]
@@ -6727,6 +6779,7 @@ impl Default for Config {
            reliability: ReliabilityConfig::default(),
            scheduler: SchedulerConfig::default(),
            agent: AgentConfig::default(),
+            pacing: PacingConfig::default(),
            skills: SkillsConfig::default(),
            model_routes: Vec::new(),
            embedding_routes: Vec::new(),
@@ -9673,6 +9726,7 @@ default_temperature = 0.7
            google_workspace: GoogleWorkspaceConfig::default(),
            proxy: ProxyConfig::default(),
            agent: AgentConfig::default(),
+            pacing: PacingConfig::default(),
            identity: IdentityConfig::default(),
            cost: CostConfig::default(),
            peripherals: PeripheralsConfig::default(),
@@ -9944,6 +9998,47 @@ tool_dispatcher = "xml"
        assert_eq!(parsed.agent.tool_dispatcher, "xml");
    }

+    #[test]
+    async fn pacing_config_defaults_are_all_none_or_empty() {
+        let cfg = PacingConfig::default();
+        assert!(cfg.step_timeout_secs.is_none());
+        assert!(cfg.loop_detection_min_elapsed_secs.is_none());
+        assert!(cfg.loop_ignore_tools.is_empty());
+        assert!(cfg.message_timeout_scale_max.is_none());
+    }
+
+    #[test]
+    async fn pacing_config_deserializes_from_toml() {
+        let raw = r#"
+default_temperature = 0.7
+[pacing]
+step_timeout_secs = 120
+loop_detection_min_elapsed_secs = 60
+loop_ignore_tools = ["browser_screenshot", "browser_navigate"]
+message_timeout_scale_max = 8
+"#;
+        let parsed: Config = toml::from_str(raw).unwrap();
+        assert_eq!(parsed.pacing.step_timeout_secs, Some(120));
+        assert_eq!(parsed.pacing.loop_detection_min_elapsed_secs, Some(60));
+        assert_eq!(
+            parsed.pacing.loop_ignore_tools,
+            vec!["browser_screenshot", "browser_navigate"]
+        );
+        assert_eq!(parsed.pacing.message_timeout_scale_max, Some(8));
+    }
+
+    #[test]
+    async fn pacing_config_absent_preserves_defaults() {
+        let raw = r#"
+default_temperature = 0.7
+"#;
+        let parsed: Config = toml::from_str(raw).unwrap();
+        assert!(parsed.pacing.step_timeout_secs.is_none());
+        assert!(parsed.pacing.loop_detection_min_elapsed_secs.is_none());
+        assert!(parsed.pacing.loop_ignore_tools.is_empty());
+        assert!(parsed.pacing.message_timeout_scale_max.is_none());
+    }
+
    #[tokio::test]
    async fn sync_directory_handles_existing_directory() {
        let dir = std::env::temp_dir().join(format!(
@@ -10012,6 +10107,7 @@ tool_dispatcher = "xml"
            google_workspace: GoogleWorkspaceConfig::default(),
            proxy: ProxyConfig::default(),
            agent: AgentConfig::default(),
+            pacing: PacingConfig::default(),
            identity: IdentityConfig::default(),
            cost: CostConfig::default(),
            peripherals: PeripheralsConfig::default(),
@@ -1438,6 +1438,7 @@ mod tests {
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
+            path_prefix: String::new(),
        }
    }

@@ -348,6 +348,8 @@ pub struct AppState {
    pub shutdown_tx: tokio::sync::watch::Sender<bool>,
    /// Registry of dynamically connected nodes
    pub node_registry: Arc<nodes::NodeRegistry>,
+    /// Path prefix for reverse-proxy deployments (empty string = no prefix)
+    pub path_prefix: String,
    /// Session backend for persisting gateway WS chat sessions
    pub session_backend: Option<Arc<dyn SessionBackend>>,
    /// Device registry for paired device management
@@ -673,6 +675,13 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> {
        idempotency_max_keys,
    ));

+    // Resolve optional path prefix for reverse-proxy deployments.
+    let path_prefix: Option<&str> = config
+        .gateway
+        .path_prefix
+        .as_deref()
+        .filter(|p| !p.is_empty());
+
    // ── Tunnel ────────────────────────────────────────────────
    let tunnel = crate::tunnel::create_tunnel(&config.tunnel)?;
    let mut tunnel_url: Option<String> = None;
@@ -691,18 +700,19 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> {
        }
    }

-    println!("🦀 ZeroClaw Gateway listening on http://{display_addr}");
+    let pfx = path_prefix.unwrap_or("");
+    println!("🦀 ZeroClaw Gateway listening on http://{display_addr}{pfx}");
    if let Some(ref url) = tunnel_url {
        println!("  🌐 Public URL: {url}");
    }
-    println!("  🌐 Web Dashboard: http://{display_addr}/");
+    println!("  🌐 Web Dashboard: http://{display_addr}{pfx}/");
    if let Some(code) = pairing.pairing_code() {
        println!();
        println!("  🔐 PAIRING REQUIRED — use this one-time code:");
        println!("     ┌──────────────┐");
        println!("     │  {code}  │");
        println!("     └──────────────┘");
-        println!();
+        println!("     Send: POST {pfx}/pair with header X-Pairing-Code: {code}");
    } else if pairing.require_pairing() {
        println!("  🔒 Pairing: ACTIVE (bearer token required)");
        println!("     To pair a new device: zeroclaw gateway get-paircode --new");
@@ -711,29 +721,29 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> {
        println!("  ⚠️  Pairing: DISABLED (all requests accepted)");
        println!();
    }
-    println!("  POST /pair      — pair a new client (X-Pairing-Code header)");
-    println!("  POST /webhook   — {{\"message\": \"your prompt\"}}");
+    println!("  POST {pfx}/pair      — pair a new client (X-Pairing-Code header)");
+    println!("  POST {pfx}/webhook   — {{\"message\": \"your prompt\"}}");
    if whatsapp_channel.is_some() {
-        println!("  GET  /whatsapp  — Meta webhook verification");
-        println!("  POST /whatsapp  — WhatsApp message webhook");
+        println!("  GET  {pfx}/whatsapp  — Meta webhook verification");
+        println!("  POST {pfx}/whatsapp  — WhatsApp message webhook");
    }
    if linq_channel.is_some() {
-        println!("  POST /linq      — Linq message webhook (iMessage/RCS/SMS)");
+        println!("  POST {pfx}/linq      — Linq message webhook (iMessage/RCS/SMS)");
    }
    if wati_channel.is_some() {
-        println!("  GET  /wati      — WATI webhook verification");
-        println!("  POST /wati      — WATI message webhook");
+        println!("  GET  {pfx}/wati      — WATI webhook verification");
+        println!("  POST {pfx}/wati      — WATI message webhook");
    }
    if nextcloud_talk_channel.is_some() {
-        println!("  POST /nextcloud-talk — Nextcloud Talk bot webhook");
+        println!("  POST {pfx}/nextcloud-talk — Nextcloud Talk bot webhook");
    }
-    println!("  GET  /api/*     — REST API (bearer token required)");
-    println!("  GET  /ws/chat   — WebSocket agent chat");
+    println!("  GET  {pfx}/api/*     — REST API (bearer token required)");
+    println!("  GET  {pfx}/ws/chat   — WebSocket agent chat");
    if config.nodes.enabled {
-        println!("  GET  /ws/nodes  — WebSocket node discovery");
+        println!("  GET  {pfx}/ws/nodes  — WebSocket node discovery");
    }
-    println!("  GET  /health    — health check");
-    println!("  GET  /metrics   — Prometheus metrics");
+    println!("  GET  {pfx}/health    — health check");
+    println!("  GET  {pfx}/metrics   — Prometheus metrics");
    println!("  Press Ctrl+C to stop.\n");

    crate::health::mark_component_ok("gateway");
@@ -799,6 +809,7 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> {
        session_backend,
        device_registry,
        pending_pairings,
+        path_prefix: path_prefix.unwrap_or("").to_string(),
    };

    // Config PUT needs larger body limit (1MB)
@@ -807,7 +818,7 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> {
        .layer(RequestBodyLimitLayer::new(1_048_576));

    // Build router with middleware
-    let app = Router::new()
+    let inner = Router::new()
        // ── Admin routes (for CLI management) ──
        .route("/admin/shutdown", post(handle_admin_shutdown))
        .route("/admin/paircode", get(handle_admin_paircode))
@@ -867,12 +878,12 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> {

    // ── Plugin management API (requires plugins-wasm feature) ──
    #[cfg(feature = "plugins-wasm")]
-    let app = app.route(
+    let inner = inner.route(
        "/api/plugins",
        get(api_plugins::plugin_routes::list_plugins),
    );

-    let app = app
+    let inner = inner
        // ── SSE event stream ──
        .route("/api/events", get(sse::handle_sse_events))
        // ── WebSocket agent chat ──
@@ -883,14 +894,27 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> {
        .route("/_app/{*path}", get(static_files::handle_static))
        // ── Config PUT with larger body limit ──
        .merge(config_put_router)
+        // ── SPA fallback: non-API GET requests serve index.html ──
+        .fallback(get(static_files::handle_spa_fallback))
        .with_state(state)
        .layer(RequestBodyLimitLayer::new(MAX_BODY_SIZE))
        .layer(TimeoutLayer::with_status_code(
            StatusCode::REQUEST_TIMEOUT,
            Duration::from_secs(gateway_request_timeout_secs()),
-        ))
-        // ── SPA fallback: non-API GET requests serve index.html ──
-        .fallback(get(static_files::handle_spa_fallback));
+        ));
+
+    // Nest under path prefix when configured (axum strips prefix before routing).
+    // nest() at "/prefix" handles both "/prefix" and "/prefix/*" but not "/prefix/"
+    // with a trailing slash, so we add a fallback redirect for that case.
+    let app = if let Some(prefix) = path_prefix {
+        let redirect_target = prefix.to_string();
+        Router::new().nest(prefix, inner).route(
+            &format!("{prefix}/"),
+            get(|| async move { axum::response::Redirect::permanent(&redirect_target) }),
+        )
+    } else {
+        inner
+    };

    // Run the server with graceful shutdown
    axum::serve(
@@ -1982,6 +2006,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -2037,6 +2062,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -2421,6 +2447,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -2490,6 +2517,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -2571,6 +2599,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -2624,6 +2653,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -2682,6 +2712,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -2745,6 +2776,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -2804,6 +2836,7 @@ mod tests {
            event_tx: tokio::sync::broadcast::channel(16).0,
            shutdown_tx: tokio::sync::watch::channel(false).0,
            node_registry: Arc::new(nodes::NodeRegistry::new(16)),
+            path_prefix: String::new(),
            session_backend: None,
            device_registry: None,
            pending_pairings: None,
@@ -3,11 +3,14 @@
 //! Uses `rust-embed` to bundle the `web/dist/` directory into the binary at compile time.

 use axum::{
+    extract::State,
    http::{header, StatusCode, Uri},
    response::{IntoResponse, Response},
 };
 use rust_embed::Embed;

+use super::AppState;
+
 #[derive(Embed)]
 #[folder = "web/dist/"]
 struct WebAssets;
@@ -23,16 +26,41 @@ pub async fn handle_static(uri: Uri) -> Response {
    serve_embedded_file(path)
 }

-/// SPA fallback: serve index.html for any non-API, non-static GET request
-pub async fn handle_spa_fallback() -> Response {
-    if WebAssets::get("index.html").is_none() {
+/// SPA fallback: serve index.html for any non-API, non-static GET request.
+/// Injects `window.__ZEROCLAW_BASE__` so the frontend knows the path prefix.
+pub async fn handle_spa_fallback(State(state): State<AppState>) -> Response {
+    let Some(content) = WebAssets::get("index.html") else {
        return (
            StatusCode::SERVICE_UNAVAILABLE,
            "Web dashboard not available. Build it with: cd web && npm ci && npm run build",
        )
            .into_response();
-    }
-    serve_embedded_file("index.html")
+    };
+
+    let html = String::from_utf8_lossy(&content.data);
+
+    // Inject path prefix for the SPA and rewrite asset paths in the HTML
+    let html = if state.path_prefix.is_empty() {
+        html.into_owned()
+    } else {
+        let pfx = &state.path_prefix;
+        // JSON-encode the prefix to safely embed in a <script> block
+        let json_pfx = serde_json::to_string(pfx).unwrap_or_else(|_| "\"\"".to_string());
+        let script = format!("<script>window.__ZEROCLAW_BASE__={json_pfx};</script>");
+        // Rewrite absolute /_app/ references so the browser requests {prefix}/_app/...
+        html.replace("/_app/", &format!("{pfx}/_app/"))
+            .replace("<head>", &format!("<head>{script}"))
+    };
+
+    (
+        StatusCode::OK,
+        [
+            (header::CONTENT_TYPE, "text/html; charset=utf-8".to_string()),
+            (header::CACHE_CONTROL, "no-cache".to_string()),
+        ],
+        html,
+    )
+        .into_response()
 }

 fn serve_embedded_file(path: &str) -> Response {
@@ -407,7 +407,11 @@ mod tests {
        // Simpler: write a temp script.
        let dir = tempfile::tempdir().unwrap();
        let script_path = dir.path().join("tool.sh");
-        std::fs::write(&script_path, format!("#!/bin/sh\necho '{}'\n", result_json)).unwrap();
+        std::fs::write(
+            &script_path,
+            format!("#!/bin/sh\ncat > /dev/null\necho '{}'\n", result_json),
+        )
+        .unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
@@ -154,6 +154,7 @@ pub async fn run_wizard(force: bool) -> Result<Config> {
        reliability: crate::config::ReliabilityConfig::default(),
        scheduler: crate::config::schema::SchedulerConfig::default(),
        agent: crate::config::schema::AgentConfig::default(),
+        pacing: crate::config::PacingConfig::default(),
        skills: crate::config::SkillsConfig::default(),
        model_routes: Vec::new(),
        embedding_routes: Vec::new(),
@@ -576,6 +577,7 @@ async fn run_quick_setup_with_home(
        reliability: crate::config::ReliabilityConfig::default(),
        scheduler: crate::config::schema::SchedulerConfig::default(),
        agent: crate::config::schema::AgentConfig::default(),
+        pacing: crate::config::PacingConfig::default(),
        skills: crate::config::SkillsConfig::default(),
        model_routes: Vec::new(),
        embedding_routes: Vec::new(),
@@ -108,6 +108,7 @@ fn is_context_window_exceeded(err: &anyhow::Error) -> bool {
        "token limit exceeded",
        "prompt is too long",
        "input is too long",
+        "prompt exceeds max length",
    ];

    hints.iter().any(|hint| lower.contains(hint))
@@ -530,6 +530,7 @@ impl DelegateTool {
                &[],
                None,
                None,
+                &crate::config::PacingConfig::default(),
            ),
        )
        .await;
@@ -100,6 +100,10 @@ fn gateway_config_defaults_are_secure() {
        !gw.trust_forwarded_headers,
        "forwarded headers should be untrusted by default"
    );
+    assert!(
+        gw.path_prefix.is_none(),
+        "path_prefix should default to None"
+    );
 }

 #[test]
@@ -124,6 +128,7 @@ fn gateway_config_toml_roundtrip() {
        host: "0.0.0.0".into(),
        require_pairing: false,
        pair_rate_limit_per_minute: 5,
+        path_prefix: Some("/zeroclaw".into()),
        ..Default::default()
    };

@@ -134,6 +139,7 @@ fn gateway_config_toml_roundtrip() {
    assert_eq!(parsed.host, "0.0.0.0");
    assert!(!parsed.require_pairing);
    assert_eq!(parsed.pair_rate_limit_per_minute, 5);
+    assert_eq!(parsed.path_prefix.as_deref(), Some("/zeroclaw"));
 }

 #[test]
@@ -163,6 +169,93 @@ port = 9090
    assert_eq!(parsed.gateway.pair_rate_limit_per_minute, 10);
 }

+// ─────────────────────────────────────────────────────────────────────────────
+// GatewayConfig path_prefix validation
+// ─────────────────────────────────────────────────────────────────────────────
+
+#[test]
+fn gateway_path_prefix_rejects_missing_leading_slash() {
+    let mut config = Config::default();
+    config.gateway.path_prefix = Some("zeroclaw".into());
+    let err = config.validate().unwrap_err();
+    assert!(
+        err.to_string().contains("must start with '/'"),
+        "expected leading-slash error, got: {err}"
+    );
+}
+
+#[test]
+fn gateway_path_prefix_rejects_trailing_slash() {
+    let mut config = Config::default();
+    config.gateway.path_prefix = Some("/zeroclaw/".into());
+    let err = config.validate().unwrap_err();
+    assert!(
+        err.to_string().contains("must not end with '/'"),
+        "expected trailing-slash error, got: {err}"
+    );
+}
+
+#[test]
+fn gateway_path_prefix_rejects_bare_slash() {
+    let mut config = Config::default();
+    config.gateway.path_prefix = Some("/".into());
+    let err = config.validate().unwrap_err();
+    assert!(
+        err.to_string().contains("must not end with '/'"),
+        "expected bare-slash error, got: {err}"
+    );
+}
+
+#[test]
+fn gateway_path_prefix_accepts_valid_prefixes() {
+    for prefix in ["/zeroclaw", "/apps/zeroclaw", "/api/hassio_ingress/abc123"] {
+        let mut config = Config::default();
+        config.gateway.path_prefix = Some(prefix.into());
+        config
+            .validate()
+            .unwrap_or_else(|e| panic!("prefix {prefix:?} should be valid, got: {e}"));
+    }
+}
+
+#[test]
+fn gateway_path_prefix_rejects_unsafe_characters() {
+    for prefix in [
+        "/zero claw",
+        "/zero<claw",
+        "/zero>claw",
+        "/zero\"claw",
+        "/zero?query",
+        "/zero#frag",
+    ] {
+        let mut config = Config::default();
+        config.gateway.path_prefix = Some(prefix.into());
+        let err = config.validate().unwrap_err();
+        assert!(
+            err.to_string().contains("invalid character"),
+            "prefix {prefix:?} should be rejected, got: {err}"
+        );
+    }
+    // Leading/trailing whitespace is rejected by the starts_with('/') or
+    // invalid-character check — either way it must not pass validation.
+    for prefix in [" /zeroclaw ", " /zeroclaw"] {
+        let mut config = Config::default();
+        config.gateway.path_prefix = Some(prefix.into());
+        assert!(
+            config.validate().is_err(),
+            "whitespace-padded prefix {prefix:?} should be rejected"
+        );
+    }
+}
+
+#[test]
+fn gateway_path_prefix_accepts_none() {
+    let config = Config::default();
+    assert!(config.gateway.path_prefix.is_none());
+    config
+        .validate()
+        .expect("absent path_prefix should be valid");
+}
+
 // ─────────────────────────────────────────────────────────────────────────────
 // SecurityConfig boundary tests
 // ─────────────────────────────────────────────────────────────────────────────
@@ -16,6 +16,7 @@ import Pairing from './pages/Pairing';
 import { AuthProvider, useAuth } from './hooks/useAuth';
 import { DraftContext, useDraftStore } from './hooks/useDraft';
 import { setLocale, type Locale } from './lib/i18n';
+import { basePath } from './lib/basePath';
 import { getAdminPairCode } from './lib/api';

 // Locale context
@@ -131,7 +132,7 @@ function PairingDialog({ onPair }: { onPair: (code: string) => Promise<void> })

        <div className="text-center mb-8">
          <img
-            src="/_app/zeroclaw-trans.png"
+            src={`${basePath}/_app/zeroclaw-trans.png`}
            alt="ZeroClaw"
            className="h-20 w-20 rounded-2xl object-cover mx-auto mb-4 animate-float"
            onError={(e) => { e.currentTarget.style.display = 'none'; }}
@@ -1,4 +1,5 @@
 import { NavLink } from 'react-router-dom';
+import { basePath } from '../../lib/basePath';
 import {
  LayoutDashboard,
  MessageSquare,
@@ -34,7 +35,7 @@ export default function Sidebar() {
        <div className="relative shrink-0">
          <div className="absolute -inset-1.5 rounded-xl" style={{ background: 'linear-gradient(135deg, rgba(var(--pc-accent-rgb), 0.15), rgba(var(--pc-accent-rgb), 0.05))' }} />
          <img
-            src="/_app/zeroclaw-trans.png"
+            src={`${basePath}/_app/zeroclaw-trans.png`}
            alt="ZeroClaw"
            className="relative h-9 w-9 rounded-xl object-cover"
            onError={(e) => {
@@ -11,6 +11,7 @@ import type {
  HealthSnapshot,
 } from '../types/api';
 import { clearToken, getToken, setToken } from './auth';
+import { basePath } from './basePath';

 // ---------------------------------------------------------------------------
 // Base fetch wrapper
@@ -42,7 +43,7 @@ export async function apiFetch<T = unknown>(
    headers.set('Content-Type', 'application/json');
  }

-  const response = await fetch(path, { ...options, headers });
+  const response = await fetch(`${basePath}${path}`, { ...options, headers });

  if (response.status === 401) {
    clearToken();
@@ -78,7 +79,7 @@ function unwrapField<T>(value: T | Record<string, T>, key: string): T {
 // ---------------------------------------------------------------------------

 export async function pair(code: string): Promise<{ token: string }> {
-  const response = await fetch('/pair', {
+  const response = await fetch(`${basePath}/pair`, {
    method: 'POST',
    headers: { 'X-Pairing-Code': code },
  });
@@ -106,7 +107,7 @@ export async function getAdminPairCode(): Promise<{ pairing_code: string | null;
 // ---------------------------------------------------------------------------

 export async function getPublicHealth(): Promise<{ require_pairing: boolean; paired: boolean }> {
-  const response = await fetch('/health');
+  const response = await fetch(`${basePath}/health`);
  if (!response.ok) {
    throw new Error(`Health check failed (${response.status})`);
  }
@@ -0,0 +1,11 @@
+// Runtime base path injected by the Rust gateway into index.html.
+// Allows the SPA to work under a reverse-proxy path prefix.
+
+declare global {
+  interface Window {
+    __ZEROCLAW_BASE__?: string;
+  }
+}
+
+/** Gateway path prefix (e.g. "/zeroclaw"), or empty string when served at root. */
+export const basePath: string = (window.__ZEROCLAW_BASE__ ?? '').replace(/\/+$/, '');
@@ -1,5 +1,6 @@
 import type { SSEEvent } from '../types/api';
 import { getToken } from './auth';
+import { basePath } from './basePath';

 export type SSEEventHandler = (event: SSEEvent) => void;
 export type SSEErrorHandler = (error: Event | Error) => void;
@@ -41,7 +42,7 @@ export class SSEClient {
  private readonly autoReconnect: boolean;

  constructor(options: SSEClientOptions = {}) {
-    this.path = options.path ?? '/api/events';
+    this.path = options.path ?? `${basePath}/api/events`;
    this.reconnectDelay = options.reconnectDelay ?? DEFAULT_RECONNECT_DELAY;
    this.maxReconnectDelay = options.maxReconnectDelay ?? MAX_RECONNECT_DELAY;
    this.autoReconnect = options.autoReconnect ?? true;
@@ -1,5 +1,6 @@
 import type { WsMessage } from '../types/api';
 import { getToken } from './auth';
+import { basePath } from './basePath';
 import { generateUUID } from './uuid';

 export type WsMessageHandler = (msg: WsMessage) => void;
@@ -69,7 +70,7 @@ export class WebSocketClient {
    const params = new URLSearchParams();
    if (token) params.set('token', token);
    params.set('session_id', sessionId);
-    const url = `${this.baseUrl}/ws/chat?${params.toString()}`;
+    const url = `${this.baseUrl}${basePath}/ws/chat?${params.toString()}`;

    const protocols: string[] = ['zeroclaw.v1'];
    if (token) protocols.push(`bearer.${token}`);
@@ -2,12 +2,13 @@ import React from 'react';
 import ReactDOM from 'react-dom/client';
 import { BrowserRouter } from 'react-router-dom';
 import App from './App';
+import { basePath } from './lib/basePath';
 import './index.css';

 ReactDOM.createRoot(document.getElementById('root')!).render(
  <React.StrictMode>
-    {/* Vite base '/_app/' scopes static asset URLs only; app routes stay rooted at '/' for SPA fallback. */}
-    <BrowserRouter basename="/">
+    {/* basePath is injected by the Rust gateway at serve time for reverse-proxy prefix support. */}
+    <BrowserRouter basename={basePath || '/'}>
      <App />
    </BrowserRouter>
  </React.StrictMode>
Author	SHA1	Message	Date
Argenis	9069bc3c1f	fix(agent): add system prompt budgeting for small-context models (#4185 ) For models with small context windows (e.g. glm-4.5-air ~8K tokens), the system prompt alone can exceed the limit. This adds: - max_system_prompt_chars config option (default 0 = unlimited) - compact_context now also compacts the system prompt: skips the Channel Capabilities section and shows only tool names - Truncation with marker when prompt exceeds the budget Users can set `max_system_prompt_chars = 8000` in [agent] config to cap the system prompt for small-context models. Closes #4124	2026-03-21 19:40:21 -04:00
Argenis	9319fe18da	fix(approval): support wildcard `` in auto_approve and always_ask (#4184 ) auto_approve = [""] was doing exact string matching, so only the literal tool name "*" was matched. Users expecting wildcard semantics had every tool blocked in supervised mode. Also adds "prompt exceeds max length" to the context-window error detection hints (fixes GLM/ZAI error 1261 detection). Closes #4127	2026-03-21 19:38:11 -04:00
Argenis	cc454a86c8	fix(install): remove pairing code display from installer (#4176 ) The gateway pairing code is now shown in the dashboard, so displaying it in the installer output is redundant and cluttered (showed 3 codes).	2026-03-21 19:06:37 -04:00
Argenis	256e8ccebf	chore: bump version to v0.5.6 (#4174 ) Update version across all distribution manifests: - Cargo.toml / Cargo.lock - dist/aur/PKGBUILD + .SRCINFO - dist/scoop/zeroclaw.json	2026-03-21 18:03:38 -04:00
Argenis	72c9e6b6ca	fix(publish): publish aardvark-sys dep before main crate (#4172 ) * fix(publish): add aardvark-sys version and publish it before main crate - Add version = "0.1.0" to aardvark-sys path dependency in Cargo.toml - Update all three publish workflows to publish aardvark-sys first - Add aardvark-sys COPY to Dockerfile for workspace builds - Fixes cargo publish failure: "dependency aardvark-sys does not specify a version" * ci: publish aardvark-sys before main crate in all publish workflows All three crates.io publish workflows now publish aardvark-sys first, wait for indexing, then publish the main zeroclawlabs crate.	2026-03-21 16:20:50 -04:00
Argenis	755a129ca2	fix(install): use /dev/tty for sudo in curl\|bash Xcode license accept (#4169 ) When run via `curl \| bash`, stdin is the curl pipe, so sudo cannot prompt for a password. Redirect sudo's stdin from /dev/tty to reach the real terminal, allowing the password prompt to work in piped invocations.	2026-03-21 14:15:21 -04:00
Argenis	8b0d3684c5	fix(install): auto-accept Xcode license instead of bailing out (#4165 ) Instead of exiting with a manual remediation step, the installer now attempts to accept the Xcode/CLT license automatically via `sudo xcodebuild -license accept`. Falls back to a clear error message only if sudo fails (e.g. no terminal or password).	2026-03-21 13:57:38 -04:00
Argenis	a38a4d132e	fix(hardware): drain stdin in subprocess test to prevent broken pipe flake (#4161 ) * fix(hardware): drain stdin in subprocess test to prevent broken pipe flake The test script did not consume stdin, so SubprocessTool's stdin write raced against the process exit, causing intermittent EPIPE failures. Add `cat > /dev/null` to drain stdin before producing output. * style: format subprocess test	2026-03-21 12:19:53 -04:00
Argenis	48aba73d3a	fix(install): always check Xcode license on macOS, not just with --install-system-deps (#4153 ) The Xcode license test-compile was inside install_system_deps(), which only runs when --install-system-deps is passed. On macOS the default path skipped this entirely, so users hit `cc` exit code 69 deep in cargo build. Move the check into the unconditional main flow so it always fires on Darwin.	2026-03-21 11:29:36 -04:00
Argenis	a1ab1e1a11	fix(install): use test-compile instead of xcrun for Xcode license detection (#4151 ) xcrun --show-sdk-path can succeed even when the Xcode/CLT license has not been accepted, so the previous check was ineffective. Replace it with an actual test-compilation of a trivial C file, which reliably triggers the exit-code-69 failure when the license is pending.	2026-03-21 11:03:07 -04:00
SimianAstronaut7	87b5bca449	feat(config): add configurable pacing controls for slow/local LLM workloads (#3343 ) * feat(config): add configurable pacing controls for slow/local LLM workloads (#2963) Add a new `[pacing]` config section with four opt-in parameters that let users tune timeout and loop-detection behavior for local LLMs (Ollama, llama.cpp, vLLM) without disabling safety features entirely: - `step_timeout_secs`: per-step LLM inference timeout independent of the overall message budget, catching hung model responses early. - `loop_detection_min_elapsed_secs`: time-gated loop detection that only activates after a configurable grace period, avoiding false positives on long-running browser/research workflows. - `loop_ignore_tools`: per-tool loop-detection exclusions so tools like `browser_screenshot` that structurally resemble loops are not counted toward identical-output detection. - `message_timeout_scale_max`: overrides the hardcoded 4x ceiling in the channel message timeout scaling formula. All parameters are strictly optional with no effect when absent, preserving full backwards compatibility. Closes #2963 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(config): add missing pacing fields in tests and call sites * fix(config): add pacing arg to remaining cost-tracking test call sites --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: argenis de la rosa <theonlyhennygod@gmail.com>	2026-03-21 08:54:08 -04:00
Argenis	be40c0c5a5	Merge pull request #4145 from zeroclaw-labs/feat/gateway-path-prefix feat(gateway): add path_prefix for reverse-proxy deployments	2026-03-21 08:48:56 -04:00
argenis de la rosa	6527871928	fix: add path_prefix to test AppState in gateway/api.rs	2026-03-21 08:14:28 -04:00
argenis de la rosa	0bda80de9c	feat(gateway): add path_prefix for reverse-proxy deployments Adopted from #3709 by @slayer with minor cleanup. Supersedes #3709	2026-03-21 08:14:28 -04:00