From 98760a441cd088c118de89f07d5b5fb3d6d73d15 Mon Sep 17 00:00:00 2001
From: SimianAstronaut7 <79373020+SimianAstronaut7@users.noreply.github.com>
Date: Sat, 21 Mar 2026 08:54:08 -0400
Subject: [PATCH] feat(config): add configurable pacing controls for slow/local
 LLM workloads (#3343)

* feat(config): add configurable pacing controls for slow/local LLM workloads (#2963)

Add a new `[pacing]` config section with four opt-in parameters that
let users tune timeout and loop-detection behavior for local LLMs
(Ollama, llama.cpp, vLLM) without disabling safety features entirely:

- `step_timeout_secs`: per-step LLM inference timeout independent of
  the overall message budget, catching hung model responses early.
- `loop_detection_min_elapsed_secs`: time-gated loop detection that
  only activates after a configurable grace period, avoiding false
  positives on long-running browser/research workflows.
- `loop_ignore_tools`: per-tool loop-detection exclusions so tools
  like `browser_screenshot` that structurally resemble loops are not
  counted toward identical-output detection.
- `message_timeout_scale_max`: overrides the hardcoded 4x ceiling in
  the channel message timeout scaling formula.

All parameters are strictly optional with no effect when absent,
preserving full backwards compatibility.

Closes #2963

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix(config): add missing pacing fields in tests and call sites

* fix(config): add pacing arg to remaining cost-tracking test call sites

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: argenis de la rosa <theonlyhennygod@gmail.com>
---
 docs/reference/api/config-reference.md |  32 ++++++-
 src/agent/loop_.rs                     | 123 +++++++++++++++++++++++--
 src/channels/mod.rs                    | 102 +++++++++++++++++++-
 src/config/mod.rs                      |   2 +-
 src/config/schema.rs                   |  85 +++++++++++++++++
 src/onboard/wizard.rs                  |   2 +
 src/tools/delegate.rs                  |   1 +
 7 files changed, 335 insertions(+), 12 deletions(-)

diff --git a/docs/reference/api/config-reference.md b/docs/reference/api/config-reference.md
index ae84a2aec..5436ff3db 100644
--- a/docs/reference/api/config-reference.md
+++ b/docs/reference/api/config-reference.md
@@ -122,6 +122,34 @@ tools = ["mcp_browser_*"]
 keywords = ["browse", "navigate", "open url", "screenshot"]
 ```
 
+## `[pacing]`
+
+Pacing controls for slow/local LLM workloads (Ollama, llama.cpp, vLLM). All keys are optional; when absent, existing behavior is preserved.
+
+| Key | Default | Purpose |
+|---|---|---|
+| `step_timeout_secs` | _none_ | Per-step timeout: maximum seconds for a single LLM inference turn. Catches a truly hung model without terminating the overall task loop |
+| `loop_detection_min_elapsed_secs` | _none_ | Minimum elapsed seconds before loop detection activates. Tasks completing under this threshold get aggressive loop protection; longer-running tasks receive a grace period |
+| `loop_ignore_tools` | `[]` | Tool names excluded from identical-output loop detection. Useful for browser workflows where `browser_screenshot` structurally resembles a loop |
+| `message_timeout_scale_max` | `4` | Override for the hardcoded timeout scaling cap. The channel message timeout budget is `message_timeout_secs * min(max_tool_iterations, message_timeout_scale_max)` |
+
+Notes:
+
+- These settings are intended for local/slow LLM deployments. Cloud-provider users typically do not need them.
+- `step_timeout_secs` operates independently of the total channel message timeout budget. A step timeout abort does not consume the overall budget; the loop simply stops.
+- `loop_detection_min_elapsed_secs` delays loop-detection counting, not the task itself. Loop protection remains fully active for short tasks (the default).
+- `loop_ignore_tools` only suppresses tool-output-based loop detection for the listed tools. Other safety features (max iterations, overall timeout) remain active.
+- `message_timeout_scale_max` must be >= 1. Setting it higher than `max_tool_iterations` has no additional effect (the formula uses `min()`).
+- Example configuration for a slow local Ollama deployment:
+
+```toml
+[pacing]
+step_timeout_secs = 120
+loop_detection_min_elapsed_secs = 60
+loop_ignore_tools = ["browser_screenshot", "browser_navigate"]
+message_timeout_scale_max = 8
+```
+
 ## `[security.otp]`
 
 | Key | Default | Purpose |
@@ -603,7 +631,7 @@ Top-level channel options are configured under `channels_config`.
 
 | Key | Default | Purpose |
 |---|---|---|
-| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x) |
+| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x, overridable via `[pacing].message_timeout_scale_max`) |
 
 Examples:
 
@@ -618,7 +646,7 @@ Examples:
 Notes:
 
 - Default `300s` is optimized for on-device LLMs (Ollama) which are slower than cloud APIs.
-- Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, 4)` and a minimum of `1`.
+- Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, cap)` and a minimum of `1`. The default cap is `4`; override with `[pacing].message_timeout_scale_max`.
 - This scaling avoids false timeouts when the first LLM turn is slow/retried but later tool-loop turns still need to complete.
 - If using cloud APIs (OpenAI, Anthropic, etc.), you can reduce this to `60` or lower.
 - Values below `30` are clamped to `30` to avoid immediate timeout churn.
diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs
index 2fe00e753..4c6d4a166 100644
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@@ -2331,6 +2331,7 @@ pub(crate) async fn agent_turn(
         dedup_exempt_tools,
         activated_tools,
         model_switch_callback,
+        &crate::config::PacingConfig::default(),
     )
     .await
 }
@@ -2640,6 +2641,7 @@ pub(crate) async fn run_tool_call_loop(
     dedup_exempt_tools: &[String],
     activated_tools: Option<&std::sync::Arc<std::sync::Mutex<crate::tools::ActivatedToolSet>>>,
     model_switch_callback: Option<ModelSwitchCallback>,
+    pacing: &crate::config::PacingConfig,
 ) -> Result<String> {
     let max_iterations = if max_tool_iterations == 0 {
         DEFAULT_MAX_TOOL_ITERATIONS
@@ -2648,6 +2650,14 @@ pub(crate) async fn run_tool_call_loop(
     };
 
     let turn_id = Uuid::new_v4().to_string();
+    let loop_started_at = Instant::now();
+    let loop_ignore_tools: HashSet<&str> = pacing
+        .loop_ignore_tools
+        .iter()
+        .map(String::as_str)
+        .collect();
+    let mut consecutive_identical_outputs: usize = 0;
+    let mut last_tool_output_hash: Option<u64> = None;
 
     for iteration in 0..max_iterations {
         let mut seen_tool_signatures: HashSet<(String, String)> = HashSet::new();
@@ -2777,13 +2787,43 @@ pub(crate) async fn run_tool_call_loop(
             temperature,
         );
 
-        let chat_result = if let Some(token) = cancellation_token.as_ref() {
-            tokio::select! {
-                () = token.cancelled() => return Err(ToolLoopCancelled.into()),
-                result = chat_future => result,
+        // Wrap the LLM call with an optional per-step timeout from pacing config.
+        // This catches a truly hung model response without terminating the overall
+        // task loop (the per-message budget handles that separately).
+        let chat_result = match pacing.step_timeout_secs {
+            Some(step_secs) if step_secs > 0 => {
+                let step_timeout = Duration::from_secs(step_secs);
+                if let Some(token) = cancellation_token.as_ref() {
+                    tokio::select! {
+                        () = token.cancelled() => return Err(ToolLoopCancelled.into()),
+                        result = tokio::time::timeout(step_timeout, chat_future) => {
+                            match result {
+                                Ok(inner) => inner,
+                                Err(_) => anyhow::bail!(
+                                    "LLM inference step timed out after {step_secs}s (step_timeout_secs)"
+                                ),
+                            }
+                        },
+                    }
+                } else {
+                    match tokio::time::timeout(step_timeout, chat_future).await {
+                        Ok(inner) => inner,
+                        Err(_) => anyhow::bail!(
+                            "LLM inference step timed out after {step_secs}s (step_timeout_secs)"
+                        ),
+                    }
+                }
+            }
+            _ => {
+                if let Some(token) = cancellation_token.as_ref() {
+                    tokio::select! {
+                        () = token.cancelled() => return Err(ToolLoopCancelled.into()),
+                        result = chat_future => result,
+                    }
+                } else {
+                    chat_future.await
+                }
             }
-        } else {
-            chat_future.await
         };
 
         let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) =
@@ -3282,7 +3322,13 @@ pub(crate) async fn run_tool_call_loop(
             ordered_results[*idx] = Some((call.name.clone(), call.tool_call_id.clone(), outcome));
         }
 
+        // Collect tool results and build per-tool output for loop detection.
+        // Only non-ignored tool outputs contribute to the identical-output hash.
+        let mut detection_relevant_output = String::new();
         for (tool_name, tool_call_id, outcome) in ordered_results.into_iter().flatten() {
+            if !loop_ignore_tools.contains(tool_name.as_str()) {
+                detection_relevant_output.push_str(&outcome.output);
+            }
             individual_results.push((tool_call_id, outcome.output.clone()));
             let _ = writeln!(
                 tool_results,
@@ -3291,6 +3337,53 @@ pub(crate) async fn run_tool_call_loop(
             );
         }
 
+        // ── Time-gated loop detection ──────────────────────────
+        // When pacing.loop_detection_min_elapsed_secs is set, identical-output
+        // loop detection activates after the task has been running that long.
+        // This avoids false-positive aborts on long-running browser/research
+        // workflows while keeping aggressive protection for quick tasks.
+        // When not configured, identical-output detection is disabled (preserving
+        // existing behavior where only max_iterations prevents runaway loops).
+        let loop_detection_active = match pacing.loop_detection_min_elapsed_secs {
+            Some(min_secs) => loop_started_at.elapsed() >= Duration::from_secs(min_secs),
+            None => false, // disabled when not configured (backwards compatible)
+        };
+
+        if loop_detection_active && !detection_relevant_output.is_empty() {
+            use std::hash::{Hash, Hasher};
+            let mut hasher = std::collections::hash_map::DefaultHasher::new();
+            detection_relevant_output.hash(&mut hasher);
+            let current_hash = hasher.finish();
+
+            if last_tool_output_hash == Some(current_hash) {
+                consecutive_identical_outputs += 1;
+            } else {
+                consecutive_identical_outputs = 0;
+                last_tool_output_hash = Some(current_hash);
+            }
+
+            // Bail if we see 3+ consecutive identical tool outputs (clear runaway).
+            if consecutive_identical_outputs >= 3 {
+                runtime_trace::record_event(
+                    "tool_loop_identical_output_abort",
+                    Some(channel_name),
+                    Some(provider_name),
+                    Some(model),
+                    Some(&turn_id),
+                    Some(false),
+                    Some("identical tool output detected 3 consecutive times"),
+                    serde_json::json!({
+                        "iteration": iteration + 1,
+                        "consecutive_identical": consecutive_identical_outputs,
+                    }),
+                );
+                anyhow::bail!(
+                    "Agent loop aborted: identical tool output detected {} consecutive times",
+                    consecutive_identical_outputs
+                );
+            }
+        }
+
         // Add assistant message with tool calls + tool results to history.
         // Native mode: use JSON-structured messages so convert_messages() can
         // reconstruct proper OpenAI-format tool_calls and tool result messages.
@@ -3841,6 +3934,7 @@ pub async fn run(
                 &config.agent.tool_call_dedup_exempt,
                 activated_handle.as_ref(),
                 Some(model_switch_callback.clone()),
+                &config.pacing,
             )
             .await
             {
@@ -4068,6 +4162,7 @@ pub async fn run(
                     &config.agent.tool_call_dedup_exempt,
                     activated_handle.as_ref(),
                     Some(model_switch_callback.clone()),
+                    &config.pacing,
                 )
                 .await
                 {
@@ -4966,6 +5061,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect_err("provider without vision support should fail");
@@ -5016,6 +5112,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect_err("oversized payload must fail");
@@ -5060,6 +5157,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("valid multimodal payload should pass");
@@ -5190,6 +5288,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("parallel execution should complete");
@@ -5260,6 +5359,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("cron_add delivery defaults should be injected");
@@ -5322,6 +5422,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("explicit delivery mode should be preserved");
@@ -5379,6 +5480,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("loop should finish after deduplicating repeated calls");
@@ -5448,6 +5550,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("non-interactive shell should succeed for low-risk command");
@@ -5508,6 +5611,7 @@ mod tests {
             &exempt,
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("loop should finish with exempt tool executing twice");
@@ -5588,6 +5692,7 @@ mod tests {
             &exempt,
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("loop should complete");
@@ -5645,6 +5750,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("native fallback id flow should complete");
@@ -5726,6 +5832,7 @@ mod tests {
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("native tool-call text should be relayed through on_delta");
@@ -7711,6 +7818,7 @@ Let me check the result."#;
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("tool loop should complete");
@@ -7858,6 +7966,7 @@ Let me check the result."#;
                     &[],
                     None,
                     None,
+                    &crate::config::PacingConfig::default(),
                 ),
             )
             .await
@@ -7936,6 +8045,7 @@ Let me check the result."#;
                     &[],
                     None,
                     None,
+                    &crate::config::PacingConfig::default(),
                 ),
             )
             .await
@@ -7990,6 +8100,7 @@ Let me check the result."#;
             &[],
             None,
             None,
+            &crate::config::PacingConfig::default(),
         )
         .await
         .expect("should succeed without cost scope");
diff --git a/src/channels/mod.rs b/src/channels/mod.rs
index 2b11eefa6..1a0690e52 100644
--- a/src/channels/mod.rs
+++ b/src/channels/mod.rs
@@ -222,9 +222,21 @@ fn effective_channel_message_timeout_secs(configured: u64) -> u64 {
 fn channel_message_timeout_budget_secs(
     message_timeout_secs: u64,
     max_tool_iterations: usize,
+) -> u64 {
+    channel_message_timeout_budget_secs_with_cap(
+        message_timeout_secs,
+        max_tool_iterations,
+        CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP,
+    )
+}
+
+fn channel_message_timeout_budget_secs_with_cap(
+    message_timeout_secs: u64,
+    max_tool_iterations: usize,
+    scale_cap: u64,
 ) -> u64 {
     let iterations = max_tool_iterations.max(1) as u64;
-    let scale = iterations.min(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP);
+    let scale = iterations.min(scale_cap);
     message_timeout_secs.saturating_mul(scale)
 }
 
@@ -362,6 +374,7 @@ struct ChannelRuntimeContext {
     approval_manager: Arc<ApprovalManager>,
     activated_tools: Option<std::sync::Arc<std::sync::Mutex<crate::tools::ActivatedToolSet>>>,
     cost_tracking: Option<ChannelCostTrackingState>,
+    pacing: crate::config::PacingConfig,
 }
 
 #[derive(Clone)]
@@ -2402,8 +2415,15 @@ async fn process_channel_message(
     }
 
     let model_switch_callback = get_model_switch_state();
-    let timeout_budget_secs =
-        channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations);
+    let scale_cap = ctx
+        .pacing
+        .message_timeout_scale_max
+        .unwrap_or(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP);
+    let timeout_budget_secs = channel_message_timeout_budget_secs_with_cap(
+        ctx.message_timeout_secs,
+        ctx.max_tool_iterations,
+        scale_cap,
+    );
     let cost_tracking_context = ctx.cost_tracking.clone().map(|state| {
         crate::agent::loop_::ToolLoopCostTrackingContext::new(state.tracker, state.prices)
     });
@@ -2445,6 +2465,7 @@ async fn process_channel_message(
                     ctx.tool_call_dedup_exempt.as_ref(),
                     ctx.activated_tools.as_ref(),
                     Some(model_switch_callback.clone()),
+                    &ctx.pacing,
                 ),
                 ),
             ) => LlmExecutionResult::Completed(result),
@@ -4638,6 +4659,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
             tracker,
             prices: Arc::new(config.cost.prices.clone()),
         }),
+        pacing: config.pacing.clone(),
     });
 
     // Hydrate in-memory conversation histories from persisted JSONL session files.
@@ -4734,6 +4756,49 @@ mod tests {
         );
     }
 
+    #[test]
+    fn channel_message_timeout_budget_with_custom_scale_cap() {
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(300, 8, 8),
+            300 * 8
+        );
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(300, 20, 8),
+            300 * 8
+        );
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(300, 10, 1),
+            300
+        );
+    }
+
+    #[test]
+    fn pacing_config_defaults_preserve_existing_behavior() {
+        let pacing = crate::config::PacingConfig::default();
+        assert!(pacing.step_timeout_secs.is_none());
+        assert!(pacing.loop_detection_min_elapsed_secs.is_none());
+        assert!(pacing.loop_ignore_tools.is_empty());
+        assert!(pacing.message_timeout_scale_max.is_none());
+    }
+
+    #[test]
+    fn pacing_message_timeout_scale_max_overrides_default_cap() {
+        // Custom cap of 8 scales budget proportionally
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(300, 10, 8),
+            300 * 8
+        );
+        // Default cap produces the standard behavior
+        assert_eq!(
+            channel_message_timeout_budget_secs_with_cap(
+                300,
+                10,
+                CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP
+            ),
+            300 * CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP
+        );
+    }
+
     #[test]
     fn context_window_overflow_error_detector_matches_known_messages() {
         let overflow_err = anyhow::anyhow!(
@@ -4938,6 +5003,7 @@ mod tests {
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         };
 
         assert!(compact_sender_history(&ctx, &sender));
@@ -5054,6 +5120,7 @@ mod tests {
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         };
 
         append_sender_turn(&ctx, &sender, ChatMessage::user("hello"));
@@ -5126,6 +5193,7 @@ mod tests {
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         };
 
         assert!(rollback_orphan_user_turn(&ctx, &sender, "pending"));
@@ -5217,6 +5285,7 @@ mod tests {
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         };
 
         assert!(rollback_orphan_user_turn(
@@ -5755,6 +5824,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -5836,6 +5906,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -5931,6 +6002,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -6011,6 +6083,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -6101,6 +6174,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -6212,6 +6286,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -6304,6 +6379,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -6411,6 +6487,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -6503,6 +6580,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -6585,6 +6663,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -6782,6 +6861,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(4);
@@ -6884,6 +6964,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(8);
@@ -7001,6 +7082,7 @@ BTC is currently around $65,000 based on latest tool output."#
             activated_tools: None,
             cost_tracking: None,
             query_classification: crate::config::QueryClassificationConfig::default(),
+            pacing: crate::config::PacingConfig::default(),
         });
 
         let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(8);
@@ -7115,6 +7197,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(8);
@@ -7211,6 +7294,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -7291,6 +7375,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -8057,6 +8142,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -8188,6 +8274,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -8359,6 +8446,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -8467,6 +8555,7 @@ BTC is currently around $65,000 based on latest tool output."#
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -9039,6 +9128,7 @@ This is an example JSON object for profile settings."#;
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         // Simulate a photo attachment message with [IMAGE:] marker.
@@ -9126,6 +9216,7 @@ This is an example JSON object for profile settings."#;
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -9288,6 +9379,7 @@ This is an example JSON object for profile settings."#;
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -9399,6 +9491,7 @@ This is an example JSON object for profile settings."#;
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -9502,6 +9595,7 @@ This is an example JSON object for profile settings."#;
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -9625,6 +9719,7 @@ This is an example JSON object for profile settings."#;
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         process_channel_message(
@@ -9886,6 +9981,7 @@ This is an example JSON object for profile settings."#;
             )),
             activated_tools: None,
             cost_tracking: None,
+            pacing: crate::config::PacingConfig::default(),
         });
 
         let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(8);
diff --git a/src/config/mod.rs b/src/config/mod.rs
index 5d186ad7a..ba92ad14c 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -21,7 +21,7 @@ pub use schema::{
     MatrixConfig, McpConfig, McpServerConfig, McpTransport, MemoryConfig, Microsoft365Config,
     ModelRouteConfig, MultimodalConfig, NextcloudTalkConfig, NodeTransportConfig, NodesConfig,
     NotionConfig, ObservabilityConfig, OpenAiSttConfig, OpenAiTtsConfig, OpenVpnTunnelConfig,
-    OtpConfig, OtpMethod, PeripheralBoardConfig, PeripheralsConfig, PluginsConfig,
+    OtpConfig, OtpMethod, PacingConfig, PeripheralBoardConfig, PeripheralsConfig, PluginsConfig,
     ProjectIntelConfig, ProxyConfig, ProxyScope, QdrantConfig, QueryClassificationConfig,
     ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig,
     SchedulerConfig, SecretsConfig, SecurityConfig, SecurityOpsConfig, SkillCreationConfig,
diff --git a/src/config/schema.rs b/src/config/schema.rs
index 280ed8c3a..5de9d8402 100644
--- a/src/config/schema.rs
+++ b/src/config/schema.rs
@@ -165,6 +165,10 @@ pub struct Config {
     #[serde(default)]
     pub agent: AgentConfig,
 
+    /// Pacing controls for slow/local LLM workloads (`[pacing]`).
+    #[serde(default)]
+    pub pacing: PacingConfig,
+
     /// Skills loading and community repository behavior (`[skills]`).
     #[serde(default)]
     pub skills: SkillsConfig,
@@ -1277,6 +1281,43 @@ impl Default for AgentConfig {
     }
 }
 
+// ── Pacing ────────────────────────────────────────────────────────
+
+/// Pacing controls for slow/local LLM workloads (`[pacing]` section).
+///
+/// All fields are optional and default to values that preserve existing
+/// behavior. When set, they extend — not replace — the existing timeout
+/// and loop-detection subsystems.
+#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
+pub struct PacingConfig {
+    /// Per-step timeout in seconds: the maximum time allowed for a single
+    /// LLM inference turn, independent of the total message budget.
+    /// `None` means no per-step timeout (existing behavior).
+    #[serde(default)]
+    pub step_timeout_secs: Option<u64>,
+
+    /// Minimum elapsed seconds before loop detection activates.
+    /// Tasks completing under this threshold get aggressive loop protection;
+    /// longer-running tasks receive a grace period before the detector starts
+    /// counting. `None` means loop detection is always active (existing behavior).
+    #[serde(default)]
+    pub loop_detection_min_elapsed_secs: Option<u64>,
+
+    /// Tool names excluded from identical-output / alternating-pattern loop
+    /// detection. Useful for browser workflows where `browser_screenshot`
+    /// structurally resembles a loop even when making progress.
+    #[serde(default)]
+    pub loop_ignore_tools: Vec<String>,
+
+    /// Override for the hardcoded timeout scaling cap (default: 4).
+    /// The channel message timeout budget is computed as:
+    ///   `message_timeout_secs * min(max_tool_iterations, message_timeout_scale_max)`
+    /// Raising this value lets long multi-step tasks with slow local models
+    /// receive a proportionally larger budget without inflating the base timeout.
+    #[serde(default)]
+    pub message_timeout_scale_max: Option<u64>,
+}
+
 /// Skills loading configuration (`[skills]` section).
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)]
 #[serde(rename_all = "snake_case")]
@@ -6727,6 +6768,7 @@ impl Default for Config {
             reliability: ReliabilityConfig::default(),
             scheduler: SchedulerConfig::default(),
             agent: AgentConfig::default(),
+            pacing: PacingConfig::default(),
             skills: SkillsConfig::default(),
             model_routes: Vec::new(),
             embedding_routes: Vec::new(),
@@ -9672,6 +9714,7 @@ default_temperature = 0.7
             google_workspace: GoogleWorkspaceConfig::default(),
             proxy: ProxyConfig::default(),
             agent: AgentConfig::default(),
+            pacing: PacingConfig::default(),
             identity: IdentityConfig::default(),
             cost: CostConfig::default(),
             peripherals: PeripheralsConfig::default(),
@@ -9943,6 +9986,47 @@ tool_dispatcher = "xml"
         assert_eq!(parsed.agent.tool_dispatcher, "xml");
     }
 
+    #[test]
+    async fn pacing_config_defaults_are_all_none_or_empty() {
+        let cfg = PacingConfig::default();
+        assert!(cfg.step_timeout_secs.is_none());
+        assert!(cfg.loop_detection_min_elapsed_secs.is_none());
+        assert!(cfg.loop_ignore_tools.is_empty());
+        assert!(cfg.message_timeout_scale_max.is_none());
+    }
+
+    #[test]
+    async fn pacing_config_deserializes_from_toml() {
+        let raw = r#"
+default_temperature = 0.7
+[pacing]
+step_timeout_secs = 120
+loop_detection_min_elapsed_secs = 60
+loop_ignore_tools = ["browser_screenshot", "browser_navigate"]
+message_timeout_scale_max = 8
+"#;
+        let parsed: Config = toml::from_str(raw).unwrap();
+        assert_eq!(parsed.pacing.step_timeout_secs, Some(120));
+        assert_eq!(parsed.pacing.loop_detection_min_elapsed_secs, Some(60));
+        assert_eq!(
+            parsed.pacing.loop_ignore_tools,
+            vec!["browser_screenshot", "browser_navigate"]
+        );
+        assert_eq!(parsed.pacing.message_timeout_scale_max, Some(8));
+    }
+
+    #[test]
+    async fn pacing_config_absent_preserves_defaults() {
+        let raw = r#"
+default_temperature = 0.7
+"#;
+        let parsed: Config = toml::from_str(raw).unwrap();
+        assert!(parsed.pacing.step_timeout_secs.is_none());
+        assert!(parsed.pacing.loop_detection_min_elapsed_secs.is_none());
+        assert!(parsed.pacing.loop_ignore_tools.is_empty());
+        assert!(parsed.pacing.message_timeout_scale_max.is_none());
+    }
+
     #[tokio::test]
     async fn sync_directory_handles_existing_directory() {
         let dir = std::env::temp_dir().join(format!(
@@ -10011,6 +10095,7 @@ tool_dispatcher = "xml"
             google_workspace: GoogleWorkspaceConfig::default(),
             proxy: ProxyConfig::default(),
             agent: AgentConfig::default(),
+            pacing: PacingConfig::default(),
             identity: IdentityConfig::default(),
             cost: CostConfig::default(),
             peripherals: PeripheralsConfig::default(),
diff --git a/src/onboard/wizard.rs b/src/onboard/wizard.rs
index 156cad8ae..36a83a1f4 100644
--- a/src/onboard/wizard.rs
+++ b/src/onboard/wizard.rs
@@ -154,6 +154,7 @@ pub async fn run_wizard(force: bool) -> Result<Config> {
         reliability: crate::config::ReliabilityConfig::default(),
         scheduler: crate::config::schema::SchedulerConfig::default(),
         agent: crate::config::schema::AgentConfig::default(),
+        pacing: crate::config::PacingConfig::default(),
         skills: crate::config::SkillsConfig::default(),
         model_routes: Vec::new(),
         embedding_routes: Vec::new(),
@@ -576,6 +577,7 @@ async fn run_quick_setup_with_home(
         reliability: crate::config::ReliabilityConfig::default(),
         scheduler: crate::config::schema::SchedulerConfig::default(),
         agent: crate::config::schema::AgentConfig::default(),
+        pacing: crate::config::PacingConfig::default(),
         skills: crate::config::SkillsConfig::default(),
         model_routes: Vec::new(),
         embedding_routes: Vec::new(),
diff --git a/src/tools/delegate.rs b/src/tools/delegate.rs
index 0e329ce2a..0970da5b7 100644
--- a/src/tools/delegate.rs
+++ b/src/tools/delegate.rs
@@ -530,6 +530,7 @@ impl DelegateTool {
                 &[],
                 None,
                 None,
+                &crate::config::PacingConfig::default(),
             ),
         )
         .await;