From 98760a441cd088c118de89f07d5b5fb3d6d73d15 Mon Sep 17 00:00:00 2001 From: SimianAstronaut7 <79373020+SimianAstronaut7@users.noreply.github.com> Date: Sat, 21 Mar 2026 08:54:08 -0400 Subject: [PATCH] feat(config): add configurable pacing controls for slow/local LLM workloads (#3343) * feat(config): add configurable pacing controls for slow/local LLM workloads (#2963) Add a new `[pacing]` config section with four opt-in parameters that let users tune timeout and loop-detection behavior for local LLMs (Ollama, llama.cpp, vLLM) without disabling safety features entirely: - `step_timeout_secs`: per-step LLM inference timeout independent of the overall message budget, catching hung model responses early. - `loop_detection_min_elapsed_secs`: time-gated loop detection that only activates after a configurable grace period, avoiding false positives on long-running browser/research workflows. - `loop_ignore_tools`: per-tool loop-detection exclusions so tools like `browser_screenshot` that structurally resemble loops are not counted toward identical-output detection. - `message_timeout_scale_max`: overrides the hardcoded 4x ceiling in the channel message timeout scaling formula. All parameters are strictly optional with no effect when absent, preserving full backwards compatibility. Closes #2963 Co-Authored-By: Claude Opus 4.6 * fix(config): add missing pacing fields in tests and call sites * fix(config): add pacing arg to remaining cost-tracking test call sites --------- Co-authored-by: Claude Opus 4.6 Co-authored-by: argenis de la rosa --- docs/reference/api/config-reference.md | 32 ++++++- src/agent/loop_.rs | 123 +++++++++++++++++++++++-- src/channels/mod.rs | 102 +++++++++++++++++++- src/config/mod.rs | 2 +- src/config/schema.rs | 85 +++++++++++++++++ src/onboard/wizard.rs | 2 + src/tools/delegate.rs | 1 + 7 files changed, 335 insertions(+), 12 deletions(-) diff --git a/docs/reference/api/config-reference.md b/docs/reference/api/config-reference.md index ae84a2aec..5436ff3db 100644 --- a/docs/reference/api/config-reference.md +++ b/docs/reference/api/config-reference.md @@ -122,6 +122,34 @@ tools = ["mcp_browser_*"] keywords = ["browse", "navigate", "open url", "screenshot"] ``` +## `[pacing]` + +Pacing controls for slow/local LLM workloads (Ollama, llama.cpp, vLLM). All keys are optional; when absent, existing behavior is preserved. + +| Key | Default | Purpose | +|---|---|---| +| `step_timeout_secs` | _none_ | Per-step timeout: maximum seconds for a single LLM inference turn. Catches a truly hung model without terminating the overall task loop | +| `loop_detection_min_elapsed_secs` | _none_ | Minimum elapsed seconds before loop detection activates. Tasks completing under this threshold get aggressive loop protection; longer-running tasks receive a grace period | +| `loop_ignore_tools` | `[]` | Tool names excluded from identical-output loop detection. Useful for browser workflows where `browser_screenshot` structurally resembles a loop | +| `message_timeout_scale_max` | `4` | Override for the hardcoded timeout scaling cap. The channel message timeout budget is `message_timeout_secs * min(max_tool_iterations, message_timeout_scale_max)` | + +Notes: + +- These settings are intended for local/slow LLM deployments. Cloud-provider users typically do not need them. +- `step_timeout_secs` operates independently of the total channel message timeout budget. A step timeout abort does not consume the overall budget; the loop simply stops. +- `loop_detection_min_elapsed_secs` delays loop-detection counting, not the task itself. Loop protection remains fully active for short tasks (the default). +- `loop_ignore_tools` only suppresses tool-output-based loop detection for the listed tools. Other safety features (max iterations, overall timeout) remain active. +- `message_timeout_scale_max` must be >= 1. Setting it higher than `max_tool_iterations` has no additional effect (the formula uses `min()`). +- Example configuration for a slow local Ollama deployment: + +```toml +[pacing] +step_timeout_secs = 120 +loop_detection_min_elapsed_secs = 60 +loop_ignore_tools = ["browser_screenshot", "browser_navigate"] +message_timeout_scale_max = 8 +``` + ## `[security.otp]` | Key | Default | Purpose | @@ -603,7 +631,7 @@ Top-level channel options are configured under `channels_config`. | Key | Default | Purpose | |---|---|---| -| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x) | +| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x, overridable via `[pacing].message_timeout_scale_max`) | Examples: @@ -618,7 +646,7 @@ Examples: Notes: - Default `300s` is optimized for on-device LLMs (Ollama) which are slower than cloud APIs. -- Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, 4)` and a minimum of `1`. +- Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, cap)` and a minimum of `1`. The default cap is `4`; override with `[pacing].message_timeout_scale_max`. - This scaling avoids false timeouts when the first LLM turn is slow/retried but later tool-loop turns still need to complete. - If using cloud APIs (OpenAI, Anthropic, etc.), you can reduce this to `60` or lower. - Values below `30` are clamped to `30` to avoid immediate timeout churn. diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index 2fe00e753..4c6d4a166 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -2331,6 +2331,7 @@ pub(crate) async fn agent_turn( dedup_exempt_tools, activated_tools, model_switch_callback, + &crate::config::PacingConfig::default(), ) .await } @@ -2640,6 +2641,7 @@ pub(crate) async fn run_tool_call_loop( dedup_exempt_tools: &[String], activated_tools: Option<&std::sync::Arc>>, model_switch_callback: Option, + pacing: &crate::config::PacingConfig, ) -> Result { let max_iterations = if max_tool_iterations == 0 { DEFAULT_MAX_TOOL_ITERATIONS @@ -2648,6 +2650,14 @@ pub(crate) async fn run_tool_call_loop( }; let turn_id = Uuid::new_v4().to_string(); + let loop_started_at = Instant::now(); + let loop_ignore_tools: HashSet<&str> = pacing + .loop_ignore_tools + .iter() + .map(String::as_str) + .collect(); + let mut consecutive_identical_outputs: usize = 0; + let mut last_tool_output_hash: Option = None; for iteration in 0..max_iterations { let mut seen_tool_signatures: HashSet<(String, String)> = HashSet::new(); @@ -2777,13 +2787,43 @@ pub(crate) async fn run_tool_call_loop( temperature, ); - let chat_result = if let Some(token) = cancellation_token.as_ref() { - tokio::select! { - () = token.cancelled() => return Err(ToolLoopCancelled.into()), - result = chat_future => result, + // Wrap the LLM call with an optional per-step timeout from pacing config. + // This catches a truly hung model response without terminating the overall + // task loop (the per-message budget handles that separately). + let chat_result = match pacing.step_timeout_secs { + Some(step_secs) if step_secs > 0 => { + let step_timeout = Duration::from_secs(step_secs); + if let Some(token) = cancellation_token.as_ref() { + tokio::select! { + () = token.cancelled() => return Err(ToolLoopCancelled.into()), + result = tokio::time::timeout(step_timeout, chat_future) => { + match result { + Ok(inner) => inner, + Err(_) => anyhow::bail!( + "LLM inference step timed out after {step_secs}s (step_timeout_secs)" + ), + } + }, + } + } else { + match tokio::time::timeout(step_timeout, chat_future).await { + Ok(inner) => inner, + Err(_) => anyhow::bail!( + "LLM inference step timed out after {step_secs}s (step_timeout_secs)" + ), + } + } + } + _ => { + if let Some(token) = cancellation_token.as_ref() { + tokio::select! { + () = token.cancelled() => return Err(ToolLoopCancelled.into()), + result = chat_future => result, + } + } else { + chat_future.await + } } - } else { - chat_future.await }; let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) = @@ -3282,7 +3322,13 @@ pub(crate) async fn run_tool_call_loop( ordered_results[*idx] = Some((call.name.clone(), call.tool_call_id.clone(), outcome)); } + // Collect tool results and build per-tool output for loop detection. + // Only non-ignored tool outputs contribute to the identical-output hash. + let mut detection_relevant_output = String::new(); for (tool_name, tool_call_id, outcome) in ordered_results.into_iter().flatten() { + if !loop_ignore_tools.contains(tool_name.as_str()) { + detection_relevant_output.push_str(&outcome.output); + } individual_results.push((tool_call_id, outcome.output.clone())); let _ = writeln!( tool_results, @@ -3291,6 +3337,53 @@ pub(crate) async fn run_tool_call_loop( ); } + // ── Time-gated loop detection ────────────────────────── + // When pacing.loop_detection_min_elapsed_secs is set, identical-output + // loop detection activates after the task has been running that long. + // This avoids false-positive aborts on long-running browser/research + // workflows while keeping aggressive protection for quick tasks. + // When not configured, identical-output detection is disabled (preserving + // existing behavior where only max_iterations prevents runaway loops). + let loop_detection_active = match pacing.loop_detection_min_elapsed_secs { + Some(min_secs) => loop_started_at.elapsed() >= Duration::from_secs(min_secs), + None => false, // disabled when not configured (backwards compatible) + }; + + if loop_detection_active && !detection_relevant_output.is_empty() { + use std::hash::{Hash, Hasher}; + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + detection_relevant_output.hash(&mut hasher); + let current_hash = hasher.finish(); + + if last_tool_output_hash == Some(current_hash) { + consecutive_identical_outputs += 1; + } else { + consecutive_identical_outputs = 0; + last_tool_output_hash = Some(current_hash); + } + + // Bail if we see 3+ consecutive identical tool outputs (clear runaway). + if consecutive_identical_outputs >= 3 { + runtime_trace::record_event( + "tool_loop_identical_output_abort", + Some(channel_name), + Some(provider_name), + Some(model), + Some(&turn_id), + Some(false), + Some("identical tool output detected 3 consecutive times"), + serde_json::json!({ + "iteration": iteration + 1, + "consecutive_identical": consecutive_identical_outputs, + }), + ); + anyhow::bail!( + "Agent loop aborted: identical tool output detected {} consecutive times", + consecutive_identical_outputs + ); + } + } + // Add assistant message with tool calls + tool results to history. // Native mode: use JSON-structured messages so convert_messages() can // reconstruct proper OpenAI-format tool_calls and tool result messages. @@ -3841,6 +3934,7 @@ pub async fn run( &config.agent.tool_call_dedup_exempt, activated_handle.as_ref(), Some(model_switch_callback.clone()), + &config.pacing, ) .await { @@ -4068,6 +4162,7 @@ pub async fn run( &config.agent.tool_call_dedup_exempt, activated_handle.as_ref(), Some(model_switch_callback.clone()), + &config.pacing, ) .await { @@ -4966,6 +5061,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect_err("provider without vision support should fail"); @@ -5016,6 +5112,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect_err("oversized payload must fail"); @@ -5060,6 +5157,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("valid multimodal payload should pass"); @@ -5190,6 +5288,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("parallel execution should complete"); @@ -5260,6 +5359,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("cron_add delivery defaults should be injected"); @@ -5322,6 +5422,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("explicit delivery mode should be preserved"); @@ -5379,6 +5480,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("loop should finish after deduplicating repeated calls"); @@ -5448,6 +5550,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("non-interactive shell should succeed for low-risk command"); @@ -5508,6 +5611,7 @@ mod tests { &exempt, None, None, + &crate::config::PacingConfig::default(), ) .await .expect("loop should finish with exempt tool executing twice"); @@ -5588,6 +5692,7 @@ mod tests { &exempt, None, None, + &crate::config::PacingConfig::default(), ) .await .expect("loop should complete"); @@ -5645,6 +5750,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("native fallback id flow should complete"); @@ -5726,6 +5832,7 @@ mod tests { &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("native tool-call text should be relayed through on_delta"); @@ -7711,6 +7818,7 @@ Let me check the result."#; &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("tool loop should complete"); @@ -7858,6 +7966,7 @@ Let me check the result."#; &[], None, None, + &crate::config::PacingConfig::default(), ), ) .await @@ -7936,6 +8045,7 @@ Let me check the result."#; &[], None, None, + &crate::config::PacingConfig::default(), ), ) .await @@ -7990,6 +8100,7 @@ Let me check the result."#; &[], None, None, + &crate::config::PacingConfig::default(), ) .await .expect("should succeed without cost scope"); diff --git a/src/channels/mod.rs b/src/channels/mod.rs index 2b11eefa6..1a0690e52 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -222,9 +222,21 @@ fn effective_channel_message_timeout_secs(configured: u64) -> u64 { fn channel_message_timeout_budget_secs( message_timeout_secs: u64, max_tool_iterations: usize, +) -> u64 { + channel_message_timeout_budget_secs_with_cap( + message_timeout_secs, + max_tool_iterations, + CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP, + ) +} + +fn channel_message_timeout_budget_secs_with_cap( + message_timeout_secs: u64, + max_tool_iterations: usize, + scale_cap: u64, ) -> u64 { let iterations = max_tool_iterations.max(1) as u64; - let scale = iterations.min(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP); + let scale = iterations.min(scale_cap); message_timeout_secs.saturating_mul(scale) } @@ -362,6 +374,7 @@ struct ChannelRuntimeContext { approval_manager: Arc, activated_tools: Option>>, cost_tracking: Option, + pacing: crate::config::PacingConfig, } #[derive(Clone)] @@ -2402,8 +2415,15 @@ async fn process_channel_message( } let model_switch_callback = get_model_switch_state(); - let timeout_budget_secs = - channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations); + let scale_cap = ctx + .pacing + .message_timeout_scale_max + .unwrap_or(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP); + let timeout_budget_secs = channel_message_timeout_budget_secs_with_cap( + ctx.message_timeout_secs, + ctx.max_tool_iterations, + scale_cap, + ); let cost_tracking_context = ctx.cost_tracking.clone().map(|state| { crate::agent::loop_::ToolLoopCostTrackingContext::new(state.tracker, state.prices) }); @@ -2445,6 +2465,7 @@ async fn process_channel_message( ctx.tool_call_dedup_exempt.as_ref(), ctx.activated_tools.as_ref(), Some(model_switch_callback.clone()), + &ctx.pacing, ), ), ) => LlmExecutionResult::Completed(result), @@ -4638,6 +4659,7 @@ pub async fn start_channels(config: Config) -> Result<()> { tracker, prices: Arc::new(config.cost.prices.clone()), }), + pacing: config.pacing.clone(), }); // Hydrate in-memory conversation histories from persisted JSONL session files. @@ -4734,6 +4756,49 @@ mod tests { ); } + #[test] + fn channel_message_timeout_budget_with_custom_scale_cap() { + assert_eq!( + channel_message_timeout_budget_secs_with_cap(300, 8, 8), + 300 * 8 + ); + assert_eq!( + channel_message_timeout_budget_secs_with_cap(300, 20, 8), + 300 * 8 + ); + assert_eq!( + channel_message_timeout_budget_secs_with_cap(300, 10, 1), + 300 + ); + } + + #[test] + fn pacing_config_defaults_preserve_existing_behavior() { + let pacing = crate::config::PacingConfig::default(); + assert!(pacing.step_timeout_secs.is_none()); + assert!(pacing.loop_detection_min_elapsed_secs.is_none()); + assert!(pacing.loop_ignore_tools.is_empty()); + assert!(pacing.message_timeout_scale_max.is_none()); + } + + #[test] + fn pacing_message_timeout_scale_max_overrides_default_cap() { + // Custom cap of 8 scales budget proportionally + assert_eq!( + channel_message_timeout_budget_secs_with_cap(300, 10, 8), + 300 * 8 + ); + // Default cap produces the standard behavior + assert_eq!( + channel_message_timeout_budget_secs_with_cap( + 300, + 10, + CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP + ), + 300 * CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP + ); + } + #[test] fn context_window_overflow_error_detector_matches_known_messages() { let overflow_err = anyhow::anyhow!( @@ -4938,6 +5003,7 @@ mod tests { )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }; assert!(compact_sender_history(&ctx, &sender)); @@ -5054,6 +5120,7 @@ mod tests { )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }; append_sender_turn(&ctx, &sender, ChatMessage::user("hello")); @@ -5126,6 +5193,7 @@ mod tests { )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }; assert!(rollback_orphan_user_turn(&ctx, &sender, "pending")); @@ -5217,6 +5285,7 @@ mod tests { )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }; assert!(rollback_orphan_user_turn( @@ -5755,6 +5824,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -5836,6 +5906,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -5931,6 +6002,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -6011,6 +6083,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -6101,6 +6174,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -6212,6 +6286,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -6304,6 +6379,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -6411,6 +6487,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -6503,6 +6580,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -6585,6 +6663,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -6782,6 +6861,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); let (tx, rx) = tokio::sync::mpsc::channel::(4); @@ -6884,6 +6964,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); let (tx, rx) = tokio::sync::mpsc::channel::(8); @@ -7001,6 +7082,7 @@ BTC is currently around $65,000 based on latest tool output."# activated_tools: None, cost_tracking: None, query_classification: crate::config::QueryClassificationConfig::default(), + pacing: crate::config::PacingConfig::default(), }); let (tx, rx) = tokio::sync::mpsc::channel::(8); @@ -7115,6 +7197,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); let (tx, rx) = tokio::sync::mpsc::channel::(8); @@ -7211,6 +7294,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -7291,6 +7375,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -8057,6 +8142,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -8188,6 +8274,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -8359,6 +8446,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -8467,6 +8555,7 @@ BTC is currently around $65,000 based on latest tool output."# )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -9039,6 +9128,7 @@ This is an example JSON object for profile settings."#; )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); // Simulate a photo attachment message with [IMAGE:] marker. @@ -9126,6 +9216,7 @@ This is an example JSON object for profile settings."#; )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -9288,6 +9379,7 @@ This is an example JSON object for profile settings."#; )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -9399,6 +9491,7 @@ This is an example JSON object for profile settings."#; )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -9502,6 +9595,7 @@ This is an example JSON object for profile settings."#; )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -9625,6 +9719,7 @@ This is an example JSON object for profile settings."#; )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); process_channel_message( @@ -9886,6 +9981,7 @@ This is an example JSON object for profile settings."#; )), activated_tools: None, cost_tracking: None, + pacing: crate::config::PacingConfig::default(), }); let (tx, rx) = tokio::sync::mpsc::channel::(8); diff --git a/src/config/mod.rs b/src/config/mod.rs index 5d186ad7a..ba92ad14c 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -21,7 +21,7 @@ pub use schema::{ MatrixConfig, McpConfig, McpServerConfig, McpTransport, MemoryConfig, Microsoft365Config, ModelRouteConfig, MultimodalConfig, NextcloudTalkConfig, NodeTransportConfig, NodesConfig, NotionConfig, ObservabilityConfig, OpenAiSttConfig, OpenAiTtsConfig, OpenVpnTunnelConfig, - OtpConfig, OtpMethod, PeripheralBoardConfig, PeripheralsConfig, PluginsConfig, + OtpConfig, OtpMethod, PacingConfig, PeripheralBoardConfig, PeripheralsConfig, PluginsConfig, ProjectIntelConfig, ProxyConfig, ProxyScope, QdrantConfig, QueryClassificationConfig, ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, SchedulerConfig, SecretsConfig, SecurityConfig, SecurityOpsConfig, SkillCreationConfig, diff --git a/src/config/schema.rs b/src/config/schema.rs index 280ed8c3a..5de9d8402 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -165,6 +165,10 @@ pub struct Config { #[serde(default)] pub agent: AgentConfig, + /// Pacing controls for slow/local LLM workloads (`[pacing]`). + #[serde(default)] + pub pacing: PacingConfig, + /// Skills loading and community repository behavior (`[skills]`). #[serde(default)] pub skills: SkillsConfig, @@ -1277,6 +1281,43 @@ impl Default for AgentConfig { } } +// ── Pacing ──────────────────────────────────────────────────────── + +/// Pacing controls for slow/local LLM workloads (`[pacing]` section). +/// +/// All fields are optional and default to values that preserve existing +/// behavior. When set, they extend — not replace — the existing timeout +/// and loop-detection subsystems. +#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] +pub struct PacingConfig { + /// Per-step timeout in seconds: the maximum time allowed for a single + /// LLM inference turn, independent of the total message budget. + /// `None` means no per-step timeout (existing behavior). + #[serde(default)] + pub step_timeout_secs: Option, + + /// Minimum elapsed seconds before loop detection activates. + /// Tasks completing under this threshold get aggressive loop protection; + /// longer-running tasks receive a grace period before the detector starts + /// counting. `None` means loop detection is always active (existing behavior). + #[serde(default)] + pub loop_detection_min_elapsed_secs: Option, + + /// Tool names excluded from identical-output / alternating-pattern loop + /// detection. Useful for browser workflows where `browser_screenshot` + /// structurally resembles a loop even when making progress. + #[serde(default)] + pub loop_ignore_tools: Vec, + + /// Override for the hardcoded timeout scaling cap (default: 4). + /// The channel message timeout budget is computed as: + /// `message_timeout_secs * min(max_tool_iterations, message_timeout_scale_max)` + /// Raising this value lets long multi-step tasks with slow local models + /// receive a proportionally larger budget without inflating the base timeout. + #[serde(default)] + pub message_timeout_scale_max: Option, +} + /// Skills loading configuration (`[skills]` section). #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)] #[serde(rename_all = "snake_case")] @@ -6727,6 +6768,7 @@ impl Default for Config { reliability: ReliabilityConfig::default(), scheduler: SchedulerConfig::default(), agent: AgentConfig::default(), + pacing: PacingConfig::default(), skills: SkillsConfig::default(), model_routes: Vec::new(), embedding_routes: Vec::new(), @@ -9672,6 +9714,7 @@ default_temperature = 0.7 google_workspace: GoogleWorkspaceConfig::default(), proxy: ProxyConfig::default(), agent: AgentConfig::default(), + pacing: PacingConfig::default(), identity: IdentityConfig::default(), cost: CostConfig::default(), peripherals: PeripheralsConfig::default(), @@ -9943,6 +9986,47 @@ tool_dispatcher = "xml" assert_eq!(parsed.agent.tool_dispatcher, "xml"); } + #[test] + async fn pacing_config_defaults_are_all_none_or_empty() { + let cfg = PacingConfig::default(); + assert!(cfg.step_timeout_secs.is_none()); + assert!(cfg.loop_detection_min_elapsed_secs.is_none()); + assert!(cfg.loop_ignore_tools.is_empty()); + assert!(cfg.message_timeout_scale_max.is_none()); + } + + #[test] + async fn pacing_config_deserializes_from_toml() { + let raw = r#" +default_temperature = 0.7 +[pacing] +step_timeout_secs = 120 +loop_detection_min_elapsed_secs = 60 +loop_ignore_tools = ["browser_screenshot", "browser_navigate"] +message_timeout_scale_max = 8 +"#; + let parsed: Config = toml::from_str(raw).unwrap(); + assert_eq!(parsed.pacing.step_timeout_secs, Some(120)); + assert_eq!(parsed.pacing.loop_detection_min_elapsed_secs, Some(60)); + assert_eq!( + parsed.pacing.loop_ignore_tools, + vec!["browser_screenshot", "browser_navigate"] + ); + assert_eq!(parsed.pacing.message_timeout_scale_max, Some(8)); + } + + #[test] + async fn pacing_config_absent_preserves_defaults() { + let raw = r#" +default_temperature = 0.7 +"#; + let parsed: Config = toml::from_str(raw).unwrap(); + assert!(parsed.pacing.step_timeout_secs.is_none()); + assert!(parsed.pacing.loop_detection_min_elapsed_secs.is_none()); + assert!(parsed.pacing.loop_ignore_tools.is_empty()); + assert!(parsed.pacing.message_timeout_scale_max.is_none()); + } + #[tokio::test] async fn sync_directory_handles_existing_directory() { let dir = std::env::temp_dir().join(format!( @@ -10011,6 +10095,7 @@ tool_dispatcher = "xml" google_workspace: GoogleWorkspaceConfig::default(), proxy: ProxyConfig::default(), agent: AgentConfig::default(), + pacing: PacingConfig::default(), identity: IdentityConfig::default(), cost: CostConfig::default(), peripherals: PeripheralsConfig::default(), diff --git a/src/onboard/wizard.rs b/src/onboard/wizard.rs index 156cad8ae..36a83a1f4 100644 --- a/src/onboard/wizard.rs +++ b/src/onboard/wizard.rs @@ -154,6 +154,7 @@ pub async fn run_wizard(force: bool) -> Result { reliability: crate::config::ReliabilityConfig::default(), scheduler: crate::config::schema::SchedulerConfig::default(), agent: crate::config::schema::AgentConfig::default(), + pacing: crate::config::PacingConfig::default(), skills: crate::config::SkillsConfig::default(), model_routes: Vec::new(), embedding_routes: Vec::new(), @@ -576,6 +577,7 @@ async fn run_quick_setup_with_home( reliability: crate::config::ReliabilityConfig::default(), scheduler: crate::config::schema::SchedulerConfig::default(), agent: crate::config::schema::AgentConfig::default(), + pacing: crate::config::PacingConfig::default(), skills: crate::config::SkillsConfig::default(), model_routes: Vec::new(), embedding_routes: Vec::new(), diff --git a/src/tools/delegate.rs b/src/tools/delegate.rs index 0e329ce2a..0970da5b7 100644 --- a/src/tools/delegate.rs +++ b/src/tools/delegate.rs @@ -530,6 +530,7 @@ impl DelegateTool { &[], None, None, + &crate::config::PacingConfig::default(), ), ) .await;