From 1e8c09d34afe00fa1878c8d8a102c335288c10eb Mon Sep 17 00:00:00 2001
From: argenis de la rosa <theonlyhennygod@gmail.com>
Date: Wed, 25 Feb 2026 17:33:32 -0500
Subject: [PATCH] fix(agent): improve iteration-limit recovery and defaults

---
 docs/config-reference.md         |  4 +--
 docs/i18n/fr/config-reference.md |  1 +
 docs/i18n/vi/config-reference.md |  4 +--
 src/agent/loop_.rs               | 20 +++++++++++++-
 src/channels/mod.rs              | 47 +++++++++++++++++++++++++++++++-
 src/config/schema.rs             |  8 +++---
 tests/agent_loop_robustness.rs   | 10 +++----
 tests/config_persistence.rs      |  6 ++--
 tests/config_schema.rs           |  4 +--
 9 files changed, 84 insertions(+), 20 deletions(-)
diff --git a/docs/config-reference.md b/docs/config-reference.md
index 99cd7c034..587f6977b 100644
--- a/docs/config-reference.md
+++ b/docs/config-reference.md
@@ -90,14 +90,14 @@ Operational note for container users:
 | Key | Default | Purpose |
 |---|---|---|
 | `compact_context` | `false` | When true: bootstrap_max_chars=6000, rag_chunk_limit=2. Use for 13B or smaller models |
-| `max_tool_iterations` | `10` | Maximum tool-call loop turns per user message across CLI, gateway, and channels |
+| `max_tool_iterations` | `20` | Maximum tool-call loop turns per user message across CLI, gateway, and channels |
 | `max_history_messages` | `50` | Maximum conversation history messages retained per session |
 | `parallel_tools` | `false` | Enable parallel tool execution within a single iteration |
 | `tool_dispatcher` | `auto` | Tool dispatch strategy |
 
 Notes:
 
-- Setting `max_tool_iterations = 0` falls back to safe default `10`.
+- Setting `max_tool_iterations = 0` falls back to safe default `20`.
 - If a channel message exceeds this value, the runtime returns: `Agent exceeded maximum tool iterations (<value>)`.
 - In CLI, gateway, and channel tool loops, multiple independent tool calls are executed concurrently by default when the pending calls do not require approval gating; result order remains stable.
 - `parallel_tools` applies to the `Agent::turn()` API surface. It does not gate the runtime loop used by CLI, gateway, or channel handlers.
diff --git a/docs/i18n/fr/config-reference.md b/docs/i18n/fr/config-reference.md
index 72758e0dc..43672a73f 100644
--- a/docs/i18n/fr/config-reference.md
+++ b/docs/i18n/fr/config-reference.md
@@ -20,3 +20,4 @@ Source anglaise:
 ## Notes de mise à jour
 
 - Ajout de `provider.reasoning_level` (OpenAI Codex `/responses`). Voir la source anglaise pour les détails.
+- Valeur par défaut de `agent.max_tool_iterations` augmentée à `20` (fallback sûr si `0`).
diff --git a/docs/i18n/vi/config-reference.md b/docs/i18n/vi/config-reference.md
index cc09d5160..bdcec1561 100644
--- a/docs/i18n/vi/config-reference.md
+++ b/docs/i18n/vi/config-reference.md
@@ -74,14 +74,14 @@ Lưu ý cho người dùng container:
 | Khóa | Mặc định | Mục đích |
 |---|---|---|
 | `compact_context` | `false` | Khi bật: bootstrap_max_chars=6000, rag_chunk_limit=2. Dùng cho model 13B trở xuống |
-| `max_tool_iterations` | `10` | Số vòng lặp tool-call tối đa mỗi tin nhắn trên CLI, gateway và channels |
+| `max_tool_iterations` | `20` | Số vòng lặp tool-call tối đa mỗi tin nhắn trên CLI, gateway và channels |
 | `max_history_messages` | `50` | Số tin nhắn lịch sử tối đa giữ lại mỗi phiên |
 | `parallel_tools` | `false` | Bật thực thi tool song song trong một lượt |
 | `tool_dispatcher` | `auto` | Chiến lược dispatch tool |
 
 Lưu ý:
 
-- Đặt `max_tool_iterations = 0` sẽ dùng giá trị mặc định an toàn `10`.
+- Đặt `max_tool_iterations = 0` sẽ dùng giá trị mặc định an toàn `20`.
 - Nếu tin nhắn kênh vượt giá trị này, runtime trả về: `Agent exceeded maximum tool iterations (<value>)`.
 - Trong vòng lặp tool của CLI, gateway và channel, các lời gọi tool độc lập được thực thi đồng thời mặc định khi không cần phê duyệt; thứ tự kết quả giữ ổn định.
 - `parallel_tools` áp dụng cho API `Agent::turn()`. Không ảnh hưởng đến vòng lặp runtime của CLI, gateway hay channel.
diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs
index a38e3a3fa..7c504bf17 100644
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@@ -46,7 +46,7 @@ const STREAM_CHUNK_MIN_CHARS: usize = 80;
 
 /// Default maximum agentic tool-use iterations per user message to prevent runaway loops.
 /// Used as a safe fallback when `max_tool_iterations` is unset or configured as zero.
-const DEFAULT_MAX_TOOL_ITERATIONS: usize = 10;
+const DEFAULT_MAX_TOOL_ITERATIONS: usize = 20;
 
 /// Minimum user-message length (in chars) for auto-save to memory.
 /// Matches the channel-side constant in `channels/mod.rs`.
@@ -272,6 +272,14 @@ pub(crate) fn is_tool_loop_cancelled(err: &anyhow::Error) -> bool {
     err.chain().any(|source| source.is::<ToolLoopCancelled>())
 }
 
+pub(crate) fn is_tool_iteration_limit_error(err: &anyhow::Error) -> bool {
+    err.chain().any(|source| {
+        source
+            .to_string()
+            .contains("Agent exceeded maximum tool iterations")
+    })
+}
+
 /// Execute a single turn of the agent loop: send messages, parse tool calls,
 /// execute tools, and loop until the LLM produces a final text response.
 /// When `silent` is true, suppresses stdout (for channel use).
@@ -1568,6 +1576,16 @@ pub async fn run(
             {
                 Ok(resp) => resp,
                 Err(e) => {
+                    if is_tool_iteration_limit_error(&e) {
+                        let pause_notice = format!(
+                            "⚠️ Reached tool-iteration limit ({}). Context and progress are preserved. \
+                            Reply \"continue\" to resume, or increase `agent.max_tool_iterations` in config.",
+                            config.agent.max_tool_iterations.max(DEFAULT_MAX_TOOL_ITERATIONS)
+                        );
+                        history.push(ChatMessage::assistant(&pause_notice));
+                        eprintln!("\n{pause_notice}\n");
+                        continue;
+                    }
                     eprintln!("\nError: {e}\n");
                     continue;
                 }
diff --git a/src/channels/mod.rs b/src/channels/mod.rs
index 6317d448d..564e7d991 100644
--- a/src/channels/mod.rs
+++ b/src/channels/mod.rs
@@ -1593,6 +1593,10 @@ fn is_context_window_overflow_error(err: &anyhow::Error) -> bool {
     .any(|hint| lower.contains(hint))
 }
 
+fn is_tool_iteration_limit_error(err: &anyhow::Error) -> bool {
+    crate::agent::loop_::is_tool_iteration_limit_error(err)
+}
+
 fn load_cached_model_preview(workspace_dir: &Path, provider_name: &str) -> Vec<String> {
     let cache_path = workspace_dir.join("state").join(MODEL_CACHE_FILE);
     let Ok(raw) = std::fs::read_to_string(cache_path) else {
@@ -3222,6 +3226,46 @@ async fn process_channel_message(
                             .await;
                     }
                 }
+            } else if is_tool_iteration_limit_error(&e) {
+                let limit = ctx.max_tool_iterations.max(1);
+                let pause_text = format!(
+                    "⚠️ Reached tool-iteration limit ({limit}) for this turn. Context and progress were preserved. Reply \"continue\" to resume, or increase `agent.max_tool_iterations`."
+                );
+                runtime_trace::record_event(
+                    "channel_message_error",
+                    Some(msg.channel.as_str()),
+                    Some(route.provider.as_str()),
+                    Some(route.model.as_str()),
+                    None,
+                    Some(false),
+                    Some("tool iteration limit reached"),
+                    serde_json::json!({
+                        "sender": msg.sender,
+                        "elapsed_ms": started_at.elapsed().as_millis(),
+                        "max_tool_iterations": limit,
+                    }),
+                );
+                append_sender_turn(
+                    ctx.as_ref(),
+                    &history_key,
+                    ChatMessage::assistant(
+                        "[Task paused at tool-iteration limit — context preserved. Ask to continue.]",
+                    ),
+                );
+                if let Some(channel) = target_channel.as_ref() {
+                    if let Some(ref draft_id) = draft_message_id {
+                        let _ = channel
+                            .finalize_draft(&msg.reply_target, draft_id, &pause_text)
+                            .await;
+                    } else {
+                        let _ = channel
+                            .send(
+                                &SendMessage::new(pause_text, &msg.reply_target)
+                                    .in_thread(msg.thread_ts.clone()),
+                            )
+                            .await;
+                    }
+                }
             } else {
                 eprintln!(
                     "  ❌ LLM error after {}ms: {e}",
@@ -7928,7 +7972,8 @@ BTC is currently around $65,000 based on latest tool output."#
         let sent_messages = channel_impl.sent_messages.lock().await;
         assert_eq!(sent_messages.len(), 1);
         assert!(sent_messages[0].starts_with("chat-iter-fail:"));
-        assert!(sent_messages[0].contains("⚠️ Error: Agent exceeded maximum tool iterations (3)"));
+        assert!(sent_messages[0].contains("⚠️ Reached tool-iteration limit (3)"));
+        assert!(sent_messages[0].contains("Context and progress were preserved"));
     }
 
     struct NoopMemory;
diff --git a/src/config/schema.rs b/src/config/schema.rs
index 99aa2612e..d43fa00dd 100644
--- a/src/config/schema.rs
+++ b/src/config/schema.rs
@@ -628,8 +628,8 @@ pub struct AgentConfig {
     /// When true: bootstrap_max_chars=6000, rag_chunk_limit=2. Use for 13B or smaller models.
     #[serde(default)]
     pub compact_context: bool,
-    /// Maximum tool-call loop turns per user message. Default: `10`.
-    /// Setting to `0` falls back to the safe default of `10`.
+    /// Maximum tool-call loop turns per user message. Default: `20`.
+    /// Setting to `0` falls back to the safe default of `20`.
     #[serde(default = "default_agent_max_tool_iterations")]
     pub max_tool_iterations: usize,
     /// Maximum conversation history messages retained per session. Default: `50`.
@@ -644,7 +644,7 @@ pub struct AgentConfig {
 }
 
 fn default_agent_max_tool_iterations() -> usize {
-    10
+    20
 }
 
 fn default_agent_max_history_messages() -> usize {
@@ -7169,7 +7169,7 @@ reasoning_level = "high"
     async fn agent_config_defaults() {
         let cfg = AgentConfig::default();
         assert!(!cfg.compact_context);
-        assert_eq!(cfg.max_tool_iterations, 10);
+        assert_eq!(cfg.max_tool_iterations, 20);
         assert_eq!(cfg.max_history_messages, 50);
         assert!(!cfg.parallel_tools);
         assert_eq!(cfg.tool_dispatcher, "auto");
diff --git a/tests/agent_loop_robustness.rs b/tests/agent_loop_robustness.rs
index 1e679dc2a..beb8bc886 100644
--- a/tests/agent_loop_robustness.rs
+++ b/tests/agent_loop_robustness.rs
@@ -315,14 +315,14 @@ async fn agent_handles_mixed_tool_success_and_failure() {
 // TG4.3: Iteration limit enforcement (#777)
 // ═════════════════════════════════════════════════════════════════════════════
 
-/// Agent should not exceed max_tool_iterations (default=10) even with
+/// Agent should not exceed max_tool_iterations (default=20) even with
 /// a provider that keeps returning tool calls
 #[tokio::test]
 async fn agent_respects_max_tool_iterations() {
     let (counting_tool, count) = CountingTool::new();
 
-    // Create 20 tool call responses - more than the default limit of 10
-    let mut responses: Vec<ChatResponse> = (0..20)
+    // Create 30 tool call responses - more than the default limit of 20
+    let mut responses: Vec<ChatResponse> = (0..30)
         .map(|i| {
             tool_response(vec![ToolCall {
                 id: format!("tc_{i}"),
@@ -344,8 +344,8 @@ async fn agent_respects_max_tool_iterations() {
 
     let invocations = *count.lock().unwrap();
     assert!(
-        invocations <= 10,
-        "tool invocations ({invocations}) should not exceed default max_tool_iterations (10)"
+        invocations <= 20,
+        "tool invocations ({invocations}) should not exceed default max_tool_iterations (20)"
     );
 }
 
diff --git a/tests/config_persistence.rs b/tests/config_persistence.rs
index d70010eba..45f862f40 100644
--- a/tests/config_persistence.rs
+++ b/tests/config_persistence.rs
@@ -49,8 +49,8 @@ fn config_default_temperature_positive() {
 fn agent_config_default_max_tool_iterations() {
     let agent = AgentConfig::default();
     assert_eq!(
-        agent.max_tool_iterations, 10,
-        "default max_tool_iterations should be 10"
+        agent.max_tool_iterations, 20,
+        "default max_tool_iterations should be 20"
     );
 }
 
@@ -199,7 +199,7 @@ default_temperature = 0.7
     let parsed: Config = toml::from_str(minimal_toml).expect("minimal TOML should parse");
 
     // Agent config should use defaults
-    assert_eq!(parsed.agent.max_tool_iterations, 10);
+    assert_eq!(parsed.agent.max_tool_iterations, 20);
     assert_eq!(parsed.agent.max_history_messages, 50);
     assert!(!parsed.agent.compact_context);
 }
diff --git a/tests/config_schema.rs b/tests/config_schema.rs
index e56ab6751..f1a927395 100644
--- a/tests/config_schema.rs
+++ b/tests/config_schema.rs
@@ -231,7 +231,7 @@ fn config_empty_toml_requires_temperature() {
 fn config_minimal_toml_with_temperature_uses_defaults() {
     let toml_str = "default_temperature = 0.7\n";
     let parsed: Config = toml::from_str(toml_str).expect("minimal TOML should parse");
-    assert_eq!(parsed.agent.max_tool_iterations, 10);
+    assert_eq!(parsed.agent.max_tool_iterations, 20);
     assert_eq!(parsed.gateway.port, 42617);
 }
 
@@ -240,7 +240,7 @@ fn config_only_temperature_parses() {
     let toml_str = "default_temperature = 1.2\n";
     let parsed: Config = toml::from_str(toml_str).expect("temperature-only TOML should parse");
     assert!((parsed.default_temperature - 1.2).abs() < f64::EPSILON);
-    assert_eq!(parsed.agent.max_tool_iterations, 10);
+    assert_eq!(parsed.agent.max_tool_iterations, 20);
 }
 
 #[test]