From 483d3c0853b698692fef7bed5be4f3a2e3f02e22 Mon Sep 17 00:00:00 2001
From: Argenis <theonlyhennygod@gmail.com>
Date: Wed, 11 Mar 2026 20:35:33 -0400
Subject: [PATCH] fix(ollama): strip <think> tags from Qwen responses and
 validate tool calls (#3079) (#3241)

- Strip `<think>...</think>` blocks in parse_tool_calls(), XmlToolDispatcher,
  and OllamaProvider before processing tool-call XML
- Add effective_content() fallback: when content is empty after stripping
  think tags, check the thinking field for tool-call XML
- Add strip_think_tags() to ollama.rs, loop_.rs, and dispatcher.rs
- Add comprehensive tests for think-tag stripping and tool-call parsing

Fixes #3079

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/agent/dispatcher.rs |  56 +++++++++-
 src/agent/loop_.rs      | 100 ++++++++++++++++++
 src/providers/ollama.rs | 226 +++++++++++++++++++++++++++++++++++++---
 3 files changed, 369 insertions(+), 13 deletions(-)
diff --git a/src/agent/dispatcher.rs b/src/agent/dispatcher.rs
index 16084cc94..c160d1cd8 100644
--- a/src/agent/dispatcher.rs
+++ b/src/agent/dispatcher.rs
@@ -31,9 +31,12 @@ pub struct XmlToolDispatcher;
 
 impl XmlToolDispatcher {
     fn parse_xml_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
+        // Strip `<think>...</think>` blocks before parsing tool calls.
+        // Qwen and other reasoning models may embed chain-of-thought inline.
+        let cleaned = Self::strip_think_tags(response);
         let mut text_parts = Vec::new();
         let mut calls = Vec::new();
-        let mut remaining = response;
+        let mut remaining = cleaned.as_str();
 
         while let Some(start) = remaining.find("<tool_call>") {
             let before = &remaining[..start];
@@ -81,6 +84,26 @@ impl XmlToolDispatcher {
         (text_parts.join("\n"), calls)
     }
 
+    /// Remove `<think>...</think>` blocks from model output.
+    fn strip_think_tags(s: &str) -> String {
+        let mut result = String::with_capacity(s.len());
+        let mut rest = s;
+        loop {
+            if let Some(start) = rest.find("<think>") {
+                result.push_str(&rest[..start]);
+                if let Some(end) = rest[start..].find("</think>") {
+                    rest = &rest[start + end + "</think>".len()..];
+                } else {
+                    break;
+                }
+            } else {
+                result.push_str(rest);
+                break;
+            }
+        }
+        result
+    }
+
     pub fn tool_specs(tools: &[Box<dyn Tool>]) -> Vec<ToolSpec> {
         tools.iter().map(|tool| tool.spec()).collect()
     }
@@ -259,6 +282,37 @@ mod tests {
         assert_eq!(calls[0].name, "shell");
     }
 
+    #[test]
+    fn xml_dispatcher_strips_think_before_tool_call() {
+        let response = ChatResponse {
+            text: Some(
+                "<think>I should list files</think>\n<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}</tool_call>"
+                    .into(),
+            ),
+            tool_calls: vec![],
+            usage: None,
+            reasoning_content: None,
+        };
+        let dispatcher = XmlToolDispatcher;
+        let (text, calls) = dispatcher.parse_response(&response);
+        assert_eq!(calls.len(), 1);
+        assert_eq!(calls[0].name, "shell");
+        assert!(!text.contains("<think>"), "think tags should be stripped from text");
+    }
+
+    #[test]
+    fn xml_dispatcher_think_only_returns_no_calls() {
+        let response = ChatResponse {
+            text: Some("<think>Just thinking</think>".into()),
+            tool_calls: vec![],
+            usage: None,
+            reasoning_content: None,
+        };
+        let dispatcher = XmlToolDispatcher;
+        let (_, calls) = dispatcher.parse_response(&response);
+        assert!(calls.is_empty());
+    }
+
     #[test]
     fn native_dispatcher_roundtrip() {
         let response = ChatResponse {
diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs
index 4f8e51bf4..f0cb2c0e7 100644
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@@ -1312,6 +1312,13 @@ fn parse_glm_shortened_body(body: &str) -> Option<ParsedToolCall> {
 ///
 /// Also supports JSON with `tool_calls` array from OpenAI-format responses.
 fn parse_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
+    // Strip `<think>...</think>` blocks before parsing.  Qwen and other
+    // reasoning models embed chain-of-thought inline in the response text;
+    // these tags can interfere with `<tool_call>` extraction and must be
+    // removed first.
+    let cleaned = strip_think_tags(response);
+    let response = cleaned.as_str();
+
     let mut text_parts = Vec::new();
     let mut calls = Vec::new();
     let mut remaining = response;
@@ -1694,6 +1701,30 @@ fn parse_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
     (text_parts.join("\n"), calls)
 }
 
+/// Remove `<think>...</think>` blocks from model output.
+/// Qwen and other reasoning models embed chain-of-thought inline in the
+/// response text using `<think>` tags.  These must be removed before parsing
+/// tool-call tags or displaying output.
+fn strip_think_tags(s: &str) -> String {
+    let mut result = String::with_capacity(s.len());
+    let mut rest = s;
+    loop {
+        if let Some(start) = rest.find("<think>") {
+            result.push_str(&rest[..start]);
+            if let Some(end) = rest[start..].find("</think>") {
+                rest = &rest[start + end + "</think>".len()..];
+            } else {
+                // Unclosed tag: drop the rest to avoid leaking partial reasoning.
+                break;
+            }
+        } else {
+            result.push_str(rest);
+            break;
+        }
+    }
+    result.trim().to_string()
+}
+
 /// Strip prompt-guided tool artifacts from visible output while preserving
 /// raw model text in history for future turns.
 fn strip_tool_result_blocks(text: &str) -> String {
@@ -1701,6 +1732,8 @@ fn strip_tool_result_blocks(text: &str) -> String {
         LazyLock::new(|| Regex::new(r"(?s)<tool_result[^>]*>.*?</tool_result>").unwrap());
     static THINKING_RE: LazyLock<Regex> =
         LazyLock::new(|| Regex::new(r"(?s)<thinking>.*?</thinking>").unwrap());
+    static THINK_RE: LazyLock<Regex> =
+        LazyLock::new(|| Regex::new(r"(?s)<think>.*?</think>").unwrap());
     static TOOL_RESULTS_PREFIX_RE: LazyLock<Regex> =
         LazyLock::new(|| Regex::new(r"(?m)^\[Tool results\]\s*\n?").unwrap());
     static EXCESS_BLANK_LINES_RE: LazyLock<Regex> =
@@ -1708,6 +1741,7 @@ fn strip_tool_result_blocks(text: &str) -> String {
 
     let result = TOOL_RESULT_RE.replace_all(text, "");
     let result = THINKING_RE.replace_all(&result, "");
+    let result = THINK_RE.replace_all(&result, "");
     let result = TOOL_RESULTS_PREFIX_RE.replace_all(&result, "");
     let result = EXCESS_BLANK_LINES_RE.replace_all(result.trim(), "\n\n");
 
@@ -4874,6 +4908,72 @@ Final answer."#;
         assert_eq!(strip_tool_result_blocks(input), "Here is the answer.");
     }
 
+    #[test]
+    fn strip_tool_result_blocks_removes_think_tags() {
+        let input = "<think>\nLet me reason...\n</think>\nHere is the answer.";
+        assert_eq!(strip_tool_result_blocks(input), "Here is the answer.");
+    }
+
+    #[test]
+    fn strip_think_tags_removes_single_block() {
+        assert_eq!(
+            strip_think_tags("<think>reasoning</think>Hello"),
+            "Hello"
+        );
+    }
+
+    #[test]
+    fn strip_think_tags_removes_multiple_blocks() {
+        assert_eq!(
+            strip_think_tags("<think>a</think>X<think>b</think>Y"),
+            "XY"
+        );
+    }
+
+    #[test]
+    fn strip_think_tags_handles_unclosed_block() {
+        assert_eq!(strip_think_tags("visible<think>hidden"), "visible");
+    }
+
+    #[test]
+    fn strip_think_tags_preserves_text_without_tags() {
+        assert_eq!(strip_think_tags("plain text"), "plain text");
+    }
+
+    #[test]
+    fn parse_tool_calls_strips_think_before_tool_call() {
+        // Qwen regression: <think> tags before <tool_call> tags should be
+        // stripped, allowing the tool call to be parsed correctly.
+        let response = "<think>I need to list files to understand the project</think>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}\n</tool_call>";
+        let (text, calls) = parse_tool_calls(response);
+        assert_eq!(calls.len(), 1, "should parse tool call after stripping think tags");
+        assert_eq!(calls[0].name, "shell");
+        assert_eq!(
+            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
+            "ls"
+        );
+        assert!(text.is_empty(), "think content should not appear as text");
+    }
+
+    #[test]
+    fn parse_tool_calls_strips_think_only_returns_empty() {
+        // When response is only <think> tags with no tool calls, should
+        // return empty text and no calls.
+        let response = "<think>Just thinking, no action needed</think>";
+        let (text, calls) = parse_tool_calls(response);
+        assert!(calls.is_empty());
+        assert!(text.is_empty());
+    }
+
+    #[test]
+    fn parse_tool_calls_handles_qwen_think_with_multiple_tool_calls() {
+        let response = "<think>I need to check two things</think>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}\n</tool_call>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"pwd\"}}\n</tool_call>";
+        let (_, calls) = parse_tool_calls(response);
+        assert_eq!(calls.len(), 2);
+        assert_eq!(calls[0].arguments.get("command").unwrap().as_str().unwrap(), "date");
+        assert_eq!(calls[1].arguments.get("command").unwrap().as_str().unwrap(), "pwd");
+    }
+
     #[test]
     fn strip_tool_result_blocks_preserves_clean_text() {
         let input = "Hello, this is a normal response.";
diff --git a/src/providers/ollama.rs b/src/providers/ollama.rs
index 0e2310904..097ff0348 100644
--- a/src/providers/ollama.rs
+++ b/src/providers/ollama.rs
@@ -169,13 +169,66 @@ impl OllamaProvider {
     }
 
     fn normalize_response_text(content: String) -> Option<String> {
-        if content.trim().is_empty() {
+        let stripped = Self::strip_think_tags(&content);
+        if stripped.trim().is_empty() {
             None
         } else {
-            Some(content)
+            Some(stripped)
         }
     }
 
+    /// Remove `<think>...</think>` blocks from model output.
+    /// Qwen and other reasoning models may embed chain-of-thought inline
+    /// in the `content` field using `<think>` tags.  These must be stripped
+    /// before returning text to the user or parsing for tool calls.
+    fn strip_think_tags(s: &str) -> String {
+        let mut result = String::with_capacity(s.len());
+        let mut rest = s;
+        loop {
+            if let Some(start) = rest.find("<think>") {
+                result.push_str(&rest[..start]);
+                if let Some(end) = rest[start..].find("</think>") {
+                    rest = &rest[start + end + "</think>".len()..];
+                } else {
+                    // Unclosed tag: drop the rest to avoid leaking partial reasoning.
+                    break;
+                }
+            } else {
+                result.push_str(rest);
+                break;
+            }
+        }
+        result.trim().to_string()
+    }
+
+    /// Derive the effective text content from a response, stripping `<think>` tags
+    /// and falling back to the `thinking` field when `content` is empty after
+    /// stripping.  This ensures that tool-call XML tags embedded alongside (or
+    /// after) thinking blocks are preserved for downstream parsing.
+    fn effective_content(content: &str, thinking: Option<&str>) -> Option<String> {
+        // First try the content field with think tags stripped.
+        let stripped = Self::strip_think_tags(content);
+        if !stripped.trim().is_empty() {
+            return Some(stripped);
+        }
+
+        // Content was empty or only thinking — check the thinking field.
+        // Some models (Qwen) put the full output including tool-call XML in
+        // the thinking field when `think: true` is set.
+        if let Some(thinking) = thinking.map(str::trim).filter(|t| !t.is_empty()) {
+            let stripped_thinking = Self::strip_think_tags(thinking);
+            if !stripped_thinking.trim().is_empty() {
+                tracing::debug!(
+                    "Ollama: using thinking field as effective content ({} chars)",
+                    stripped_thinking.len()
+                );
+                return Some(stripped_thinking);
+            }
+        }
+
+        None
+    }
+
     fn fallback_text_for_empty_content(model: &str, thinking: Option<&str>) -> String {
         if let Some(thinking) = thinking.map(str::trim).filter(|value| !value.is_empty()) {
             let thinking_log_excerpt: String = thinking.chars().take(100).collect();
@@ -537,9 +590,11 @@ impl Provider for OllamaProvider {
             return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls));
         }
 
-        // Plain text response
-        let content = response.message.content;
-        if let Some(content) = Self::normalize_response_text(content) {
+        // Plain text response — strip <think> tags and fall back to thinking field.
+        if let Some(content) = Self::effective_content(
+            &response.message.content,
+            response.message.thinking.as_deref(),
+        ) {
             return Ok(content);
         }
 
@@ -578,9 +633,11 @@ impl Provider for OllamaProvider {
             return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls));
         }
 
-        // Plain text response
-        let content = response.message.content;
-        if let Some(content) = Self::normalize_response_text(content) {
+        // Plain text response — strip <think> tags and fall back to thinking field.
+        if let Some(content) = Self::effective_content(
+            &response.message.content,
+            response.message.thinking.as_deref(),
+        ) {
             return Ok(content);
         }
 
@@ -652,9 +709,15 @@ impl Provider for OllamaProvider {
             });
         }
 
-        // Plain text response.
-        let content = response.message.content;
-        let text = if let Some(content) = Self::normalize_response_text(content) {
+        // No native tool calls — use the effective content (content with
+        // `<think>` tags stripped, falling back to thinking field).
+        // The loop_.rs `parse_tool_calls` will extract any XML-style tool
+        // calls from the text, so preserve `<tool_call>` tags here.
+        let effective = Self::effective_content(
+            &response.message.content,
+            response.message.thinking.as_deref(),
+        );
+        let text = if let Some(content) = effective {
             content
         } else {
             Self::fallback_text_for_empty_content(
@@ -868,7 +931,27 @@ mod tests {
         );
         assert_eq!(
             OllamaProvider::normalize_response_text(" hello ".to_string()),
-            Some(" hello ".to_string())
+            Some("hello".to_string())
+        );
+    }
+
+    #[test]
+    fn normalize_response_text_strips_think_tags() {
+        assert_eq!(
+            OllamaProvider::normalize_response_text(
+                "<think>reasoning</think> hello".to_string()
+            ),
+            Some("hello".to_string())
+        );
+    }
+
+    #[test]
+    fn normalize_response_text_rejects_think_only_content() {
+        assert_eq!(
+            OllamaProvider::normalize_response_text(
+                "<think>only thinking here</think>".to_string()
+            ),
+            None
         );
     }
 
@@ -1069,4 +1152,123 @@ mod tests {
         assert!(resp.prompt_eval_count.is_none());
         assert!(resp.eval_count.is_none());
     }
+
+    // ═══════════════════════════════════════════════════════════════════════
+    // <think> tag stripping tests
+    // ═══════════════════════════════════════════════════════════════════════
+
+    #[test]
+    fn strip_think_tags_removes_single_block() {
+        let input = "<think>internal reasoning</think>Hello world";
+        assert_eq!(OllamaProvider::strip_think_tags(input), "Hello world");
+    }
+
+    #[test]
+    fn strip_think_tags_removes_multiple_blocks() {
+        let input = "<think>first</think>A<think>second</think>B";
+        assert_eq!(OllamaProvider::strip_think_tags(input), "AB");
+    }
+
+    #[test]
+    fn strip_think_tags_handles_unclosed_block() {
+        let input = "visible<think>hidden tail";
+        assert_eq!(OllamaProvider::strip_think_tags(input), "visible");
+    }
+
+    #[test]
+    fn strip_think_tags_preserves_text_without_tags() {
+        let input = "plain text response";
+        assert_eq!(OllamaProvider::strip_think_tags(input), "plain text response");
+    }
+
+    #[test]
+    fn strip_think_tags_returns_empty_for_think_only() {
+        let input = "<think>only thinking</think>";
+        assert_eq!(OllamaProvider::strip_think_tags(input), "");
+    }
+
+    // ═══════════════════════════════════════════════════════════════════════
+    // effective_content tests
+    // ═══════════════════════════════════════════════════════════════════════
+
+    #[test]
+    fn effective_content_strips_think_and_returns_rest() {
+        let result = OllamaProvider::effective_content(
+            "<think>reasoning</think>\n<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}</tool_call>",
+            None,
+        );
+        assert!(result.is_some());
+        let text = result.unwrap();
+        assert!(text.contains("<tool_call>"));
+        assert!(!text.contains("<think>"));
+    }
+
+    #[test]
+    fn effective_content_falls_back_to_thinking_field() {
+        let result = OllamaProvider::effective_content(
+            "",
+            Some("<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}</tool_call>"),
+        );
+        assert!(result.is_some());
+        assert!(result.unwrap().contains("<tool_call>"));
+    }
+
+    #[test]
+    fn effective_content_returns_none_when_both_empty() {
+        assert!(OllamaProvider::effective_content("", None).is_none());
+        assert!(OllamaProvider::effective_content("", Some("")).is_none());
+        assert!(OllamaProvider::effective_content(
+            "<think>only thinking</think>",
+            Some("<think>also only thinking</think>")
+        ).is_none());
+    }
+
+    #[test]
+    fn effective_content_prefers_content_over_thinking() {
+        let result = OllamaProvider::effective_content(
+            "content text",
+            Some("thinking text"),
+        );
+        assert_eq!(result, Some("content text".to_string()));
+    }
+
+    #[test]
+    fn effective_content_uses_thinking_when_content_is_think_only() {
+        let result = OllamaProvider::effective_content(
+            "<think>just reasoning</think>",
+            Some("actual useful text from thinking field"),
+        );
+        assert_eq!(result, Some("actual useful text from thinking field".to_string()));
+    }
+
+    // ═══════════════════════════════════════════════════════════════════════
+    // Qwen tool-call regression scenario tests
+    // ═══════════════════════════════════════════════════════════════════════
+
+    #[test]
+    fn qwen_think_with_tool_call_in_content_preserved() {
+        // Qwen produces <think> tags followed by <tool_call> in content,
+        // with no structured tool_calls. The <tool_call> tags must survive
+        // for downstream parse_tool_calls to extract them.
+        let content = "<think>I should list files</think>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}\n</tool_call>";
+        let result = OllamaProvider::effective_content(content, None);
+        assert!(result.is_some());
+        let text = result.unwrap();
+        assert!(text.contains("<tool_call>"));
+        assert!(text.contains("shell"));
+        assert!(!text.contains("<think>"));
+    }
+
+    #[test]
+    fn qwen_thinking_field_with_tool_call_xml_extracted() {
+        // When think=true, Ollama separates thinking, but Qwen may put tool
+        // call XML in the thinking field with empty content.
+        let content = "";
+        let thinking = "I need to check the date\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}\n</tool_call>";
+        let result = OllamaProvider::effective_content(content, Some(thinking));
+        assert!(result.is_some());
+        let text = result.unwrap();
+        assert!(text.contains("<tool_call>"));
+        assert!(text.contains("date"));
+    }
 }