From 483d3c0853b698692fef7bed5be4f3a2e3f02e22 Mon Sep 17 00:00:00 2001 From: Argenis Date: Wed, 11 Mar 2026 20:35:33 -0400 Subject: [PATCH] fix(ollama): strip tags from Qwen responses and validate tool calls (#3079) (#3241) - Strip `...` blocks in parse_tool_calls(), XmlToolDispatcher, and OllamaProvider before processing tool-call XML - Add effective_content() fallback: when content is empty after stripping think tags, check the thinking field for tool-call XML - Add strip_think_tags() to ollama.rs, loop_.rs, and dispatcher.rs - Add comprehensive tests for think-tag stripping and tool-call parsing Fixes #3079 Co-authored-by: Claude Opus 4.6 --- src/agent/dispatcher.rs | 56 +++++++++- src/agent/loop_.rs | 100 ++++++++++++++++++ src/providers/ollama.rs | 226 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 369 insertions(+), 13 deletions(-) diff --git a/src/agent/dispatcher.rs b/src/agent/dispatcher.rs index 16084cc94..c160d1cd8 100644 --- a/src/agent/dispatcher.rs +++ b/src/agent/dispatcher.rs @@ -31,9 +31,12 @@ pub struct XmlToolDispatcher; impl XmlToolDispatcher { fn parse_xml_tool_calls(response: &str) -> (String, Vec) { + // Strip `...` blocks before parsing tool calls. + // Qwen and other reasoning models may embed chain-of-thought inline. + let cleaned = Self::strip_think_tags(response); let mut text_parts = Vec::new(); let mut calls = Vec::new(); - let mut remaining = response; + let mut remaining = cleaned.as_str(); while let Some(start) = remaining.find("") { let before = &remaining[..start]; @@ -81,6 +84,26 @@ impl XmlToolDispatcher { (text_parts.join("\n"), calls) } + /// Remove `...` blocks from model output. + fn strip_think_tags(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut rest = s; + loop { + if let Some(start) = rest.find("") { + result.push_str(&rest[..start]); + if let Some(end) = rest[start..].find("") { + rest = &rest[start + end + "".len()..]; + } else { + break; + } + } else { + result.push_str(rest); + break; + } + } + result + } + pub fn tool_specs(tools: &[Box]) -> Vec { tools.iter().map(|tool| tool.spec()).collect() } @@ -259,6 +282,37 @@ mod tests { assert_eq!(calls[0].name, "shell"); } + #[test] + fn xml_dispatcher_strips_think_before_tool_call() { + let response = ChatResponse { + text: Some( + "I should list files\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}" + .into(), + ), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }; + let dispatcher = XmlToolDispatcher; + let (text, calls) = dispatcher.parse_response(&response); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].name, "shell"); + assert!(!text.contains(""), "think tags should be stripped from text"); + } + + #[test] + fn xml_dispatcher_think_only_returns_no_calls() { + let response = ChatResponse { + text: Some("Just thinking".into()), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }; + let dispatcher = XmlToolDispatcher; + let (_, calls) = dispatcher.parse_response(&response); + assert!(calls.is_empty()); + } + #[test] fn native_dispatcher_roundtrip() { let response = ChatResponse { diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index 4f8e51bf4..f0cb2c0e7 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -1312,6 +1312,13 @@ fn parse_glm_shortened_body(body: &str) -> Option { /// /// Also supports JSON with `tool_calls` array from OpenAI-format responses. fn parse_tool_calls(response: &str) -> (String, Vec) { + // Strip `...` blocks before parsing. Qwen and other + // reasoning models embed chain-of-thought inline in the response text; + // these tags can interfere with `` extraction and must be + // removed first. + let cleaned = strip_think_tags(response); + let response = cleaned.as_str(); + let mut text_parts = Vec::new(); let mut calls = Vec::new(); let mut remaining = response; @@ -1694,6 +1701,30 @@ fn parse_tool_calls(response: &str) -> (String, Vec) { (text_parts.join("\n"), calls) } +/// Remove `...` blocks from model output. +/// Qwen and other reasoning models embed chain-of-thought inline in the +/// response text using `` tags. These must be removed before parsing +/// tool-call tags or displaying output. +fn strip_think_tags(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut rest = s; + loop { + if let Some(start) = rest.find("") { + result.push_str(&rest[..start]); + if let Some(end) = rest[start..].find("") { + rest = &rest[start + end + "".len()..]; + } else { + // Unclosed tag: drop the rest to avoid leaking partial reasoning. + break; + } + } else { + result.push_str(rest); + break; + } + } + result.trim().to_string() +} + /// Strip prompt-guided tool artifacts from visible output while preserving /// raw model text in history for future turns. fn strip_tool_result_blocks(text: &str) -> String { @@ -1701,6 +1732,8 @@ fn strip_tool_result_blocks(text: &str) -> String { LazyLock::new(|| Regex::new(r"(?s)]*>.*?").unwrap()); static THINKING_RE: LazyLock = LazyLock::new(|| Regex::new(r"(?s).*?").unwrap()); + static THINK_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?s).*?").unwrap()); static TOOL_RESULTS_PREFIX_RE: LazyLock = LazyLock::new(|| Regex::new(r"(?m)^\[Tool results\]\s*\n?").unwrap()); static EXCESS_BLANK_LINES_RE: LazyLock = @@ -1708,6 +1741,7 @@ fn strip_tool_result_blocks(text: &str) -> String { let result = TOOL_RESULT_RE.replace_all(text, ""); let result = THINKING_RE.replace_all(&result, ""); + let result = THINK_RE.replace_all(&result, ""); let result = TOOL_RESULTS_PREFIX_RE.replace_all(&result, ""); let result = EXCESS_BLANK_LINES_RE.replace_all(result.trim(), "\n\n"); @@ -4874,6 +4908,72 @@ Final answer."#; assert_eq!(strip_tool_result_blocks(input), "Here is the answer."); } + #[test] + fn strip_tool_result_blocks_removes_think_tags() { + let input = "\nLet me reason...\n\nHere is the answer."; + assert_eq!(strip_tool_result_blocks(input), "Here is the answer."); + } + + #[test] + fn strip_think_tags_removes_single_block() { + assert_eq!( + strip_think_tags("reasoningHello"), + "Hello" + ); + } + + #[test] + fn strip_think_tags_removes_multiple_blocks() { + assert_eq!( + strip_think_tags("aXbY"), + "XY" + ); + } + + #[test] + fn strip_think_tags_handles_unclosed_block() { + assert_eq!(strip_think_tags("visiblehidden"), "visible"); + } + + #[test] + fn strip_think_tags_preserves_text_without_tags() { + assert_eq!(strip_think_tags("plain text"), "plain text"); + } + + #[test] + fn parse_tool_calls_strips_think_before_tool_call() { + // Qwen regression: tags before tags should be + // stripped, allowing the tool call to be parsed correctly. + let response = "I need to list files to understand the project\n\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}\n"; + let (text, calls) = parse_tool_calls(response); + assert_eq!(calls.len(), 1, "should parse tool call after stripping think tags"); + assert_eq!(calls[0].name, "shell"); + assert_eq!( + calls[0].arguments.get("command").unwrap().as_str().unwrap(), + "ls" + ); + assert!(text.is_empty(), "think content should not appear as text"); + } + + #[test] + fn parse_tool_calls_strips_think_only_returns_empty() { + // When response is only tags with no tool calls, should + // return empty text and no calls. + let response = "Just thinking, no action needed"; + let (text, calls) = parse_tool_calls(response); + assert!(calls.is_empty()); + assert!(text.is_empty()); + } + + #[test] + fn parse_tool_calls_handles_qwen_think_with_multiple_tool_calls() { + let response = "I need to check two things\n\n{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}\n\n\n{\"name\":\"shell\",\"arguments\":{\"command\":\"pwd\"}}\n"; + let (_, calls) = parse_tool_calls(response); + assert_eq!(calls.len(), 2); + assert_eq!(calls[0].arguments.get("command").unwrap().as_str().unwrap(), "date"); + assert_eq!(calls[1].arguments.get("command").unwrap().as_str().unwrap(), "pwd"); + } + #[test] fn strip_tool_result_blocks_preserves_clean_text() { let input = "Hello, this is a normal response."; diff --git a/src/providers/ollama.rs b/src/providers/ollama.rs index 0e2310904..097ff0348 100644 --- a/src/providers/ollama.rs +++ b/src/providers/ollama.rs @@ -169,13 +169,66 @@ impl OllamaProvider { } fn normalize_response_text(content: String) -> Option { - if content.trim().is_empty() { + let stripped = Self::strip_think_tags(&content); + if stripped.trim().is_empty() { None } else { - Some(content) + Some(stripped) } } + /// Remove `...` blocks from model output. + /// Qwen and other reasoning models may embed chain-of-thought inline + /// in the `content` field using `` tags. These must be stripped + /// before returning text to the user or parsing for tool calls. + fn strip_think_tags(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut rest = s; + loop { + if let Some(start) = rest.find("") { + result.push_str(&rest[..start]); + if let Some(end) = rest[start..].find("") { + rest = &rest[start + end + "".len()..]; + } else { + // Unclosed tag: drop the rest to avoid leaking partial reasoning. + break; + } + } else { + result.push_str(rest); + break; + } + } + result.trim().to_string() + } + + /// Derive the effective text content from a response, stripping `` tags + /// and falling back to the `thinking` field when `content` is empty after + /// stripping. This ensures that tool-call XML tags embedded alongside (or + /// after) thinking blocks are preserved for downstream parsing. + fn effective_content(content: &str, thinking: Option<&str>) -> Option { + // First try the content field with think tags stripped. + let stripped = Self::strip_think_tags(content); + if !stripped.trim().is_empty() { + return Some(stripped); + } + + // Content was empty or only thinking — check the thinking field. + // Some models (Qwen) put the full output including tool-call XML in + // the thinking field when `think: true` is set. + if let Some(thinking) = thinking.map(str::trim).filter(|t| !t.is_empty()) { + let stripped_thinking = Self::strip_think_tags(thinking); + if !stripped_thinking.trim().is_empty() { + tracing::debug!( + "Ollama: using thinking field as effective content ({} chars)", + stripped_thinking.len() + ); + return Some(stripped_thinking); + } + } + + None + } + fn fallback_text_for_empty_content(model: &str, thinking: Option<&str>) -> String { if let Some(thinking) = thinking.map(str::trim).filter(|value| !value.is_empty()) { let thinking_log_excerpt: String = thinking.chars().take(100).collect(); @@ -537,9 +590,11 @@ impl Provider for OllamaProvider { return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls)); } - // Plain text response - let content = response.message.content; - if let Some(content) = Self::normalize_response_text(content) { + // Plain text response — strip tags and fall back to thinking field. + if let Some(content) = Self::effective_content( + &response.message.content, + response.message.thinking.as_deref(), + ) { return Ok(content); } @@ -578,9 +633,11 @@ impl Provider for OllamaProvider { return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls)); } - // Plain text response - let content = response.message.content; - if let Some(content) = Self::normalize_response_text(content) { + // Plain text response — strip tags and fall back to thinking field. + if let Some(content) = Self::effective_content( + &response.message.content, + response.message.thinking.as_deref(), + ) { return Ok(content); } @@ -652,9 +709,15 @@ impl Provider for OllamaProvider { }); } - // Plain text response. - let content = response.message.content; - let text = if let Some(content) = Self::normalize_response_text(content) { + // No native tool calls — use the effective content (content with + // `` tags stripped, falling back to thinking field). + // The loop_.rs `parse_tool_calls` will extract any XML-style tool + // calls from the text, so preserve `` tags here. + let effective = Self::effective_content( + &response.message.content, + response.message.thinking.as_deref(), + ); + let text = if let Some(content) = effective { content } else { Self::fallback_text_for_empty_content( @@ -868,7 +931,27 @@ mod tests { ); assert_eq!( OllamaProvider::normalize_response_text(" hello ".to_string()), - Some(" hello ".to_string()) + Some("hello".to_string()) + ); + } + + #[test] + fn normalize_response_text_strips_think_tags() { + assert_eq!( + OllamaProvider::normalize_response_text( + "reasoning hello".to_string() + ), + Some("hello".to_string()) + ); + } + + #[test] + fn normalize_response_text_rejects_think_only_content() { + assert_eq!( + OllamaProvider::normalize_response_text( + "only thinking here".to_string() + ), + None ); } @@ -1069,4 +1152,123 @@ mod tests { assert!(resp.prompt_eval_count.is_none()); assert!(resp.eval_count.is_none()); } + + // ═══════════════════════════════════════════════════════════════════════ + // tag stripping tests + // ═══════════════════════════════════════════════════════════════════════ + + #[test] + fn strip_think_tags_removes_single_block() { + let input = "internal reasoningHello world"; + assert_eq!(OllamaProvider::strip_think_tags(input), "Hello world"); + } + + #[test] + fn strip_think_tags_removes_multiple_blocks() { + let input = "firstAsecondB"; + assert_eq!(OllamaProvider::strip_think_tags(input), "AB"); + } + + #[test] + fn strip_think_tags_handles_unclosed_block() { + let input = "visiblehidden tail"; + assert_eq!(OllamaProvider::strip_think_tags(input), "visible"); + } + + #[test] + fn strip_think_tags_preserves_text_without_tags() { + let input = "plain text response"; + assert_eq!(OllamaProvider::strip_think_tags(input), "plain text response"); + } + + #[test] + fn strip_think_tags_returns_empty_for_think_only() { + let input = "only thinking"; + assert_eq!(OllamaProvider::strip_think_tags(input), ""); + } + + // ═══════════════════════════════════════════════════════════════════════ + // effective_content tests + // ═══════════════════════════════════════════════════════════════════════ + + #[test] + fn effective_content_strips_think_and_returns_rest() { + let result = OllamaProvider::effective_content( + "reasoning\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}", + None, + ); + assert!(result.is_some()); + let text = result.unwrap(); + assert!(text.contains("")); + assert!(!text.contains("")); + } + + #[test] + fn effective_content_falls_back_to_thinking_field() { + let result = OllamaProvider::effective_content( + "", + Some("{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}"), + ); + assert!(result.is_some()); + assert!(result.unwrap().contains("")); + } + + #[test] + fn effective_content_returns_none_when_both_empty() { + assert!(OllamaProvider::effective_content("", None).is_none()); + assert!(OllamaProvider::effective_content("", Some("")).is_none()); + assert!(OllamaProvider::effective_content( + "only thinking", + Some("also only thinking") + ).is_none()); + } + + #[test] + fn effective_content_prefers_content_over_thinking() { + let result = OllamaProvider::effective_content( + "content text", + Some("thinking text"), + ); + assert_eq!(result, Some("content text".to_string())); + } + + #[test] + fn effective_content_uses_thinking_when_content_is_think_only() { + let result = OllamaProvider::effective_content( + "just reasoning", + Some("actual useful text from thinking field"), + ); + assert_eq!(result, Some("actual useful text from thinking field".to_string())); + } + + // ═══════════════════════════════════════════════════════════════════════ + // Qwen tool-call regression scenario tests + // ═══════════════════════════════════════════════════════════════════════ + + #[test] + fn qwen_think_with_tool_call_in_content_preserved() { + // Qwen produces tags followed by in content, + // with no structured tool_calls. The tags must survive + // for downstream parse_tool_calls to extract them. + let content = "I should list files\n\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}\n"; + let result = OllamaProvider::effective_content(content, None); + assert!(result.is_some()); + let text = result.unwrap(); + assert!(text.contains("")); + assert!(text.contains("shell")); + assert!(!text.contains("")); + } + + #[test] + fn qwen_thinking_field_with_tool_call_xml_extracted() { + // When think=true, Ollama separates thinking, but Qwen may put tool + // call XML in the thinking field with empty content. + let content = ""; + let thinking = "I need to check the date\n\n{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}\n"; + let result = OllamaProvider::effective_content(content, Some(thinking)); + assert!(result.is_some()); + let text = result.unwrap(); + assert!(text.contains("")); + assert!(text.contains("date")); + } }