From 6fe8e3a5bbc421eb598721e5bf64dbb7e6055a1c Mon Sep 17 00:00:00 2001 From: Argenis Date: Fri, 13 Mar 2026 09:16:03 -0400 Subject: [PATCH] fix: gracefully handle reasoning_enabled for unsupported Ollama models (#3411) When reasoning_enabled is configured, the Ollama provider sends think=true to all models. Models that don't support the think parameter (e.g. qwen3.5:0.8b) cause request failures that the reliable provider classifies as retryable, leading to an infinite retry loop. Fix: when a request with think=true fails, automatically retry once with think omitted. This lets the call succeed on models that lack reasoning support while preserving thinking for capable models. Closes #3183 Related #850 Co-authored-by: Claude Opus 4.6 --- src/providers/ollama.rs | 90 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 81 insertions(+), 9 deletions(-) diff --git a/src/providers/ollama.rs b/src/providers/ollama.rs index 1e69c8e83..e91625d96 100644 --- a/src/providers/ollama.rs +++ b/src/providers/ollama.rs @@ -27,7 +27,7 @@ struct ChatRequest { tools: Option>, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] struct Message { role: String, #[serde(skip_serializing_if = "Option::is_none")] @@ -40,14 +40,14 @@ struct Message { tool_name: Option, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] struct OutgoingToolCall { #[serde(rename = "type")] kind: String, function: OutgoingFunction, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] struct OutgoingFunction { name: String, arguments: serde_json::Value, @@ -258,13 +258,31 @@ impl OllamaProvider { model: &str, temperature: f64, tools: Option<&[serde_json::Value]>, + ) -> ChatRequest { + self.build_chat_request_with_think( + messages, + model, + temperature, + tools, + self.reasoning_enabled, + ) + } + + /// Build a chat request with an explicit `think` value. + fn build_chat_request_with_think( + &self, + messages: Vec, + model: &str, + temperature: f64, + tools: Option<&[serde_json::Value]>, + think: Option, ) -> ChatRequest { ChatRequest { model: model.to_string(), messages, stream: false, options: Options { temperature }, - think: self.reasoning_enabled, + think, tools: tools.map(|t| t.to_vec()), } } @@ -396,17 +414,18 @@ impl OllamaProvider { .collect() } - /// Send a request to Ollama and get the parsed response. - /// Pass `tools` to enable native function-calling for models that support it. - async fn send_request( + /// Send a single HTTP request to Ollama and parse the response. + async fn send_request_inner( &self, - messages: Vec, + messages: &[Message], model: &str, temperature: f64, should_auth: bool, tools: Option<&[serde_json::Value]>, + think: Option, ) -> anyhow::Result { - let request = self.build_chat_request(messages, model, temperature, tools); + let request = + self.build_chat_request_with_think(messages.to_vec(), model, temperature, tools, think); let url = format!("{}/api/chat", self.base_url); @@ -466,6 +485,59 @@ impl OllamaProvider { Ok(chat_response) } + /// Send a request to Ollama and get the parsed response. + /// Pass `tools` to enable native function-calling for models that support it. + /// + /// When `reasoning_enabled` (`think`) is set to `true`, the first request + /// includes `think: true`. If that request fails (the model may not support + /// the `think` parameter), we automatically retry once with `think` omitted + /// so the call succeeds instead of entering an infinite retry loop. + async fn send_request( + &self, + messages: Vec, + model: &str, + temperature: f64, + should_auth: bool, + tools: Option<&[serde_json::Value]>, + ) -> anyhow::Result { + let result = self + .send_request_inner( + &messages, + model, + temperature, + should_auth, + tools, + self.reasoning_enabled, + ) + .await; + + match result { + Ok(resp) => Ok(resp), + Err(first_err) if self.reasoning_enabled == Some(true) => { + tracing::warn!( + model = model, + error = %first_err, + "Ollama request failed with think=true; retrying without reasoning \ + (model may not support it)" + ); + // Retry with think omitted from the request entirely. + self.send_request_inner(&messages, model, temperature, should_auth, tools, None) + .await + .map_err(|retry_err| { + // Both attempts failed — return the original error for clarity. + tracing::error!( + model = model, + original_error = %first_err, + retry_error = %retry_err, + "Ollama request also failed without think; returning original error" + ); + first_err + }) + } + Err(e) => Err(e), + } + } + /// Convert Ollama tool calls to the JSON format expected by parse_tool_calls in loop_.rs /// /// Handles quirky model behavior where tool calls are wrapped: