diff --git a/src/agent/agent.rs b/src/agent/agent.rs index d286ffc0b..0851bae80 100644 --- a/src/agent/agent.rs +++ b/src/agent/agent.rs @@ -796,6 +796,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } Ok(guard.remove(0)) @@ -834,6 +836,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } Ok(guard.remove(0)) @@ -874,6 +878,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }]), }); @@ -915,6 +921,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }, crate::providers::ChatResponse { text: Some("done".into()), @@ -922,6 +930,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }, ]), }); @@ -964,6 +974,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }]), seen_models: seen_models.clone(), }); diff --git a/src/agent/dispatcher.rs b/src/agent/dispatcher.rs index 2dda0b93a..b13591f1d 100644 --- a/src/agent/dispatcher.rs +++ b/src/agent/dispatcher.rs @@ -264,6 +264,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; let dispatcher = XmlToolDispatcher; let (_, calls) = dispatcher.parse_response(&response); @@ -283,6 +285,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; let dispatcher = NativeToolDispatcher; let (_, calls) = dispatcher.parse_response(&response); diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index 568facfac..6016297e7 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -6,7 +6,8 @@ use crate::memory::{self, Memory, MemoryCategory}; use crate::multimodal; use crate::observability::{self, runtime_trace, Observer, ObserverEvent}; use crate::providers::{ - self, ChatMessage, ChatRequest, Provider, ProviderCapabilityError, ToolCall, + self, ChatMessage, ChatRequest, NormalizedStopReason, Provider, ProviderCapabilityError, + ToolCall, }; use crate::runtime; use crate::security::SecurityPolicy; @@ -61,6 +62,16 @@ const STREAM_CHUNK_MIN_CHARS: usize = 80; /// Used as a safe fallback when `max_tool_iterations` is unset or configured as zero. const DEFAULT_MAX_TOOL_ITERATIONS: usize = 20; +/// Maximum continuation retries when a provider reports max-token truncation. +const MAX_TOKENS_CONTINUATION_MAX_ATTEMPTS: usize = 3; +/// Absolute safety cap for merged continuation output. +const MAX_TOKENS_CONTINUATION_MAX_OUTPUT_CHARS: usize = 120_000; +/// Deterministic continuation instruction appended as a user message. +const MAX_TOKENS_CONTINUATION_PROMPT: &str = "Previous response was truncated by token limit.\nContinue exactly from where you left off.\nIf you intended a tool call, emit one complete tool call payload only.\nDo not repeat already-sent text."; +/// Notice appended when continuation budget is exhausted before completion. +const MAX_TOKENS_CONTINUATION_NOTICE: &str = + "\n\n[Response may be truncated due to continuation limits. Reply \"continue\" to resume.]"; + /// Minimum user-message length (in chars) for auto-save to memory. /// Matches the channel-side constant in `channels/mod.rs`. const AUTOSAVE_MIN_MESSAGE_CHARS: usize = 20; @@ -559,6 +570,43 @@ fn looks_like_deferred_action_without_tool_call(text: &str) -> bool { && CJK_DEFERRED_ACTION_VERB_REGEX.is_match(trimmed) } +fn merge_continuation_text(existing: &str, next: &str) -> String { + if next.is_empty() { + return existing.to_string(); + } + if existing.is_empty() { + return next.to_string(); + } + if existing.ends_with(next) { + return existing.to_string(); + } + if next.starts_with(existing) { + return next.to_string(); + } + format!("{existing}{next}") +} + +fn add_optional_u64(lhs: Option, rhs: Option) -> Option { + match (lhs, rhs) { + (Some(left), Some(right)) => Some(left.saturating_add(right)), + (Some(left), None) => Some(left), + (None, Some(right)) => Some(right), + (None, None) => None, + } +} + +fn stop_reason_name(reason: &NormalizedStopReason) -> &'static str { + match reason { + NormalizedStopReason::EndTurn => "end_turn", + NormalizedStopReason::ToolCall => "tool_call", + NormalizedStopReason::MaxTokens => "max_tokens", + NormalizedStopReason::ContextWindowExceeded => "context_window_exceeded", + NormalizedStopReason::SafetyBlocked => "safety_blocked", + NormalizedStopReason::Cancelled => "cancelled", + NormalizedStopReason::Unknown(_) => "unknown", + } +} + fn maybe_inject_cron_add_delivery( tool_name: &str, tool_args: &mut serde_json::Value, @@ -1340,12 +1388,171 @@ pub(crate) async fn run_tool_call_loop( parse_issue_detected, ) = match chat_result { Ok(resp) => { - let (resp_input_tokens, resp_output_tokens) = resp + let mut response_text = resp.text_or_empty().to_string(); + let mut native_calls = resp.tool_calls; + let mut reasoning_content = resp.reasoning_content.clone(); + let mut stop_reason = resp.stop_reason.clone(); + let mut raw_stop_reason = resp.raw_stop_reason.clone(); + let (mut resp_input_tokens, mut resp_output_tokens) = resp .usage .as_ref() .map(|u| (u.input_tokens, u.output_tokens)) .unwrap_or((None, None)); + if let Some(reason) = stop_reason.as_ref() { + runtime_trace::record_event( + "stop_reason_observed", + Some(channel_name), + Some(provider_name), + Some(active_model.as_str()), + Some(&turn_id), + Some(true), + None, + serde_json::json!({ + "iteration": iteration + 1, + "normalized_reason": stop_reason_name(reason), + "raw_reason": raw_stop_reason.clone(), + }), + ); + } + + let mut continuation_attempts = 0usize; + let mut continuation_termination_reason: Option<&'static str> = None; + let mut continuation_error: Option = None; + + while matches!(stop_reason, Some(NormalizedStopReason::MaxTokens)) + && native_calls.is_empty() + && continuation_attempts < MAX_TOKENS_CONTINUATION_MAX_ATTEMPTS + && response_text.chars().count() < MAX_TOKENS_CONTINUATION_MAX_OUTPUT_CHARS + { + continuation_attempts += 1; + runtime_trace::record_event( + "continuation_attempt", + Some(channel_name), + Some(provider_name), + Some(active_model.as_str()), + Some(&turn_id), + Some(true), + None, + serde_json::json!({ + "iteration": iteration + 1, + "attempt": continuation_attempts, + "output_chars": response_text.chars().count(), + "max_output_chars": MAX_TOKENS_CONTINUATION_MAX_OUTPUT_CHARS, + }), + ); + + let mut continuation_messages = request_messages.clone(); + continuation_messages.push(ChatMessage::assistant(response_text.clone())); + continuation_messages.push(ChatMessage::user( + MAX_TOKENS_CONTINUATION_PROMPT.to_string(), + )); + + let continuation_future = provider.chat( + ChatRequest { + messages: &continuation_messages, + tools: request_tools, + }, + active_model.as_str(), + temperature, + ); + let continuation_result = if let Some(token) = cancellation_token.as_ref() { + tokio::select! { + () = token.cancelled() => return Err(ToolLoopCancelled.into()), + result = continuation_future => result, + } + } else { + continuation_future.await + }; + + let continuation_resp = match continuation_result { + Ok(response) => response, + Err(error) => { + continuation_termination_reason = Some("provider_error"); + continuation_error = + Some(crate::providers::sanitize_api_error(&error.to_string())); + break; + } + }; + + if let Some(usage) = continuation_resp.usage.as_ref() { + resp_input_tokens = add_optional_u64(resp_input_tokens, usage.input_tokens); + resp_output_tokens = + add_optional_u64(resp_output_tokens, usage.output_tokens); + } + + let next_text = continuation_resp.text_or_empty().to_string(); + response_text = merge_continuation_text(&response_text, &next_text); + + if continuation_resp.reasoning_content.is_some() { + reasoning_content = continuation_resp.reasoning_content.clone(); + } + if !continuation_resp.tool_calls.is_empty() { + native_calls = continuation_resp.tool_calls; + } + stop_reason = continuation_resp.stop_reason; + raw_stop_reason = continuation_resp.raw_stop_reason; + + if let Some(reason) = stop_reason.as_ref() { + runtime_trace::record_event( + "stop_reason_observed", + Some(channel_name), + Some(provider_name), + Some(active_model.as_str()), + Some(&turn_id), + Some(true), + None, + serde_json::json!({ + "iteration": iteration + 1, + "continuation_attempt": continuation_attempts, + "normalized_reason": stop_reason_name(reason), + "raw_reason": raw_stop_reason.clone(), + }), + ); + } + } + + if continuation_attempts > 0 && continuation_termination_reason.is_none() { + continuation_termination_reason = + if matches!(stop_reason, Some(NormalizedStopReason::MaxTokens)) { + if response_text.chars().count() + >= MAX_TOKENS_CONTINUATION_MAX_OUTPUT_CHARS + { + Some("output_cap") + } else { + Some("retry_limit") + } + } else { + Some("completed") + }; + } + + if let Some(terminal_reason) = continuation_termination_reason { + runtime_trace::record_event( + "continuation_terminated", + Some(channel_name), + Some(provider_name), + Some(active_model.as_str()), + Some(&turn_id), + Some(terminal_reason == "completed"), + continuation_error.as_deref(), + serde_json::json!({ + "iteration": iteration + 1, + "attempts": continuation_attempts, + "terminal_reason": terminal_reason, + "output_chars": response_text.chars().count(), + }), + ); + } + + if continuation_attempts > 0 + && matches!(stop_reason, Some(NormalizedStopReason::MaxTokens)) + && native_calls.is_empty() + && !response_text.ends_with(MAX_TOKENS_CONTINUATION_NOTICE) + { + response_text.push_str(MAX_TOKENS_CONTINUATION_NOTICE); + } + observer.record_event(&ObserverEvent::LlmResponse { provider: provider_name.to_string(), model: active_model.clone(), @@ -1356,12 +1563,11 @@ pub(crate) async fn run_tool_call_loop( output_tokens: resp_output_tokens, }); - let response_text = resp.text_or_empty().to_string(); // First try native structured tool calls (OpenAI-format). // Fall back to text-based parsing (XML tags, markdown blocks, // GLM format) only if the provider returned no native calls — // this ensures we support both native and prompt-guided models. - let mut calls = parse_structured_tool_calls(&resp.tool_calls); + let mut calls = parse_structured_tool_calls(&native_calls); let mut parsed_text = String::new(); if calls.is_empty() { @@ -1406,15 +1612,17 @@ pub(crate) async fn run_tool_call_loop( "input_tokens": resp_input_tokens, "output_tokens": resp_output_tokens, "raw_response": scrub_credentials(&response_text), - "native_tool_calls": resp.tool_calls.len(), + "native_tool_calls": native_calls.len(), "parsed_tool_calls": calls.len(), + "continuation_attempts": continuation_attempts, + "stop_reason": stop_reason.as_ref().map(stop_reason_name), + "raw_stop_reason": raw_stop_reason, }), ); // Preserve native tool call IDs in assistant history so role=tool // follow-up messages can reference the exact call id. - let reasoning_content = resp.reasoning_content.clone(); - let assistant_history_content = if resp.tool_calls.is_empty() { + let assistant_history_content = if native_calls.is_empty() { if use_native_tools { build_native_assistant_history_from_parsed_calls( &response_text, @@ -1428,12 +1636,11 @@ pub(crate) async fn run_tool_call_loop( } else { build_native_assistant_history( &response_text, - &resp.tool_calls, + &native_calls, reasoning_content.as_deref(), ) }; - let native_calls = resp.tool_calls; ( response_text, parsed_text, @@ -3223,6 +3430,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } } @@ -3233,6 +3442,13 @@ mod tests { } impl ScriptedProvider { + fn from_scripted_responses(responses: Vec) -> Self { + Self { + responses: Arc::new(Mutex::new(VecDeque::from(responses))), + capabilities: ProviderCapabilities::default(), + } + } + fn from_text_responses(responses: Vec<&str>) -> Self { let scripted = responses .into_iter() @@ -3242,12 +3458,11 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) .collect(); - Self { - responses: Arc::new(Mutex::new(scripted)), - capabilities: ProviderCapabilities::default(), - } + Self::from_scripted_responses(scripted) } fn with_native_tool_support(mut self) -> Self { @@ -4249,6 +4464,140 @@ mod tests { ); } + #[tokio::test] + async fn run_tool_call_loop_continues_when_stop_reason_is_max_tokens() { + let provider = ScriptedProvider::from_scripted_responses(vec![ + ChatResponse { + text: Some("part 1 ".to_string()), + tool_calls: Vec::new(), + usage: None, + reasoning_content: None, + quota_metadata: None, + stop_reason: Some(NormalizedStopReason::MaxTokens), + raw_stop_reason: Some("length".to_string()), + }, + ChatResponse { + text: Some("part 2".to_string()), + tool_calls: Vec::new(), + usage: None, + reasoning_content: None, + quota_metadata: None, + stop_reason: Some(NormalizedStopReason::EndTurn), + raw_stop_reason: Some("stop".to_string()), + }, + ]); + + let tools_registry: Vec> = Vec::new(); + let mut history = vec![ + ChatMessage::system("test-system"), + ChatMessage::user("continue this"), + ]; + let observer = NoopObserver; + + let result = run_tool_call_loop( + &provider, + &mut history, + &tools_registry, + &observer, + "mock-provider", + "mock-model", + 0.0, + true, + None, + "cli", + &crate::config::MultimodalConfig::default(), + 4, + None, + None, + None, + &[], + ) + .await + .expect("max-token continuation should complete"); + + assert_eq!(result, "part 1 part 2"); + assert!( + !result.contains("Response may be truncated"), + "continuation should not emit truncation notice when it ends cleanly" + ); + } + + #[tokio::test] + async fn run_tool_call_loop_appends_notice_when_continuation_budget_exhausts() { + let provider = ScriptedProvider::from_scripted_responses(vec![ + ChatResponse { + text: Some("A".to_string()), + tool_calls: Vec::new(), + usage: None, + reasoning_content: None, + quota_metadata: None, + stop_reason: Some(NormalizedStopReason::MaxTokens), + raw_stop_reason: Some("length".to_string()), + }, + ChatResponse { + text: Some("B".to_string()), + tool_calls: Vec::new(), + usage: None, + reasoning_content: None, + quota_metadata: None, + stop_reason: Some(NormalizedStopReason::MaxTokens), + raw_stop_reason: Some("length".to_string()), + }, + ChatResponse { + text: Some("C".to_string()), + tool_calls: Vec::new(), + usage: None, + reasoning_content: None, + quota_metadata: None, + stop_reason: Some(NormalizedStopReason::MaxTokens), + raw_stop_reason: Some("length".to_string()), + }, + ChatResponse { + text: Some("D".to_string()), + tool_calls: Vec::new(), + usage: None, + reasoning_content: None, + quota_metadata: None, + stop_reason: Some(NormalizedStopReason::MaxTokens), + raw_stop_reason: Some("length".to_string()), + }, + ]); + + let tools_registry: Vec> = Vec::new(); + let mut history = vec![ + ChatMessage::system("test-system"), + ChatMessage::user("long output"), + ]; + let observer = NoopObserver; + + let result = run_tool_call_loop( + &provider, + &mut history, + &tools_registry, + &observer, + "mock-provider", + "mock-model", + 0.0, + true, + None, + "cli", + &crate::config::MultimodalConfig::default(), + 4, + None, + None, + None, + &[], + ) + .await + .expect("continuation should degrade to partial output"); + + assert!(result.starts_with("ABCD")); + assert!( + result.contains("Response may be truncated due to continuation limits"), + "result should include truncation notice when continuation cap is hit" + ); + } + #[tokio::test] async fn run_tool_call_loop_preserves_failed_tool_error_for_after_hook() { let provider = ScriptedProvider::from_text_responses(vec![ diff --git a/src/agent/loop_/history.rs b/src/agent/loop_/history.rs index 8e228b4d6..f866d53a9 100644 --- a/src/agent/loop_/history.rs +++ b/src/agent/loop_/history.rs @@ -169,6 +169,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } } diff --git a/src/agent/tests.rs b/src/agent/tests.rs index e59999411..f00905db3 100644 --- a/src/agent/tests.rs +++ b/src/agent/tests.rs @@ -96,6 +96,8 @@ impl Provider for ScriptedProvider { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } Ok(guard.remove(0)) @@ -334,6 +336,8 @@ fn tool_response(calls: Vec) -> ChatResponse { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, } } @@ -345,6 +349,8 @@ fn text_response(text: &str) -> ChatResponse { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, } } @@ -358,6 +364,8 @@ fn xml_tool_response(name: &str, args: &str) -> ChatResponse { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, } } @@ -754,6 +762,8 @@ async fn turn_handles_empty_text_response() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }])); let mut agent = build_agent_with(provider, vec![], Box::new(NativeToolDispatcher)); @@ -770,6 +780,8 @@ async fn turn_handles_none_text_response() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }])); let mut agent = build_agent_with(provider, vec![], Box::new(NativeToolDispatcher)); @@ -796,6 +808,8 @@ async fn turn_preserves_text_alongside_tool_calls() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }, text_response("Here are the results"), ])); @@ -1035,6 +1049,8 @@ async fn native_dispatcher_handles_stringified_arguments() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; let (_, calls) = dispatcher.parse_response(&response); @@ -1063,6 +1079,8 @@ fn xml_dispatcher_handles_nested_json() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; let dispatcher = XmlToolDispatcher; @@ -1083,6 +1101,8 @@ fn xml_dispatcher_handles_empty_tool_call_tag() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; let dispatcher = XmlToolDispatcher; @@ -1099,6 +1119,8 @@ fn xml_dispatcher_handles_unclosed_tool_call() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; let dispatcher = XmlToolDispatcher; diff --git a/src/providers/anthropic.rs b/src/providers/anthropic.rs index b762ef5f4..42516d432 100644 --- a/src/providers/anthropic.rs +++ b/src/providers/anthropic.rs @@ -1,6 +1,6 @@ use crate::providers::traits::{ ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse, - Provider, ProviderCapabilities, TokenUsage, ToolCall as ProviderToolCall, + NormalizedStopReason, Provider, ProviderCapabilities, TokenUsage, ToolCall as ProviderToolCall, }; use crate::tools::ToolSpec; use async_trait::async_trait; @@ -139,6 +139,8 @@ struct NativeChatResponse { #[serde(default)] content: Vec, #[serde(default)] + stop_reason: Option, + #[serde(default)] usage: Option, } @@ -416,6 +418,10 @@ impl AnthropicProvider { fn parse_native_response(response: NativeChatResponse) -> ProviderChatResponse { let mut text_parts = Vec::new(); let mut tool_calls = Vec::new(); + let raw_stop_reason = response.stop_reason.clone(); + let stop_reason = raw_stop_reason + .as_deref() + .map(NormalizedStopReason::from_anthropic_stop_reason); let usage = response.usage.map(|u| TokenUsage { input_tokens: u.input_tokens, @@ -459,6 +465,8 @@ impl AnthropicProvider { usage, reasoning_content: None, quota_metadata: None, + stop_reason, + raw_stop_reason, } } diff --git a/src/providers/bedrock.rs b/src/providers/bedrock.rs index d61cb8925..2dc83d891 100644 --- a/src/providers/bedrock.rs +++ b/src/providers/bedrock.rs @@ -6,8 +6,8 @@ use crate::providers::traits::{ ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse, - Provider, ProviderCapabilities, StreamChunk, StreamError, StreamOptions, StreamResult, - TokenUsage, ToolCall as ProviderToolCall, ToolsPayload, + NormalizedStopReason, Provider, ProviderCapabilities, StreamChunk, StreamError, StreamOptions, + StreamResult, TokenUsage, ToolCall as ProviderToolCall, ToolsPayload, }; use crate::tools::ToolSpec; use async_trait::async_trait; @@ -512,7 +512,6 @@ struct ConverseResponse { #[serde(default)] output: Option, #[serde(default)] - #[allow(dead_code)] stop_reason: Option, #[serde(default)] usage: Option, @@ -941,6 +940,10 @@ impl BedrockProvider { fn parse_converse_response(response: ConverseResponse) -> ProviderChatResponse { let mut text_parts = Vec::new(); let mut tool_calls = Vec::new(); + let raw_stop_reason = response.stop_reason.clone(); + let stop_reason = raw_stop_reason + .as_deref() + .map(NormalizedStopReason::from_bedrock_stop_reason); let usage = response.usage.map(|u| TokenUsage { input_tokens: u.input_tokens, @@ -982,6 +985,8 @@ impl BedrockProvider { usage, reasoning_content: None, quota_metadata: None, + stop_reason, + raw_stop_reason, } } diff --git a/src/providers/compatible.rs b/src/providers/compatible.rs index 3a4bed581..9f877e975 100644 --- a/src/providers/compatible.rs +++ b/src/providers/compatible.rs @@ -5,8 +5,8 @@ use crate::multimodal; use crate::providers::traits::{ ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse, - Provider, StreamChunk, StreamError, StreamOptions, StreamResult, TokenUsage, - ToolCall as ProviderToolCall, + NormalizedStopReason, Provider, StreamChunk, StreamError, StreamOptions, StreamResult, + TokenUsage, ToolCall as ProviderToolCall, }; use async_trait::async_trait; use futures_util::{stream, SinkExt, StreamExt}; @@ -479,6 +479,8 @@ struct UsageInfo { #[derive(Debug, Deserialize)] struct Choice { message: ResponseMessage, + #[serde(default)] + finish_reason: Option, } /// Remove `...` blocks from model output. @@ -968,6 +970,8 @@ fn parse_responses_chat_response(response: ResponsesResponse) -> ProviderChatRes usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, } } @@ -1576,7 +1580,12 @@ impl OpenAiCompatibleProvider { modified_messages } - fn parse_native_response(message: ResponseMessage) -> ProviderChatResponse { + fn parse_native_response(choice: Choice) -> ProviderChatResponse { + let raw_stop_reason = choice.finish_reason; + let stop_reason = raw_stop_reason + .as_deref() + .map(NormalizedStopReason::from_openai_finish_reason); + let message = choice.message; let text = message.effective_content_optional(); let reasoning_content = message.reasoning_content.clone(); let tool_calls = message @@ -1611,6 +1620,8 @@ impl OpenAiCompatibleProvider { usage: None, reasoning_content, quota_metadata: None, + stop_reason, + raw_stop_reason, } } @@ -1983,6 +1994,8 @@ impl Provider for OpenAiCompatibleProvider { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } }; @@ -2030,6 +2043,11 @@ impl Provider for OpenAiCompatibleProvider { .next() .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?; + let raw_stop_reason = choice.finish_reason; + let stop_reason = raw_stop_reason + .as_deref() + .map(NormalizedStopReason::from_openai_finish_reason); + let text = choice.message.effective_content_optional(); let reasoning_content = choice.message.reasoning_content; let tool_calls = choice @@ -2055,6 +2073,8 @@ impl Provider for OpenAiCompatibleProvider { usage, reasoning_content, quota_metadata: None, + stop_reason, + raw_stop_reason, }) } @@ -2176,14 +2196,13 @@ impl Provider for OpenAiCompatibleProvider { input_tokens: u.prompt_tokens, output_tokens: u.completion_tokens, }); - let message = native_response + let choice = native_response .choices .into_iter() .next() - .map(|choice| choice.message) .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?; - let mut result = Self::parse_native_response(message); + let mut result = Self::parse_native_response(choice); result.usage = usage; Ok(result) } @@ -2920,26 +2939,31 @@ mod tests { #[test] fn parse_native_response_preserves_tool_call_id() { - let message = ResponseMessage { - content: None, - tool_calls: Some(vec![ToolCall { - id: Some("call_123".to_string()), - kind: Some("function".to_string()), - function: Some(Function { - name: Some("shell".to_string()), - arguments: Some(r#"{"command":"pwd"}"#.to_string()), - }), - name: None, - arguments: None, - parameters: None, - }]), - reasoning_content: None, + let choice = Choice { + message: ResponseMessage { + content: None, + tool_calls: Some(vec![ToolCall { + id: Some("call_123".to_string()), + kind: Some("function".to_string()), + function: Some(Function { + name: Some("shell".to_string()), + arguments: Some(r#"{"command":"pwd"}"#.to_string()), + }), + name: None, + arguments: None, + parameters: None, + }]), + reasoning_content: None, + }, + finish_reason: Some("tool_calls".to_string()), }; - let parsed = OpenAiCompatibleProvider::parse_native_response(message); + let parsed = OpenAiCompatibleProvider::parse_native_response(choice); assert_eq!(parsed.tool_calls.len(), 1); assert_eq!(parsed.tool_calls[0].id, "call_123"); assert_eq!(parsed.tool_calls[0].name, "shell"); + assert_eq!(parsed.stop_reason, Some(NormalizedStopReason::ToolCall)); + assert_eq!(parsed.raw_stop_reason.as_deref(), Some("tool_calls")); } #[test] @@ -3968,39 +3992,49 @@ mod tests { #[test] fn parse_native_response_captures_reasoning_content() { - let message = ResponseMessage { - content: Some("answer".to_string()), - reasoning_content: Some("thinking step".to_string()), - tool_calls: Some(vec![ToolCall { - id: Some("call_1".to_string()), - kind: Some("function".to_string()), - function: Some(Function { - name: Some("shell".to_string()), - arguments: Some(r#"{"cmd":"ls"}"#.to_string()), - }), - name: None, - arguments: None, - parameters: None, - }]), + let choice = Choice { + message: ResponseMessage { + content: Some("answer".to_string()), + reasoning_content: Some("thinking step".to_string()), + tool_calls: Some(vec![ToolCall { + id: Some("call_1".to_string()), + kind: Some("function".to_string()), + function: Some(Function { + name: Some("shell".to_string()), + arguments: Some(r#"{"cmd":"ls"}"#.to_string()), + }), + name: None, + arguments: None, + parameters: None, + }]), + }, + finish_reason: Some("length".to_string()), }; - let parsed = OpenAiCompatibleProvider::parse_native_response(message); + let parsed = OpenAiCompatibleProvider::parse_native_response(choice); assert_eq!(parsed.reasoning_content.as_deref(), Some("thinking step")); assert_eq!(parsed.text.as_deref(), Some("answer")); assert_eq!(parsed.tool_calls.len(), 1); + assert_eq!(parsed.stop_reason, Some(NormalizedStopReason::MaxTokens)); + assert_eq!(parsed.raw_stop_reason.as_deref(), Some("length")); } #[test] fn parse_native_response_none_reasoning_content_for_normal_model() { - let message = ResponseMessage { - content: Some("hello".to_string()), - reasoning_content: None, - tool_calls: None, + let choice = Choice { + message: ResponseMessage { + content: Some("hello".to_string()), + reasoning_content: None, + tool_calls: None, + }, + finish_reason: Some("stop".to_string()), }; - let parsed = OpenAiCompatibleProvider::parse_native_response(message); + let parsed = OpenAiCompatibleProvider::parse_native_response(choice); assert!(parsed.reasoning_content.is_none()); assert_eq!(parsed.text.as_deref(), Some("hello")); + assert_eq!(parsed.stop_reason, Some(NormalizedStopReason::EndTurn)); + assert_eq!(parsed.raw_stop_reason.as_deref(), Some("stop")); } #[test] diff --git a/src/providers/copilot.rs b/src/providers/copilot.rs index 96103ca89..26f74e583 100644 --- a/src/providers/copilot.rs +++ b/src/providers/copilot.rs @@ -400,6 +400,8 @@ impl CopilotProvider { usage, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } diff --git a/src/providers/cursor.rs b/src/providers/cursor.rs index 583d92e47..b396a6413 100644 --- a/src/providers/cursor.rs +++ b/src/providers/cursor.rs @@ -236,6 +236,8 @@ impl Provider for CursorProvider { usage: Some(TokenUsage::default()), reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } } diff --git a/src/providers/gemini.rs b/src/providers/gemini.rs index c5d269d78..f2af938f4 100644 --- a/src/providers/gemini.rs +++ b/src/providers/gemini.rs @@ -5,7 +5,9 @@ //! - Google Cloud ADC (`GOOGLE_APPLICATION_CREDENTIALS`) use crate::auth::AuthService; -use crate::providers::traits::{ChatMessage, ChatResponse, Provider, TokenUsage}; +use crate::providers::traits::{ + ChatMessage, ChatResponse, NormalizedStopReason, Provider, TokenUsage, +}; use async_trait::async_trait; use base64::Engine; use directories::UserDirs; @@ -175,6 +177,8 @@ struct InternalGenerateContentResponse { struct Candidate { #[serde(default)] content: Option, + #[serde(default, rename = "finishReason")] + finish_reason: Option, } #[derive(Debug, Deserialize)] @@ -939,7 +943,12 @@ impl GeminiProvider { system_instruction: Option, model: &str, temperature: f64, - ) -> anyhow::Result<(String, Option)> { + ) -> anyhow::Result<( + String, + Option, + Option, + Option, + )> { let auth = self.auth.as_ref().ok_or_else(|| { anyhow::anyhow!( "Gemini API key not found. Options:\n\ @@ -1132,14 +1141,21 @@ impl GeminiProvider { output_tokens: u.candidates_token_count, }); - let text = result + let candidate = result .candidates .and_then(|c| c.into_iter().next()) - .and_then(|c| c.content) + .ok_or_else(|| anyhow::anyhow!("No response from Gemini"))?; + let raw_stop_reason = candidate.finish_reason.clone(); + let stop_reason = raw_stop_reason + .as_deref() + .map(NormalizedStopReason::from_gemini_finish_reason); + + let text = candidate + .content .and_then(|c| c.effective_text()) .ok_or_else(|| anyhow::anyhow!("No response from Gemini"))?; - Ok((text, usage)) + Ok((text, usage, stop_reason, raw_stop_reason)) } } @@ -1166,7 +1182,7 @@ impl Provider for GeminiProvider { }], }]; - let (text, _usage) = self + let (text, _usage, _stop_reason, _raw_stop_reason) = self .send_generate_content(contents, system_instruction, model, temperature) .await?; Ok(text) @@ -1218,7 +1234,7 @@ impl Provider for GeminiProvider { }) }; - let (text, _usage) = self + let (text, _usage, _stop_reason, _raw_stop_reason) = self .send_generate_content(contents, system_instruction, model, temperature) .await?; Ok(text) @@ -1263,7 +1279,7 @@ impl Provider for GeminiProvider { }) }; - let (text, usage) = self + let (text, usage, stop_reason, raw_stop_reason) = self .send_generate_content(contents, system_instruction, model, temperature) .await?; @@ -1273,6 +1289,8 @@ impl Provider for GeminiProvider { usage, reasoning_content: None, quota_metadata: None, + stop_reason, + raw_stop_reason, }) } diff --git a/src/providers/mod.rs b/src/providers/mod.rs index dff6c0916..147875a0a 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -39,8 +39,8 @@ pub mod traits; #[allow(unused_imports)] pub use traits::{ is_user_or_assistant_role, ChatMessage, ChatRequest, ChatResponse, ConversationMessage, - Provider, ProviderCapabilityError, ToolCall, ToolResultMessage, ROLE_ASSISTANT, ROLE_SYSTEM, - ROLE_TOOL, ROLE_USER, + NormalizedStopReason, Provider, ProviderCapabilityError, ToolCall, ToolResultMessage, + ROLE_ASSISTANT, ROLE_SYSTEM, ROLE_TOOL, ROLE_USER, }; use crate::auth::AuthService; diff --git a/src/providers/ollama.rs b/src/providers/ollama.rs index 79f4ce255..81eb44ddb 100644 --- a/src/providers/ollama.rs +++ b/src/providers/ollama.rs @@ -650,6 +650,8 @@ impl Provider for OllamaProvider { usage, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } @@ -669,6 +671,8 @@ impl Provider for OllamaProvider { usage, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } @@ -717,6 +721,8 @@ impl Provider for OllamaProvider { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } } diff --git a/src/providers/openai.rs b/src/providers/openai.rs index bb3973d6e..eed9f52ea 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -1,6 +1,6 @@ use crate::providers::traits::{ ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse, - Provider, TokenUsage, ToolCall as ProviderToolCall, + NormalizedStopReason, Provider, TokenUsage, ToolCall as ProviderToolCall, }; use crate::tools::ToolSpec; use async_trait::async_trait; @@ -36,6 +36,8 @@ struct ChatResponse { #[derive(Debug, Deserialize)] struct Choice { message: ResponseMessage, + #[serde(default)] + finish_reason: Option, } #[derive(Debug, Deserialize)] @@ -145,6 +147,8 @@ struct UsageInfo { #[derive(Debug, Deserialize)] struct NativeChoice { message: NativeResponseMessage, + #[serde(default)] + finish_reason: Option, } #[derive(Debug, Deserialize)] @@ -282,7 +286,12 @@ impl OpenAiProvider { .collect() } - fn parse_native_response(message: NativeResponseMessage) -> ProviderChatResponse { + fn parse_native_response(choice: NativeChoice) -> ProviderChatResponse { + let raw_stop_reason = choice.finish_reason; + let stop_reason = raw_stop_reason + .as_deref() + .map(NormalizedStopReason::from_openai_finish_reason); + let message = choice.message; let text = message.effective_content(); let reasoning_content = message.reasoning_content.clone(); let tool_calls = message @@ -302,6 +311,8 @@ impl OpenAiProvider { usage: None, reasoning_content, quota_metadata: None, + stop_reason, + raw_stop_reason, } } @@ -407,13 +418,12 @@ impl Provider for OpenAiProvider { input_tokens: u.prompt_tokens, output_tokens: u.completion_tokens, }); - let message = native_response + let choice = native_response .choices .into_iter() .next() - .map(|c| c.message) .ok_or_else(|| anyhow::anyhow!("No response from OpenAI"))?; - let mut result = Self::parse_native_response(message); + let mut result = Self::parse_native_response(choice); result.usage = usage; result.quota_metadata = quota_metadata; Ok(result) @@ -476,13 +486,12 @@ impl Provider for OpenAiProvider { input_tokens: u.prompt_tokens, output_tokens: u.completion_tokens, }); - let message = native_response + let choice = native_response .choices .into_iter() .next() - .map(|c| c.message) .ok_or_else(|| anyhow::anyhow!("No response from OpenAI"))?; - let mut result = Self::parse_native_response(message); + let mut result = Self::parse_native_response(choice); result.usage = usage; result.quota_metadata = quota_metadata; Ok(result) @@ -773,21 +782,25 @@ mod tests { "content":"answer", "reasoning_content":"thinking step", "tool_calls":[{"id":"call_1","type":"function","function":{"name":"shell","arguments":"{}"}}] - }}]}"#; + },"finish_reason":"length"}]}"#; let resp: NativeChatResponse = serde_json::from_str(json).unwrap(); - let message = resp.choices.into_iter().next().unwrap().message; - let parsed = OpenAiProvider::parse_native_response(message); + let choice = resp.choices.into_iter().next().unwrap(); + let parsed = OpenAiProvider::parse_native_response(choice); assert_eq!(parsed.reasoning_content.as_deref(), Some("thinking step")); assert_eq!(parsed.tool_calls.len(), 1); + assert_eq!(parsed.stop_reason, Some(NormalizedStopReason::MaxTokens)); + assert_eq!(parsed.raw_stop_reason.as_deref(), Some("length")); } #[test] fn parse_native_response_none_reasoning_content_for_normal_model() { - let json = r#"{"choices":[{"message":{"content":"hello"}}]}"#; + let json = r#"{"choices":[{"message":{"content":"hello"},"finish_reason":"stop"}]}"#; let resp: NativeChatResponse = serde_json::from_str(json).unwrap(); - let message = resp.choices.into_iter().next().unwrap().message; - let parsed = OpenAiProvider::parse_native_response(message); + let choice = resp.choices.into_iter().next().unwrap(); + let parsed = OpenAiProvider::parse_native_response(choice); assert!(parsed.reasoning_content.is_none()); + assert_eq!(parsed.stop_reason, Some(NormalizedStopReason::EndTurn)); + assert_eq!(parsed.raw_stop_reason.as_deref(), Some("stop")); } #[test] diff --git a/src/providers/openrouter.rs b/src/providers/openrouter.rs index f02d639b4..de85ec64a 100644 --- a/src/providers/openrouter.rs +++ b/src/providers/openrouter.rs @@ -1,7 +1,7 @@ use crate::multimodal; use crate::providers::traits::{ ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse, - Provider, ProviderCapabilities, TokenUsage, ToolCall as ProviderToolCall, + NormalizedStopReason, Provider, ProviderCapabilities, TokenUsage, ToolCall as ProviderToolCall, }; use crate::tools::ToolSpec; use async_trait::async_trait; @@ -55,6 +55,8 @@ struct ApiChatResponse { #[derive(Debug, Deserialize)] struct Choice { message: ResponseMessage, + #[serde(default)] + finish_reason: Option, } #[derive(Debug, Deserialize)] @@ -137,6 +139,8 @@ struct UsageInfo { #[derive(Debug, Deserialize)] struct NativeChoice { message: NativeResponseMessage, + #[serde(default)] + finish_reason: Option, } #[derive(Debug, Deserialize)] @@ -284,7 +288,12 @@ impl OpenRouterProvider { MessageContent::Parts(parts) } - fn parse_native_response(message: NativeResponseMessage) -> ProviderChatResponse { + fn parse_native_response(choice: NativeChoice) -> ProviderChatResponse { + let raw_stop_reason = choice.finish_reason; + let stop_reason = raw_stop_reason + .as_deref() + .map(NormalizedStopReason::from_openai_finish_reason); + let message = choice.message; let reasoning_content = message.reasoning_content.clone(); let tool_calls = message .tool_calls @@ -303,6 +312,8 @@ impl OpenRouterProvider { usage: None, reasoning_content, quota_metadata: None, + stop_reason, + raw_stop_reason, } } @@ -487,13 +498,12 @@ impl Provider for OpenRouterProvider { input_tokens: u.prompt_tokens, output_tokens: u.completion_tokens, }); - let message = native_response + let choice = native_response .choices .into_iter() .next() - .map(|c| c.message) .ok_or_else(|| anyhow::anyhow!("No response from OpenRouter"))?; - let mut result = Self::parse_native_response(message); + let mut result = Self::parse_native_response(choice); result.usage = usage; Ok(result) } @@ -582,13 +592,12 @@ impl Provider for OpenRouterProvider { input_tokens: u.prompt_tokens, output_tokens: u.completion_tokens, }); - let message = native_response + let choice = native_response .choices .into_iter() .next() - .map(|c| c.message) .ok_or_else(|| anyhow::anyhow!("No response from OpenRouter"))?; - let mut result = Self::parse_native_response(message); + let mut result = Self::parse_native_response(choice); result.usage = usage; Ok(result) } @@ -828,25 +837,30 @@ mod tests { #[test] fn parse_native_response_converts_to_chat_response() { - let message = NativeResponseMessage { - content: Some("Here you go.".into()), - reasoning_content: None, - tool_calls: Some(vec![NativeToolCall { - id: Some("call_789".into()), - kind: Some("function".into()), - function: NativeFunctionCall { - name: "file_read".into(), - arguments: r#"{"path":"test.txt"}"#.into(), - }, - }]), + let choice = NativeChoice { + message: NativeResponseMessage { + content: Some("Here you go.".into()), + reasoning_content: None, + tool_calls: Some(vec![NativeToolCall { + id: Some("call_789".into()), + kind: Some("function".into()), + function: NativeFunctionCall { + name: "file_read".into(), + arguments: r#"{"path":"test.txt"}"#.into(), + }, + }]), + }, + finish_reason: Some("stop".into()), }; - let response = OpenRouterProvider::parse_native_response(message); + let response = OpenRouterProvider::parse_native_response(choice); assert_eq!(response.text.as_deref(), Some("Here you go.")); assert_eq!(response.tool_calls.len(), 1); assert_eq!(response.tool_calls[0].id, "call_789"); assert_eq!(response.tool_calls[0].name, "file_read"); + assert_eq!(response.stop_reason, Some(NormalizedStopReason::EndTurn)); + assert_eq!(response.raw_stop_reason.as_deref(), Some("stop")); } #[test] @@ -942,32 +956,42 @@ mod tests { #[test] fn parse_native_response_captures_reasoning_content() { - let message = NativeResponseMessage { - content: Some("answer".into()), - reasoning_content: Some("thinking step".into()), - tool_calls: Some(vec![NativeToolCall { - id: Some("call_1".into()), - kind: Some("function".into()), - function: NativeFunctionCall { - name: "shell".into(), - arguments: "{}".into(), - }, - }]), + let choice = NativeChoice { + message: NativeResponseMessage { + content: Some("answer".into()), + reasoning_content: Some("thinking step".into()), + tool_calls: Some(vec![NativeToolCall { + id: Some("call_1".into()), + kind: Some("function".into()), + function: NativeFunctionCall { + name: "shell".into(), + arguments: "{}".into(), + }, + }]), + }, + finish_reason: Some("length".into()), }; - let parsed = OpenRouterProvider::parse_native_response(message); + let parsed = OpenRouterProvider::parse_native_response(choice); assert_eq!(parsed.reasoning_content.as_deref(), Some("thinking step")); assert_eq!(parsed.tool_calls.len(), 1); + assert_eq!(parsed.stop_reason, Some(NormalizedStopReason::MaxTokens)); + assert_eq!(parsed.raw_stop_reason.as_deref(), Some("length")); } #[test] fn parse_native_response_none_reasoning_content_for_normal_model() { - let message = NativeResponseMessage { - content: Some("hello".into()), - reasoning_content: None, - tool_calls: None, + let choice = NativeChoice { + message: NativeResponseMessage { + content: Some("hello".into()), + reasoning_content: None, + tool_calls: None, + }, + finish_reason: Some("stop".into()), }; - let parsed = OpenRouterProvider::parse_native_response(message); + let parsed = OpenRouterProvider::parse_native_response(choice); assert!(parsed.reasoning_content.is_none()); + assert_eq!(parsed.stop_reason, Some(NormalizedStopReason::EndTurn)); + assert_eq!(parsed.raw_stop_reason.as_deref(), Some("stop")); } #[test] diff --git a/src/providers/reliable.rs b/src/providers/reliable.rs index 56eee0bde..e714566ed 100644 --- a/src/providers/reliable.rs +++ b/src/providers/reliable.rs @@ -1876,6 +1876,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } } @@ -2070,6 +2072,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } } diff --git a/src/providers/traits.rs b/src/providers/traits.rs index 47e594f52..212070ec8 100644 --- a/src/providers/traits.rs +++ b/src/providers/traits.rs @@ -65,6 +65,65 @@ pub struct TokenUsage { pub output_tokens: Option, } +/// Provider-agnostic stop reasons used by the agent loop. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "kind", content = "value", rename_all = "snake_case")] +pub enum NormalizedStopReason { + EndTurn, + ToolCall, + MaxTokens, + ContextWindowExceeded, + SafetyBlocked, + Cancelled, + Unknown(String), +} + +impl NormalizedStopReason { + pub fn from_openai_finish_reason(raw: &str) -> Self { + match raw.trim().to_ascii_lowercase().as_str() { + "stop" => Self::EndTurn, + "tool_calls" | "function_call" => Self::ToolCall, + "length" | "max_tokens" => Self::MaxTokens, + "content_filter" => Self::SafetyBlocked, + "cancelled" | "canceled" => Self::Cancelled, + _ => Self::Unknown(raw.trim().to_string()), + } + } + + pub fn from_anthropic_stop_reason(raw: &str) -> Self { + match raw.trim().to_ascii_lowercase().as_str() { + "end_turn" | "stop_sequence" => Self::EndTurn, + "tool_use" => Self::ToolCall, + "max_tokens" => Self::MaxTokens, + "model_context_window_exceeded" => Self::ContextWindowExceeded, + "safety" => Self::SafetyBlocked, + "cancelled" | "canceled" => Self::Cancelled, + _ => Self::Unknown(raw.trim().to_string()), + } + } + + pub fn from_bedrock_stop_reason(raw: &str) -> Self { + match raw.trim().to_ascii_lowercase().as_str() { + "end_turn" => Self::EndTurn, + "tool_use" => Self::ToolCall, + "max_tokens" => Self::MaxTokens, + "guardrail_intervened" => Self::SafetyBlocked, + "cancelled" | "canceled" => Self::Cancelled, + _ => Self::Unknown(raw.trim().to_string()), + } + } + + pub fn from_gemini_finish_reason(raw: &str) -> Self { + match raw.trim().to_ascii_uppercase().as_str() { + "STOP" => Self::EndTurn, + "MAX_TOKENS" => Self::MaxTokens, + "SAFETY" | "RECITATION" => Self::SafetyBlocked, + "CANCELLED" => Self::Cancelled, + _ => Self::Unknown(raw.trim().to_string()), + } + } +} + /// An LLM response that may contain text, tool calls, or both. #[derive(Debug, Clone)] pub struct ChatResponse { @@ -82,6 +141,10 @@ pub struct ChatResponse { /// Quota metadata extracted from response headers (if available). /// Populated by providers that support quota tracking. pub quota_metadata: Option, + /// Normalized provider stop reason (if surfaced by the upstream API). + pub stop_reason: Option, + /// Raw provider-native stop reason string for diagnostics. + pub raw_stop_reason: Option, } impl ChatResponse { @@ -376,6 +439,8 @@ pub trait Provider: Send + Sync { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } } @@ -389,6 +454,8 @@ pub trait Provider: Send + Sync { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } @@ -425,6 +492,8 @@ pub trait Provider: Send + Sync { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } @@ -555,6 +624,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; assert!(!empty.has_tool_calls()); assert_eq!(empty.text_or_empty(), ""); @@ -569,6 +640,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; assert!(with_tools.has_tool_calls()); assert_eq!(with_tools.text_or_empty(), "Let me check"); @@ -592,6 +665,8 @@ mod tests { }), reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; assert_eq!(resp.usage.as_ref().unwrap().input_tokens, Some(100)); assert_eq!(resp.usage.as_ref().unwrap().output_tokens, Some(50)); @@ -661,6 +736,30 @@ mod tests { assert!(provider.supports_vision()); } + #[test] + fn normalized_stop_reason_mappings_cover_core_provider_values() { + assert_eq!( + NormalizedStopReason::from_openai_finish_reason("length"), + NormalizedStopReason::MaxTokens + ); + assert_eq!( + NormalizedStopReason::from_openai_finish_reason("tool_calls"), + NormalizedStopReason::ToolCall + ); + assert_eq!( + NormalizedStopReason::from_anthropic_stop_reason("model_context_window_exceeded"), + NormalizedStopReason::ContextWindowExceeded + ); + assert_eq!( + NormalizedStopReason::from_bedrock_stop_reason("guardrail_intervened"), + NormalizedStopReason::SafetyBlocked + ); + assert_eq!( + NormalizedStopReason::from_gemini_finish_reason("MAX_TOKENS"), + NormalizedStopReason::MaxTokens + ); + } + #[test] fn tools_payload_variants() { // Test Gemini variant diff --git a/src/tools/delegate.rs b/src/tools/delegate.rs index 19e6152b0..7daa4d1c7 100644 --- a/src/tools/delegate.rs +++ b/src/tools/delegate.rs @@ -881,6 +881,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } else { Ok(ChatResponse { @@ -893,6 +895,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } } @@ -928,6 +932,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }) } } diff --git a/src/tools/file_read.rs b/src/tools/file_read.rs index 2b915b6d6..31094a696 100644 --- a/src/tools/file_read.rs +++ b/src/tools/file_read.rs @@ -936,6 +936,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } Ok(guard.remove(0)) @@ -997,6 +999,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }, // Turn 1 continued: provider sees tool result and answers ChatResponse { @@ -1005,6 +1009,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }, ]); @@ -1092,6 +1098,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }, ChatResponse { text: Some("The file appears to be binary data.".into()), @@ -1099,6 +1107,8 @@ mod tests { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }, ]); diff --git a/tests/agent_e2e.rs b/tests/agent_e2e.rs index 47eca6696..31413dc9d 100644 --- a/tests/agent_e2e.rs +++ b/tests/agent_e2e.rs @@ -67,6 +67,8 @@ impl Provider for MockProvider { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } Ok(guard.remove(0)) @@ -194,6 +196,8 @@ impl Provider for RecordingProvider { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } Ok(guard.remove(0)) @@ -244,6 +248,8 @@ fn text_response(text: &str) -> ChatResponse { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, } } @@ -254,6 +260,8 @@ fn tool_response(calls: Vec) -> ChatResponse { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, } } @@ -380,6 +388,8 @@ async fn e2e_xml_dispatcher_tool_call() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }, text_response("XML tool executed"), ])); @@ -1019,6 +1029,8 @@ async fn e2e_agent_research_prompt_guided() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } Ok(guard.remove(0)) @@ -1038,6 +1050,8 @@ async fn e2e_agent_research_prompt_guided() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; // Response 2: Research complete @@ -1047,6 +1061,8 @@ async fn e2e_agent_research_prompt_guided() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; // Response 3: Main turn response diff --git a/tests/agent_loop_robustness.rs b/tests/agent_loop_robustness.rs index 06fb7651f..1e732a87b 100644 --- a/tests/agent_loop_robustness.rs +++ b/tests/agent_loop_robustness.rs @@ -62,6 +62,8 @@ impl Provider for MockProvider { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }); } Ok(guard.remove(0)) @@ -185,6 +187,8 @@ fn text_response(text: &str) -> ChatResponse { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, } } @@ -195,6 +199,8 @@ fn tool_response(calls: Vec) -> ChatResponse { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, } } @@ -365,6 +371,8 @@ async fn agent_handles_empty_provider_response() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }])); let mut agent = build_agent(provider, vec![Box::new(EchoTool)]); @@ -381,6 +389,8 @@ async fn agent_handles_none_text_response() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }])); let mut agent = build_agent(provider, vec![Box::new(EchoTool)]); diff --git a/tests/provider_schema.rs b/tests/provider_schema.rs index 3b775a974..97273fae0 100644 --- a/tests/provider_schema.rs +++ b/tests/provider_schema.rs @@ -156,6 +156,8 @@ fn chat_response_text_only() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; assert_eq!(resp.text_or_empty(), "Hello world"); @@ -174,6 +176,8 @@ fn chat_response_with_tool_calls() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; assert!(resp.has_tool_calls()); @@ -189,6 +193,8 @@ fn chat_response_text_or_empty_handles_none() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; assert_eq!(resp.text_or_empty(), ""); @@ -213,6 +219,8 @@ fn chat_response_multiple_tool_calls() { usage: None, reasoning_content: None, quota_metadata: None, + stop_reason: None, + raw_stop_reason: None, }; assert!(resp.has_tool_calls());