diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index 626a5979b..0b8d25118 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -366,6 +366,95 @@ fn parse_tool_calls_from_json_value(value: &serde_json::Value) -> Vec bool { + let normalized = tag.to_ascii_lowercase(); + matches!( + normalized.as_str(), + "tool_call" + | "toolcall" + | "tool-call" + | "invoke" + | "thinking" + | "thought" + | "analysis" + | "reasoning" + | "reflection" + ) +} + +static XML_TOOL_TAG_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?s)<([a-zA-Z_][a-zA-Z0-9_-]*)>\s*(.*?)\s*").unwrap()); + +static XML_ARG_TAG_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?s)<([a-zA-Z_][a-zA-Z0-9_-]*)>\s*([^<]+?)\s*").unwrap()); + +/// Parse XML-style tool calls in `` bodies. +/// Supports both nested argument tags and JSON argument payloads: +/// - `...` +/// - `{"command":"pwd"}` +fn parse_xml_tool_calls(xml_content: &str) -> Option> { + let mut calls = Vec::new(); + let trimmed = xml_content.trim(); + + if !trimmed.starts_with('<') || !trimmed.contains('>') { + return None; + } + + for cap in XML_TOOL_TAG_RE.captures_iter(trimmed) { + let tool_name = cap[1].trim().to_string(); + if is_xml_meta_tag(&tool_name) { + continue; + } + + let inner_content = cap[2].trim(); + if inner_content.is_empty() { + continue; + } + + let mut args = serde_json::Map::new(); + + if let Some(first_json) = extract_json_values(inner_content).into_iter().next() { + match first_json { + serde_json::Value::Object(object_args) => { + args = object_args; + } + other => { + args.insert("value".to_string(), other); + } + } + } else { + for inner_cap in XML_ARG_TAG_RE.captures_iter(inner_content) { + let key = inner_cap[1].trim().to_string(); + if is_xml_meta_tag(&key) { + continue; + } + let value = inner_cap[2].trim(); + if !value.is_empty() { + args.insert(key, serde_json::Value::String(value.to_string())); + } + } + + if args.is_empty() { + args.insert( + "content".to_string(), + serde_json::Value::String(inner_content.to_string()), + ); + } + } + + calls.push(ParsedToolCall { + name: tool_name, + arguments: serde_json::Value::Object(args), + }); + } + + if calls.is_empty() { + None + } else { + Some(calls) + } +} + const TOOL_CALL_OPEN_TAGS: [&str; 4] = ["", "", "", ""]; fn find_first_tag<'a>(haystack: &str, tags: &'a [&'a str]) -> Option<(usize, &'a str)> { @@ -659,6 +748,8 @@ fn parse_tool_calls(response: &str) -> (String, Vec) { if let Some(close_idx) = after_open.find(close_tag) { let inner = &after_open[..close_idx]; let mut parsed_any = false; + + // Try JSON format first let json_values = extract_json_values(inner); for value in json_values { let parsed_calls = parse_tool_calls_from_json_value(&value); @@ -668,8 +759,18 @@ fn parse_tool_calls(response: &str) -> (String, Vec) { } } + // If JSON parsing failed, try XML format (DeepSeek/GLM style) if !parsed_any { - tracing::warn!("Malformed JSON: expected tool-call object in tag body"); + if let Some(xml_calls) = parse_xml_tool_calls(inner) { + calls.extend(xml_calls); + parsed_any = true; + } + } + + if !parsed_any { + tracing::warn!( + "Malformed : expected tool-call object in tag body (JSON/XML)" + ); } remaining = &after_open[close_idx + close_tag.len()..]; @@ -2582,6 +2683,59 @@ I will now call the tool with this payload: ); } + #[test] + fn parse_tool_calls_handles_xml_nested_tool_payload() { + let response = r#" + +project roadmap + +"#; + + let (text, calls) = parse_tool_calls(response); + assert!(text.is_empty()); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].name, "memory_recall"); + assert_eq!( + calls[0].arguments.get("query").unwrap().as_str().unwrap(), + "project roadmap" + ); + } + + #[test] + fn parse_tool_calls_ignores_xml_thinking_wrapper() { + let response = r#" +Need to inspect memory first + +recent deploy notes + +"#; + + let (text, calls) = parse_tool_calls(response); + assert!(text.is_empty()); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].name, "memory_recall"); + assert_eq!( + calls[0].arguments.get("query").unwrap().as_str().unwrap(), + "recent deploy notes" + ); + } + + #[test] + fn parse_tool_calls_handles_xml_with_json_arguments() { + let response = r#" +{"command":"pwd"} +"#; + + let (text, calls) = parse_tool_calls(response); + assert!(text.is_empty()); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].name, "shell"); + assert_eq!( + calls[0].arguments.get("command").unwrap().as_str().unwrap(), + "pwd" + ); + } + #[test] fn parse_tool_calls_handles_markdown_tool_call_fence() { let response = r#"I'll check that. diff --git a/src/providers/compatible.rs b/src/providers/compatible.rs index 615ac6d93..00a317d9c 100644 --- a/src/providers/compatible.rs +++ b/src/providers/compatible.rs @@ -356,13 +356,60 @@ struct ToolCall { #[serde(skip_serializing_if = "Option::is_none")] id: Option, #[serde(rename = "type")] + #[serde(default)] kind: Option, + #[serde(default)] function: Option, + + // Compatibility: Some providers (e.g., older GLM) may use 'name' directly + #[serde(default)] + name: Option, + #[serde(default)] + arguments: Option, + + // Compatibility: DeepSeek sometimes wraps arguments differently + #[serde(rename = "parameters", default)] + parameters: Option, +} + +impl ToolCall { + /// Extract function name with fallback logic for various provider formats + fn function_name(&self) -> Option { + // Standard OpenAI format: tool_calls[].function.name + if let Some(ref func) = self.function { + if let Some(ref name) = func.name { + return Some(name.clone()); + } + } + // Fallback: direct name field + self.name.clone() + } + + /// Extract arguments with fallback logic and type conversion + fn function_arguments(&self) -> Option { + // Standard OpenAI format: tool_calls[].function.arguments (string) + if let Some(ref func) = self.function { + if let Some(ref args) = func.arguments { + return Some(args.clone()); + } + } + // Fallback: direct arguments field + if let Some(ref args) = self.arguments { + return Some(args.clone()); + } + // Compatibility: Some providers return parameters as object instead of string + if let Some(ref params) = self.parameters { + return serde_json::to_string(params).ok(); + } + None + } } #[derive(Debug, Deserialize, Serialize)] struct Function { + #[serde(default)] name: Option, + #[serde(default)] arguments: Option, } @@ -849,26 +896,34 @@ impl OpenAiCompatibleProvider { } fn parse_native_response(message: ResponseMessage) -> ProviderChatResponse { + let text = message.effective_content_optional(); let tool_calls = message .tool_calls .unwrap_or_default() .into_iter() .filter_map(|tc| { - let function = tc.function?; - let name = function.name?; - let arguments = function.arguments.unwrap_or_else(|| "{}".to_string()); + let name = tc.function_name()?; + let arguments = tc.function_arguments().unwrap_or_else(|| "{}".to_string()); + let normalized_arguments = + if serde_json::from_str::(&arguments).is_ok() { + arguments + } else { + tracing::warn!( + function = %name, + arguments = %arguments, + "Invalid JSON in native tool-call arguments, using empty object" + ); + "{}".to_string() + }; Some(ProviderToolCall { id: tc.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()), name, - arguments, + arguments: normalized_arguments, }) }) .collect::>(); - ProviderChatResponse { - text: message.content, - tool_calls, - } + ProviderChatResponse { text, tool_calls } } fn is_native_tool_schema_unsupported(status: reqwest::StatusCode, error: &str) -> bool { @@ -1696,6 +1751,50 @@ mod tests { .contains("requires at least one non-system message")); } + #[test] + fn tool_call_function_name_falls_back_to_top_level_name() { + let call: ToolCall = serde_json::from_value(serde_json::json!({ + "name": "memory_recall", + "arguments": "{\"query\":\"latest roadmap\"}" + })) + .unwrap(); + + assert_eq!(call.function_name().as_deref(), Some("memory_recall")); + } + + #[test] + fn tool_call_function_arguments_falls_back_to_parameters_object() { + let call: ToolCall = serde_json::from_value(serde_json::json!({ + "name": "shell", + "parameters": {"command": "pwd"} + })) + .unwrap(); + + assert_eq!( + call.function_arguments().as_deref(), + Some("{\"command\":\"pwd\"}") + ); + } + + #[test] + fn tool_call_function_arguments_prefers_nested_function_field() { + let call: ToolCall = serde_json::from_value(serde_json::json!({ + "name": "ignored_name", + "arguments": "{\"query\":\"ignored\"}", + "function": { + "name": "memory_recall", + "arguments": "{\"query\":\"preferred\"}" + } + })) + .unwrap(); + + assert_eq!(call.function_name().as_deref(), Some("memory_recall")); + assert_eq!( + call.function_arguments().as_deref(), + Some("{\"query\":\"preferred\"}") + ); + } + // ---------------------------------------------------------- // Custom endpoint path tests (Issue #114) // ----------------------------------------------------------