diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index 00a02f6c6..58ddfbe0e 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -132,6 +132,7 @@ const AUTO_CRON_DELIVERY_CHANNELS: &[&str] = &["telegram", "discord", "slack", " const NON_CLI_APPROVAL_WAIT_TIMEOUT_SECS: u64 = 300; const NON_CLI_APPROVAL_POLL_INTERVAL_MS: u64 = 250; const MISSING_TOOL_CALL_RETRY_PROMPT: &str = "Internal correction: your last reply implied a follow-up action or claimed action completion, but no valid tool call was emitted. If a tool is needed, emit it now using the required ... format. If no tool is needed, provide the complete final answer now and do not defer action."; +const TOOL_UNAVAILABLE_RETRY_PROMPT_PREFIX: &str = "Internal correction: your prior reply claimed required tools were unavailable. Use only the runtime-allowed tools listed below. If file changes are requested and `file_write`/`file_edit` are listed, call them directly."; /// Detect completion claims that imply state-changing work already happened /// without an accompanying tool call. @@ -158,6 +159,26 @@ static SIDE_EFFECT_ACTION_OBJECT_REGEX: LazyLock = LazyLock::new(|| { .unwrap() }); +/// Detect responses that incorrectly claim file tooling is unavailable even +/// when runtime policy allows file tools in this turn. +static TOOL_UNAVAILABLE_CLAIM_REGEX: LazyLock = LazyLock::new(|| { + Regex::new( + r"(?ix) + \b( + i\s+(?:do\s+not|don't)\s+have\s+access| + i\s+(?:cannot|can't)\s+(?:access|use|perform|create|edit|write)| + i\s+am\s+unable\s+to| + no\s+(?:tool|tools|function|functions)\s+(?:available|access) + )\b + [^.!?\n]{0,220} + \b( + tool|tools|function|functions|file|file_write|file_edit| + create|write|edit|delete + )\b", + ) + .unwrap() +}); + #[derive(Debug, Clone)] pub(crate) struct NonCliApprovalPrompt { pub request_id: String, @@ -432,6 +453,31 @@ fn looks_like_unverified_action_completion_without_tool_call(text: &str) -> bool && SIDE_EFFECT_ACTION_OBJECT_REGEX.is_match(trimmed) } +fn looks_like_tool_unavailability_claim(text: &str, tool_specs: &[crate::tools::ToolSpec]) -> bool { + let trimmed = text.trim(); + if trimmed.is_empty() || !TOOL_UNAVAILABLE_CLAIM_REGEX.is_match(trimmed) { + return false; + } + + tool_specs + .iter() + .any(|spec| matches!(spec.name.as_str(), "file_write" | "file_edit")) +} + +fn build_tool_unavailable_retry_prompt(tool_specs: &[crate::tools::ToolSpec]) -> String { + const MAX_TOOLS_IN_PROMPT: usize = 24; + let tool_list = tool_specs + .iter() + .map(|spec| spec.name.as_str()) + .take(MAX_TOOLS_IN_PROMPT) + .collect::>() + .join(", "); + + format!( + "{TOOL_UNAVAILABLE_RETRY_PROMPT_PREFIX}\nRuntime tools: {tool_list}\nEmit the correct tool call now if tool use is required. Otherwise provide the final answer without claiming missing tools." + ) +} + #[derive(Debug)] pub(crate) struct ToolLoopCancelled; @@ -921,7 +967,10 @@ pub(crate) async fn run_tool_call_loop( if tool_calls.is_empty() { let completion_claim_signal = looks_like_unverified_action_completion_without_tool_call(&display_text); - let missing_tool_call_signal = parse_issue_detected || completion_claim_signal; + let tool_unavailable_signal = + looks_like_tool_unavailability_claim(&display_text, &tool_specs); + let missing_tool_call_signal = + parse_issue_detected || completion_claim_signal || tool_unavailable_signal; let missing_tool_call_followthrough = !missing_tool_call_retry_used && iteration + 1 < max_iterations && !tool_specs.is_empty() @@ -929,9 +978,15 @@ pub(crate) async fn run_tool_call_loop( if missing_tool_call_followthrough { missing_tool_call_retry_used = true; - missing_tool_call_retry_prompt = Some(MISSING_TOOL_CALL_RETRY_PROMPT.to_string()); + missing_tool_call_retry_prompt = Some(if tool_unavailable_signal { + build_tool_unavailable_retry_prompt(&tool_specs) + } else { + MISSING_TOOL_CALL_RETRY_PROMPT.to_string() + }); let retry_reason = if parse_issue_detected { "parse_issue_detected" + } else if tool_unavailable_signal { + "tool_unavailable_claim_detected" } else { "completion_claim_text_detected" }; @@ -978,7 +1033,13 @@ pub(crate) async fn run_tool_call_loop( ), }), ); - anyhow::bail!("Model repeatedly deferred action without emitting a tool call"); + if tool_unavailable_signal && !parse_issue_detected && !completion_claim_signal { + tracing::warn!( + "Model still claims missing tools after corrective retry; returning text response." + ); + } else { + anyhow::bail!("Model repeatedly deferred action without emitting a tool call"); + } } runtime_trace::record_event( @@ -1519,6 +1580,23 @@ pub(crate) fn build_shell_policy_instructions(autonomy: &crate::config::Autonomy instructions } +fn build_runtime_tool_availability_notice(tools_registry: &[Box]) -> String { + const MAX_LISTED_TOOLS: usize = 40; + let names = tools_registry + .iter() + .map(|tool| tool.name()) + .take(MAX_LISTED_TOOLS) + .collect::>() + .join(", "); + + format!( + "\n## Runtime Tool Availability (Authoritative)\n\n\ + Use only these runtime-available tools for this turn.\n\ + Tools: {names}\n\ + Do not claim tools are unavailable when they are listed here.\n" + ) +} + // ── CLI Entrypoint ─────────────────────────────────────────────────────── // Wires up all subsystems (observer, runtime, security, memory, tools, // provider, hardware RAG, peripherals) and enters either single-shot or @@ -1784,6 +1862,7 @@ pub async fn run( system_prompt.push_str(&build_tool_instructions(&tools_registry)); } system_prompt.push_str(&build_shell_policy_instructions(&config.autonomy)); + system_prompt.push_str(&build_runtime_tool_availability_notice(&tools_registry)); // ── Approval manager (supervised mode) ─────────────────────── let approval_manager = if interactive { @@ -2180,6 +2259,7 @@ pub async fn process_message(config: Config, message: &str) -> Result { system_prompt.push_str(&build_tool_instructions(&tools_registry)); } system_prompt.push_str(&build_shell_policy_instructions(&config.autonomy)); + system_prompt.push_str(&build_runtime_tool_availability_notice(&tools_registry)); let mem_context = build_context(mem.as_ref(), message, config.memory.min_relevance_score).await; let rag_limit = if config.agent.compact_context { 2 } else { 5 }; @@ -3232,6 +3312,94 @@ mod tests { ); } + #[tokio::test] + async fn run_tool_call_loop_retries_when_model_claims_missing_file_tools() { + let provider = ScriptedProvider::from_text_responses(vec![ + "I don't have access to a file creation tool in my current set of available functions.", + r#" +{"name":"file_write","arguments":{"value":"retry"}} +"#, + "done after file tool", + ]); + + let invocations = Arc::new(AtomicUsize::new(0)); + let tools_registry: Vec> = vec![Box::new(CountingTool::new( + "file_write", + Arc::clone(&invocations), + ))]; + + let mut history = vec![ + ChatMessage::system("test-system"), + ChatMessage::user("create a test file"), + ]; + let observer = NoopObserver; + + let result = run_tool_call_loop( + &provider, + &mut history, + &tools_registry, + &observer, + "mock-provider", + "mock-model", + 0.0, + true, + None, + "cli", + &crate::config::MultimodalConfig::default(), + 5, + None, + None, + None, + &[], + ) + .await + .expect("loop should retry once when model wrongly claims file tools are unavailable"); + + assert_eq!(result, "done after file tool"); + assert_eq!(invocations.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn run_tool_call_loop_allows_text_only_planning_without_tool_call() { + let provider = ScriptedProvider::from_text_responses(vec![ + "We were previously discussing gmail integration. Goal 1 is done. Our next task is Goal 2 — Gmail API via OAuth. Here is the implementation plan before any tool actions.", + ]); + + let tools_registry: Vec> = vec![Box::new(CountingTool::new( + "count_tool", + Arc::new(AtomicUsize::new(0)), + ))]; + + let mut history = vec![ + ChatMessage::system("test-system"), + ChatMessage::user("we finished goal one, what is next"), + ]; + let observer = NoopObserver; + + let result = run_tool_call_loop( + &provider, + &mut history, + &tools_registry, + &observer, + "mock-provider", + "mock-model", + 0.0, + true, + None, + "cli", + &crate::config::MultimodalConfig::default(), + 5, + None, + None, + None, + &[], + ) + .await + .expect("planning-only text should be returned without forced tool-call rejection"); + + assert!(result.contains("implementation plan")); + } + #[tokio::test] async fn run_tool_call_loop_native_mode_preserves_fallback_tool_call_ids() { let provider = ScriptedProvider::from_text_responses(vec![ @@ -3307,6 +3475,34 @@ mod tests { assert!(!looks_like_unverified_action_completion_without_tool_call( "I have a suggestion for the plan if you want me to proceed." )); + assert!(!looks_like_unverified_action_completion_without_tool_call( + "We were previously discussing gmail integration. Goal 1 is done. Our next task is Goal 2 — Gmail API via OAuth." + )); + } + + #[test] + fn looks_like_tool_unavailability_claim_detects_false_missing_tool_replies() { + let tools = vec![ + crate::tools::ToolSpec { + name: "file_write".to_string(), + description: "Write file".to_string(), + parameters: serde_json::json!({"type":"object"}), + }, + crate::tools::ToolSpec { + name: "file_edit".to_string(), + description: "Edit file".to_string(), + parameters: serde_json::json!({"type":"object"}), + }, + ]; + + assert!(looks_like_tool_unavailability_claim( + "I don't have access to a file creation tool in my current set of available functions.", + &tools + )); + assert!(!looks_like_tool_unavailability_claim( + "I can create that file now.", + &tools + )); } #[test] diff --git a/src/channels/mod.rs b/src/channels/mod.rs index a2350b5c2..99ad804ac 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -1015,6 +1015,18 @@ fn build_runtime_tool_visibility_prompt( ); } + prompt.push_str( + "- Do not claim tools are unavailable when they are listed above; call the appropriate tool directly.\n", + ); + if specs + .iter() + .any(|spec| matches!(spec.name.as_str(), "file_write" | "file_edit")) + { + prompt.push_str( + "- File changes are supported in this turn (`file_write`/`file_edit`) when requested and policy permits.\n", + ); + } + if native_tools { prompt.push_str( "Tool calling for this turn uses native provider function-calling. \ @@ -6014,14 +6026,22 @@ BTC is currently around $65,000 based on latest tool output."# assert!(non_native.contains("Excluded by runtime policy: mock_price")); assert!(non_native.contains("`mock_echo`")); assert!(!non_native.contains("**mock_price**:")); + assert!(non_native.contains("Do not claim tools are unavailable")); assert!(non_native.contains("## Tool Use Protocol")); let native = build_runtime_tool_visibility_prompt(&tools, &excluded, true); assert!(native.contains("Runtime Tool Availability (Authoritative)")); + assert!(native.contains("Do not claim tools are unavailable")); assert!(native.contains("native provider function-calling")); assert!(!native.contains("## Tool Use Protocol")); } + fn autonomy_with_mock_price_auto_approve() -> crate::config::AutonomyConfig { + let mut autonomy = crate::config::AutonomyConfig::default(); + autonomy.auto_approve.push("mock_price".to_string()); + autonomy + } + #[tokio::test] async fn process_channel_message_injects_runtime_tool_visibility_prompt() { let channel_impl = Arc::new(RecordingChannel::default()); @@ -6064,7 +6084,7 @@ BTC is currently around $65,000 based on latest tool output."# query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), }); @@ -6139,7 +6159,7 @@ BTC is currently around $65,000 based on latest tool output."# query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), multimodal: crate::config::MultimodalConfig::default(), hooks: None, @@ -6203,7 +6223,7 @@ BTC is currently around $65,000 based on latest tool output."# query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), multimodal: crate::config::MultimodalConfig::default(), hooks: None, @@ -6279,7 +6299,7 @@ BTC is currently around $65,000 based on latest tool output."# interrupt_on_new_message: false, non_cli_excluded_tools: Arc::new(Mutex::new(Vec::new())), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), multimodal: crate::config::MultimodalConfig::default(), hooks: None, @@ -6356,7 +6376,7 @@ BTC is currently around $65,000 based on latest tool output."# interrupt_on_new_message: false, non_cli_excluded_tools: Arc::new(Mutex::new(Vec::new())), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), multimodal: crate::config::MultimodalConfig::default(), hooks: None, @@ -6429,7 +6449,7 @@ BTC is currently around $65,000 based on latest tool output."# query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), }); @@ -6493,7 +6513,7 @@ BTC is currently around $65,000 based on latest tool output."# query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), }); @@ -6566,7 +6586,7 @@ BTC is currently around $65,000 based on latest tool output."# query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), }); @@ -7837,7 +7857,7 @@ BTC is currently around $65,000 based on latest tool output."# query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), + &autonomy_with_mock_price_auto_approve(), )), }); @@ -8239,6 +8259,8 @@ BTC is currently around $65,000 based on latest tool output."# async fn process_channel_message_respects_configured_max_tool_iterations_above_default() { let channel_impl = Arc::new(RecordingChannel::default()); let channel: Arc = channel_impl.clone(); + let mut autonomy_cfg = autonomy_with_mock_price_auto_approve(); + autonomy_cfg.level = crate::security::AutonomyLevel::Full; let mut channels_by_name = HashMap::new(); channels_by_name.insert(channel.name().to_string(), channel); @@ -8266,16 +8288,14 @@ BTC is currently around $65,000 based on latest tool output."# reliability: Arc::new(crate::config::ReliabilityConfig::default()), provider_runtime_options: providers::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), - message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, + message_timeout_secs: 5, interrupt_on_new_message: false, multimodal: crate::config::MultimodalConfig::default(), hooks: None, non_cli_excluded_tools: Arc::new(Mutex::new(Vec::new())), query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), - approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), - )), + approval_manager: Arc::new(ApprovalManager::from_config(&autonomy_cfg)), }); process_channel_message( @@ -8304,6 +8324,8 @@ BTC is currently around $65,000 based on latest tool output."# async fn process_channel_message_reports_configured_max_tool_iterations_limit() { let channel_impl = Arc::new(RecordingChannel::default()); let channel: Arc = channel_impl.clone(); + let mut autonomy_cfg = autonomy_with_mock_price_auto_approve(); + autonomy_cfg.level = crate::security::AutonomyLevel::Full; let mut channels_by_name = HashMap::new(); channels_by_name.insert(channel.name().to_string(), channel); @@ -8331,16 +8353,14 @@ BTC is currently around $65,000 based on latest tool output."# reliability: Arc::new(crate::config::ReliabilityConfig::default()), provider_runtime_options: providers::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), - message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, + message_timeout_secs: 5, interrupt_on_new_message: false, multimodal: crate::config::MultimodalConfig::default(), hooks: None, non_cli_excluded_tools: Arc::new(Mutex::new(Vec::new())), query_classification: crate::config::QueryClassificationConfig::default(), model_routes: Vec::new(), - approval_manager: Arc::new(ApprovalManager::from_config( - &crate::config::AutonomyConfig::default(), - )), + approval_manager: Arc::new(ApprovalManager::from_config(&autonomy_cfg)), }); process_channel_message( diff --git a/src/config/schema.rs b/src/config/schema.rs index 512f68136..d4e90a91e 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -5526,6 +5526,22 @@ fn read_codex_openai_api_key() -> Option { .map(ToString::to_string) } +fn normalize_top_level_table_aliases(raw_toml: &mut toml::Value) { + let Some(root) = raw_toml.as_table_mut() else { + return; + }; + + if root.contains_key("Gateway") { + if root.contains_key("gateway") { + let _ = root.remove("Gateway"); + tracing::warn!("Legacy table [Gateway] ignored because [gateway] is already present."); + } else if let Some(value) = root.remove("Gateway") { + root.insert("gateway".to_string(), value); + tracing::warn!("Legacy table [Gateway] mapped to [gateway]."); + } + } +} + impl Config { pub async fn load_or_init() -> Result { let (default_zeroclaw_dir, default_workspace_dir) = default_config_and_workspace_dirs()?; @@ -5563,12 +5579,18 @@ impl Config { let contents = fs::read_to_string(&config_path) .await .context("Failed to read config file")?; + let mut raw_toml: toml::Value = + toml::from_str(&contents).context("Failed to parse config file")?; + normalize_top_level_table_aliases(&mut raw_toml); + let normalized_contents = + toml::to_string(&raw_toml).context("Failed to normalize config file")?; // Track ignored/unknown config keys to warn users about silent misconfigurations // (e.g., using [providers.ollama] which doesn't exist instead of top-level api_url) let mut ignored_paths: Vec = Vec::new(); let mut config: Config = serde_ignored::deserialize( - toml::de::Deserializer::parse(&contents).context("Failed to parse config file")?, + toml::de::Deserializer::parse(&normalized_contents) + .context("Failed to parse config file")?, |path| { ignored_paths.push(path.to_string()); }, @@ -8435,6 +8457,25 @@ default_temperature = 0.7 ); } + #[test] + async fn checklist_gateway_backward_compat_accepts_legacy_gateway_table_alias() { + let mut raw: toml::Value = toml::from_str( + r#" +default_temperature = 0.7 +[Gateway] +require_pairing = false +"#, + ) + .unwrap(); + + normalize_top_level_table_aliases(&mut raw); + let parsed: Config = raw.try_into().unwrap(); + assert!( + !parsed.gateway.require_pairing, + "Legacy [Gateway] alias should map to [gateway]" + ); + } + #[test] async fn checklist_autonomy_default_is_workspace_scoped() { let a = AutonomyConfig::default();