fix(channels): harden tool-loop and gateway config regressions

This commit is contained in:
argenis de la rosa 2026-03-05 05:27:51 -05:00
parent bd2beb3e16
commit d85cbce76a
3 changed files with 278 additions and 21 deletions

View File

@ -132,6 +132,7 @@ const AUTO_CRON_DELIVERY_CHANNELS: &[&str] = &["telegram", "discord", "slack", "
const NON_CLI_APPROVAL_WAIT_TIMEOUT_SECS: u64 = 300;
const NON_CLI_APPROVAL_POLL_INTERVAL_MS: u64 = 250;
const MISSING_TOOL_CALL_RETRY_PROMPT: &str = "Internal correction: your last reply implied a follow-up action or claimed action completion, but no valid tool call was emitted. If a tool is needed, emit it now using the required <tool_call>...</tool_call> format. If no tool is needed, provide the complete final answer now and do not defer action.";
const TOOL_UNAVAILABLE_RETRY_PROMPT_PREFIX: &str = "Internal correction: your prior reply claimed required tools were unavailable. Use only the runtime-allowed tools listed below. If file changes are requested and `file_write`/`file_edit` are listed, call them directly.";
/// Detect completion claims that imply state-changing work already happened
/// without an accompanying tool call.
@ -158,6 +159,26 @@ static SIDE_EFFECT_ACTION_OBJECT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
.unwrap()
});
/// Detect responses that incorrectly claim file tooling is unavailable even
/// when runtime policy allows file tools in this turn.
static TOOL_UNAVAILABLE_CLAIM_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?ix)
\b(
i\s+(?:do\s+not|don't)\s+have\s+access|
i\s+(?:cannot|can't)\s+(?:access|use|perform|create|edit|write)|
i\s+am\s+unable\s+to|
no\s+(?:tool|tools|function|functions)\s+(?:available|access)
)\b
[^.!?\n]{0,220}
\b(
tool|tools|function|functions|file|file_write|file_edit|
create|write|edit|delete
)\b",
)
.unwrap()
});
#[derive(Debug, Clone)]
pub(crate) struct NonCliApprovalPrompt {
pub request_id: String,
@ -432,6 +453,31 @@ fn looks_like_unverified_action_completion_without_tool_call(text: &str) -> bool
&& SIDE_EFFECT_ACTION_OBJECT_REGEX.is_match(trimmed)
}
fn looks_like_tool_unavailability_claim(text: &str, tool_specs: &[crate::tools::ToolSpec]) -> bool {
let trimmed = text.trim();
if trimmed.is_empty() || !TOOL_UNAVAILABLE_CLAIM_REGEX.is_match(trimmed) {
return false;
}
tool_specs
.iter()
.any(|spec| matches!(spec.name.as_str(), "file_write" | "file_edit"))
}
fn build_tool_unavailable_retry_prompt(tool_specs: &[crate::tools::ToolSpec]) -> String {
const MAX_TOOLS_IN_PROMPT: usize = 24;
let tool_list = tool_specs
.iter()
.map(|spec| spec.name.as_str())
.take(MAX_TOOLS_IN_PROMPT)
.collect::<Vec<_>>()
.join(", ");
format!(
"{TOOL_UNAVAILABLE_RETRY_PROMPT_PREFIX}\nRuntime tools: {tool_list}\nEmit the correct tool call now if tool use is required. Otherwise provide the final answer without claiming missing tools."
)
}
#[derive(Debug)]
pub(crate) struct ToolLoopCancelled;
@ -921,7 +967,10 @@ pub(crate) async fn run_tool_call_loop(
if tool_calls.is_empty() {
let completion_claim_signal =
looks_like_unverified_action_completion_without_tool_call(&display_text);
let missing_tool_call_signal = parse_issue_detected || completion_claim_signal;
let tool_unavailable_signal =
looks_like_tool_unavailability_claim(&display_text, &tool_specs);
let missing_tool_call_signal =
parse_issue_detected || completion_claim_signal || tool_unavailable_signal;
let missing_tool_call_followthrough = !missing_tool_call_retry_used
&& iteration + 1 < max_iterations
&& !tool_specs.is_empty()
@ -929,9 +978,15 @@ pub(crate) async fn run_tool_call_loop(
if missing_tool_call_followthrough {
missing_tool_call_retry_used = true;
missing_tool_call_retry_prompt = Some(MISSING_TOOL_CALL_RETRY_PROMPT.to_string());
missing_tool_call_retry_prompt = Some(if tool_unavailable_signal {
build_tool_unavailable_retry_prompt(&tool_specs)
} else {
MISSING_TOOL_CALL_RETRY_PROMPT.to_string()
});
let retry_reason = if parse_issue_detected {
"parse_issue_detected"
} else if tool_unavailable_signal {
"tool_unavailable_claim_detected"
} else {
"completion_claim_text_detected"
};
@ -978,7 +1033,13 @@ pub(crate) async fn run_tool_call_loop(
),
}),
);
anyhow::bail!("Model repeatedly deferred action without emitting a tool call");
if tool_unavailable_signal && !parse_issue_detected && !completion_claim_signal {
tracing::warn!(
"Model still claims missing tools after corrective retry; returning text response."
);
} else {
anyhow::bail!("Model repeatedly deferred action without emitting a tool call");
}
}
runtime_trace::record_event(
@ -1519,6 +1580,23 @@ pub(crate) fn build_shell_policy_instructions(autonomy: &crate::config::Autonomy
instructions
}
fn build_runtime_tool_availability_notice(tools_registry: &[Box<dyn Tool>]) -> String {
const MAX_LISTED_TOOLS: usize = 40;
let names = tools_registry
.iter()
.map(|tool| tool.name())
.take(MAX_LISTED_TOOLS)
.collect::<Vec<_>>()
.join(", ");
format!(
"\n## Runtime Tool Availability (Authoritative)\n\n\
Use only these runtime-available tools for this turn.\n\
Tools: {names}\n\
Do not claim tools are unavailable when they are listed here.\n"
)
}
// ── CLI Entrypoint ───────────────────────────────────────────────────────
// Wires up all subsystems (observer, runtime, security, memory, tools,
// provider, hardware RAG, peripherals) and enters either single-shot or
@ -1784,6 +1862,7 @@ pub async fn run(
system_prompt.push_str(&build_tool_instructions(&tools_registry));
}
system_prompt.push_str(&build_shell_policy_instructions(&config.autonomy));
system_prompt.push_str(&build_runtime_tool_availability_notice(&tools_registry));
// ── Approval manager (supervised mode) ───────────────────────
let approval_manager = if interactive {
@ -2180,6 +2259,7 @@ pub async fn process_message(config: Config, message: &str) -> Result<String> {
system_prompt.push_str(&build_tool_instructions(&tools_registry));
}
system_prompt.push_str(&build_shell_policy_instructions(&config.autonomy));
system_prompt.push_str(&build_runtime_tool_availability_notice(&tools_registry));
let mem_context = build_context(mem.as_ref(), message, config.memory.min_relevance_score).await;
let rag_limit = if config.agent.compact_context { 2 } else { 5 };
@ -3232,6 +3312,94 @@ mod tests {
);
}
#[tokio::test]
async fn run_tool_call_loop_retries_when_model_claims_missing_file_tools() {
let provider = ScriptedProvider::from_text_responses(vec![
"I don't have access to a file creation tool in my current set of available functions.",
r#"<tool_call>
{"name":"file_write","arguments":{"value":"retry"}}
</tool_call>"#,
"done after file tool",
]);
let invocations = Arc::new(AtomicUsize::new(0));
let tools_registry: Vec<Box<dyn Tool>> = vec![Box::new(CountingTool::new(
"file_write",
Arc::clone(&invocations),
))];
let mut history = vec![
ChatMessage::system("test-system"),
ChatMessage::user("create a test file"),
];
let observer = NoopObserver;
let result = run_tool_call_loop(
&provider,
&mut history,
&tools_registry,
&observer,
"mock-provider",
"mock-model",
0.0,
true,
None,
"cli",
&crate::config::MultimodalConfig::default(),
5,
None,
None,
None,
&[],
)
.await
.expect("loop should retry once when model wrongly claims file tools are unavailable");
assert_eq!(result, "done after file tool");
assert_eq!(invocations.load(Ordering::SeqCst), 1);
}
#[tokio::test]
async fn run_tool_call_loop_allows_text_only_planning_without_tool_call() {
let provider = ScriptedProvider::from_text_responses(vec![
"We were previously discussing gmail integration. Goal 1 is done. Our next task is Goal 2 — Gmail API via OAuth. Here is the implementation plan before any tool actions.",
]);
let tools_registry: Vec<Box<dyn Tool>> = vec![Box::new(CountingTool::new(
"count_tool",
Arc::new(AtomicUsize::new(0)),
))];
let mut history = vec![
ChatMessage::system("test-system"),
ChatMessage::user("we finished goal one, what is next"),
];
let observer = NoopObserver;
let result = run_tool_call_loop(
&provider,
&mut history,
&tools_registry,
&observer,
"mock-provider",
"mock-model",
0.0,
true,
None,
"cli",
&crate::config::MultimodalConfig::default(),
5,
None,
None,
None,
&[],
)
.await
.expect("planning-only text should be returned without forced tool-call rejection");
assert!(result.contains("implementation plan"));
}
#[tokio::test]
async fn run_tool_call_loop_native_mode_preserves_fallback_tool_call_ids() {
let provider = ScriptedProvider::from_text_responses(vec![
@ -3307,6 +3475,34 @@ mod tests {
assert!(!looks_like_unverified_action_completion_without_tool_call(
"I have a suggestion for the plan if you want me to proceed."
));
assert!(!looks_like_unverified_action_completion_without_tool_call(
"We were previously discussing gmail integration. Goal 1 is done. Our next task is Goal 2 — Gmail API via OAuth."
));
}
#[test]
fn looks_like_tool_unavailability_claim_detects_false_missing_tool_replies() {
let tools = vec![
crate::tools::ToolSpec {
name: "file_write".to_string(),
description: "Write file".to_string(),
parameters: serde_json::json!({"type":"object"}),
},
crate::tools::ToolSpec {
name: "file_edit".to_string(),
description: "Edit file".to_string(),
parameters: serde_json::json!({"type":"object"}),
},
];
assert!(looks_like_tool_unavailability_claim(
"I don't have access to a file creation tool in my current set of available functions.",
&tools
));
assert!(!looks_like_tool_unavailability_claim(
"I can create that file now.",
&tools
));
}
#[test]

View File

@ -1015,6 +1015,18 @@ fn build_runtime_tool_visibility_prompt(
);
}
prompt.push_str(
"- Do not claim tools are unavailable when they are listed above; call the appropriate tool directly.\n",
);
if specs
.iter()
.any(|spec| matches!(spec.name.as_str(), "file_write" | "file_edit"))
{
prompt.push_str(
"- File changes are supported in this turn (`file_write`/`file_edit`) when requested and policy permits.\n",
);
}
if native_tools {
prompt.push_str(
"Tool calling for this turn uses native provider function-calling. \
@ -6014,14 +6026,22 @@ BTC is currently around $65,000 based on latest tool output."#
assert!(non_native.contains("Excluded by runtime policy: mock_price"));
assert!(non_native.contains("`mock_echo`"));
assert!(!non_native.contains("**mock_price**:"));
assert!(non_native.contains("Do not claim tools are unavailable"));
assert!(non_native.contains("## Tool Use Protocol"));
let native = build_runtime_tool_visibility_prompt(&tools, &excluded, true);
assert!(native.contains("Runtime Tool Availability (Authoritative)"));
assert!(native.contains("Do not claim tools are unavailable"));
assert!(native.contains("native provider function-calling"));
assert!(!native.contains("## Tool Use Protocol"));
}
fn autonomy_with_mock_price_auto_approve() -> crate::config::AutonomyConfig {
let mut autonomy = crate::config::AutonomyConfig::default();
autonomy.auto_approve.push("mock_price".to_string());
autonomy
}
#[tokio::test]
async fn process_channel_message_injects_runtime_tool_visibility_prompt() {
let channel_impl = Arc::new(RecordingChannel::default());
@ -6064,7 +6084,7 @@ BTC is currently around $65,000 based on latest tool output."#
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
});
@ -6139,7 +6159,7 @@ BTC is currently around $65,000 based on latest tool output."#
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
multimodal: crate::config::MultimodalConfig::default(),
hooks: None,
@ -6203,7 +6223,7 @@ BTC is currently around $65,000 based on latest tool output."#
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
multimodal: crate::config::MultimodalConfig::default(),
hooks: None,
@ -6279,7 +6299,7 @@ BTC is currently around $65,000 based on latest tool output."#
interrupt_on_new_message: false,
non_cli_excluded_tools: Arc::new(Mutex::new(Vec::new())),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
multimodal: crate::config::MultimodalConfig::default(),
hooks: None,
@ -6356,7 +6376,7 @@ BTC is currently around $65,000 based on latest tool output."#
interrupt_on_new_message: false,
non_cli_excluded_tools: Arc::new(Mutex::new(Vec::new())),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
multimodal: crate::config::MultimodalConfig::default(),
hooks: None,
@ -6429,7 +6449,7 @@ BTC is currently around $65,000 based on latest tool output."#
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
});
@ -6493,7 +6513,7 @@ BTC is currently around $65,000 based on latest tool output."#
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
});
@ -6566,7 +6586,7 @@ BTC is currently around $65,000 based on latest tool output."#
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
});
@ -7837,7 +7857,7 @@ BTC is currently around $65,000 based on latest tool output."#
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
&autonomy_with_mock_price_auto_approve(),
)),
});
@ -8239,6 +8259,8 @@ BTC is currently around $65,000 based on latest tool output."#
async fn process_channel_message_respects_configured_max_tool_iterations_above_default() {
let channel_impl = Arc::new(RecordingChannel::default());
let channel: Arc<dyn Channel> = channel_impl.clone();
let mut autonomy_cfg = autonomy_with_mock_price_auto_approve();
autonomy_cfg.level = crate::security::AutonomyLevel::Full;
let mut channels_by_name = HashMap::new();
channels_by_name.insert(channel.name().to_string(), channel);
@ -8266,16 +8288,14 @@ BTC is currently around $65,000 based on latest tool output."#
reliability: Arc::new(crate::config::ReliabilityConfig::default()),
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
message_timeout_secs: 5,
interrupt_on_new_message: false,
multimodal: crate::config::MultimodalConfig::default(),
hooks: None,
non_cli_excluded_tools: Arc::new(Mutex::new(Vec::new())),
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
)),
approval_manager: Arc::new(ApprovalManager::from_config(&autonomy_cfg)),
});
process_channel_message(
@ -8304,6 +8324,8 @@ BTC is currently around $65,000 based on latest tool output."#
async fn process_channel_message_reports_configured_max_tool_iterations_limit() {
let channel_impl = Arc::new(RecordingChannel::default());
let channel: Arc<dyn Channel> = channel_impl.clone();
let mut autonomy_cfg = autonomy_with_mock_price_auto_approve();
autonomy_cfg.level = crate::security::AutonomyLevel::Full;
let mut channels_by_name = HashMap::new();
channels_by_name.insert(channel.name().to_string(), channel);
@ -8331,16 +8353,14 @@ BTC is currently around $65,000 based on latest tool output."#
reliability: Arc::new(crate::config::ReliabilityConfig::default()),
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
message_timeout_secs: 5,
interrupt_on_new_message: false,
multimodal: crate::config::MultimodalConfig::default(),
hooks: None,
non_cli_excluded_tools: Arc::new(Mutex::new(Vec::new())),
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
approval_manager: Arc::new(ApprovalManager::from_config(
&crate::config::AutonomyConfig::default(),
)),
approval_manager: Arc::new(ApprovalManager::from_config(&autonomy_cfg)),
});
process_channel_message(

View File

@ -5526,6 +5526,22 @@ fn read_codex_openai_api_key() -> Option<String> {
.map(ToString::to_string)
}
fn normalize_top_level_table_aliases(raw_toml: &mut toml::Value) {
let Some(root) = raw_toml.as_table_mut() else {
return;
};
if root.contains_key("Gateway") {
if root.contains_key("gateway") {
let _ = root.remove("Gateway");
tracing::warn!("Legacy table [Gateway] ignored because [gateway] is already present.");
} else if let Some(value) = root.remove("Gateway") {
root.insert("gateway".to_string(), value);
tracing::warn!("Legacy table [Gateway] mapped to [gateway].");
}
}
}
impl Config {
pub async fn load_or_init() -> Result<Self> {
let (default_zeroclaw_dir, default_workspace_dir) = default_config_and_workspace_dirs()?;
@ -5563,12 +5579,18 @@ impl Config {
let contents = fs::read_to_string(&config_path)
.await
.context("Failed to read config file")?;
let mut raw_toml: toml::Value =
toml::from_str(&contents).context("Failed to parse config file")?;
normalize_top_level_table_aliases(&mut raw_toml);
let normalized_contents =
toml::to_string(&raw_toml).context("Failed to normalize config file")?;
// Track ignored/unknown config keys to warn users about silent misconfigurations
// (e.g., using [providers.ollama] which doesn't exist instead of top-level api_url)
let mut ignored_paths: Vec<String> = Vec::new();
let mut config: Config = serde_ignored::deserialize(
toml::de::Deserializer::parse(&contents).context("Failed to parse config file")?,
toml::de::Deserializer::parse(&normalized_contents)
.context("Failed to parse config file")?,
|path| {
ignored_paths.push(path.to_string());
},
@ -8435,6 +8457,25 @@ default_temperature = 0.7
);
}
#[test]
async fn checklist_gateway_backward_compat_accepts_legacy_gateway_table_alias() {
let mut raw: toml::Value = toml::from_str(
r#"
default_temperature = 0.7
[Gateway]
require_pairing = false
"#,
)
.unwrap();
normalize_top_level_table_aliases(&mut raw);
let parsed: Config = raw.try_into().unwrap();
assert!(
!parsed.gateway.require_pairing,
"Legacy [Gateway] alias should map to [gateway]"
);
}
#[test]
async fn checklist_autonomy_default_is_workspace_scoped() {
let a = AutonomyConfig::default();