From 96e2a324d1da860e3c5ccd93be339641c4393483 Mon Sep 17 00:00:00 2001 From: Argenis Date: Thu, 19 Mar 2026 08:48:38 -0400 Subject: [PATCH] fix: make channel system prompt respect autonomy.level = full (#3952) (#3970) When autonomy.level is set to "full", the channel/web system prompt no longer includes instructions telling the model to ask for permission before executing tools. Previously these safety lines were hardcoded regardless of autonomy config, causing the LLM to simulate approval dialogs in channel and web-interface modes even though the ApprovalManager correctly allowed execution. The fix adds an autonomy_level parameter to build_system_prompt_with_mode and conditionally omits the "ask before acting" instructions when the level is Full. Core safety rules (no data exfiltration, prefer trash) are always included. --- src/agent/loop_.rs | 3 ++ src/channels/mod.rs | 84 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index 5442f05c5..566db1cc7 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -3466,6 +3466,7 @@ pub async fn run( bootstrap_max_chars, native_tools, config.skills.prompt_injection_mode, + config.autonomy.level, ); // Append structured tool-use instructions with schemas (only for non-native providers) @@ -4100,6 +4101,7 @@ pub async fn process_message( bootstrap_max_chars, native_tools, config.skills.prompt_injection_mode, + config.autonomy.level, ); if !native_tools { system_prompt.push_str(&build_tool_instructions(&tools_registry, Some(&i18n_descs))); @@ -6672,6 +6674,7 @@ Let me check the result."#; None, // no bootstrap_max_chars true, // native_tools crate::config::SkillsPromptInjectionMode::Full, + crate::security::AutonomyLevel::default(), ); // Must contain zero XML protocol artifacts diff --git a/src/channels/mod.rs b/src/channels/mod.rs index e34117a08..316bfcf9d 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -98,7 +98,7 @@ use crate::observability::traits::{ObserverEvent, ObserverMetric}; use crate::observability::{self, runtime_trace, Observer}; use crate::providers::{self, ChatMessage, Provider}; use crate::runtime; -use crate::security::SecurityPolicy; +use crate::security::{AutonomyLevel, SecurityPolicy}; use crate::tools::{self, Tool}; use crate::util::truncate_with_ellipsis; use anyhow::{Context, Result}; @@ -2785,6 +2785,7 @@ pub fn build_system_prompt( bootstrap_max_chars, false, crate::config::SkillsPromptInjectionMode::Full, + AutonomyLevel::default(), ) } @@ -2797,6 +2798,7 @@ pub fn build_system_prompt_with_mode( bootstrap_max_chars: Option, native_tools: bool, skills_prompt_mode: crate::config::SkillsPromptInjectionMode, + autonomy_level: AutonomyLevel, ) -> String { use std::fmt::Write; let mut prompt = String::with_capacity(8192); @@ -2862,13 +2864,18 @@ pub fn build_system_prompt_with_mode( // ── 2. Safety ─────────────────────────────────────────────── prompt.push_str("## Safety\n\n"); - prompt.push_str( - "- Do not exfiltrate private data.\n\ - - Do not run destructive commands without asking.\n\ - - Do not bypass oversight or approval mechanisms.\n\ - - Prefer `trash` over `rm` (recoverable beats gone forever).\n\ - - When in doubt, ask before acting externally.\n\n", - ); + prompt.push_str("- Do not exfiltrate private data.\n"); + if autonomy_level != AutonomyLevel::Full { + prompt.push_str( + "- Do not run destructive commands without asking.\n\ + - Do not bypass oversight or approval mechanisms.\n", + ); + } + prompt.push_str("- Prefer `trash` over `rm` (recoverable beats gone forever).\n"); + if autonomy_level != AutonomyLevel::Full { + prompt.push_str("- When in doubt, ask before acting externally.\n"); + } + prompt.push('\n'); // ── 3. Skills (full or compact, based on config) ───────────── if !skills.is_empty() { @@ -4026,6 +4033,7 @@ pub async fn start_channels(config: Config) -> Result<()> { bootstrap_max_chars, native_tools, config.skills.prompt_injection_mode, + config.autonomy.level, ); if !native_tools { system_prompt.push_str(&build_tool_instructions( @@ -6956,6 +6964,7 @@ BTC is currently around $65,000 based on latest tool output."# None, false, crate::config::SkillsPromptInjectionMode::Compact, + AutonomyLevel::default(), ); assert!(prompt.contains(""), "missing skills XML"); @@ -7078,6 +7087,65 @@ BTC is currently around $65,000 based on latest tool output."# assert!(prompt.contains(&format!("Working directory: `{}`", ws.path().display()))); } + #[test] + fn full_autonomy_omits_approval_instructions() { + let ws = make_workspace(); + let prompt = build_system_prompt_with_mode( + ws.path(), + "model", + &[], + &[], + None, + None, + false, + crate::config::SkillsPromptInjectionMode::Full, + AutonomyLevel::Full, + ); + + assert!( + !prompt.contains("without asking"), + "full autonomy prompt must not tell the model to ask before acting" + ); + assert!( + !prompt.contains("ask before acting externally"), + "full autonomy prompt must not contain ask-before-acting instruction" + ); + // Core safety rules should still be present + assert!( + prompt.contains("Do not exfiltrate private data"), + "data exfiltration guard must remain" + ); + assert!( + prompt.contains("Prefer `trash` over `rm`"), + "trash-over-rm hint must remain" + ); + } + + #[test] + fn supervised_autonomy_includes_approval_instructions() { + let ws = make_workspace(); + let prompt = build_system_prompt_with_mode( + ws.path(), + "model", + &[], + &[], + None, + None, + false, + crate::config::SkillsPromptInjectionMode::Full, + AutonomyLevel::Supervised, + ); + + assert!( + prompt.contains("without asking"), + "supervised prompt must include ask-before-acting instruction" + ); + assert!( + prompt.contains("ask before acting externally"), + "supervised prompt must include ask-before-acting instruction" + ); + } + #[test] fn channel_notify_observer_truncates_utf8_arguments_safely() { let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::();