From d1f7c8a116fe5f69ab7cfc0b2e3b48fccdbb42b0 Mon Sep 17 00:00:00 2001 From: Argenis Date: Sat, 21 Mar 2026 19:40:21 -0400 Subject: [PATCH] fix(agent): add system prompt budgeting for small-context models (#4185) For models with small context windows (e.g. glm-4.5-air ~8K tokens), the system prompt alone can exceed the limit. This adds: - max_system_prompt_chars config option (default 0 = unlimited) - compact_context now also compacts the system prompt: skips the Channel Capabilities section and shows only tool names - Truncation with marker when prompt exceeds the budget Users can set `max_system_prompt_chars = 8000` in [agent] config to cap the system prompt for small-context models. Closes #4124 --- src/agent/loop_.rs | 6 +++-- src/channels/mod.rs | 61 ++++++++++++++++++++++++++++++++++---------- src/config/schema.rs | 11 ++++++++ 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index cb6ec9a43..cdcde97b8 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -3833,7 +3833,8 @@ pub async fn run( Some(&config.autonomy), native_tools, config.skills.prompt_injection_mode, - config.autonomy.level, + config.agent.compact_context, + config.agent.max_system_prompt_chars, ); // Append structured tool-use instructions with schemas (only for non-native providers) @@ -4494,7 +4495,8 @@ pub async fn process_message( Some(&config.autonomy), native_tools, config.skills.prompt_injection_mode, - config.autonomy.level, + config.agent.compact_context, + config.agent.max_system_prompt_chars, ); if !native_tools { system_prompt.push_str(&build_tool_instructions(&tools_registry, Some(&i18n_descs))); diff --git a/src/channels/mod.rs b/src/channels/mod.rs index df671b08b..b0b874812 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -3128,9 +3128,12 @@ pub fn build_system_prompt_with_mode( Some(&autonomy_cfg), native_tools, skills_prompt_mode, + false, + 0, ) } +#[allow(clippy::too_many_arguments)] pub fn build_system_prompt_with_mode_and_autonomy( workspace_dir: &std::path::Path, model_name: &str, @@ -3141,6 +3144,8 @@ pub fn build_system_prompt_with_mode_and_autonomy( autonomy_config: Option<&crate::config::AutonomyConfig>, native_tools: bool, skills_prompt_mode: crate::config::SkillsPromptInjectionMode, + compact_context: bool, + max_system_prompt_chars: usize, ) -> String { use std::fmt::Write; let mut prompt = String::with_capacity(8192); @@ -3167,11 +3172,19 @@ pub fn build_system_prompt_with_mode_and_autonomy( // ── 1. Tooling ────────────────────────────────────────────── if !tools.is_empty() { prompt.push_str("## Tools\n\n"); - prompt.push_str("You have access to the following tools:\n\n"); - for (name, desc) in tools { - let _ = writeln!(prompt, "- **{name}**: {desc}"); + if compact_context { + // Compact mode: tool names only, no descriptions/schemas + prompt.push_str("Available tools: "); + let names: Vec<&str> = tools.iter().map(|(name, _)| *name).collect(); + prompt.push_str(&names.join(", ")); + prompt.push_str("\n\n"); + } else { + prompt.push_str("You have access to the following tools:\n\n"); + for (name, desc) in tools { + let _ = writeln!(prompt, "- **{name}**: {desc}"); + } + prompt.push('\n'); } - prompt.push('\n'); } // ── 1b. Hardware (when gpio/arduino tools present) ─────────── @@ -3311,11 +3324,13 @@ pub fn build_system_prompt_with_mode_and_autonomy( std::env::consts::OS, ); - // ── 8. Channel Capabilities ───────────────────────────────────── - prompt.push_str("## Channel Capabilities\n\n"); - prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n"); - prompt.push_str("- You do NOT need to ask permission to respond — just respond directly.\n"); - prompt.push_str(match autonomy_config.map(|cfg| cfg.level) { + // ── 8. Channel Capabilities (skipped in compact_context mode) ── + if !compact_context { + prompt.push_str("## Channel Capabilities\n\n"); + prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n"); + prompt + .push_str("- You do NOT need to ask permission to respond — just respond directly.\n"); + prompt.push_str(match autonomy_config.map(|cfg| cfg.level) { Some(crate::security::AutonomyLevel::Full) => { "- If the runtime policy already allows a tool, use it directly; do not ask the user for extra approval.\n\ - Never pretend you are waiting for a human approval click or confirmation when the runtime policy already permits the action.\n\ @@ -3329,10 +3344,23 @@ pub fn build_system_prompt_with_mode_and_autonomy( - If there is no approval path for this channel or the runtime blocks an action, explain that restriction directly instead of simulating an approval flow.\n" } }); - prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n"); - prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n"); - prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n"); - prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n"); + prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n"); + prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n"); + prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n"); + prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n"); + } // end if !compact_context (Channel Capabilities) + + // ── 9. Truncation (max_system_prompt_chars budget) ────────── + if max_system_prompt_chars > 0 && prompt.len() > max_system_prompt_chars { + // Truncate on a char boundary, keeping the top portion (identity + safety). + let mut end = max_system_prompt_chars; + // Ensure we don't split a multi-byte UTF-8 character. + while !prompt.is_char_boundary(end) && end > 0 { + end -= 1; + } + prompt.truncate(end); + prompt.push_str("\n\n[System prompt truncated to fit context budget]\n"); + } if prompt.is_empty() { "You are ZeroClaw, a fast and efficient AI assistant built in Rust. Be helpful, concise, and direct." @@ -4447,7 +4475,8 @@ pub async fn start_channels(config: Config) -> Result<()> { Some(&config.autonomy), native_tools, config.skills.prompt_injection_mode, - config.autonomy.level, + config.agent.compact_context, + config.agent.max_system_prompt_chars, ); if !native_tools { system_prompt.push_str(&build_tool_instructions( @@ -7776,6 +7805,8 @@ BTC is currently around $65,000 based on latest tool output."# Some(&config), false, crate::config::SkillsPromptInjectionMode::Full, + false, + 0, ); assert!( @@ -7805,6 +7836,8 @@ BTC is currently around $65,000 based on latest tool output."# Some(&config), false, crate::config::SkillsPromptInjectionMode::Full, + false, + 0, ); assert!( diff --git a/src/config/schema.rs b/src/config/schema.rs index dc781c5e9..450d16d8d 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -1248,6 +1248,12 @@ pub struct AgentConfig { /// Default: `[]` (no filtering — all tools included). #[serde(default)] pub tool_filter_groups: Vec, + /// Maximum characters for the assembled system prompt. When `> 0`, the prompt + /// is truncated to this limit after assembly (keeping the top portion which + /// contains identity and safety instructions). `0` means unlimited. + /// Useful for small-context models (e.g. glm-4.5-air ~8K tokens → set to 8000). + #[serde(default = "default_max_system_prompt_chars")] + pub max_system_prompt_chars: usize, } fn default_agent_max_tool_iterations() -> usize { @@ -1266,6 +1272,10 @@ fn default_agent_tool_dispatcher() -> String { "auto".into() } +fn default_max_system_prompt_chars() -> usize { + 0 +} + impl Default for AgentConfig { fn default() -> Self { Self { @@ -1277,6 +1287,7 @@ impl Default for AgentConfig { tool_dispatcher: default_agent_tool_dispatcher(), tool_call_dedup_exempt: Vec::new(), tool_filter_groups: Vec::new(), + max_system_prompt_chars: default_max_system_prompt_chars(), } } }