fix(agent): add system prompt budgeting for small-context models (#4185)

For models with small context windows (e.g. glm-4.5-air ~8K tokens), the system prompt alone can exceed the limit. This adds: - max_system_prompt_chars config option (default 0 = unlimited) - compact_context now also compacts the system prompt: skips the Channel Capabilities section and shows only tool names - Truncation with marker when prompt exceeds the budget Users can set `max_system_prompt_chars = 8000` in [agent] config to cap the system prompt for small-context models. Closes #4124
2026-03-21 19:40:21 -04:00 · 2026-03-21 19:40:21 -04:00 · d1f7c8a116
commit d1f7c8a116
parent 031416f065
3 changed files with 62 additions and 16 deletions
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@ -3833,7 +3833,8 @@ pub async fn run(
        Some(&config.autonomy),
        native_tools,
        config.skills.prompt_injection_mode,
-        config.autonomy.level,
+        config.agent.compact_context,
+        config.agent.max_system_prompt_chars,
    );

    // Append structured tool-use instructions with schemas (only for non-native providers)
@ -4494,7 +4495,8 @@ pub async fn process_message(
        Some(&config.autonomy),
        native_tools,
        config.skills.prompt_injection_mode,
-        config.autonomy.level,
+        config.agent.compact_context,
+        config.agent.max_system_prompt_chars,
    );
    if !native_tools {
        system_prompt.push_str(&build_tool_instructions(&tools_registry, Some(&i18n_descs)));
--- a/src/channels/mod.rs
+++ b/src/channels/mod.rs
@ -3128,9 +3128,12 @@ pub fn build_system_prompt_with_mode(
        Some(&autonomy_cfg),
        native_tools,
        skills_prompt_mode,
+        false,
+        0,
    )
 }

+#[allow(clippy::too_many_arguments)]
 pub fn build_system_prompt_with_mode_and_autonomy(
    workspace_dir: &std::path::Path,
    model_name: &str,
@ -3141,6 +3144,8 @@ pub fn build_system_prompt_with_mode_and_autonomy(
    autonomy_config: Option<&crate::config::AutonomyConfig>,
    native_tools: bool,
    skills_prompt_mode: crate::config::SkillsPromptInjectionMode,
+    compact_context: bool,
+    max_system_prompt_chars: usize,
 ) -> String {
    use std::fmt::Write;
    let mut prompt = String::with_capacity(8192);
@ -3167,11 +3172,19 @@ pub fn build_system_prompt_with_mode_and_autonomy(
    // ── 1. Tooling ──────────────────────────────────────────────
    if !tools.is_empty() {
        prompt.push_str("## Tools\n\n");
-        prompt.push_str("You have access to the following tools:\n\n");
-        for (name, desc) in tools {
-            let _ = writeln!(prompt, "- **{name}**: {desc}");
+        if compact_context {
+            // Compact mode: tool names only, no descriptions/schemas
+            prompt.push_str("Available tools: ");
+            let names: Vec<&str> = tools.iter().map(|(name, _)| *name).collect();
+            prompt.push_str(&names.join(", "));
+            prompt.push_str("\n\n");
+        } else {
+            prompt.push_str("You have access to the following tools:\n\n");
+            for (name, desc) in tools {
+                let _ = writeln!(prompt, "- **{name}**: {desc}");
+            }
+            prompt.push('\n');
        }
-        prompt.push('\n');
    }

    // ── 1b. Hardware (when gpio/arduino tools present) ───────────
@ -3311,11 +3324,13 @@ pub fn build_system_prompt_with_mode_and_autonomy(
        std::env::consts::OS,
    );

-    // ── 8. Channel Capabilities ─────────────────────────────────────
-    prompt.push_str("## Channel Capabilities\n\n");
-    prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n");
-    prompt.push_str("- You do NOT need to ask permission to respond — just respond directly.\n");
-    prompt.push_str(match autonomy_config.map(|cfg| cfg.level) {
+    // ── 8. Channel Capabilities (skipped in compact_context mode) ──
+    if !compact_context {
+        prompt.push_str("## Channel Capabilities\n\n");
+        prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n");
+        prompt
+            .push_str("- You do NOT need to ask permission to respond — just respond directly.\n");
+        prompt.push_str(match autonomy_config.map(|cfg| cfg.level) {
        Some(crate::security::AutonomyLevel::Full) => {
            "- If the runtime policy already allows a tool, use it directly; do not ask the user for extra approval.\n\
             - Never pretend you are waiting for a human approval click or confirmation when the runtime policy already permits the action.\n\
@ -3329,10 +3344,23 @@ pub fn build_system_prompt_with_mode_and_autonomy(
             - If there is no approval path for this channel or the runtime blocks an action, explain that restriction directly instead of simulating an approval flow.\n"
        }
    });
-    prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n");
-    prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n");
-    prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n");
-    prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n");
+        prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n");
+        prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n");
+        prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n");
+        prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n");
+    } // end if !compact_context (Channel Capabilities)
+
+    // ── 9. Truncation (max_system_prompt_chars budget) ──────────
+    if max_system_prompt_chars > 0 && prompt.len() > max_system_prompt_chars {
+        // Truncate on a char boundary, keeping the top portion (identity + safety).
+        let mut end = max_system_prompt_chars;
+        // Ensure we don't split a multi-byte UTF-8 character.
+        while !prompt.is_char_boundary(end) && end > 0 {
+            end -= 1;
+        }
+        prompt.truncate(end);
+        prompt.push_str("\n\n[System prompt truncated to fit context budget]\n");
+    }

    if prompt.is_empty() {
        "You are ZeroClaw, a fast and efficient AI assistant built in Rust. Be helpful, concise, and direct."
@ -4447,7 +4475,8 @@ pub async fn start_channels(config: Config) -> Result<()> {
        Some(&config.autonomy),
        native_tools,
        config.skills.prompt_injection_mode,
-        config.autonomy.level,
+        config.agent.compact_context,
+        config.agent.max_system_prompt_chars,
    );
    if !native_tools {
        system_prompt.push_str(&build_tool_instructions(
@ -7776,6 +7805,8 @@ BTC is currently around $65,000 based on latest tool output."#
            Some(&config),
            false,
            crate::config::SkillsPromptInjectionMode::Full,
+            false,
+            0,
        );

        assert!(
@ -7805,6 +7836,8 @@ BTC is currently around $65,000 based on latest tool output."#
            Some(&config),
            false,
            crate::config::SkillsPromptInjectionMode::Full,
+            false,
+            0,
        );

        assert!(
--- a/src/config/schema.rs
+++ b/src/config/schema.rs
@ -1248,6 +1248,12 @@ pub struct AgentConfig {
    /// Default: `[]` (no filtering — all tools included).
    #[serde(default)]
    pub tool_filter_groups: Vec<ToolFilterGroup>,
+    /// Maximum characters for the assembled system prompt. When `> 0`, the prompt
+    /// is truncated to this limit after assembly (keeping the top portion which
+    /// contains identity and safety instructions). `0` means unlimited.
+    /// Useful for small-context models (e.g. glm-4.5-air ~8K tokens → set to 8000).
+    #[serde(default = "default_max_system_prompt_chars")]
+    pub max_system_prompt_chars: usize,
 }

 fn default_agent_max_tool_iterations() -> usize {
@ -1266,6 +1272,10 @@ fn default_agent_tool_dispatcher() -> String {
    "auto".into()
 }

+fn default_max_system_prompt_chars() -> usize {
+    0
+}
+
 impl Default for AgentConfig {
    fn default() -> Self {
        Self {
@ -1277,6 +1287,7 @@ impl Default for AgentConfig {
            tool_dispatcher: default_agent_tool_dispatcher(),
            tool_call_dedup_exempt: Vec::new(),
            tool_filter_groups: Vec::new(),
+            max_system_prompt_chars: default_max_system_prompt_chars(),
        }
    }
 }