fix(agent): add system prompt budgeting for small-context models (#4185)

For models with small context windows (e.g. glm-4.5-air ~8K tokens),
the system prompt alone can exceed the limit. This adds:

- max_system_prompt_chars config option (default 0 = unlimited)
- compact_context now also compacts the system prompt: skips the
  Channel Capabilities section and shows only tool names
- Truncation with marker when prompt exceeds the budget

Users can set `max_system_prompt_chars = 8000` in [agent] config
to cap the system prompt for small-context models.

Closes #4124
This commit is contained in:
Argenis 2026-03-21 19:40:21 -04:00 committed by Roman Tataurov
parent 031416f065
commit d1f7c8a116
No known key found for this signature in database
GPG Key ID: 70A51EF3185C334B
3 changed files with 62 additions and 16 deletions

View File

@ -3833,7 +3833,8 @@ pub async fn run(
Some(&config.autonomy),
native_tools,
config.skills.prompt_injection_mode,
config.autonomy.level,
config.agent.compact_context,
config.agent.max_system_prompt_chars,
);
// Append structured tool-use instructions with schemas (only for non-native providers)
@ -4494,7 +4495,8 @@ pub async fn process_message(
Some(&config.autonomy),
native_tools,
config.skills.prompt_injection_mode,
config.autonomy.level,
config.agent.compact_context,
config.agent.max_system_prompt_chars,
);
if !native_tools {
system_prompt.push_str(&build_tool_instructions(&tools_registry, Some(&i18n_descs)));

View File

@ -3128,9 +3128,12 @@ pub fn build_system_prompt_with_mode(
Some(&autonomy_cfg),
native_tools,
skills_prompt_mode,
false,
0,
)
}
#[allow(clippy::too_many_arguments)]
pub fn build_system_prompt_with_mode_and_autonomy(
workspace_dir: &std::path::Path,
model_name: &str,
@ -3141,6 +3144,8 @@ pub fn build_system_prompt_with_mode_and_autonomy(
autonomy_config: Option<&crate::config::AutonomyConfig>,
native_tools: bool,
skills_prompt_mode: crate::config::SkillsPromptInjectionMode,
compact_context: bool,
max_system_prompt_chars: usize,
) -> String {
use std::fmt::Write;
let mut prompt = String::with_capacity(8192);
@ -3167,11 +3172,19 @@ pub fn build_system_prompt_with_mode_and_autonomy(
// ── 1. Tooling ──────────────────────────────────────────────
if !tools.is_empty() {
prompt.push_str("## Tools\n\n");
prompt.push_str("You have access to the following tools:\n\n");
for (name, desc) in tools {
let _ = writeln!(prompt, "- **{name}**: {desc}");
if compact_context {
// Compact mode: tool names only, no descriptions/schemas
prompt.push_str("Available tools: ");
let names: Vec<&str> = tools.iter().map(|(name, _)| *name).collect();
prompt.push_str(&names.join(", "));
prompt.push_str("\n\n");
} else {
prompt.push_str("You have access to the following tools:\n\n");
for (name, desc) in tools {
let _ = writeln!(prompt, "- **{name}**: {desc}");
}
prompt.push('\n');
}
prompt.push('\n');
}
// ── 1b. Hardware (when gpio/arduino tools present) ───────────
@ -3311,11 +3324,13 @@ pub fn build_system_prompt_with_mode_and_autonomy(
std::env::consts::OS,
);
// ── 8. Channel Capabilities ─────────────────────────────────────
prompt.push_str("## Channel Capabilities\n\n");
prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n");
prompt.push_str("- You do NOT need to ask permission to respond — just respond directly.\n");
prompt.push_str(match autonomy_config.map(|cfg| cfg.level) {
// ── 8. Channel Capabilities (skipped in compact_context mode) ──
if !compact_context {
prompt.push_str("## Channel Capabilities\n\n");
prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n");
prompt
.push_str("- You do NOT need to ask permission to respond — just respond directly.\n");
prompt.push_str(match autonomy_config.map(|cfg| cfg.level) {
Some(crate::security::AutonomyLevel::Full) => {
"- If the runtime policy already allows a tool, use it directly; do not ask the user for extra approval.\n\
- Never pretend you are waiting for a human approval click or confirmation when the runtime policy already permits the action.\n\
@ -3329,10 +3344,23 @@ pub fn build_system_prompt_with_mode_and_autonomy(
- If there is no approval path for this channel or the runtime blocks an action, explain that restriction directly instead of simulating an approval flow.\n"
}
});
prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n");
prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n");
prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n");
prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n");
prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n");
prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n");
prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n");
prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n");
} // end if !compact_context (Channel Capabilities)
// ── 9. Truncation (max_system_prompt_chars budget) ──────────
if max_system_prompt_chars > 0 && prompt.len() > max_system_prompt_chars {
// Truncate on a char boundary, keeping the top portion (identity + safety).
let mut end = max_system_prompt_chars;
// Ensure we don't split a multi-byte UTF-8 character.
while !prompt.is_char_boundary(end) && end > 0 {
end -= 1;
}
prompt.truncate(end);
prompt.push_str("\n\n[System prompt truncated to fit context budget]\n");
}
if prompt.is_empty() {
"You are ZeroClaw, a fast and efficient AI assistant built in Rust. Be helpful, concise, and direct."
@ -4447,7 +4475,8 @@ pub async fn start_channels(config: Config) -> Result<()> {
Some(&config.autonomy),
native_tools,
config.skills.prompt_injection_mode,
config.autonomy.level,
config.agent.compact_context,
config.agent.max_system_prompt_chars,
);
if !native_tools {
system_prompt.push_str(&build_tool_instructions(
@ -7776,6 +7805,8 @@ BTC is currently around $65,000 based on latest tool output."#
Some(&config),
false,
crate::config::SkillsPromptInjectionMode::Full,
false,
0,
);
assert!(
@ -7805,6 +7836,8 @@ BTC is currently around $65,000 based on latest tool output."#
Some(&config),
false,
crate::config::SkillsPromptInjectionMode::Full,
false,
0,
);
assert!(

View File

@ -1248,6 +1248,12 @@ pub struct AgentConfig {
/// Default: `[]` (no filtering — all tools included).
#[serde(default)]
pub tool_filter_groups: Vec<ToolFilterGroup>,
/// Maximum characters for the assembled system prompt. When `> 0`, the prompt
/// is truncated to this limit after assembly (keeping the top portion which
/// contains identity and safety instructions). `0` means unlimited.
/// Useful for small-context models (e.g. glm-4.5-air ~8K tokens → set to 8000).
#[serde(default = "default_max_system_prompt_chars")]
pub max_system_prompt_chars: usize,
}
fn default_agent_max_tool_iterations() -> usize {
@ -1266,6 +1272,10 @@ fn default_agent_tool_dispatcher() -> String {
"auto".into()
}
fn default_max_system_prompt_chars() -> usize {
0
}
impl Default for AgentConfig {
fn default() -> Self {
Self {
@ -1277,6 +1287,7 @@ impl Default for AgentConfig {
tool_dispatcher: default_agent_tool_dispatcher(),
tool_call_dedup_exempt: Vec::new(),
tool_filter_groups: Vec::new(),
max_system_prompt_chars: default_max_system_prompt_chars(),
}
}
}