fix(agent): add system prompt budgeting for small-context models (#4185)
For models with small context windows (e.g. glm-4.5-air ~8K tokens), the system prompt alone can exceed the limit. This adds: - max_system_prompt_chars config option (default 0 = unlimited) - compact_context now also compacts the system prompt: skips the Channel Capabilities section and shows only tool names - Truncation with marker when prompt exceeds the budget Users can set `max_system_prompt_chars = 8000` in [agent] config to cap the system prompt for small-context models. Closes #4124
This commit is contained in:
parent
031416f065
commit
d1f7c8a116
@ -3833,7 +3833,8 @@ pub async fn run(
|
||||
Some(&config.autonomy),
|
||||
native_tools,
|
||||
config.skills.prompt_injection_mode,
|
||||
config.autonomy.level,
|
||||
config.agent.compact_context,
|
||||
config.agent.max_system_prompt_chars,
|
||||
);
|
||||
|
||||
// Append structured tool-use instructions with schemas (only for non-native providers)
|
||||
@ -4494,7 +4495,8 @@ pub async fn process_message(
|
||||
Some(&config.autonomy),
|
||||
native_tools,
|
||||
config.skills.prompt_injection_mode,
|
||||
config.autonomy.level,
|
||||
config.agent.compact_context,
|
||||
config.agent.max_system_prompt_chars,
|
||||
);
|
||||
if !native_tools {
|
||||
system_prompt.push_str(&build_tool_instructions(&tools_registry, Some(&i18n_descs)));
|
||||
|
||||
@ -3128,9 +3128,12 @@ pub fn build_system_prompt_with_mode(
|
||||
Some(&autonomy_cfg),
|
||||
native_tools,
|
||||
skills_prompt_mode,
|
||||
false,
|
||||
0,
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn build_system_prompt_with_mode_and_autonomy(
|
||||
workspace_dir: &std::path::Path,
|
||||
model_name: &str,
|
||||
@ -3141,6 +3144,8 @@ pub fn build_system_prompt_with_mode_and_autonomy(
|
||||
autonomy_config: Option<&crate::config::AutonomyConfig>,
|
||||
native_tools: bool,
|
||||
skills_prompt_mode: crate::config::SkillsPromptInjectionMode,
|
||||
compact_context: bool,
|
||||
max_system_prompt_chars: usize,
|
||||
) -> String {
|
||||
use std::fmt::Write;
|
||||
let mut prompt = String::with_capacity(8192);
|
||||
@ -3167,11 +3172,19 @@ pub fn build_system_prompt_with_mode_and_autonomy(
|
||||
// ── 1. Tooling ──────────────────────────────────────────────
|
||||
if !tools.is_empty() {
|
||||
prompt.push_str("## Tools\n\n");
|
||||
prompt.push_str("You have access to the following tools:\n\n");
|
||||
for (name, desc) in tools {
|
||||
let _ = writeln!(prompt, "- **{name}**: {desc}");
|
||||
if compact_context {
|
||||
// Compact mode: tool names only, no descriptions/schemas
|
||||
prompt.push_str("Available tools: ");
|
||||
let names: Vec<&str> = tools.iter().map(|(name, _)| *name).collect();
|
||||
prompt.push_str(&names.join(", "));
|
||||
prompt.push_str("\n\n");
|
||||
} else {
|
||||
prompt.push_str("You have access to the following tools:\n\n");
|
||||
for (name, desc) in tools {
|
||||
let _ = writeln!(prompt, "- **{name}**: {desc}");
|
||||
}
|
||||
prompt.push('\n');
|
||||
}
|
||||
prompt.push('\n');
|
||||
}
|
||||
|
||||
// ── 1b. Hardware (when gpio/arduino tools present) ───────────
|
||||
@ -3311,11 +3324,13 @@ pub fn build_system_prompt_with_mode_and_autonomy(
|
||||
std::env::consts::OS,
|
||||
);
|
||||
|
||||
// ── 8. Channel Capabilities ─────────────────────────────────────
|
||||
prompt.push_str("## Channel Capabilities\n\n");
|
||||
prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n");
|
||||
prompt.push_str("- You do NOT need to ask permission to respond — just respond directly.\n");
|
||||
prompt.push_str(match autonomy_config.map(|cfg| cfg.level) {
|
||||
// ── 8. Channel Capabilities (skipped in compact_context mode) ──
|
||||
if !compact_context {
|
||||
prompt.push_str("## Channel Capabilities\n\n");
|
||||
prompt.push_str("- You are running as a messaging bot. Your response is automatically sent back to the user's channel.\n");
|
||||
prompt
|
||||
.push_str("- You do NOT need to ask permission to respond — just respond directly.\n");
|
||||
prompt.push_str(match autonomy_config.map(|cfg| cfg.level) {
|
||||
Some(crate::security::AutonomyLevel::Full) => {
|
||||
"- If the runtime policy already allows a tool, use it directly; do not ask the user for extra approval.\n\
|
||||
- Never pretend you are waiting for a human approval click or confirmation when the runtime policy already permits the action.\n\
|
||||
@ -3329,10 +3344,23 @@ pub fn build_system_prompt_with_mode_and_autonomy(
|
||||
- If there is no approval path for this channel or the runtime blocks an action, explain that restriction directly instead of simulating an approval flow.\n"
|
||||
}
|
||||
});
|
||||
prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n");
|
||||
prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n");
|
||||
prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n");
|
||||
prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n");
|
||||
prompt.push_str("- NEVER repeat, describe, or echo credentials, tokens, API keys, or secrets in your responses.\n");
|
||||
prompt.push_str("- If a tool output contains credentials, they have already been redacted — do not mention them.\n");
|
||||
prompt.push_str("- When a user sends a voice note, it is automatically transcribed to text. Your text reply is automatically converted to a voice note and sent back. Do NOT attempt to generate audio yourself — TTS is handled by the channel.\n");
|
||||
prompt.push_str("- NEVER narrate or describe your tool usage. Do NOT say 'Let me fetch...', 'I will use...', 'Searching...', or similar. Give the FINAL ANSWER only — no intermediate steps, no tool mentions, no progress updates.\n\n");
|
||||
} // end if !compact_context (Channel Capabilities)
|
||||
|
||||
// ── 9. Truncation (max_system_prompt_chars budget) ──────────
|
||||
if max_system_prompt_chars > 0 && prompt.len() > max_system_prompt_chars {
|
||||
// Truncate on a char boundary, keeping the top portion (identity + safety).
|
||||
let mut end = max_system_prompt_chars;
|
||||
// Ensure we don't split a multi-byte UTF-8 character.
|
||||
while !prompt.is_char_boundary(end) && end > 0 {
|
||||
end -= 1;
|
||||
}
|
||||
prompt.truncate(end);
|
||||
prompt.push_str("\n\n[System prompt truncated to fit context budget]\n");
|
||||
}
|
||||
|
||||
if prompt.is_empty() {
|
||||
"You are ZeroClaw, a fast and efficient AI assistant built in Rust. Be helpful, concise, and direct."
|
||||
@ -4447,7 +4475,8 @@ pub async fn start_channels(config: Config) -> Result<()> {
|
||||
Some(&config.autonomy),
|
||||
native_tools,
|
||||
config.skills.prompt_injection_mode,
|
||||
config.autonomy.level,
|
||||
config.agent.compact_context,
|
||||
config.agent.max_system_prompt_chars,
|
||||
);
|
||||
if !native_tools {
|
||||
system_prompt.push_str(&build_tool_instructions(
|
||||
@ -7776,6 +7805,8 @@ BTC is currently around $65,000 based on latest tool output."#
|
||||
Some(&config),
|
||||
false,
|
||||
crate::config::SkillsPromptInjectionMode::Full,
|
||||
false,
|
||||
0,
|
||||
);
|
||||
|
||||
assert!(
|
||||
@ -7805,6 +7836,8 @@ BTC is currently around $65,000 based on latest tool output."#
|
||||
Some(&config),
|
||||
false,
|
||||
crate::config::SkillsPromptInjectionMode::Full,
|
||||
false,
|
||||
0,
|
||||
);
|
||||
|
||||
assert!(
|
||||
|
||||
@ -1248,6 +1248,12 @@ pub struct AgentConfig {
|
||||
/// Default: `[]` (no filtering — all tools included).
|
||||
#[serde(default)]
|
||||
pub tool_filter_groups: Vec<ToolFilterGroup>,
|
||||
/// Maximum characters for the assembled system prompt. When `> 0`, the prompt
|
||||
/// is truncated to this limit after assembly (keeping the top portion which
|
||||
/// contains identity and safety instructions). `0` means unlimited.
|
||||
/// Useful for small-context models (e.g. glm-4.5-air ~8K tokens → set to 8000).
|
||||
#[serde(default = "default_max_system_prompt_chars")]
|
||||
pub max_system_prompt_chars: usize,
|
||||
}
|
||||
|
||||
fn default_agent_max_tool_iterations() -> usize {
|
||||
@ -1266,6 +1272,10 @@ fn default_agent_tool_dispatcher() -> String {
|
||||
"auto".into()
|
||||
}
|
||||
|
||||
fn default_max_system_prompt_chars() -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
impl Default for AgentConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
@ -1277,6 +1287,7 @@ impl Default for AgentConfig {
|
||||
tool_dispatcher: default_agent_tool_dispatcher(),
|
||||
tool_call_dedup_exempt: Vec::new(),
|
||||
tool_filter_groups: Vec::new(),
|
||||
max_system_prompt_chars: default_max_system_prompt_chars(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user