style: fix rustfmt alignment in test assertions

fix: proactively truncate channel history before sending to model
Channel sessions accumulate conversation history up to 50 turns with no token-budget check before the LLM call. On models with smaller context windows (e.g. GLM 131K) this causes "context window exceeded" errors after extended conversations. The existing compaction logic only runs *after* the error, wasting a round-trip. Add `truncate_history_to_token_budget` which estimates token count using a conservative 4-chars-per-token heuristic and drops the oldest non-system turns until the history fits within a 100K token budget. The function is called right after building the outbound history, before any provider call. Closes #3460
2026-03-14 07:07:02 -04:00 · 2026-03-14 06:55:22 -04:00
1 changed files with 112 additions and 0 deletions
--- a/src/channels/mod.rs
+++ b/src/channels/mod.rs
@ -189,6 +189,13 @@ const MEMORY_CONTEXT_ENTRY_MAX_CHARS: usize = 800;
 const MEMORY_CONTEXT_MAX_CHARS: usize = 4_000;
 const CHANNEL_HISTORY_COMPACT_KEEP_MESSAGES: usize = 12;
 const CHANNEL_HISTORY_COMPACT_CONTENT_CHARS: usize = 600;
+/// Conservative token budget for channel history. Before sending to the model,
+/// the history is proactively truncated to fit within this limit using a
+/// character-based estimate (~4 chars per token). This prevents context-window
+/// overflow errors that would otherwise require a round-trip to the API.
+const MAX_HISTORY_ESTIMATED_TOKENS: usize = 100_000;
+/// Approximate characters per token for the conservative estimator.
+const CHARS_PER_TOKEN_ESTIMATE: usize = 4;
 /// Guardrail for hook-modified outbound channel content.
 const CHANNEL_HOOK_MAX_OUTBOUND_CHARS: usize = 20_000;

@ -919,6 +926,52 @@ fn compact_sender_history(ctx: &ChannelRuntimeContext, sender_key: &str) -> bool
    true
 }

+/// Estimate the total token count of a message list using a conservative
+/// character-based heuristic (~4 characters per token).
+fn estimate_history_tokens(messages: &[ChatMessage]) -> usize {
+    messages
+        .iter()
+        .map(|m| m.content.len().div_ceil(CHARS_PER_TOKEN_ESTIMATE))
+        .sum()
+}
+
+/// Proactively truncate conversation history so the estimated token count
+/// stays within `MAX_HISTORY_ESTIMATED_TOKENS`.  The system message (first
+/// element) and the most recent user message (last element) are always
+/// preserved; the oldest non-system turns are dropped first.
+///
+/// Returns `true` if any messages were removed.
+fn truncate_history_to_token_budget(history: &mut Vec<ChatMessage>) -> bool {
+    if history.len() <= 2 {
+        return false;
+    }
+
+    let mut estimated = estimate_history_tokens(history);
+    if estimated <= MAX_HISTORY_ESTIMATED_TOKENS {
+        return false;
+    }
+
+    let mut removed = 0usize;
+    // Remove oldest non-system messages (index 1 .. len-1) until within budget.
+    while estimated > MAX_HISTORY_ESTIMATED_TOKENS && history.len() > 2 {
+        let dropped_tokens = history[1].content.len().div_ceil(CHARS_PER_TOKEN_ESTIMATE);
+        history.remove(1);
+        estimated = estimated.saturating_sub(dropped_tokens);
+        removed += 1;
+    }
+
+    if removed > 0 {
+        tracing::info!(
+            removed_turns = removed,
+            estimated_tokens = estimated,
+            remaining_turns = history.len(),
+            "Proactively truncated channel history to fit token budget"
+        );
+    }
+
+    removed > 0
+}
+
 fn append_sender_turn(ctx: &ChannelRuntimeContext, sender_key: &str, turn: ChatMessage) {
    let mut histories = ctx
        .conversation_histories
@ -1814,6 +1867,12 @@ async fn process_channel_message(
        build_channel_system_prompt(ctx.system_prompt.as_str(), &msg.channel, &msg.reply_target);
    let mut history = vec![ChatMessage::system(system_prompt)];
    history.extend(prior_turns);
+
+    // Proactively truncate history to avoid context-window overflow errors.
+    // This drops the oldest non-system turns until the estimated token count
+    // is within the budget, preventing a wasted round-trip to the API.
+    truncate_history_to_token_budget(&mut history);
+
    let use_streaming = target_channel
        .as_ref()
        .is_some_and(|ch| ch.supports_draft_updates());
@ -4020,6 +4079,59 @@ mod tests {
        }));
    }

+    #[test]
+    fn estimate_history_tokens_basic() {
+        let messages = vec![
+            ChatMessage::system("a".repeat(400)), // 100 tokens
+            ChatMessage::user("b".repeat(800)),   // 200 tokens
+        ];
+        assert_eq!(estimate_history_tokens(&messages), 300);
+    }
+
+    #[test]
+    fn truncate_history_within_budget_is_noop() {
+        let mut history = vec![
+            ChatMessage::system("sys"),
+            ChatMessage::user("short message"),
+        ];
+        assert!(!truncate_history_to_token_budget(&mut history));
+        assert_eq!(history.len(), 2);
+    }
+
+    #[test]
+    fn truncate_history_drops_oldest_non_system_turns() {
+        // Build history that exceeds MAX_HISTORY_ESTIMATED_TOKENS.
+        // Each turn ~25K tokens => 5 turns = 125K tokens > 100K budget.
+        let big = "x".repeat(100_000); // ~25K tokens each
+        let mut history = vec![
+            ChatMessage::system("system prompt"),
+            ChatMessage::user(big.clone()),
+            ChatMessage::assistant(big.clone()),
+            ChatMessage::user(big.clone()),
+            ChatMessage::assistant(big.clone()),
+            ChatMessage::user("latest question"),
+        ];
+        assert!(truncate_history_to_token_budget(&mut history));
+        // System prompt and latest question must survive
+        assert_eq!(history[0].role, "system");
+        assert_eq!(history.last().unwrap().content, "latest question");
+        // Total estimated tokens should now be within budget
+        assert!(estimate_history_tokens(&history) <= MAX_HISTORY_ESTIMATED_TOKENS);
+    }
+
+    #[test]
+    fn truncate_history_preserves_system_and_last_user() {
+        // Two messages only — should never truncate below 2
+        let big = "y".repeat(500_000);
+        let mut history = vec![ChatMessage::system(big.clone()), ChatMessage::user(big)];
+        // Even if over budget, cannot drop below 2
+        let result = truncate_history_to_token_budget(&mut history);
+        assert_eq!(history.len(), 2);
+        // If still over budget after removing all middle messages, that's OK —
+        // the function only drops middle turns.
+        assert!(!result);
+    }
+
    #[test]
    fn append_sender_turn_stores_single_turn_per_call() {
        let sender = "telegram_u2".to_string();