diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index 3f8feda24..ccfd0f09e 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -2773,6 +2773,7 @@ pub async fn run( &model_name, config.agent.max_history_messages, effective_hooks, + Some(mem.as_ref()), ) .await { diff --git a/src/agent/loop_/context.rs b/src/agent/loop_/context.rs index bb7f127e8..668ea0d18 100644 --- a/src/agent/loop_/context.rs +++ b/src/agent/loop_/context.rs @@ -1,13 +1,29 @@ -use crate::memory::{self, decay, Memory}; +use crate::memory::{self, decay, Memory, MemoryCategory}; use std::fmt::Write; /// Default half-life (days) for time decay in context building. const CONTEXT_DECAY_HALF_LIFE_DAYS: f64 = 7.0; +/// Score boost applied to `Core` category memories so durable facts and +/// preferences surface even when keyword/semantic similarity is moderate. +const CORE_CATEGORY_SCORE_BOOST: f64 = 0.3; + +/// Maximum number of memory entries included in the context preamble. +const CONTEXT_ENTRY_LIMIT: usize = 5; + +/// Over-fetch factor: retrieve more candidates than the output limit so +/// that Core boost and re-ranking can select the best subset. +const RECALL_OVER_FETCH_FACTOR: usize = 2; + /// Build context preamble by searching memory for relevant entries. /// Entries with a hybrid score below `min_relevance_score` are dropped to /// prevent unrelated memories from bleeding into the conversation. +/// /// Core memories are exempt from time decay (evergreen). +/// +/// `Core` category memories receive a score boost so that durable facts, +/// preferences, and project rules are more likely to appear in context +/// even when semantic similarity to the current message is moderate. pub(super) async fn build_context( mem: &dyn Memory, user_msg: &str, @@ -16,32 +32,41 @@ pub(super) async fn build_context( ) -> String { let mut context = String::new(); - // Pull relevant memories for this message - if let Ok(mut entries) = mem.recall(user_msg, 5, session_id).await { - // Apply time decay: older non-Core memories score lower + // Over-fetch so Core-boosted entries can compete fairly after re-ranking. + let fetch_limit = CONTEXT_ENTRY_LIMIT * RECALL_OVER_FETCH_FACTOR; + if let Ok(mut entries) = mem.recall(user_msg, fetch_limit, session_id).await { + // Apply time decay: older non-Core memories score lower. decay::apply_time_decay(&mut entries, CONTEXT_DECAY_HALF_LIFE_DAYS); - let relevant: Vec<_> = entries + // Apply Core category boost and filter by minimum relevance. + let mut scored: Vec<_> = entries .iter() - .filter(|e| match e.score { - Some(score) => score >= min_relevance_score, - None => true, + .filter(|e| !memory::is_assistant_autosave_key(&e.key)) + .filter_map(|e| { + let base = e.score.unwrap_or(min_relevance_score); + let boosted = if e.category == MemoryCategory::Core { + (base + CORE_CATEGORY_SCORE_BOOST).min(1.0) + } else { + base + }; + if boosted >= min_relevance_score { + Some((e, boosted)) + } else { + None + } }) .collect(); - if !relevant.is_empty() { + // Sort by boosted score descending, then truncate to output limit. + scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + scored.truncate(CONTEXT_ENTRY_LIMIT); + + if !scored.is_empty() { context.push_str("[Memory context]\n"); - for entry in &relevant { - if memory::is_assistant_autosave_key(&entry.key) { - continue; - } + for (entry, _) in &scored { let _ = writeln!(context, "- {}: {}", entry.key, entry.content); } - if context == "[Memory context]\n" { - context.clear(); - } else { - context.push('\n'); - } + context.push('\n'); } } @@ -87,3 +112,135 @@ pub(super) fn build_hardware_context( context.push('\n'); context } + +#[cfg(test)] +mod tests { + use super::*; + use crate::memory::{Memory, MemoryCategory, MemoryEntry}; + use async_trait::async_trait; + use std::sync::Arc; + + struct MockMemory { + entries: Arc>, + } + + #[async_trait] + impl Memory for MockMemory { + async fn store( + &self, + _key: &str, + _content: &str, + _category: MemoryCategory, + _session_id: Option<&str>, + ) -> anyhow::Result<()> { + Ok(()) + } + + async fn recall( + &self, + _query: &str, + _limit: usize, + _session_id: Option<&str>, + ) -> anyhow::Result> { + Ok(self.entries.as_ref().clone()) + } + + async fn get(&self, _key: &str) -> anyhow::Result> { + Ok(None) + } + + async fn list( + &self, + _category: Option<&MemoryCategory>, + _session_id: Option<&str>, + ) -> anyhow::Result> { + Ok(vec![]) + } + + async fn forget(&self, _key: &str) -> anyhow::Result { + Ok(true) + } + + async fn count(&self) -> anyhow::Result { + Ok(self.entries.len()) + } + + async fn health_check(&self) -> bool { + true + } + + fn name(&self) -> &str { + "mock-memory" + } + } + + #[tokio::test] + async fn build_context_promotes_core_entries_with_score_boost() { + let memory = MockMemory { + entries: Arc::new(vec![ + MemoryEntry { + id: "1".into(), + key: "conv_note".into(), + content: "small talk".into(), + category: MemoryCategory::Conversation, + timestamp: "now".into(), + session_id: None, + score: Some(0.6), + }, + MemoryEntry { + id: "2".into(), + key: "core_rule".into(), + content: "always provide tests".into(), + category: MemoryCategory::Core, + timestamp: "now".into(), + session_id: None, + score: Some(0.2), + }, + MemoryEntry { + id: "3".into(), + key: "conv_low".into(), + content: "irrelevant".into(), + category: MemoryCategory::Conversation, + timestamp: "now".into(), + session_id: None, + score: Some(0.1), + }, + ]), + }; + + let context = build_context(&memory, "test query", 0.4, None).await; + assert!( + context.contains("core_rule"), + "expected core boost to include core_rule" + ); + assert!( + !context.contains("conv_low"), + "low-score non-core should be filtered" + ); + } + + #[tokio::test] + async fn build_context_keeps_output_limit_at_five_entries() { + let entries = (0..8) + .map(|idx| MemoryEntry { + id: idx.to_string(), + key: format!("k{idx}"), + content: format!("v{idx}"), + category: MemoryCategory::Conversation, + timestamp: "now".into(), + session_id: None, + score: Some(0.9 - (idx as f64 * 0.01)), + }) + .collect::>(); + let memory = MockMemory { + entries: Arc::new(entries), + }; + + let context = build_context(&memory, "limit", 0.0, None).await; + let listed = context + .lines() + .filter(|line| line.starts_with("- ")) + .count(); + assert_eq!(listed, 5, "context output limit should remain 5 entries"); + } +} diff --git a/src/agent/loop_/history.rs b/src/agent/loop_/history.rs index 8e228b4d6..eb6bc7423 100644 --- a/src/agent/loop_/history.rs +++ b/src/agent/loop_/history.rs @@ -1,3 +1,4 @@ +use crate::memory::{Memory, MemoryCategory}; use crate::providers::{ChatMessage, Provider}; use crate::util::truncate_with_ellipsis; use anyhow::Result; @@ -12,6 +13,9 @@ const COMPACTION_MAX_SOURCE_CHARS: usize = 12_000; /// Max characters retained in stored compaction summary. const COMPACTION_MAX_SUMMARY_CHARS: usize = 2_000; +/// Safety cap for durable facts extracted during pre-compaction flush. +const COMPACTION_MAX_FLUSH_FACTS: usize = 8; + /// Trim conversation history to prevent unbounded growth. /// Preserves the system prompt (first message if role=system) and the most recent messages. pub(super) fn trim_history(history: &mut Vec, max_history: usize) { @@ -67,6 +71,7 @@ pub(super) async fn auto_compact_history( model: &str, max_history: usize, hooks: Option<&crate::hooks::HookRunner>, + memory: Option<&dyn Memory>, ) -> Result { let has_system = history.first().map_or(false, |m| m.role == "system"); let non_system_count = if has_system { @@ -105,6 +110,13 @@ pub(super) async fn auto_compact_history( }; let transcript = build_compaction_transcript(&to_compact); + // ── Pre-compaction memory flush ────────────────────────────────── + // Before discarding old messages, ask the LLM to extract durable + // facts and store them as Core memories so they survive compaction. + if let Some(mem) = memory { + flush_durable_facts(provider, model, &transcript, mem).await; + } + let summarizer_system = "You are a conversation compaction engine. Summarize older chat history into concise context for future turns. Preserve: user preferences, commitments, decisions, unresolved tasks, key facts. Omit: filler, repeated chit-chat, verbose tool logs. Output plain text bullet points only."; let summarizer_user = format!( @@ -137,6 +149,86 @@ pub(super) async fn auto_compact_history( Ok(true) } +/// Extract durable facts from a conversation transcript and store them as +/// `Core` memories. Called before compaction discards old messages. +/// +/// Best-effort: failures are logged but never block compaction. +async fn flush_durable_facts( + provider: &dyn Provider, + model: &str, + transcript: &str, + memory: &dyn Memory, +) { + const FLUSH_SYSTEM: &str = "\ +You extract durable facts from a conversation that is about to be compacted. \ +Output ONLY facts worth remembering long-term — user preferences, project decisions, \ +technical constraints, commitments, or important discoveries. \ +Output one fact per line, prefixed with a short key in brackets. \ +Example:\n\ +[preferred_language] User prefers Rust over Go\n\ +[db_choice] Project uses PostgreSQL 16\n\ +If there are no durable facts, output exactly: NONE"; + + let flush_user = format!( + "Extract durable facts from this conversation (max 8 facts):\n\n{}", + transcript + ); + + let response = match provider + .chat_with_system(Some(FLUSH_SYSTEM), &flush_user, model, 0.2) + .await + { + Ok(r) => r, + Err(e) => { + tracing::warn!("Pre-compaction memory flush failed: {e}"); + return; + } + }; + + if response.trim().eq_ignore_ascii_case("NONE") || response.trim().is_empty() { + return; + } + + let mut stored = 0usize; + for line in response.lines() { + if stored >= COMPACTION_MAX_FLUSH_FACTS { + break; + } + let line = line.trim(); + if line.is_empty() { + continue; + } + // Parse "[key] content" format + if let Some((key, content)) = parse_fact_line(line) { + let prefixed_key = format!("compaction_fact_{key}"); + if let Err(e) = memory + .store(&prefixed_key, content, MemoryCategory::Core, None) + .await + { + tracing::warn!("Failed to store compaction fact '{prefixed_key}': {e}"); + } else { + stored += 1; + } + } + } + if stored > 0 { + tracing::info!("Pre-compaction flush: stored {stored} durable fact(s) to Core memory"); + } +} + +/// Parse a `[key] content` line from the fact extraction output. +fn parse_fact_line(line: &str) -> Option<(&str, &str)> { + let line = line.trim_start_matches(|c: char| c == '-' || c.is_whitespace()); + let rest = line.strip_prefix('[')?; + let close = rest.find(']')?; + let key = rest[..close].trim(); + let content = rest[close + 1..].trim(); + if key.is_empty() || content.is_empty() { + return None; + } + Some((key, content)) +} + #[cfg(test)] mod tests { use super::*; @@ -213,10 +305,16 @@ mod tests { // previously cut right before the tool result (index 2). assert_eq!(history.len(), 22); - let compacted = - auto_compact_history(&mut history, &StaticSummaryProvider, "test-model", 21, None) - .await - .expect("compaction should succeed"); + let compacted = auto_compact_history( + &mut history, + &StaticSummaryProvider, + "test-model", + 21, + None, + None, + ) + .await + .expect("compaction should succeed"); assert!(compacted); assert_eq!(history[0].role, "assistant"); @@ -229,4 +327,297 @@ mod tests { "first retained message must not be an orphan tool result" ); } + + #[test] + fn parse_fact_line_extracts_key_and_content() { + assert_eq!( + parse_fact_line("[preferred_language] User prefers Rust over Go"), + Some(("preferred_language", "User prefers Rust over Go")) + ); + } + + #[test] + fn parse_fact_line_handles_leading_dash() { + assert_eq!( + parse_fact_line("- [db_choice] Project uses PostgreSQL 16"), + Some(("db_choice", "Project uses PostgreSQL 16")) + ); + } + + #[test] + fn parse_fact_line_rejects_empty_key_or_content() { + assert_eq!(parse_fact_line("[] some content"), None); + assert_eq!(parse_fact_line("[key]"), None); + assert_eq!(parse_fact_line("[key] "), None); + } + + #[test] + fn parse_fact_line_rejects_malformed_input() { + assert_eq!(parse_fact_line("no brackets here"), None); + assert_eq!(parse_fact_line(""), None); + assert_eq!(parse_fact_line("[unclosed bracket"), None); + } + + #[tokio::test] + async fn auto_compact_with_memory_stores_durable_facts() { + use crate::memory::{MemoryCategory, MemoryEntry}; + use std::sync::{Arc, Mutex}; + + struct FactCapture { + stored: Mutex>, + } + + #[async_trait] + impl Memory for FactCapture { + async fn store( + &self, + key: &str, + content: &str, + _category: MemoryCategory, + _session_id: Option<&str>, + ) -> anyhow::Result<()> { + self.stored + .lock() + .unwrap() + .push((key.to_string(), content.to_string())); + Ok(()) + } + async fn recall( + &self, + _q: &str, + _l: usize, + _s: Option<&str>, + ) -> anyhow::Result> { + Ok(vec![]) + } + async fn get(&self, _k: &str) -> anyhow::Result> { + Ok(None) + } + async fn list( + &self, + _c: Option<&MemoryCategory>, + _s: Option<&str>, + ) -> anyhow::Result> { + Ok(vec![]) + } + async fn forget(&self, _k: &str) -> anyhow::Result { + Ok(true) + } + async fn count(&self) -> anyhow::Result { + Ok(0) + } + async fn health_check(&self) -> bool { + true + } + fn name(&self) -> &str { + "fact-capture" + } + } + + /// Provider that returns facts for the first call (flush) and summary for the second (compaction). + struct FlushThenSummaryProvider { + call_count: Mutex, + } + + #[async_trait] + impl Provider for FlushThenSummaryProvider { + async fn chat_with_system( + &self, + _system_prompt: Option<&str>, + _message: &str, + _model: &str, + _temperature: f64, + ) -> anyhow::Result { + let mut count = self.call_count.lock().unwrap(); + *count += 1; + if *count == 1 { + // flush_durable_facts call + Ok("[lang] User prefers Rust\n[db] PostgreSQL 16".to_string()) + } else { + // summarizer call + Ok("- summarized context".to_string()) + } + } + + async fn chat( + &self, + _request: ChatRequest<'_>, + _model: &str, + _temperature: f64, + ) -> anyhow::Result { + Ok(ChatResponse { + text: Some("- summarized context".to_string()), + tool_calls: Vec::new(), + usage: None, + reasoning_content: None, + quota_metadata: None, + }) + } + } + + let mem = Arc::new(FactCapture { + stored: Mutex::new(Vec::new()), + }); + let provider = FlushThenSummaryProvider { + call_count: Mutex::new(0), + }; + + let mut history: Vec = Vec::new(); + for i in 0..25 { + history.push(ChatMessage::user(format!("msg-{i}"))); + } + + let compacted = auto_compact_history( + &mut history, + &provider, + "test-model", + 21, + None, + Some(mem.as_ref()), + ) + .await + .expect("compaction should succeed"); + + assert!(compacted); + + let stored = mem.stored.lock().unwrap(); + assert_eq!(stored.len(), 2, "should store 2 durable facts"); + assert_eq!(stored[0].0, "compaction_fact_lang"); + assert_eq!(stored[0].1, "User prefers Rust"); + assert_eq!(stored[1].0, "compaction_fact_db"); + assert_eq!(stored[1].1, "PostgreSQL 16"); + } + + #[tokio::test] + async fn auto_compact_with_memory_caps_fact_flush_at_eight_entries() { + use crate::memory::{MemoryCategory, MemoryEntry}; + use std::sync::{Arc, Mutex}; + + struct FactCapture { + stored: Mutex>, + } + + #[async_trait] + impl Memory for FactCapture { + async fn store( + &self, + key: &str, + content: &str, + _category: MemoryCategory, + _session_id: Option<&str>, + ) -> anyhow::Result<()> { + self.stored + .lock() + .expect("fact capture lock") + .push((key.to_string(), content.to_string())); + Ok(()) + } + + async fn recall( + &self, + _q: &str, + _l: usize, + _s: Option<&str>, + ) -> anyhow::Result> { + Ok(vec![]) + } + + async fn get(&self, _k: &str) -> anyhow::Result> { + Ok(None) + } + + async fn list( + &self, + _c: Option<&MemoryCategory>, + _s: Option<&str>, + ) -> anyhow::Result> { + Ok(vec![]) + } + + async fn forget(&self, _k: &str) -> anyhow::Result { + Ok(true) + } + + async fn count(&self) -> anyhow::Result { + Ok(0) + } + + async fn health_check(&self) -> bool { + true + } + + fn name(&self) -> &str { + "fact-capture-cap" + } + } + + struct FlushManyFactsProvider { + call_count: Mutex, + } + + #[async_trait] + impl Provider for FlushManyFactsProvider { + async fn chat_with_system( + &self, + _system_prompt: Option<&str>, + _message: &str, + _model: &str, + _temperature: f64, + ) -> anyhow::Result { + let mut count = self.call_count.lock().expect("provider lock"); + *count += 1; + if *count == 1 { + let lines = (0..12) + .map(|idx| format!("[k{idx}] fact-{idx}")) + .collect::>() + .join("\n"); + Ok(lines) + } else { + Ok("- summarized context".to_string()) + } + } + + async fn chat( + &self, + _request: ChatRequest<'_>, + _model: &str, + _temperature: f64, + ) -> anyhow::Result { + Ok(ChatResponse { + text: Some("- summarized context".to_string()), + tool_calls: Vec::new(), + usage: None, + reasoning_content: None, + quota_metadata: None, + }) + } + } + + let mem = Arc::new(FactCapture { + stored: Mutex::new(Vec::new()), + }); + let provider = FlushManyFactsProvider { + call_count: Mutex::new(0), + }; + let mut history = (0..30) + .map(|idx| ChatMessage::user(format!("msg-{idx}"))) + .collect::>(); + + let compacted = auto_compact_history( + &mut history, + &provider, + "test-model", + 21, + None, + Some(mem.as_ref()), + ) + .await + .expect("compaction should succeed"); + assert!(compacted); + + let stored = mem.stored.lock().expect("fact capture lock"); + assert_eq!(stored.len(), COMPACTION_MAX_FLUSH_FACTS); + assert_eq!(stored[0].0, "compaction_fact_k0"); + assert_eq!(stored[7].0, "compaction_fact_k7"); + } } diff --git a/src/agent/memory_loader.rs b/src/agent/memory_loader.rs index 783650d64..a2aa85be2 100644 --- a/src/agent/memory_loader.rs +++ b/src/agent/memory_loader.rs @@ -1,10 +1,18 @@ -use crate::memory::{self, decay, Memory}; +use crate::memory::{self, decay, Memory, MemoryCategory}; use async_trait::async_trait; use std::fmt::Write; /// Default half-life (days) for time decay in memory loading. const LOADER_DECAY_HALF_LIFE_DAYS: f64 = 7.0; +/// Score boost applied to `Core` category memories so durable facts and +/// preferences surface even when keyword/semantic similarity is moderate. +const CORE_CATEGORY_SCORE_BOOST: f64 = 0.3; + +/// Over-fetch factor: retrieve more candidates than the output limit so +/// that Core boost and re-ranking can select the best subset. +const RECALL_OVER_FETCH_FACTOR: usize = 2; + #[async_trait] pub trait MemoryLoader: Send + Sync { async fn load_context(&self, memory: &dyn Memory, user_message: &str) @@ -41,32 +49,47 @@ impl MemoryLoader for DefaultMemoryLoader { memory: &dyn Memory, user_message: &str, ) -> anyhow::Result { - let mut entries = memory.recall(user_message, self.limit, None).await?; + // Over-fetch so Core-boosted entries can compete fairly after re-ranking. + let fetch_limit = self.limit * RECALL_OVER_FETCH_FACTOR; + let mut entries = memory.recall(user_message, fetch_limit, None).await?; if entries.is_empty() { return Ok(String::new()); } - // Apply time decay: older non-Core memories score lower + // Apply time decay: older non-Core memories score lower. decay::apply_time_decay(&mut entries, LOADER_DECAY_HALF_LIFE_DAYS); - let mut context = String::from("[Memory context]\n"); - for entry in entries { - if memory::is_assistant_autosave_key(&entry.key) { - continue; - } - if let Some(score) = entry.score { - if score < self.min_relevance_score { - continue; + // Apply Core category boost and filter by minimum relevance. + let mut scored: Vec<_> = entries + .iter() + .filter(|e| !memory::is_assistant_autosave_key(&e.key)) + .filter_map(|e| { + let base = e.score.unwrap_or(self.min_relevance_score); + let boosted = if e.category == MemoryCategory::Core { + (base + CORE_CATEGORY_SCORE_BOOST).min(1.0) + } else { + base + }; + if boosted >= self.min_relevance_score { + Some((e, boosted)) + } else { + None } - } - let _ = writeln!(context, "- {}: {}", entry.key, entry.content); - } + }) + .collect(); - // If all entries were below threshold, return empty - if context == "[Memory context]\n" { + // Sort by boosted score descending, then truncate to output limit. + scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + scored.truncate(self.limit); + + if scored.is_empty() { return Ok(String::new()); } + let mut context = String::from("[Memory context]\n"); + for (entry, _) in &scored { + let _ = writeln!(context, "- {}: {}", entry.key, entry.content); + } context.push('\n'); Ok(context) } @@ -233,4 +256,93 @@ mod tests { assert!(!context.contains("assistant_resp_legacy")); assert!(!context.contains("fabricated detail")); } + + #[tokio::test] + async fn core_category_boost_promotes_low_score_core_entry() { + let loader = DefaultMemoryLoader::new(2, 0.4); + let memory = MockMemoryWithEntries { + entries: Arc::new(vec![ + MemoryEntry { + id: "1".into(), + key: "chat_detail".into(), + content: "talked about weather".into(), + category: MemoryCategory::Conversation, + timestamp: "now".into(), + session_id: None, + score: Some(0.6), + }, + MemoryEntry { + id: "2".into(), + key: "project_rule".into(), + content: "always use async/await".into(), + category: MemoryCategory::Core, + timestamp: "now".into(), + session_id: None, + // Below threshold without boost (0.25 < 0.4), + // but above with +0.3 boost (0.55 >= 0.4). + score: Some(0.25), + }, + MemoryEntry { + id: "3".into(), + key: "low_conv".into(), + content: "irrelevant chatter".into(), + category: MemoryCategory::Conversation, + timestamp: "now".into(), + session_id: None, + score: Some(0.2), + }, + ]), + }; + + let context = loader.load_context(&memory, "code style").await.unwrap(); + // Core entry should survive thanks to boost + assert!( + context.contains("project_rule"), + "Core entry should be promoted by boost: {context}" + ); + // Low-score Conversation entry should be filtered out + assert!( + !context.contains("low_conv"), + "Low-score non-Core entry should be filtered: {context}" + ); + } + + #[tokio::test] + async fn core_boost_reranks_above_conversation() { + let loader = DefaultMemoryLoader::new(1, 0.0); + let memory = MockMemoryWithEntries { + entries: Arc::new(vec![ + MemoryEntry { + id: "1".into(), + key: "conv_high".into(), + content: "recent conversation".into(), + category: MemoryCategory::Conversation, + timestamp: "now".into(), + session_id: None, + score: Some(0.6), + }, + MemoryEntry { + id: "2".into(), + key: "core_pref".into(), + content: "user prefers Rust".into(), + category: MemoryCategory::Core, + timestamp: "now".into(), + session_id: None, + // 0.5 + 0.3 boost = 0.8 > 0.6 + score: Some(0.5), + }, + ]), + }; + + let context = loader.load_context(&memory, "language").await.unwrap(); + // With limit=1 and Core boost, Core entry (0.8) should win over Conversation (0.6) + assert!( + context.contains("core_pref"), + "Boosted Core should rank above Conversation: {context}" + ); + assert!( + !context.contains("conv_high"), + "Conversation should be truncated when limit=1: {context}" + ); + } } diff --git a/src/channels/mod.rs b/src/channels/mod.rs index 26c33210f..cc715810d 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -251,6 +251,14 @@ struct ChannelRuntimeDefaults { api_url: Option, reliability: crate::config::ReliabilityConfig, cost: crate::config::CostConfig, + auto_save_memory: bool, + max_tool_iterations: usize, + min_relevance_score: f64, + message_timeout_secs: u64, + interrupt_on_new_message: bool, + multimodal: crate::config::MultimodalConfig, + query_classification: crate::config::QueryClassificationConfig, + model_routes: Vec, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -1048,6 +1056,14 @@ fn resolved_default_model(config: &Config) -> String { } fn runtime_defaults_from_config(config: &Config) -> ChannelRuntimeDefaults { + let message_timeout_secs = + effective_channel_message_timeout_secs(config.channels_config.message_timeout_secs); + let interrupt_on_new_message = config + .channels_config + .telegram + .as_ref() + .is_some_and(|tg| tg.interrupt_on_new_message); + ChannelRuntimeDefaults { default_provider: resolved_default_provider(config), model: resolved_default_model(config), @@ -1056,6 +1072,14 @@ fn runtime_defaults_from_config(config: &Config) -> ChannelRuntimeDefaults { api_url: config.api_url.clone(), reliability: config.reliability.clone(), cost: config.cost.clone(), + auto_save_memory: config.memory.auto_save, + max_tool_iterations: config.agent.max_tool_iterations, + min_relevance_score: config.memory.min_relevance_score, + message_timeout_secs, + interrupt_on_new_message, + multimodal: config.multimodal.clone(), + query_classification: config.query_classification.clone(), + model_routes: config.model_routes.clone(), } } @@ -1102,6 +1126,14 @@ fn runtime_defaults_snapshot(ctx: &ChannelRuntimeContext) -> ChannelRuntimeDefau api_url: ctx.api_url.clone(), reliability: (*ctx.reliability).clone(), cost: crate::config::CostConfig::default(), + auto_save_memory: ctx.auto_save_memory, + max_tool_iterations: ctx.max_tool_iterations, + min_relevance_score: ctx.min_relevance_score, + message_timeout_secs: ctx.message_timeout_secs, + interrupt_on_new_message: ctx.interrupt_on_new_message, + multimodal: ctx.multimodal.clone(), + query_classification: ctx.query_classification.clone(), + model_routes: ctx.model_routes.clone(), } } @@ -1722,14 +1754,14 @@ fn get_route_selection(ctx: &ChannelRuntimeContext, sender_key: &str) -> Channel /// Classify a user message and return the appropriate route selection with logging. /// Returns None if classification is disabled or no rules match. fn classify_message_route( - ctx: &ChannelRuntimeContext, + query_classification: &crate::config::QueryClassificationConfig, + model_routes: &[crate::config::ModelRouteConfig], message: &str, ) -> Option { - let decision = - crate::agent::classifier::classify_with_decision(&ctx.query_classification, message)?; + let decision = crate::agent::classifier::classify_with_decision(query_classification, message)?; // Find the matching model route - let route = ctx.model_routes.iter().find(|r| r.hint == decision.hint)?; + let route = model_routes.iter().find(|r| r.hint == decision.hint)?; tracing::info!( target: "query_classification", @@ -1956,9 +1988,9 @@ async fn get_or_create_provider( let provider = create_resilient_provider_nonblocking( provider_name, - ctx.api_key.clone(), + defaults.api_key.clone(), api_url.map(ToString::to_string), - ctx.reliability.as_ref().clone(), + defaults.reliability.clone(), ctx.provider_runtime_options.clone(), ) .await?; @@ -3446,10 +3478,14 @@ or tune thresholds in config.", } } } - // Try classification first, fall back to sender/default route - let route = classify_message_route(ctx.as_ref(), &msg.content) - .unwrap_or_else(|| get_route_selection(ctx.as_ref(), &history_key)); let runtime_defaults = runtime_defaults_snapshot(ctx.as_ref()); + // Try classification first, fall back to sender/default route. + let route = classify_message_route( + &runtime_defaults.query_classification, + &runtime_defaults.model_routes, + &msg.content, + ) + .unwrap_or_else(|| get_route_selection(ctx.as_ref(), &history_key)); let active_provider = match get_or_create_provider(ctx.as_ref(), &route.provider).await { Ok(provider) => provider, Err(err) => { @@ -3469,7 +3505,9 @@ or tune thresholds in config.", return; } }; - if ctx.auto_save_memory && msg.content.chars().count() >= AUTOSAVE_MIN_MESSAGE_CHARS { + if runtime_defaults.auto_save_memory + && msg.content.chars().count() >= AUTOSAVE_MIN_MESSAGE_CHARS + { let autosave_key = conversation_memory_key(&msg); let _ = ctx .memory @@ -3532,7 +3570,7 @@ or tune thresholds in config.", let memory_context = build_memory_context( ctx.memory.as_ref(), &msg.content, - ctx.min_relevance_score, + runtime_defaults.min_relevance_score, Some(&history_key), ) .await; @@ -3686,8 +3724,10 @@ or tune thresholds in config.", Cancelled, } - let timeout_budget_secs = - channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations); + let timeout_budget_secs = channel_message_timeout_budget_secs( + runtime_defaults.message_timeout_secs, + runtime_defaults.max_tool_iterations, + ); let cost_enforcement_context = crate::agent::loop_::create_cost_enforcement_context( &runtime_defaults.cost, ctx.workspace_dir.as_path(), @@ -3751,8 +3791,8 @@ or tune thresholds in config.", Some(ctx.approval_manager.as_ref()), msg.channel.as_str(), non_cli_approval_context, - &ctx.multimodal, - ctx.max_tool_iterations, + &runtime_defaults.multimodal, + runtime_defaults.max_tool_iterations, Some(cancellation_token.clone()), delta_tx, ctx.hooks.as_deref(), @@ -3931,7 +3971,7 @@ or tune thresholds in config.", &history_key, ChatMessage::assistant(&history_response), ); - if ctx.auto_save_memory + if runtime_defaults.auto_save_memory && delivered_response.chars().count() >= AUTOSAVE_MIN_MESSAGE_CHARS { let assistant_key = assistant_memory_key(&msg); @@ -4044,7 +4084,7 @@ or tune thresholds in config.", } } } else if is_tool_iteration_limit_error(&e) { - let limit = ctx.max_tool_iterations.max(1); + let limit = runtime_defaults.max_tool_iterations.max(1); let pause_text = format!( "⚠️ Reached tool-iteration limit ({limit}) for this turn. Context and progress were preserved. Reply \"continue\" to resume, or increase `agent.max_tool_iterations`." ); @@ -4140,7 +4180,9 @@ or tune thresholds in config.", LlmExecutionResult::Completed(Err(_)) => { let timeout_msg = format!( "LLM response timed out after {}s (base={}s, max_tool_iterations={})", - timeout_budget_secs, ctx.message_timeout_secs, ctx.max_tool_iterations + timeout_budget_secs, + runtime_defaults.message_timeout_secs, + runtime_defaults.max_tool_iterations ); runtime_trace::record_event( "channel_message_timeout", @@ -4221,8 +4263,9 @@ async fn run_message_dispatch_loop( let task_sequence = Arc::clone(&task_sequence); workers.spawn(async move { let _permit = permit; + let runtime_defaults = runtime_defaults_snapshot(worker_ctx.as_ref()); let interrupt_enabled = - worker_ctx.interrupt_on_new_message && msg.channel == "telegram"; + runtime_defaults.interrupt_on_new_message && msg.channel == "telegram"; let sender_scope_key = interruption_scope_key(&msg); let cancellation_token = CancellationToken::new(); let completion = Arc::new(InFlightTaskCompletion::new()); @@ -9503,6 +9546,14 @@ BTC is currently around $65,000 based on latest tool output."# api_url: None, reliability: crate::config::ReliabilityConfig::default(), cost: crate::config::CostConfig::default(), + auto_save_memory: false, + max_tool_iterations: 5, + min_relevance_score: 0.0, + message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, + interrupt_on_new_message: false, + multimodal: crate::config::MultimodalConfig::default(), + query_classification: crate::config::QueryClassificationConfig::default(), + model_routes: Vec::new(), }, perplexity_filter: crate::config::PerplexityFilterConfig::default(), outbound_leak_guard: crate::config::OutboundLeakGuardConfig::default(), @@ -9685,6 +9736,13 @@ BTC is currently around $65,000 based on latest tool output."# cfg.default_provider = Some("ollama".to_string()); cfg.default_model = Some("llama3.2".to_string()); cfg.api_key = Some("http://127.0.0.1:11434".to_string()); + cfg.memory.auto_save = false; + cfg.memory.min_relevance_score = 0.15; + cfg.agent.max_tool_iterations = 5; + cfg.channels_config.message_timeout_secs = 45; + cfg.multimodal.allow_remote_fetch = false; + cfg.query_classification.enabled = false; + cfg.model_routes = vec![]; cfg.autonomy.non_cli_natural_language_approval_mode = crate::config::NonCliNaturalLanguageApprovalMode::Direct; cfg.autonomy.non_cli_excluded_tools = vec!["shell".to_string()]; @@ -9751,6 +9809,14 @@ BTC is currently around $65,000 based on latest tool output."# runtime_outbound_leak_guard_snapshot(runtime_ctx.as_ref()).action, crate::config::OutboundLeakGuardAction::Redact ); + let defaults = runtime_defaults_snapshot(runtime_ctx.as_ref()); + assert!(!defaults.auto_save_memory); + assert_eq!(defaults.min_relevance_score, 0.15); + assert_eq!(defaults.max_tool_iterations, 5); + assert_eq!(defaults.message_timeout_secs, 45); + assert!(!defaults.multimodal.allow_remote_fetch); + assert!(!defaults.query_classification.enabled); + assert!(defaults.model_routes.is_empty()); cfg.autonomy.non_cli_natural_language_approval_mode = crate::config::NonCliNaturalLanguageApprovalMode::Disabled; @@ -9766,6 +9832,28 @@ BTC is currently around $65,000 based on latest tool output."# cfg.security.perplexity_filter.perplexity_threshold = 12.5; cfg.security.outbound_leak_guard.action = crate::config::OutboundLeakGuardAction::Block; cfg.security.outbound_leak_guard.sensitivity = 0.92; + cfg.memory.auto_save = true; + cfg.memory.min_relevance_score = 0.65; + cfg.agent.max_tool_iterations = 11; + cfg.channels_config.message_timeout_secs = 120; + cfg.multimodal.allow_remote_fetch = true; + cfg.query_classification.enabled = true; + cfg.query_classification.rules = vec![crate::config::ClassificationRule { + hint: "reasoning".to_string(), + keywords: vec!["analyze".to_string()], + patterns: vec!["deep".to_string()], + min_length: None, + max_length: None, + priority: 10, + }]; + cfg.model_routes = vec![crate::config::ModelRouteConfig { + hint: "reasoning".to_string(), + provider: "openrouter".to_string(), + model: "openai/gpt-5.2".to_string(), + max_tokens: Some(512), + api_key: None, + transport: None, + }]; cfg.save().await.expect("save updated config"); maybe_apply_runtime_config_update(runtime_ctx.as_ref()) @@ -9797,6 +9885,15 @@ BTC is currently around $65,000 based on latest tool output."# crate::config::OutboundLeakGuardAction::Block ); assert_eq!(leak_guard_cfg.sensitivity, 0.92); + let defaults = runtime_defaults_snapshot(runtime_ctx.as_ref()); + assert!(defaults.auto_save_memory); + assert_eq!(defaults.min_relevance_score, 0.65); + assert_eq!(defaults.max_tool_iterations, 11); + assert_eq!(defaults.message_timeout_secs, 120); + assert!(defaults.multimodal.allow_remote_fetch); + assert!(defaults.query_classification.enabled); + assert_eq!(defaults.query_classification.rules.len(), 1); + assert_eq!(defaults.model_routes.len(), 1); let mut store = runtime_config_store() .lock()