Merge branch 'main' into feat/feishu-doc-tool

This commit is contained in:
Chum Yin 2026-03-02 04:29:04 +08:00 committed by GitHub
commit d8f6820a4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 815 additions and 57 deletions

View File

@ -2773,6 +2773,7 @@ pub async fn run(
&model_name,
config.agent.max_history_messages,
effective_hooks,
Some(mem.as_ref()),
)
.await
{

View File

@ -1,13 +1,29 @@
use crate::memory::{self, decay, Memory};
use crate::memory::{self, decay, Memory, MemoryCategory};
use std::fmt::Write;
/// Default half-life (days) for time decay in context building.
const CONTEXT_DECAY_HALF_LIFE_DAYS: f64 = 7.0;
/// Score boost applied to `Core` category memories so durable facts and
/// preferences surface even when keyword/semantic similarity is moderate.
const CORE_CATEGORY_SCORE_BOOST: f64 = 0.3;
/// Maximum number of memory entries included in the context preamble.
const CONTEXT_ENTRY_LIMIT: usize = 5;
/// Over-fetch factor: retrieve more candidates than the output limit so
/// that Core boost and re-ranking can select the best subset.
const RECALL_OVER_FETCH_FACTOR: usize = 2;
/// Build context preamble by searching memory for relevant entries.
/// Entries with a hybrid score below `min_relevance_score` are dropped to
/// prevent unrelated memories from bleeding into the conversation.
///
/// Core memories are exempt from time decay (evergreen).
///
/// `Core` category memories receive a score boost so that durable facts,
/// preferences, and project rules are more likely to appear in context
/// even when semantic similarity to the current message is moderate.
pub(super) async fn build_context(
mem: &dyn Memory,
user_msg: &str,
@ -16,32 +32,41 @@ pub(super) async fn build_context(
) -> String {
let mut context = String::new();
// Pull relevant memories for this message
if let Ok(mut entries) = mem.recall(user_msg, 5, session_id).await {
// Apply time decay: older non-Core memories score lower
// Over-fetch so Core-boosted entries can compete fairly after re-ranking.
let fetch_limit = CONTEXT_ENTRY_LIMIT * RECALL_OVER_FETCH_FACTOR;
if let Ok(mut entries) = mem.recall(user_msg, fetch_limit, session_id).await {
// Apply time decay: older non-Core memories score lower.
decay::apply_time_decay(&mut entries, CONTEXT_DECAY_HALF_LIFE_DAYS);
let relevant: Vec<_> = entries
// Apply Core category boost and filter by minimum relevance.
let mut scored: Vec<_> = entries
.iter()
.filter(|e| match e.score {
Some(score) => score >= min_relevance_score,
None => true,
.filter(|e| !memory::is_assistant_autosave_key(&e.key))
.filter_map(|e| {
let base = e.score.unwrap_or(min_relevance_score);
let boosted = if e.category == MemoryCategory::Core {
(base + CORE_CATEGORY_SCORE_BOOST).min(1.0)
} else {
base
};
if boosted >= min_relevance_score {
Some((e, boosted))
} else {
None
}
})
.collect();
if !relevant.is_empty() {
// Sort by boosted score descending, then truncate to output limit.
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(CONTEXT_ENTRY_LIMIT);
if !scored.is_empty() {
context.push_str("[Memory context]\n");
for entry in &relevant {
if memory::is_assistant_autosave_key(&entry.key) {
continue;
}
for (entry, _) in &scored {
let _ = writeln!(context, "- {}: {}", entry.key, entry.content);
}
if context == "[Memory context]\n" {
context.clear();
} else {
context.push('\n');
}
context.push('\n');
}
}
@ -87,3 +112,135 @@ pub(super) fn build_hardware_context(
context.push('\n');
context
}
#[cfg(test)]
mod tests {
use super::*;
use crate::memory::{Memory, MemoryCategory, MemoryEntry};
use async_trait::async_trait;
use std::sync::Arc;
struct MockMemory {
entries: Arc<Vec<MemoryEntry>>,
}
#[async_trait]
impl Memory for MockMemory {
async fn store(
&self,
_key: &str,
_content: &str,
_category: MemoryCategory,
_session_id: Option<&str>,
) -> anyhow::Result<()> {
Ok(())
}
async fn recall(
&self,
_query: &str,
_limit: usize,
_session_id: Option<&str>,
) -> anyhow::Result<Vec<MemoryEntry>> {
Ok(self.entries.as_ref().clone())
}
async fn get(&self, _key: &str) -> anyhow::Result<Option<MemoryEntry>> {
Ok(None)
}
async fn list(
&self,
_category: Option<&MemoryCategory>,
_session_id: Option<&str>,
) -> anyhow::Result<Vec<MemoryEntry>> {
Ok(vec![])
}
async fn forget(&self, _key: &str) -> anyhow::Result<bool> {
Ok(true)
}
async fn count(&self) -> anyhow::Result<usize> {
Ok(self.entries.len())
}
async fn health_check(&self) -> bool {
true
}
fn name(&self) -> &str {
"mock-memory"
}
}
#[tokio::test]
async fn build_context_promotes_core_entries_with_score_boost() {
let memory = MockMemory {
entries: Arc::new(vec![
MemoryEntry {
id: "1".into(),
key: "conv_note".into(),
content: "small talk".into(),
category: MemoryCategory::Conversation,
timestamp: "now".into(),
session_id: None,
score: Some(0.6),
},
MemoryEntry {
id: "2".into(),
key: "core_rule".into(),
content: "always provide tests".into(),
category: MemoryCategory::Core,
timestamp: "now".into(),
session_id: None,
score: Some(0.2),
},
MemoryEntry {
id: "3".into(),
key: "conv_low".into(),
content: "irrelevant".into(),
category: MemoryCategory::Conversation,
timestamp: "now".into(),
session_id: None,
score: Some(0.1),
},
]),
};
let context = build_context(&memory, "test query", 0.4, None).await;
assert!(
context.contains("core_rule"),
"expected core boost to include core_rule"
);
assert!(
!context.contains("conv_low"),
"low-score non-core should be filtered"
);
}
#[tokio::test]
async fn build_context_keeps_output_limit_at_five_entries() {
let entries = (0..8)
.map(|idx| MemoryEntry {
id: idx.to_string(),
key: format!("k{idx}"),
content: format!("v{idx}"),
category: MemoryCategory::Conversation,
timestamp: "now".into(),
session_id: None,
score: Some(0.9 - (idx as f64 * 0.01)),
})
.collect::<Vec<_>>();
let memory = MockMemory {
entries: Arc::new(entries),
};
let context = build_context(&memory, "limit", 0.0, None).await;
let listed = context
.lines()
.filter(|line| line.starts_with("- "))
.count();
assert_eq!(listed, 5, "context output limit should remain 5 entries");
}
}

View File

@ -1,3 +1,4 @@
use crate::memory::{Memory, MemoryCategory};
use crate::providers::{ChatMessage, Provider};
use crate::util::truncate_with_ellipsis;
use anyhow::Result;
@ -12,6 +13,9 @@ const COMPACTION_MAX_SOURCE_CHARS: usize = 12_000;
/// Max characters retained in stored compaction summary.
const COMPACTION_MAX_SUMMARY_CHARS: usize = 2_000;
/// Safety cap for durable facts extracted during pre-compaction flush.
const COMPACTION_MAX_FLUSH_FACTS: usize = 8;
/// Trim conversation history to prevent unbounded growth.
/// Preserves the system prompt (first message if role=system) and the most recent messages.
pub(super) fn trim_history(history: &mut Vec<ChatMessage>, max_history: usize) {
@ -67,6 +71,7 @@ pub(super) async fn auto_compact_history(
model: &str,
max_history: usize,
hooks: Option<&crate::hooks::HookRunner>,
memory: Option<&dyn Memory>,
) -> Result<bool> {
let has_system = history.first().map_or(false, |m| m.role == "system");
let non_system_count = if has_system {
@ -105,6 +110,13 @@ pub(super) async fn auto_compact_history(
};
let transcript = build_compaction_transcript(&to_compact);
// ── Pre-compaction memory flush ──────────────────────────────────
// Before discarding old messages, ask the LLM to extract durable
// facts and store them as Core memories so they survive compaction.
if let Some(mem) = memory {
flush_durable_facts(provider, model, &transcript, mem).await;
}
let summarizer_system = "You are a conversation compaction engine. Summarize older chat history into concise context for future turns. Preserve: user preferences, commitments, decisions, unresolved tasks, key facts. Omit: filler, repeated chit-chat, verbose tool logs. Output plain text bullet points only.";
let summarizer_user = format!(
@ -137,6 +149,86 @@ pub(super) async fn auto_compact_history(
Ok(true)
}
/// Extract durable facts from a conversation transcript and store them as
/// `Core` memories. Called before compaction discards old messages.
///
/// Best-effort: failures are logged but never block compaction.
async fn flush_durable_facts(
provider: &dyn Provider,
model: &str,
transcript: &str,
memory: &dyn Memory,
) {
const FLUSH_SYSTEM: &str = "\
You extract durable facts from a conversation that is about to be compacted. \
Output ONLY facts worth remembering long-term user preferences, project decisions, \
technical constraints, commitments, or important discoveries. \
Output one fact per line, prefixed with a short key in brackets. \
Example:\n\
[preferred_language] User prefers Rust over Go\n\
[db_choice] Project uses PostgreSQL 16\n\
If there are no durable facts, output exactly: NONE";
let flush_user = format!(
"Extract durable facts from this conversation (max 8 facts):\n\n{}",
transcript
);
let response = match provider
.chat_with_system(Some(FLUSH_SYSTEM), &flush_user, model, 0.2)
.await
{
Ok(r) => r,
Err(e) => {
tracing::warn!("Pre-compaction memory flush failed: {e}");
return;
}
};
if response.trim().eq_ignore_ascii_case("NONE") || response.trim().is_empty() {
return;
}
let mut stored = 0usize;
for line in response.lines() {
if stored >= COMPACTION_MAX_FLUSH_FACTS {
break;
}
let line = line.trim();
if line.is_empty() {
continue;
}
// Parse "[key] content" format
if let Some((key, content)) = parse_fact_line(line) {
let prefixed_key = format!("compaction_fact_{key}");
if let Err(e) = memory
.store(&prefixed_key, content, MemoryCategory::Core, None)
.await
{
tracing::warn!("Failed to store compaction fact '{prefixed_key}': {e}");
} else {
stored += 1;
}
}
}
if stored > 0 {
tracing::info!("Pre-compaction flush: stored {stored} durable fact(s) to Core memory");
}
}
/// Parse a `[key] content` line from the fact extraction output.
fn parse_fact_line(line: &str) -> Option<(&str, &str)> {
let line = line.trim_start_matches(|c: char| c == '-' || c.is_whitespace());
let rest = line.strip_prefix('[')?;
let close = rest.find(']')?;
let key = rest[..close].trim();
let content = rest[close + 1..].trim();
if key.is_empty() || content.is_empty() {
return None;
}
Some((key, content))
}
#[cfg(test)]
mod tests {
use super::*;
@ -213,10 +305,16 @@ mod tests {
// previously cut right before the tool result (index 2).
assert_eq!(history.len(), 22);
let compacted =
auto_compact_history(&mut history, &StaticSummaryProvider, "test-model", 21, None)
.await
.expect("compaction should succeed");
let compacted = auto_compact_history(
&mut history,
&StaticSummaryProvider,
"test-model",
21,
None,
None,
)
.await
.expect("compaction should succeed");
assert!(compacted);
assert_eq!(history[0].role, "assistant");
@ -229,4 +327,297 @@ mod tests {
"first retained message must not be an orphan tool result"
);
}
#[test]
fn parse_fact_line_extracts_key_and_content() {
assert_eq!(
parse_fact_line("[preferred_language] User prefers Rust over Go"),
Some(("preferred_language", "User prefers Rust over Go"))
);
}
#[test]
fn parse_fact_line_handles_leading_dash() {
assert_eq!(
parse_fact_line("- [db_choice] Project uses PostgreSQL 16"),
Some(("db_choice", "Project uses PostgreSQL 16"))
);
}
#[test]
fn parse_fact_line_rejects_empty_key_or_content() {
assert_eq!(parse_fact_line("[] some content"), None);
assert_eq!(parse_fact_line("[key]"), None);
assert_eq!(parse_fact_line("[key] "), None);
}
#[test]
fn parse_fact_line_rejects_malformed_input() {
assert_eq!(parse_fact_line("no brackets here"), None);
assert_eq!(parse_fact_line(""), None);
assert_eq!(parse_fact_line("[unclosed bracket"), None);
}
#[tokio::test]
async fn auto_compact_with_memory_stores_durable_facts() {
use crate::memory::{MemoryCategory, MemoryEntry};
use std::sync::{Arc, Mutex};
struct FactCapture {
stored: Mutex<Vec<(String, String)>>,
}
#[async_trait]
impl Memory for FactCapture {
async fn store(
&self,
key: &str,
content: &str,
_category: MemoryCategory,
_session_id: Option<&str>,
) -> anyhow::Result<()> {
self.stored
.lock()
.unwrap()
.push((key.to_string(), content.to_string()));
Ok(())
}
async fn recall(
&self,
_q: &str,
_l: usize,
_s: Option<&str>,
) -> anyhow::Result<Vec<MemoryEntry>> {
Ok(vec![])
}
async fn get(&self, _k: &str) -> anyhow::Result<Option<MemoryEntry>> {
Ok(None)
}
async fn list(
&self,
_c: Option<&MemoryCategory>,
_s: Option<&str>,
) -> anyhow::Result<Vec<MemoryEntry>> {
Ok(vec![])
}
async fn forget(&self, _k: &str) -> anyhow::Result<bool> {
Ok(true)
}
async fn count(&self) -> anyhow::Result<usize> {
Ok(0)
}
async fn health_check(&self) -> bool {
true
}
fn name(&self) -> &str {
"fact-capture"
}
}
/// Provider that returns facts for the first call (flush) and summary for the second (compaction).
struct FlushThenSummaryProvider {
call_count: Mutex<usize>,
}
#[async_trait]
impl Provider for FlushThenSummaryProvider {
async fn chat_with_system(
&self,
_system_prompt: Option<&str>,
_message: &str,
_model: &str,
_temperature: f64,
) -> anyhow::Result<String> {
let mut count = self.call_count.lock().unwrap();
*count += 1;
if *count == 1 {
// flush_durable_facts call
Ok("[lang] User prefers Rust\n[db] PostgreSQL 16".to_string())
} else {
// summarizer call
Ok("- summarized context".to_string())
}
}
async fn chat(
&self,
_request: ChatRequest<'_>,
_model: &str,
_temperature: f64,
) -> anyhow::Result<ChatResponse> {
Ok(ChatResponse {
text: Some("- summarized context".to_string()),
tool_calls: Vec::new(),
usage: None,
reasoning_content: None,
quota_metadata: None,
})
}
}
let mem = Arc::new(FactCapture {
stored: Mutex::new(Vec::new()),
});
let provider = FlushThenSummaryProvider {
call_count: Mutex::new(0),
};
let mut history: Vec<ChatMessage> = Vec::new();
for i in 0..25 {
history.push(ChatMessage::user(format!("msg-{i}")));
}
let compacted = auto_compact_history(
&mut history,
&provider,
"test-model",
21,
None,
Some(mem.as_ref()),
)
.await
.expect("compaction should succeed");
assert!(compacted);
let stored = mem.stored.lock().unwrap();
assert_eq!(stored.len(), 2, "should store 2 durable facts");
assert_eq!(stored[0].0, "compaction_fact_lang");
assert_eq!(stored[0].1, "User prefers Rust");
assert_eq!(stored[1].0, "compaction_fact_db");
assert_eq!(stored[1].1, "PostgreSQL 16");
}
#[tokio::test]
async fn auto_compact_with_memory_caps_fact_flush_at_eight_entries() {
use crate::memory::{MemoryCategory, MemoryEntry};
use std::sync::{Arc, Mutex};
struct FactCapture {
stored: Mutex<Vec<(String, String)>>,
}
#[async_trait]
impl Memory for FactCapture {
async fn store(
&self,
key: &str,
content: &str,
_category: MemoryCategory,
_session_id: Option<&str>,
) -> anyhow::Result<()> {
self.stored
.lock()
.expect("fact capture lock")
.push((key.to_string(), content.to_string()));
Ok(())
}
async fn recall(
&self,
_q: &str,
_l: usize,
_s: Option<&str>,
) -> anyhow::Result<Vec<MemoryEntry>> {
Ok(vec![])
}
async fn get(&self, _k: &str) -> anyhow::Result<Option<MemoryEntry>> {
Ok(None)
}
async fn list(
&self,
_c: Option<&MemoryCategory>,
_s: Option<&str>,
) -> anyhow::Result<Vec<MemoryEntry>> {
Ok(vec![])
}
async fn forget(&self, _k: &str) -> anyhow::Result<bool> {
Ok(true)
}
async fn count(&self) -> anyhow::Result<usize> {
Ok(0)
}
async fn health_check(&self) -> bool {
true
}
fn name(&self) -> &str {
"fact-capture-cap"
}
}
struct FlushManyFactsProvider {
call_count: Mutex<usize>,
}
#[async_trait]
impl Provider for FlushManyFactsProvider {
async fn chat_with_system(
&self,
_system_prompt: Option<&str>,
_message: &str,
_model: &str,
_temperature: f64,
) -> anyhow::Result<String> {
let mut count = self.call_count.lock().expect("provider lock");
*count += 1;
if *count == 1 {
let lines = (0..12)
.map(|idx| format!("[k{idx}] fact-{idx}"))
.collect::<Vec<_>>()
.join("\n");
Ok(lines)
} else {
Ok("- summarized context".to_string())
}
}
async fn chat(
&self,
_request: ChatRequest<'_>,
_model: &str,
_temperature: f64,
) -> anyhow::Result<ChatResponse> {
Ok(ChatResponse {
text: Some("- summarized context".to_string()),
tool_calls: Vec::new(),
usage: None,
reasoning_content: None,
quota_metadata: None,
})
}
}
let mem = Arc::new(FactCapture {
stored: Mutex::new(Vec::new()),
});
let provider = FlushManyFactsProvider {
call_count: Mutex::new(0),
};
let mut history = (0..30)
.map(|idx| ChatMessage::user(format!("msg-{idx}")))
.collect::<Vec<_>>();
let compacted = auto_compact_history(
&mut history,
&provider,
"test-model",
21,
None,
Some(mem.as_ref()),
)
.await
.expect("compaction should succeed");
assert!(compacted);
let stored = mem.stored.lock().expect("fact capture lock");
assert_eq!(stored.len(), COMPACTION_MAX_FLUSH_FACTS);
assert_eq!(stored[0].0, "compaction_fact_k0");
assert_eq!(stored[7].0, "compaction_fact_k7");
}
}

View File

@ -1,10 +1,18 @@
use crate::memory::{self, decay, Memory};
use crate::memory::{self, decay, Memory, MemoryCategory};
use async_trait::async_trait;
use std::fmt::Write;
/// Default half-life (days) for time decay in memory loading.
const LOADER_DECAY_HALF_LIFE_DAYS: f64 = 7.0;
/// Score boost applied to `Core` category memories so durable facts and
/// preferences surface even when keyword/semantic similarity is moderate.
const CORE_CATEGORY_SCORE_BOOST: f64 = 0.3;
/// Over-fetch factor: retrieve more candidates than the output limit so
/// that Core boost and re-ranking can select the best subset.
const RECALL_OVER_FETCH_FACTOR: usize = 2;
#[async_trait]
pub trait MemoryLoader: Send + Sync {
async fn load_context(&self, memory: &dyn Memory, user_message: &str)
@ -41,32 +49,47 @@ impl MemoryLoader for DefaultMemoryLoader {
memory: &dyn Memory,
user_message: &str,
) -> anyhow::Result<String> {
let mut entries = memory.recall(user_message, self.limit, None).await?;
// Over-fetch so Core-boosted entries can compete fairly after re-ranking.
let fetch_limit = self.limit * RECALL_OVER_FETCH_FACTOR;
let mut entries = memory.recall(user_message, fetch_limit, None).await?;
if entries.is_empty() {
return Ok(String::new());
}
// Apply time decay: older non-Core memories score lower
// Apply time decay: older non-Core memories score lower.
decay::apply_time_decay(&mut entries, LOADER_DECAY_HALF_LIFE_DAYS);
let mut context = String::from("[Memory context]\n");
for entry in entries {
if memory::is_assistant_autosave_key(&entry.key) {
continue;
}
if let Some(score) = entry.score {
if score < self.min_relevance_score {
continue;
// Apply Core category boost and filter by minimum relevance.
let mut scored: Vec<_> = entries
.iter()
.filter(|e| !memory::is_assistant_autosave_key(&e.key))
.filter_map(|e| {
let base = e.score.unwrap_or(self.min_relevance_score);
let boosted = if e.category == MemoryCategory::Core {
(base + CORE_CATEGORY_SCORE_BOOST).min(1.0)
} else {
base
};
if boosted >= self.min_relevance_score {
Some((e, boosted))
} else {
None
}
}
let _ = writeln!(context, "- {}: {}", entry.key, entry.content);
}
})
.collect();
// If all entries were below threshold, return empty
if context == "[Memory context]\n" {
// Sort by boosted score descending, then truncate to output limit.
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(self.limit);
if scored.is_empty() {
return Ok(String::new());
}
let mut context = String::from("[Memory context]\n");
for (entry, _) in &scored {
let _ = writeln!(context, "- {}: {}", entry.key, entry.content);
}
context.push('\n');
Ok(context)
}
@ -233,4 +256,93 @@ mod tests {
assert!(!context.contains("assistant_resp_legacy"));
assert!(!context.contains("fabricated detail"));
}
#[tokio::test]
async fn core_category_boost_promotes_low_score_core_entry() {
let loader = DefaultMemoryLoader::new(2, 0.4);
let memory = MockMemoryWithEntries {
entries: Arc::new(vec![
MemoryEntry {
id: "1".into(),
key: "chat_detail".into(),
content: "talked about weather".into(),
category: MemoryCategory::Conversation,
timestamp: "now".into(),
session_id: None,
score: Some(0.6),
},
MemoryEntry {
id: "2".into(),
key: "project_rule".into(),
content: "always use async/await".into(),
category: MemoryCategory::Core,
timestamp: "now".into(),
session_id: None,
// Below threshold without boost (0.25 < 0.4),
// but above with +0.3 boost (0.55 >= 0.4).
score: Some(0.25),
},
MemoryEntry {
id: "3".into(),
key: "low_conv".into(),
content: "irrelevant chatter".into(),
category: MemoryCategory::Conversation,
timestamp: "now".into(),
session_id: None,
score: Some(0.2),
},
]),
};
let context = loader.load_context(&memory, "code style").await.unwrap();
// Core entry should survive thanks to boost
assert!(
context.contains("project_rule"),
"Core entry should be promoted by boost: {context}"
);
// Low-score Conversation entry should be filtered out
assert!(
!context.contains("low_conv"),
"Low-score non-Core entry should be filtered: {context}"
);
}
#[tokio::test]
async fn core_boost_reranks_above_conversation() {
let loader = DefaultMemoryLoader::new(1, 0.0);
let memory = MockMemoryWithEntries {
entries: Arc::new(vec![
MemoryEntry {
id: "1".into(),
key: "conv_high".into(),
content: "recent conversation".into(),
category: MemoryCategory::Conversation,
timestamp: "now".into(),
session_id: None,
score: Some(0.6),
},
MemoryEntry {
id: "2".into(),
key: "core_pref".into(),
content: "user prefers Rust".into(),
category: MemoryCategory::Core,
timestamp: "now".into(),
session_id: None,
// 0.5 + 0.3 boost = 0.8 > 0.6
score: Some(0.5),
},
]),
};
let context = loader.load_context(&memory, "language").await.unwrap();
// With limit=1 and Core boost, Core entry (0.8) should win over Conversation (0.6)
assert!(
context.contains("core_pref"),
"Boosted Core should rank above Conversation: {context}"
);
assert!(
!context.contains("conv_high"),
"Conversation should be truncated when limit=1: {context}"
);
}
}

View File

@ -251,6 +251,14 @@ struct ChannelRuntimeDefaults {
api_url: Option<String>,
reliability: crate::config::ReliabilityConfig,
cost: crate::config::CostConfig,
auto_save_memory: bool,
max_tool_iterations: usize,
min_relevance_score: f64,
message_timeout_secs: u64,
interrupt_on_new_message: bool,
multimodal: crate::config::MultimodalConfig,
query_classification: crate::config::QueryClassificationConfig,
model_routes: Vec<crate::config::ModelRouteConfig>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -1048,6 +1056,14 @@ fn resolved_default_model(config: &Config) -> String {
}
fn runtime_defaults_from_config(config: &Config) -> ChannelRuntimeDefaults {
let message_timeout_secs =
effective_channel_message_timeout_secs(config.channels_config.message_timeout_secs);
let interrupt_on_new_message = config
.channels_config
.telegram
.as_ref()
.is_some_and(|tg| tg.interrupt_on_new_message);
ChannelRuntimeDefaults {
default_provider: resolved_default_provider(config),
model: resolved_default_model(config),
@ -1056,6 +1072,14 @@ fn runtime_defaults_from_config(config: &Config) -> ChannelRuntimeDefaults {
api_url: config.api_url.clone(),
reliability: config.reliability.clone(),
cost: config.cost.clone(),
auto_save_memory: config.memory.auto_save,
max_tool_iterations: config.agent.max_tool_iterations,
min_relevance_score: config.memory.min_relevance_score,
message_timeout_secs,
interrupt_on_new_message,
multimodal: config.multimodal.clone(),
query_classification: config.query_classification.clone(),
model_routes: config.model_routes.clone(),
}
}
@ -1102,6 +1126,14 @@ fn runtime_defaults_snapshot(ctx: &ChannelRuntimeContext) -> ChannelRuntimeDefau
api_url: ctx.api_url.clone(),
reliability: (*ctx.reliability).clone(),
cost: crate::config::CostConfig::default(),
auto_save_memory: ctx.auto_save_memory,
max_tool_iterations: ctx.max_tool_iterations,
min_relevance_score: ctx.min_relevance_score,
message_timeout_secs: ctx.message_timeout_secs,
interrupt_on_new_message: ctx.interrupt_on_new_message,
multimodal: ctx.multimodal.clone(),
query_classification: ctx.query_classification.clone(),
model_routes: ctx.model_routes.clone(),
}
}
@ -1722,14 +1754,14 @@ fn get_route_selection(ctx: &ChannelRuntimeContext, sender_key: &str) -> Channel
/// Classify a user message and return the appropriate route selection with logging.
/// Returns None if classification is disabled or no rules match.
fn classify_message_route(
ctx: &ChannelRuntimeContext,
query_classification: &crate::config::QueryClassificationConfig,
model_routes: &[crate::config::ModelRouteConfig],
message: &str,
) -> Option<ChannelRouteSelection> {
let decision =
crate::agent::classifier::classify_with_decision(&ctx.query_classification, message)?;
let decision = crate::agent::classifier::classify_with_decision(query_classification, message)?;
// Find the matching model route
let route = ctx.model_routes.iter().find(|r| r.hint == decision.hint)?;
let route = model_routes.iter().find(|r| r.hint == decision.hint)?;
tracing::info!(
target: "query_classification",
@ -1956,9 +1988,9 @@ async fn get_or_create_provider(
let provider = create_resilient_provider_nonblocking(
provider_name,
ctx.api_key.clone(),
defaults.api_key.clone(),
api_url.map(ToString::to_string),
ctx.reliability.as_ref().clone(),
defaults.reliability.clone(),
ctx.provider_runtime_options.clone(),
)
.await?;
@ -3446,10 +3478,14 @@ or tune thresholds in config.",
}
}
}
// Try classification first, fall back to sender/default route
let route = classify_message_route(ctx.as_ref(), &msg.content)
.unwrap_or_else(|| get_route_selection(ctx.as_ref(), &history_key));
let runtime_defaults = runtime_defaults_snapshot(ctx.as_ref());
// Try classification first, fall back to sender/default route.
let route = classify_message_route(
&runtime_defaults.query_classification,
&runtime_defaults.model_routes,
&msg.content,
)
.unwrap_or_else(|| get_route_selection(ctx.as_ref(), &history_key));
let active_provider = match get_or_create_provider(ctx.as_ref(), &route.provider).await {
Ok(provider) => provider,
Err(err) => {
@ -3469,7 +3505,9 @@ or tune thresholds in config.",
return;
}
};
if ctx.auto_save_memory && msg.content.chars().count() >= AUTOSAVE_MIN_MESSAGE_CHARS {
if runtime_defaults.auto_save_memory
&& msg.content.chars().count() >= AUTOSAVE_MIN_MESSAGE_CHARS
{
let autosave_key = conversation_memory_key(&msg);
let _ = ctx
.memory
@ -3532,7 +3570,7 @@ or tune thresholds in config.",
let memory_context = build_memory_context(
ctx.memory.as_ref(),
&msg.content,
ctx.min_relevance_score,
runtime_defaults.min_relevance_score,
Some(&history_key),
)
.await;
@ -3686,8 +3724,10 @@ or tune thresholds in config.",
Cancelled,
}
let timeout_budget_secs =
channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations);
let timeout_budget_secs = channel_message_timeout_budget_secs(
runtime_defaults.message_timeout_secs,
runtime_defaults.max_tool_iterations,
);
let cost_enforcement_context = crate::agent::loop_::create_cost_enforcement_context(
&runtime_defaults.cost,
ctx.workspace_dir.as_path(),
@ -3751,8 +3791,8 @@ or tune thresholds in config.",
Some(ctx.approval_manager.as_ref()),
msg.channel.as_str(),
non_cli_approval_context,
&ctx.multimodal,
ctx.max_tool_iterations,
&runtime_defaults.multimodal,
runtime_defaults.max_tool_iterations,
Some(cancellation_token.clone()),
delta_tx,
ctx.hooks.as_deref(),
@ -3931,7 +3971,7 @@ or tune thresholds in config.",
&history_key,
ChatMessage::assistant(&history_response),
);
if ctx.auto_save_memory
if runtime_defaults.auto_save_memory
&& delivered_response.chars().count() >= AUTOSAVE_MIN_MESSAGE_CHARS
{
let assistant_key = assistant_memory_key(&msg);
@ -4044,7 +4084,7 @@ or tune thresholds in config.",
}
}
} else if is_tool_iteration_limit_error(&e) {
let limit = ctx.max_tool_iterations.max(1);
let limit = runtime_defaults.max_tool_iterations.max(1);
let pause_text = format!(
"⚠️ Reached tool-iteration limit ({limit}) for this turn. Context and progress were preserved. Reply \"continue\" to resume, or increase `agent.max_tool_iterations`."
);
@ -4140,7 +4180,9 @@ or tune thresholds in config.",
LlmExecutionResult::Completed(Err(_)) => {
let timeout_msg = format!(
"LLM response timed out after {}s (base={}s, max_tool_iterations={})",
timeout_budget_secs, ctx.message_timeout_secs, ctx.max_tool_iterations
timeout_budget_secs,
runtime_defaults.message_timeout_secs,
runtime_defaults.max_tool_iterations
);
runtime_trace::record_event(
"channel_message_timeout",
@ -4221,8 +4263,9 @@ async fn run_message_dispatch_loop(
let task_sequence = Arc::clone(&task_sequence);
workers.spawn(async move {
let _permit = permit;
let runtime_defaults = runtime_defaults_snapshot(worker_ctx.as_ref());
let interrupt_enabled =
worker_ctx.interrupt_on_new_message && msg.channel == "telegram";
runtime_defaults.interrupt_on_new_message && msg.channel == "telegram";
let sender_scope_key = interruption_scope_key(&msg);
let cancellation_token = CancellationToken::new();
let completion = Arc::new(InFlightTaskCompletion::new());
@ -9503,6 +9546,14 @@ BTC is currently around $65,000 based on latest tool output."#
api_url: None,
reliability: crate::config::ReliabilityConfig::default(),
cost: crate::config::CostConfig::default(),
auto_save_memory: false,
max_tool_iterations: 5,
min_relevance_score: 0.0,
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
interrupt_on_new_message: false,
multimodal: crate::config::MultimodalConfig::default(),
query_classification: crate::config::QueryClassificationConfig::default(),
model_routes: Vec::new(),
},
perplexity_filter: crate::config::PerplexityFilterConfig::default(),
outbound_leak_guard: crate::config::OutboundLeakGuardConfig::default(),
@ -9685,6 +9736,13 @@ BTC is currently around $65,000 based on latest tool output."#
cfg.default_provider = Some("ollama".to_string());
cfg.default_model = Some("llama3.2".to_string());
cfg.api_key = Some("http://127.0.0.1:11434".to_string());
cfg.memory.auto_save = false;
cfg.memory.min_relevance_score = 0.15;
cfg.agent.max_tool_iterations = 5;
cfg.channels_config.message_timeout_secs = 45;
cfg.multimodal.allow_remote_fetch = false;
cfg.query_classification.enabled = false;
cfg.model_routes = vec![];
cfg.autonomy.non_cli_natural_language_approval_mode =
crate::config::NonCliNaturalLanguageApprovalMode::Direct;
cfg.autonomy.non_cli_excluded_tools = vec!["shell".to_string()];
@ -9751,6 +9809,14 @@ BTC is currently around $65,000 based on latest tool output."#
runtime_outbound_leak_guard_snapshot(runtime_ctx.as_ref()).action,
crate::config::OutboundLeakGuardAction::Redact
);
let defaults = runtime_defaults_snapshot(runtime_ctx.as_ref());
assert!(!defaults.auto_save_memory);
assert_eq!(defaults.min_relevance_score, 0.15);
assert_eq!(defaults.max_tool_iterations, 5);
assert_eq!(defaults.message_timeout_secs, 45);
assert!(!defaults.multimodal.allow_remote_fetch);
assert!(!defaults.query_classification.enabled);
assert!(defaults.model_routes.is_empty());
cfg.autonomy.non_cli_natural_language_approval_mode =
crate::config::NonCliNaturalLanguageApprovalMode::Disabled;
@ -9766,6 +9832,28 @@ BTC is currently around $65,000 based on latest tool output."#
cfg.security.perplexity_filter.perplexity_threshold = 12.5;
cfg.security.outbound_leak_guard.action = crate::config::OutboundLeakGuardAction::Block;
cfg.security.outbound_leak_guard.sensitivity = 0.92;
cfg.memory.auto_save = true;
cfg.memory.min_relevance_score = 0.65;
cfg.agent.max_tool_iterations = 11;
cfg.channels_config.message_timeout_secs = 120;
cfg.multimodal.allow_remote_fetch = true;
cfg.query_classification.enabled = true;
cfg.query_classification.rules = vec![crate::config::ClassificationRule {
hint: "reasoning".to_string(),
keywords: vec!["analyze".to_string()],
patterns: vec!["deep".to_string()],
min_length: None,
max_length: None,
priority: 10,
}];
cfg.model_routes = vec![crate::config::ModelRouteConfig {
hint: "reasoning".to_string(),
provider: "openrouter".to_string(),
model: "openai/gpt-5.2".to_string(),
max_tokens: Some(512),
api_key: None,
transport: None,
}];
cfg.save().await.expect("save updated config");
maybe_apply_runtime_config_update(runtime_ctx.as_ref())
@ -9797,6 +9885,15 @@ BTC is currently around $65,000 based on latest tool output."#
crate::config::OutboundLeakGuardAction::Block
);
assert_eq!(leak_guard_cfg.sensitivity, 0.92);
let defaults = runtime_defaults_snapshot(runtime_ctx.as_ref());
assert!(defaults.auto_save_memory);
assert_eq!(defaults.min_relevance_score, 0.65);
assert_eq!(defaults.max_tool_iterations, 11);
assert_eq!(defaults.message_timeout_secs, 120);
assert!(defaults.multimodal.allow_remote_fetch);
assert!(defaults.query_classification.enabled);
assert_eq!(defaults.query_classification.rules.len(), 1);
assert_eq!(defaults.model_routes.len(), 1);
let mut store = runtime_config_store()
.lock()