- Expand communication style presets (professional, expressive, custom) - Enrich SOUL.md with human-like tone and emoji-awareness guidance - Add crash recovery and sub-task scoping guidance to AGENTS.md scaffold - Add 'Use when / Don't use when' guidance to TOOLS.md and runtime prompts - Implement memory hygiene system with configurable archiving and retention - Add MemoryConfig options: hygiene_enabled, archive_after_days, purge_after_days, conversation_retention_days - Archive old daily memory and session files to archive subdirectories - Purge old archives and prune stale SQLite conversation rows - Add comprehensive tests for new features
230 lines
7.1 KiB
Rust
230 lines
7.1 KiB
Rust
use super::Provider;
|
|
use async_trait::async_trait;
|
|
use std::time::Duration;
|
|
|
|
/// Provider wrapper with retry + fallback behavior.
|
|
pub struct ReliableProvider {
|
|
providers: Vec<(String, Box<dyn Provider>)>,
|
|
max_retries: u32,
|
|
base_backoff_ms: u64,
|
|
}
|
|
|
|
impl ReliableProvider {
|
|
pub fn new(
|
|
providers: Vec<(String, Box<dyn Provider>)>,
|
|
max_retries: u32,
|
|
base_backoff_ms: u64,
|
|
) -> Self {
|
|
Self {
|
|
providers,
|
|
max_retries,
|
|
base_backoff_ms: base_backoff_ms.max(50),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for ReliableProvider {
|
|
async fn chat_with_system(
|
|
&self,
|
|
system_prompt: Option<&str>,
|
|
message: &str,
|
|
model: &str,
|
|
temperature: f64,
|
|
) -> anyhow::Result<String> {
|
|
let mut failures = Vec::new();
|
|
|
|
for (provider_name, provider) in &self.providers {
|
|
let mut backoff_ms = self.base_backoff_ms;
|
|
|
|
for attempt in 0..=self.max_retries {
|
|
match provider
|
|
.chat_with_system(system_prompt, message, model, temperature)
|
|
.await
|
|
{
|
|
Ok(resp) => {
|
|
if attempt > 0 {
|
|
tracing::info!(
|
|
provider = provider_name,
|
|
attempt,
|
|
"Provider recovered after retries"
|
|
);
|
|
}
|
|
return Ok(resp);
|
|
}
|
|
Err(e) => {
|
|
failures.push(format!(
|
|
"{provider_name} attempt {}/{}: {e}",
|
|
attempt + 1,
|
|
self.max_retries + 1
|
|
));
|
|
|
|
if attempt < self.max_retries {
|
|
tracing::warn!(
|
|
provider = provider_name,
|
|
attempt = attempt + 1,
|
|
max_retries = self.max_retries,
|
|
"Provider call failed, retrying"
|
|
);
|
|
tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
|
|
backoff_ms = (backoff_ms.saturating_mul(2)).min(10_000);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
tracing::warn!(provider = provider_name, "Switching to fallback provider");
|
|
}
|
|
|
|
anyhow::bail!("All providers failed. Attempts:\n{}", failures.join("\n"))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
use std::sync::Arc;
|
|
|
|
struct MockProvider {
|
|
calls: Arc<AtomicUsize>,
|
|
fail_until_attempt: usize,
|
|
response: &'static str,
|
|
error: &'static str,
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for MockProvider {
|
|
async fn chat_with_system(
|
|
&self,
|
|
_system_prompt: Option<&str>,
|
|
_message: &str,
|
|
_model: &str,
|
|
_temperature: f64,
|
|
) -> anyhow::Result<String> {
|
|
let attempt = self.calls.fetch_add(1, Ordering::SeqCst) + 1;
|
|
if attempt <= self.fail_until_attempt {
|
|
anyhow::bail!(self.error);
|
|
}
|
|
Ok(self.response.to_string())
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn succeeds_without_retry() {
|
|
let calls = Arc::new(AtomicUsize::new(0));
|
|
let provider = ReliableProvider::new(
|
|
vec![(
|
|
"primary".into(),
|
|
Box::new(MockProvider {
|
|
calls: Arc::clone(&calls),
|
|
fail_until_attempt: 0,
|
|
response: "ok",
|
|
error: "boom",
|
|
}),
|
|
)],
|
|
2,
|
|
1,
|
|
);
|
|
|
|
let result = provider.chat("hello", "test", 0.0).await.unwrap();
|
|
assert_eq!(result, "ok");
|
|
assert_eq!(calls.load(Ordering::SeqCst), 1);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn retries_then_recovers() {
|
|
let calls = Arc::new(AtomicUsize::new(0));
|
|
let provider = ReliableProvider::new(
|
|
vec![(
|
|
"primary".into(),
|
|
Box::new(MockProvider {
|
|
calls: Arc::clone(&calls),
|
|
fail_until_attempt: 1,
|
|
response: "recovered",
|
|
error: "temporary",
|
|
}),
|
|
)],
|
|
2,
|
|
1,
|
|
);
|
|
|
|
let result = provider.chat("hello", "test", 0.0).await.unwrap();
|
|
assert_eq!(result, "recovered");
|
|
assert_eq!(calls.load(Ordering::SeqCst), 2);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn falls_back_after_retries_exhausted() {
|
|
let primary_calls = Arc::new(AtomicUsize::new(0));
|
|
let fallback_calls = Arc::new(AtomicUsize::new(0));
|
|
|
|
let provider = ReliableProvider::new(
|
|
vec![
|
|
(
|
|
"primary".into(),
|
|
Box::new(MockProvider {
|
|
calls: Arc::clone(&primary_calls),
|
|
fail_until_attempt: usize::MAX,
|
|
response: "never",
|
|
error: "primary down",
|
|
}),
|
|
),
|
|
(
|
|
"fallback".into(),
|
|
Box::new(MockProvider {
|
|
calls: Arc::clone(&fallback_calls),
|
|
fail_until_attempt: 0,
|
|
response: "from fallback",
|
|
error: "fallback down",
|
|
}),
|
|
),
|
|
],
|
|
1,
|
|
1,
|
|
);
|
|
|
|
let result = provider.chat("hello", "test", 0.0).await.unwrap();
|
|
assert_eq!(result, "from fallback");
|
|
assert_eq!(primary_calls.load(Ordering::SeqCst), 2);
|
|
assert_eq!(fallback_calls.load(Ordering::SeqCst), 1);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn returns_aggregated_error_when_all_providers_fail() {
|
|
let provider = ReliableProvider::new(
|
|
vec![
|
|
(
|
|
"p1".into(),
|
|
Box::new(MockProvider {
|
|
calls: Arc::new(AtomicUsize::new(0)),
|
|
fail_until_attempt: usize::MAX,
|
|
response: "never",
|
|
error: "p1 error",
|
|
}),
|
|
),
|
|
(
|
|
"p2".into(),
|
|
Box::new(MockProvider {
|
|
calls: Arc::new(AtomicUsize::new(0)),
|
|
fail_until_attempt: usize::MAX,
|
|
response: "never",
|
|
error: "p2 error",
|
|
}),
|
|
),
|
|
],
|
|
0,
|
|
1,
|
|
);
|
|
|
|
let err = provider
|
|
.chat("hello", "test", 0.0)
|
|
.await
|
|
.expect_err("all providers should fail");
|
|
let msg = err.to_string();
|
|
assert!(msg.contains("All providers failed"));
|
|
assert!(msg.contains("p1 attempt 1/1"));
|
|
assert!(msg.contains("p2 attempt 1/1"));
|
|
}
|
|
}
|