From 96c798df39933c2164f2dc55cceac2b0b07bac98 Mon Sep 17 00:00:00 2001 From: chumyin Date: Sat, 21 Feb 2026 14:28:18 +0800 Subject: [PATCH] fix(provider): make reliable chat retries work for structured requests --- src/providers/bedrock.rs | 6 +- src/providers/reliable.rs | 119 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/providers/bedrock.rs b/src/providers/bedrock.rs index a6856f637..daa95b9ba 100644 --- a/src/providers/bedrock.rs +++ b/src/providers/bedrock.rs @@ -1176,8 +1176,10 @@ mod tests { assert!(result.is_err()); let err = result.unwrap_err().to_string(); assert!( - err.contains("credentials not set"), - "Expected credentials error, got: {err}" + err.contains("credentials not set") + || err.contains("169.254.169.254") + || err.to_lowercase().contains("credential"), + "Expected missing-credentials style error, got: {err}" ); } diff --git a/src/providers/reliable.rs b/src/providers/reliable.rs index 1112f81ad..d2b17697a 100644 --- a/src/providers/reliable.rs +++ b/src/providers/reliable.rs @@ -661,6 +661,125 @@ impl Provider for ReliableProvider { ) } + async fn chat( + &self, + request: ChatRequest<'_>, + model: &str, + temperature: f64, + ) -> anyhow::Result { + let models = self.model_chain(model); + let mut failures = Vec::new(); + + for current_model in &models { + for (provider_name, provider) in &self.providers { + let mut backoff_ms = self.base_backoff_ms; + + for attempt in 0..=self.max_retries { + let req = ChatRequest { + messages: request.messages, + tools: request.tools, + }; + match provider.chat(req, current_model, temperature).await { + Ok(resp) => { + if attempt > 0 || *current_model != model { + tracing::info!( + provider = provider_name, + model = *current_model, + attempt, + original_model = model, + "Provider recovered (failover/retry)" + ); + } + return Ok(resp); + } + Err(e) => { + let non_retryable_rate_limit = is_non_retryable_rate_limit(&e); + let non_retryable = is_non_retryable(&e) || non_retryable_rate_limit; + let rate_limited = is_rate_limited(&e); + let failure_reason = failure_reason(rate_limited, non_retryable); + let error_detail = compact_error_detail(&e); + + push_failure( + &mut failures, + provider_name, + current_model, + attempt + 1, + self.max_retries + 1, + failure_reason, + &error_detail, + ); + + if rate_limited && !non_retryable_rate_limit { + if let Some(new_key) = self.rotate_key() { + tracing::warn!( + provider = provider_name, + error = %error_detail, + "Rate limited; key rotation selected key ending ...{} \ + but cannot apply (Provider trait has no set_api_key). \ + Retrying with original key.", + &new_key[new_key.len().saturating_sub(4)..] + ); + } + } + + if non_retryable { + tracing::warn!( + provider = provider_name, + model = *current_model, + error = %error_detail, + "Non-retryable error, moving on" + ); + + if is_context_window_exceeded(&e) { + anyhow::bail!( + "Request exceeds model context window; retries and fallbacks were skipped. Attempts:\n{}", + failures.join("\n") + ); + } + + break; + } + + if attempt < self.max_retries { + let wait = self.compute_backoff(backoff_ms, &e); + tracing::warn!( + provider = provider_name, + model = *current_model, + attempt = attempt + 1, + backoff_ms = wait, + reason = failure_reason, + error = %error_detail, + "Provider call failed, retrying" + ); + tokio::time::sleep(Duration::from_millis(wait)).await; + backoff_ms = (backoff_ms.saturating_mul(2)).min(10_000); + } + } + } + } + + tracing::warn!( + provider = provider_name, + model = *current_model, + "Exhausted retries, trying next provider/model" + ); + } + + if *current_model != model { + tracing::warn!( + original_model = model, + fallback_model = *current_model, + "Model fallback exhausted all providers, trying next fallback model" + ); + } + } + + anyhow::bail!( + "All providers/models failed. Attempts:\n{}", + failures.join("\n") + ) + } + fn supports_streaming(&self) -> bool { self.providers.iter().any(|(_, p)| p.supports_streaming()) }