From af8e6cf846ba2d269516b13d785237236f40b9cd Mon Sep 17 00:00:00 2001 From: argenis de la rosa Date: Mon, 2 Mar 2026 13:32:32 -0500 Subject: [PATCH 1/5] fix(daemon): handle sigterm shutdown signal Wait for either SIGINT or SIGTERM on Unix so daemon mode behaves correctly under container and process-manager termination flows. Record signal-specific shutdown reasons and add unit tests for shutdown signal labeling. Refs #2529 (cherry picked from commit 7bdf8eb60993a1c1efe74cd091ffcc06a1fb7fee) --- src/daemon/mod.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/src/daemon/mod.rs b/src/daemon/mod.rs index 76f02e762..c998a5417 100644 --- a/src/daemon/mod.rs +++ b/src/daemon/mod.rs @@ -8,6 +8,40 @@ use tokio::time::Duration; const STATUS_FLUSH_SECONDS: u64 = 5; +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum ShutdownSignal { + CtrlC, + SigTerm, +} + +fn shutdown_reason(signal: ShutdownSignal) -> &'static str { + match signal { + ShutdownSignal::CtrlC => "shutdown requested (SIGINT)", + ShutdownSignal::SigTerm => "shutdown requested (SIGTERM)", + } +} + +async fn wait_for_shutdown_signal() -> Result { + #[cfg(unix)] + { + use tokio::signal::unix::{signal, SignalKind}; + + let mut sigterm = signal(SignalKind::terminate())?; + tokio::select! { + ctrl_c = tokio::signal::ctrl_c() => { + ctrl_c?; + Ok(ShutdownSignal::CtrlC) + } + _ = sigterm.recv() => Ok(ShutdownSignal::SigTerm), + } + } + #[cfg(not(unix))] + { + tokio::signal::ctrl_c().await?; + Ok(ShutdownSignal::CtrlC) + } +} + pub async fn run(config: Config, host: String, port: u16) -> Result<()> { let initial_backoff = config.reliability.channel_initial_backoff_secs.max(1); let max_backoff = config @@ -90,10 +124,10 @@ pub async fn run(config: Config, host: String, port: u16) -> Result<()> { println!("🧠 ZeroClaw daemon started"); println!(" Gateway: http://{host}:{port}"); println!(" Components: gateway, channels, heartbeat, scheduler"); - println!(" Ctrl+C to stop"); + println!(" Ctrl+C or SIGTERM to stop"); - tokio::signal::ctrl_c().await?; - crate::health::mark_component_error("daemon", "shutdown requested"); + let signal = wait_for_shutdown_signal().await?; + crate::health::mark_component_error("daemon", shutdown_reason(signal)); for handle in &handles { handle.abort(); @@ -350,6 +384,22 @@ mod tests { assert_eq!(path, tmp.path().join("daemon_state.json")); } + #[test] + fn shutdown_reason_for_ctrl_c_mentions_sigint() { + assert_eq!( + shutdown_reason(ShutdownSignal::CtrlC), + "shutdown requested (SIGINT)" + ); + } + + #[test] + fn shutdown_reason_for_sigterm_mentions_sigterm() { + assert_eq!( + shutdown_reason(ShutdownSignal::SigTerm), + "shutdown requested (SIGTERM)" + ); + } + #[tokio::test] async fn supervisor_marks_error_and_restart_on_failure() { let handle = spawn_component_supervisor("daemon-test-fail", 1, 1, || async { From b171704b7208bf3538c4df9c895ca68f0213efb9 Mon Sep 17 00:00:00 2001 From: argenis de la rosa Date: Wed, 4 Mar 2026 05:05:10 -0500 Subject: [PATCH 2/5] fix(daemon): add shutdown grace window and signal hint parity (cherry picked from commit 61cc0aad34d70c73a0f27ed04542a907fd4482b2) --- src/daemon/mod.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/src/daemon/mod.rs b/src/daemon/mod.rs index c998a5417..bfa2d27ac 100644 --- a/src/daemon/mod.rs +++ b/src/daemon/mod.rs @@ -7,6 +7,7 @@ use tokio::task::JoinHandle; use tokio::time::Duration; const STATUS_FLUSH_SECONDS: u64 = 5; +const SHUTDOWN_GRACE_SECONDS: u64 = 5; #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum ShutdownSignal { @@ -21,6 +22,16 @@ fn shutdown_reason(signal: ShutdownSignal) -> &'static str { } } +#[cfg(unix)] +fn shutdown_hint() -> &'static str { + "Ctrl+C or SIGTERM to stop" +} + +#[cfg(not(unix))] +fn shutdown_hint() -> &'static str { + "Ctrl+C to stop" +} + async fn wait_for_shutdown_signal() -> Result { #[cfg(unix)] { @@ -32,7 +43,10 @@ async fn wait_for_shutdown_signal() -> Result { ctrl_c?; Ok(ShutdownSignal::CtrlC) } - _ = sigterm.recv() => Ok(ShutdownSignal::SigTerm), + sigterm_result = sigterm.recv() => match sigterm_result { + Some(()) => Ok(ShutdownSignal::SigTerm), + None => bail!("SIGTERM signal stream unexpectedly closed"), + }, } } #[cfg(not(unix))] @@ -124,19 +138,40 @@ pub async fn run(config: Config, host: String, port: u16) -> Result<()> { println!("🧠 ZeroClaw daemon started"); println!(" Gateway: http://{host}:{port}"); println!(" Components: gateway, channels, heartbeat, scheduler"); - println!(" Ctrl+C or SIGTERM to stop"); + println!(" {}", shutdown_hint()); let signal = wait_for_shutdown_signal().await?; crate::health::mark_component_error("daemon", shutdown_reason(signal)); + let aborted = + shutdown_handles_with_grace(handles, Duration::from_secs(SHUTDOWN_GRACE_SECONDS)).await; + if aborted > 0 { + tracing::warn!( + aborted, + grace_seconds = SHUTDOWN_GRACE_SECONDS, + "Forced shutdown for daemon tasks that exceeded graceful drain window" + ); + } + Ok(()) +} + +async fn shutdown_handles_with_grace(handles: Vec>, grace: Duration) -> usize { + let deadline = tokio::time::Instant::now() + grace; + while !handles.iter().all(JoinHandle::is_finished) && tokio::time::Instant::now() < deadline { + tokio::time::sleep(Duration::from_millis(50)).await; + } + + let mut aborted = 0usize; for handle in &handles { - handle.abort(); + if !handle.is_finished() { + handle.abort(); + aborted += 1; + } } for handle in handles { let _ = handle.await; } - - Ok(()) + aborted } pub fn state_file_path(config: &Config) -> PathBuf { @@ -400,6 +435,38 @@ mod tests { ); } + #[test] + fn shutdown_hint_matches_platform_signal_support() { + #[cfg(unix)] + assert_eq!(shutdown_hint(), "Ctrl+C or SIGTERM to stop"); + + #[cfg(not(unix))] + assert_eq!(shutdown_hint(), "Ctrl+C to stop"); + } + + #[tokio::test] + async fn graceful_shutdown_waits_for_completed_handles_without_abort() { + let finished = tokio::spawn(async {}); + let aborted = shutdown_handles_with_grace(vec![finished], Duration::from_millis(20)).await; + assert_eq!(aborted, 0); + } + + #[tokio::test] + async fn graceful_shutdown_aborts_stuck_handles_after_timeout() { + let never_finishes = tokio::spawn(async { + tokio::time::sleep(Duration::from_secs(30)).await; + }); + let started = tokio::time::Instant::now(); + let aborted = + shutdown_handles_with_grace(vec![never_finishes], Duration::from_millis(20)).await; + + assert_eq!(aborted, 1); + assert!( + started.elapsed() < Duration::from_secs(2), + "shutdown should not block indefinitely" + ); + } + #[tokio::test] async fn supervisor_marks_error_and_restart_on_failure() { let handle = spawn_component_supervisor("daemon-test-fail", 1, 1, || async { From 6518210953124e608e244e81c4d6a0970fa59bcc Mon Sep 17 00:00:00 2001 From: argenis de la rosa Date: Mon, 2 Mar 2026 13:32:28 -0500 Subject: [PATCH 3/5] fix(channels): use routed provider for channel startup Initialize channel runtime providers through routed provider construction so model_routes, hint defaults, and route-scoped credentials are honored. Add a regression test that verifies start_channels succeeds when global provider credentials are absent but route-level config is present. Refs #2537 (cherry picked from commit ec9bc3fefcb4ddab9de5a0e7ea488b84e4bc5734) --- src/channels/mod.rs | 65 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/src/channels/mod.rs b/src/channels/mod.rs index fb343e34a..cc8866df7 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -1693,6 +1693,31 @@ async fn create_resilient_provider_nonblocking( .context("failed to join provider initialization task")? } +async fn create_routed_provider_nonblocking( + provider_name: &str, + api_key: Option, + api_url: Option, + reliability: crate::config::ReliabilityConfig, + model_routes: Vec, + default_model: String, + provider_runtime_options: providers::ProviderRuntimeOptions, +) -> anyhow::Result> { + let provider_name = provider_name.to_string(); + tokio::task::spawn_blocking(move || { + providers::create_routed_provider_with_options( + &provider_name, + api_key.as_deref(), + api_url.as_deref(), + &reliability, + &model_routes, + &default_model, + &provider_runtime_options, + ) + }) + .await + .context("failed to join routed provider initialization task")? +} + fn build_models_help_response(current: &ChannelRouteSelection, workspace_dir: &Path) -> String { let mut response = String::new(); let _ = writeln!( @@ -4561,6 +4586,7 @@ pub async fn doctor_channels(config: Config) -> Result<()> { #[allow(clippy::too_many_lines)] pub async fn start_channels(config: Config) -> Result<()> { let provider_name = resolved_default_provider(&config); + let model = resolved_default_model(&config); let provider_runtime_options = providers::ProviderRuntimeOptions { auth_profile_override: None, provider_api_url: config.api_url.clone(), @@ -4573,11 +4599,13 @@ pub async fn start_channels(config: Config) -> Result<()> { model_support_vision: config.model_support_vision, }; let provider: Arc = Arc::from( - create_resilient_provider_nonblocking( + create_routed_provider_nonblocking( &provider_name, config.api_key.clone(), config.api_url.clone(), config.reliability.clone(), + config.model_routes.clone(), + model.clone(), provider_runtime_options.clone(), ) .await?, @@ -4611,7 +4639,6 @@ pub async fn start_channels(config: Config) -> Result<()> { &config.autonomy, &config.workspace_dir, )); - let model = resolved_default_model(&config); let temperature = config.default_temperature; let mem: Arc = Arc::from(memory::create_memory_with_storage( &config.memory, @@ -8067,6 +8094,40 @@ BTC is currently around $65,000 based on latest tool output."# store.remove(&config_path); } + #[tokio::test] + async fn start_channels_uses_model_routes_when_global_provider_key_is_missing() { + let temp = tempfile::TempDir::new().expect("temp dir"); + let workspace_dir = temp.path().join("workspace"); + std::fs::create_dir_all(&workspace_dir).expect("workspace dir"); + + let mut cfg = Config::default(); + cfg.workspace_dir = workspace_dir; + cfg.config_path = temp.path().join("config.toml"); + cfg.default_provider = None; + cfg.api_key = None; + cfg.default_model = Some("hint:fast".to_string()); + cfg.model_routes = vec![crate::config::ModelRouteConfig { + hint: "fast".to_string(), + provider: "openai-codex".to_string(), + model: "gpt-5.3-codex".to_string(), + max_tokens: Some(512), + api_key: Some("route-specific-key".to_string()), + transport: Some("sse".to_string()), + }]; + + let config_path = cfg.config_path.clone(); + let result = start_channels(cfg).await; + assert!( + result.is_ok(), + "start_channels should support routed providers without global credentials: {result:?}" + ); + + let mut store = runtime_config_store() + .lock() + .unwrap_or_else(|e| e.into_inner()); + store.remove(&config_path); + } + #[tokio::test] async fn process_channel_message_respects_configured_max_tool_iterations_above_default() { let channel_impl = Arc::new(RecordingChannel::default()); From 995f06a8bbfd7ce66d44e29c5533be4e292ee867 Mon Sep 17 00:00:00 2001 From: argenis de la rosa Date: Tue, 3 Mar 2026 15:52:24 -0500 Subject: [PATCH 4/5] test(channels): ensure runtime config cleanup before assert (cherry picked from commit 7e888d0a402a1033a680e131676ef63710218c0d) --- src/channels/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/channels/mod.rs b/src/channels/mod.rs index cc8866df7..8694594c4 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -8117,15 +8117,15 @@ BTC is currently around $65,000 based on latest tool output."# let config_path = cfg.config_path.clone(); let result = start_channels(cfg).await; - assert!( - result.is_ok(), - "start_channels should support routed providers without global credentials: {result:?}" - ); - let mut store = runtime_config_store() .lock() .unwrap_or_else(|e| e.into_inner()); store.remove(&config_path); + + assert!( + result.is_ok(), + "start_channels should support routed providers without global credentials: {result:?}" + ); } #[tokio::test] From c6aff6b4c5449bda6ec2de96173b354f01653a21 Mon Sep 17 00:00:00 2001 From: argenis de la rosa Date: Wed, 4 Mar 2026 06:58:20 -0500 Subject: [PATCH 5/5] fix(backport): align #2567 changes with dev schema --- src/channels/mod.rs | 1 - src/daemon/mod.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/channels/mod.rs b/src/channels/mod.rs index 8694594c4..447bc53c7 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -8112,7 +8112,6 @@ BTC is currently around $65,000 based on latest tool output."# model: "gpt-5.3-codex".to_string(), max_tokens: Some(512), api_key: Some("route-specific-key".to_string()), - transport: Some("sse".to_string()), }]; let config_path = cfg.config_path.clone(); diff --git a/src/daemon/mod.rs b/src/daemon/mod.rs index bfa2d27ac..48793221a 100644 --- a/src/daemon/mod.rs +++ b/src/daemon/mod.rs @@ -45,7 +45,7 @@ async fn wait_for_shutdown_signal() -> Result { } sigterm_result = sigterm.recv() => match sigterm_result { Some(()) => Ok(ShutdownSignal::SigTerm), - None => bail!("SIGTERM signal stream unexpectedly closed"), + None => anyhow::bail!("SIGTERM signal stream unexpectedly closed"), }, } }