From c293561be23f3b7901426ccd8d25a37b4b83fd41 Mon Sep 17 00:00:00 2001 From: Chummy Date: Wed, 25 Feb 2026 02:18:16 +0000 Subject: [PATCH] supersede: file-replay changes from #1639 Automated conflict recovery via changed-file replay on latest dev. --- README.md | 8 +- docs/config-reference.md | 81 ++++---- src/agent/agent.rs | 70 ++++++- src/agent/mod.rs | 1 + src/agent/research.rs | 362 ++++++++++++++++++++++++++++++++++ src/config/mod.rs | 18 +- src/config/schema.rs | 330 +++++++++++-------------------- src/onboard/wizard.rs | 4 +- tests/agent_e2e.rs | 411 ++++++++++++++++++++++++++++++++++++++- 9 files changed, 1001 insertions(+), 284 deletions(-) create mode 100644 src/agent/research.rs diff --git a/README.md b/README.md index 0cc91bcb5..92b230fa3 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ Use this board for important notices (breaking changes, security advisories, mai - πŸ’° **Cost-Efficient Deployment:** Designed for low-cost boards and small cloud instances without heavyweight runtime dependencies. - ⚑ **Fast Cold Starts:** Single-binary Rust runtime keeps command and daemon startup near-instant for daily operations. - 🌍 **Portable Architecture:** One binary-first workflow across ARM, x86, and RISC-V with swappable providers/channels/tools. +- πŸ” **Research Phase:** Proactive information gathering through tools before response generation β€” reduces hallucinations by fact-checking first. ### Why teams pick ZeroClaw @@ -241,7 +242,6 @@ cd zeroclaw ./bootstrap.sh --onboard --api-key "sk-..." --provider openrouter [--model "openrouter/auto"] # Optional: run bootstrap + onboarding fully in Docker-compatible mode -# See docs/docker-setup.md for full Docker guide ./bootstrap.sh --docker # Optional: force Podman as container CLI @@ -1023,12 +1023,6 @@ You can also override at runtime with `ZEROCLAW_OPEN_SKILLS_ENABLED`, `ZEROCLAW_ Skill installs are now gated by a built-in static security audit. `zeroclaw skills install ` blocks symlinks, script-like files, unsafe markdown link patterns, and high-risk shell payload snippets before accepting a skill. You can run `zeroclaw skills audit ` to validate a local directory or an installed skill manually. -### Ecosystem Projects - -Community-built projects that extend ZeroClaw UX and operations: - -- **ZeroClaw Views**: Full-stack dashboard companion (Vue 3 frontend + Rust BFF) covering chat, agents, memory browsing, config editing, and workflow integrations. Repository: - ## Development ```bash diff --git a/docs/config-reference.md b/docs/config-reference.md index 1bed5a3f0..f18a09bcc 100644 --- a/docs/config-reference.md +++ b/docs/config-reference.md @@ -178,6 +178,47 @@ model = "qwen2.5-coder:32b" temperature = 0.2 ``` +## `[research]` + +Research phase allows the agent to gather information through tools before generating the main response. + +| Key | Default | Purpose | +|---|---|---| +| `enabled` | `false` | Enable research phase | +| `trigger` | `never` | Research trigger strategy: `never`, `always`, `keywords`, `length`, `question` | +| `keywords` | `["find", "search", "check", "investigate"]` | Keywords that trigger research (when trigger = `keywords`) | +| `min_message_length` | `50` | Minimum message length to trigger research (when trigger = `length`) | +| `max_iterations` | `5` | Maximum tool calls during research phase | +| `show_progress` | `true` | Show research progress to user | + +Notes: + +- Research phase is **disabled by default** (`trigger = never`). +- When enabled, the agent first gathers facts through tools (grep, file_read, shell, memory search), then responds using the collected context. +- Research runs before the main agent turn and does not count toward `agent.max_tool_iterations`. +- Trigger strategies: + - `never` β€” research disabled (default) + - `always` β€” research on every user message + - `keywords` β€” research when message contains any keyword from the list + - `length` β€” research when message length exceeds `min_message_length` + - `question` β€” research when message contains '?' + +Example: + +```toml +[research] +enabled = true +trigger = "keywords" +keywords = ["find", "show", "check", "how many"] +max_iterations = 3 +show_progress = true +``` + +The agent will research the codebase before responding to queries like: +- "Find all TODO in src/" +- "Show contents of main.rs" +- "How many files in the project?" + ## `[runtime]` | Key | Default | Purpose | @@ -317,46 +358,6 @@ Notes: - Use exact domain or subdomain matching (e.g. `"api.example.com"`, `"example.com"`), or `"*"` to allow any public domain. - Local/private targets are still blocked even when `"*"` is configured. -## `[web_fetch]` - -| Key | Default | Purpose | -|---|---|---| -| `enabled` | `false` | Enable `web_fetch` tool for page retrieval | -| `provider` | `fast_html2md` | Fetch provider: `fast_html2md`, `nanohtml2text`, or `firecrawl` | -| `api_key` | unset | Provider API key (required for `provider = "firecrawl"`) | -| `api_url` | unset | Optional provider API base URL override (self-hosted endpoints) | -| `allowed_domains` | `["*"]` | Allowed domains for web fetch (exact/subdomain match) | -| `blocked_domains` | `[]` | Blocklist that overrides allowlist | -| `max_response_size` | `500000` | Maximum output payload length in bytes | -| `timeout_secs` | `30` | Request timeout in seconds | - -Notes: - -- URL policy is deny-by-default when `allowed_domains` is empty. -- Local/private addresses are blocked even when allowlist contains `"*"`. -- Redirect responses return the validated redirect target URL string instead of destination page content. -- `fast_html2md` is the markdown-preserving default provider. -- `nanohtml2text` requires Cargo feature `web-fetch-plaintext`. -- `firecrawl` requires Cargo feature `firecrawl`. - -## `[web_search]` - -| Key | Default | Purpose | -|---|---|---| -| `enabled` | `false` | Enable `web_search_tool` | -| `provider` | `duckduckgo` | Search provider: `duckduckgo`, `brave`, or `firecrawl` | -| `api_key` | unset | Generic provider API key (`firecrawl`, optional fallback for `brave`) | -| `api_url` | unset | Optional provider API base URL override (self-hosted Firecrawl) | -| `brave_api_key` | unset | Brave Search API key (used when `provider = "brave"`) | -| `max_results` | `5` | Result count (clamped to 1..10) | -| `timeout_secs` | `15` | Request timeout in seconds | - -Notes: - -- `provider = "duckduckgo"` needs no API key. -- `provider = "brave"` uses `brave_api_key` first, then falls back to `api_key`. -- `provider = "firecrawl"` requires `api_key` and Cargo feature `firecrawl`. - ## `[gateway]` | Key | Default | Purpose | diff --git a/src/agent/agent.rs b/src/agent/agent.rs index 563211e96..984f6f434 100644 --- a/src/agent/agent.rs +++ b/src/agent/agent.rs @@ -3,7 +3,8 @@ use crate::agent::dispatcher::{ }; use crate::agent::memory_loader::{DefaultMemoryLoader, MemoryLoader}; use crate::agent::prompt::{PromptContext, SystemPromptBuilder}; -use crate::config::Config; +use crate::agent::research; +use crate::config::{Config, ResearchPhaseConfig}; use crate::memory::{self, Memory, MemoryCategory}; use crate::observability::{self, Observer, ObserverEvent}; use crate::providers::{self, ChatMessage, ChatRequest, ConversationMessage, Provider}; @@ -37,6 +38,7 @@ pub struct Agent { classification_config: crate::config::QueryClassificationConfig, available_hints: Vec, route_model_by_hint: HashMap, + research_config: ResearchPhaseConfig, } pub struct AgentBuilder { @@ -58,6 +60,7 @@ pub struct AgentBuilder { classification_config: Option, available_hints: Option>, route_model_by_hint: Option>, + research_config: Option, } impl AgentBuilder { @@ -81,6 +84,7 @@ impl AgentBuilder { classification_config: None, available_hints: None, route_model_by_hint: None, + research_config: None, } } @@ -180,6 +184,11 @@ impl AgentBuilder { self } + pub fn research_config(mut self, research_config: ResearchPhaseConfig) -> Self { + self.research_config = Some(research_config); + self + } + pub fn build(self) -> Result { let tools = self .tools @@ -223,6 +232,7 @@ impl AgentBuilder { classification_config: self.classification_config.unwrap_or_default(), available_hints: self.available_hints.unwrap_or_default(), route_model_by_hint: self.route_model_by_hint.unwrap_or_default(), + research_config: self.research_config.unwrap_or_default(), }) } } @@ -342,6 +352,7 @@ impl Agent { )) .skills_prompt_mode(config.skills.prompt_injection_mode) .auto_save(config.memory.auto_save) + .research_config(config.research.clone()) .build() } @@ -486,11 +497,60 @@ impl Agent { .await .unwrap_or_default(); - let now = chrono::Local::now().format("%Y-%m-%d %H:%M:%S %Z"); - let enriched = if context.is_empty() { - format!("[{now}] {user_message}") + // ── Research Phase ────────────────────────────────────────────── + // If enabled and triggered, run a focused research turn to gather + // information before the main response. + let research_context = if research::should_trigger(&self.research_config, user_message) { + if self.research_config.show_progress { + println!("[Research] Gathering information..."); + } + + match research::run_research_phase( + &self.research_config, + self.provider.as_ref(), + &self.tools, + user_message, + &self.model_name, + self.temperature, + self.observer.clone(), + ) + .await + { + Ok(result) => { + if self.research_config.show_progress { + println!( + "[Research] Complete: {} tool calls, {} chars context", + result.tool_call_count, + result.context.len() + ); + for summary in &result.tool_summaries { + println!(" - {}: {}", summary.tool_name, summary.result_preview); + } + } + if result.context.is_empty() { + None + } else { + Some(result.context) + } + } + Err(e) => { + tracing::warn!("Research phase failed: {}", e); + None + } + } } else { - format!("{context}[{now}] {user_message}") + None + }; + + let now = chrono::Local::now().format("%Y-%m-%d %H:%M:%S %Z"); + let stamped_user_message = format!("[{now}] {user_message}"); + let enriched = match (&context, &research_context) { + (c, Some(r)) if !c.is_empty() => { + format!("{c}\n\n{r}\n\n{stamped_user_message}") + } + (_, Some(r)) => format!("{r}\n\n{stamped_user_message}"), + (c, None) if !c.is_empty() => format!("{c}{stamped_user_message}"), + _ => stamped_user_message, }; self.history diff --git a/src/agent/mod.rs b/src/agent/mod.rs index 3d33bb49e..4b77f929d 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -5,6 +5,7 @@ pub mod dispatcher; pub mod loop_; pub mod memory_loader; pub mod prompt; +pub mod research; #[cfg(test)] mod tests; diff --git a/src/agent/research.rs b/src/agent/research.rs new file mode 100644 index 000000000..a4db729dd --- /dev/null +++ b/src/agent/research.rs @@ -0,0 +1,362 @@ +//! Research phase β€” proactive information gathering before main response. +//! +//! When enabled, the agent runs a focused "research turn" using available tools +//! to gather context before generating its main response. This creates a +//! "thinking" phase where the agent explores the codebase, searches memory, +//! or fetches external data. +//! +//! Supports both: +//! - Native tool calling (OpenAI, Anthropic, Bedrock, etc.) +//! - Prompt-guided tool calling (Gemini and other providers without native support) + +use crate::agent::dispatcher::{ToolDispatcher, XmlToolDispatcher}; +use crate::config::{ResearchPhaseConfig, ResearchTrigger}; +use crate::observability::Observer; +use crate::providers::traits::build_tool_instructions_text; +use crate::providers::{ChatMessage, ChatRequest, ChatResponse, Provider, ToolCall}; +use crate::tools::{Tool, ToolResult, ToolSpec}; +use anyhow::Result; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +/// Result of the research phase. +#[derive(Debug, Clone)] +pub struct ResearchResult { + /// Collected context from research (formatted for injection into main prompt). + pub context: String, + /// Number of tool calls made during research. + pub tool_call_count: usize, + /// Duration of the research phase. + pub duration: Duration, + /// Summary of tools called and their results. + pub tool_summaries: Vec, +} + +/// Summary of a single tool call during research. +#[derive(Debug, Clone)] +pub struct ToolSummary { + pub tool_name: String, + pub arguments_preview: String, + pub result_preview: String, + pub success: bool, +} + +/// Check if research phase should be triggered for this message. +pub fn should_trigger(config: &ResearchPhaseConfig, message: &str) -> bool { + if !config.enabled { + return false; + } + + match config.trigger { + ResearchTrigger::Never => false, + ResearchTrigger::Always => true, + ResearchTrigger::Keywords => { + let message_lower = message.to_lowercase(); + config + .keywords + .iter() + .any(|kw| message_lower.contains(&kw.to_lowercase())) + } + ResearchTrigger::Length => message.len() >= config.min_message_length, + ResearchTrigger::Question => message.contains('?'), + } +} + +/// Default system prompt for research phase. +const RESEARCH_SYSTEM_PROMPT: &str = r#"You are in RESEARCH MODE. Your task is to gather information that will help answer the user's question. + +RULES: +1. Use tools to search, read files, check status, or fetch data +2. Focus on gathering FACTS, not answering yet +3. Be efficient β€” only gather what's needed +4. After gathering enough info, respond with a summary starting with "[RESEARCH COMPLETE]" + +DO NOT: +- Answer the user's question directly +- Make changes to files +- Execute destructive commands + +When you have enough information, summarize what you found in this format: +[RESEARCH COMPLETE] +- Finding 1: ... +- Finding 2: ... +- Finding 3: ... +"#; + +/// Run the research phase. +/// +/// This executes a focused LLM + tools loop to gather information before +/// the main response. The collected context is returned for injection +/// into the main conversation. +pub async fn run_research_phase( + config: &ResearchPhaseConfig, + provider: &dyn Provider, + tools: &[Box], + user_message: &str, + model: &str, + temperature: f64, + _observer: Arc, +) -> Result { + let start = Instant::now(); + let mut tool_summaries = Vec::new(); + let mut collected_context = String::new(); + let mut iteration = 0; + + let uses_native_tools = provider.supports_native_tools(); + + // Build tool specs for native OR prompt-guided tool calling + let tool_specs: Vec = tools + .iter() + .map(|t| ToolSpec { + name: t.name().to_string(), + description: t.description().to_string(), + parameters: t.parameters_schema(), + }) + .collect(); + + // Build system prompt + // For prompt-guided providers, include tool instructions in system prompt + let base_prompt = if config.system_prompt_prefix.is_empty() { + RESEARCH_SYSTEM_PROMPT.to_string() + } else { + format!( + "{}\n\n{}", + config.system_prompt_prefix, RESEARCH_SYSTEM_PROMPT + ) + }; + + let system_prompt = if uses_native_tools { + base_prompt + } else { + // Prompt-guided: append tool instructions + format!( + "{}\n\n{}", + base_prompt, + build_tool_instructions_text(&tool_specs) + ) + }; + + // Conversation history for research phase + let mut messages = vec![ChatMessage::user(format!( + "Research the following question to gather relevant information:\n\n{}", + user_message + ))]; + + // Research loop + while iteration < config.max_iterations { + iteration += 1; + + // Log research iteration if showing progress + if config.show_progress { + tracing::info!(iteration, "Research phase iteration"); + } + + // Build messages with system prompt as first message + let mut full_messages = vec![ChatMessage::system(&system_prompt)]; + full_messages.extend(messages.iter().cloned()); + + // Call LLM + let request = ChatRequest { + messages: &full_messages, + tools: if uses_native_tools { + Some(&tool_specs) + } else { + None // Prompt-guided: tools are in system prompt + }, + }; + + let response: ChatResponse = provider.chat(request, model, temperature).await?; + + // Check if research is complete + if let Some(ref text) = response.text { + if text.contains("[RESEARCH COMPLETE]") { + // Extract the summary + if let Some(idx) = text.find("[RESEARCH COMPLETE]") { + collected_context = text[idx..].to_string(); + } + break; + } + } + + // Parse tool calls: native OR from XML in response text + let tool_calls: Vec = if uses_native_tools { + response.tool_calls.clone() + } else { + // Parse XML tags from response text using XmlToolDispatcher + let dispatcher = XmlToolDispatcher; + let (_, parsed) = dispatcher.parse_response(&response); + parsed + .into_iter() + .enumerate() + .map(|(i, p)| ToolCall { + id: p + .tool_call_id + .unwrap_or_else(|| format!("tc_{}_{}", iteration, i)), + name: p.name, + arguments: serde_json::to_string(&p.arguments).unwrap_or_default(), + }) + .collect() + }; + + // If no tool calls, we're done + if tool_calls.is_empty() { + if let Some(text) = response.text { + collected_context = text; + } + break; + } + + // Execute tool calls + for tool_call in &tool_calls { + let tool_result = execute_tool_call(tools, tool_call).await; + + let summary = ToolSummary { + tool_name: tool_call.name.clone(), + arguments_preview: truncate(&tool_call.arguments, 100), + result_preview: truncate(&tool_result.output, 200), + success: tool_result.success, + }; + + if config.show_progress { + tracing::info!( + tool = %summary.tool_name, + success = summary.success, + "Research tool call" + ); + } + + tool_summaries.push(summary); + + // Add tool result to conversation + messages.push(ChatMessage::assistant(format!( + "Called tool `{}` with arguments: {}", + tool_call.name, tool_call.arguments + ))); + messages.push(ChatMessage::user(format!( + "Tool result:\n{}", + tool_result.output + ))); + } + } + + let duration = start.elapsed(); + + Ok(ResearchResult { + context: collected_context, + tool_call_count: tool_summaries.len(), + duration, + tool_summaries, + }) +} + +/// Execute a single tool call. +async fn execute_tool_call(tools: &[Box], tool_call: &ToolCall) -> ToolResult { + // Find the tool + let tool = tools.iter().find(|t| t.name() == tool_call.name); + + match tool { + Some(t) => { + // Parse arguments + let args: serde_json::Value = serde_json::from_str(&tool_call.arguments) + .unwrap_or(serde_json::Value::Object(serde_json::Map::new())); + + // Execute + match t.execute(args).await { + Ok(result) => result, + Err(e) => ToolResult { + success: false, + output: format!("Error: {}", e), + error: Some(e.to_string()), + }, + } + } + None => ToolResult { + success: false, + output: format!("Unknown tool: {}", tool_call.name), + error: Some(format!("Unknown tool: {}", tool_call.name)), + }, + } +} + +/// Truncate string with ellipsis. +fn truncate(s: &str, max_len: usize) -> String { + if s.len() <= max_len { + s.to_string() + } else { + format!("{}...", &s[..max_len.saturating_sub(3)]) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn should_trigger_never() { + let config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Never, + ..Default::default() + }; + assert!(!should_trigger(&config, "find something")); + } + + #[test] + fn should_trigger_always() { + let config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Always, + ..Default::default() + }; + assert!(should_trigger(&config, "hello")); + } + + #[test] + fn should_trigger_keywords() { + let config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Keywords, + keywords: vec!["find".into(), "search".into()], + ..Default::default() + }; + assert!(should_trigger(&config, "please find the file")); + assert!(should_trigger(&config, "SEARCH for errors")); + assert!(!should_trigger(&config, "hello world")); + } + + #[test] + fn should_trigger_length() { + let config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Length, + min_message_length: 20, + ..Default::default() + }; + assert!(!should_trigger(&config, "short")); + assert!(should_trigger( + &config, + "this is a longer message that exceeds the minimum" + )); + } + + #[test] + fn should_trigger_question() { + let config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Question, + ..Default::default() + }; + assert!(should_trigger(&config, "what is this?")); + assert!(!should_trigger(&config, "do this now")); + } + + #[test] + fn disabled_never_triggers() { + let config = ResearchPhaseConfig { + enabled: false, + trigger: ResearchTrigger::Always, + ..Default::default() + }; + assert!(!should_trigger(&config, "anything")); + } +} diff --git a/src/config/mod.rs b/src/config/mod.rs index cad5ecf31..d8b9ad70f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5,15 +5,15 @@ pub mod traits; pub use schema::{ apply_runtime_proxy_to_builder, build_runtime_proxy_client, build_runtime_proxy_client_with_timeouts, runtime_proxy_config, set_runtime_proxy_config, - AgentConfig, AgentsIpcConfig, AuditConfig, AutonomyConfig, BrowserComputerUseConfig, - BrowserConfig, BuiltinHooksConfig, ChannelsConfig, ClassificationRule, ComposioConfig, Config, - CostConfig, CronConfig, DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, - EmbeddingRouteConfig, EstopConfig, FeishuConfig, GatewayConfig, HardwareConfig, - HardwareTransport, HeartbeatConfig, HooksConfig, HttpRequestConfig, IMessageConfig, - IdentityConfig, LarkConfig, MatrixConfig, MemoryConfig, ModelRouteConfig, MultimodalConfig, - NextcloudTalkConfig, ObservabilityConfig, OtpConfig, OtpMethod, PeripheralBoardConfig, - PeripheralsConfig, ProxyConfig, ProxyScope, QdrantConfig, QueryClassificationConfig, - ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, + AgentConfig, AuditConfig, AutonomyConfig, BrowserComputerUseConfig, BrowserConfig, + BuiltinHooksConfig, ChannelsConfig, ClassificationRule, ComposioConfig, Config, CostConfig, + CronConfig, DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, EmbeddingRouteConfig, + EstopConfig, FeishuConfig, GatewayConfig, HardwareConfig, HardwareTransport, HeartbeatConfig, + HooksConfig, HttpRequestConfig, IMessageConfig, IdentityConfig, LarkConfig, MatrixConfig, + MemoryConfig, ModelRouteConfig, MultimodalConfig, NextcloudTalkConfig, ObservabilityConfig, + OtpConfig, OtpMethod, PeripheralBoardConfig, PeripheralsConfig, ProxyConfig, ProxyScope, + QdrantConfig, QueryClassificationConfig, ReliabilityConfig, ResearchPhaseConfig, + ResearchTrigger, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, SchedulerConfig, SecretsConfig, SecurityConfig, SkillsConfig, SkillsPromptInjectionMode, SlackConfig, StorageConfig, StorageProviderConfig, StorageProviderSection, StreamMode, TelegramConfig, TranscriptionConfig, TunnelConfig, WebFetchConfig, WebSearchConfig, diff --git a/src/config/schema.rs b/src/config/schema.rs index f68a7a00b..229c349b0 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -129,6 +129,10 @@ pub struct Config { #[serde(default)] pub runtime: RuntimeConfig, + /// Research phase configuration (`[research]`). Proactive information gathering. + #[serde(default)] + pub research: ResearchPhaseConfig, + /// Reliability settings: retries, fallback providers, backoff (`[reliability]`). #[serde(default)] pub reliability: ReliabilityConfig, @@ -244,10 +248,6 @@ pub struct Config { /// Voice transcription configuration (Whisper API via Groq). #[serde(default)] pub transcription: TranscriptionConfig, - - /// Inter-process agent communication (`[agents_ipc]`). - #[serde(default)] - pub agents_ipc: AgentsIpcConfig, } /// Named provider profile definition compatible with Codex app-server style config. @@ -423,44 +423,6 @@ impl Default for TranscriptionConfig { } } -// ── Agents IPC ────────────────────────────────────────────────── - -fn default_agents_ipc_db_path() -> String { - "~/.zeroclaw/agents.db".into() -} - -fn default_agents_ipc_staleness_secs() -> u64 { - 300 -} - -/// Inter-process agent communication configuration (`[agents_ipc]` section). -/// -/// When enabled, registers 5 IPC tools that let independent ZeroClaw processes -/// on the same host discover each other and exchange messages via a shared -/// SQLite database. Disabled by default (zero overhead when off). -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct AgentsIpcConfig { - /// Enable inter-process agent communication tools. - #[serde(default)] - pub enabled: bool, - /// Path to shared SQLite database (all agents on this host share one file). - #[serde(default = "default_agents_ipc_db_path")] - pub db_path: String, - /// Agents not seen within this window are considered offline (seconds). - #[serde(default = "default_agents_ipc_staleness_secs")] - pub staleness_secs: u64, -} - -impl Default for AgentsIpcConfig { - fn default() -> Self { - Self { - enabled: false, - db_path: default_agents_ipc_db_path(), - staleness_secs: default_agents_ipc_staleness_secs(), - } - } -} - /// Agent orchestration configuration (`[agent]` section). #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct AgentConfig { @@ -1151,15 +1113,6 @@ pub struct WebFetchConfig { /// Enable `web_fetch` tool for fetching web page content #[serde(default)] pub enabled: bool, - /// Provider: "fast_html2md", "nanohtml2text", or "firecrawl" - #[serde(default = "default_web_fetch_provider")] - pub provider: String, - /// Optional provider API key (required for provider = "firecrawl") - #[serde(default)] - pub api_key: Option, - /// Optional provider API URL override (for self-hosted providers) - #[serde(default)] - pub api_url: Option, /// Allowed domains for web fetch (exact or subdomain match; `["*"]` = all public hosts) #[serde(default)] pub allowed_domains: Vec, @@ -1178,10 +1131,6 @@ fn default_web_fetch_max_response_size() -> usize { 500_000 // 500KB } -fn default_web_fetch_provider() -> String { - "fast_html2md".into() -} - fn default_web_fetch_timeout_secs() -> u64 { 30 } @@ -1190,9 +1139,6 @@ impl Default for WebFetchConfig { fn default() -> Self { Self { enabled: false, - provider: default_web_fetch_provider(), - api_key: None, - api_url: None, allowed_domains: vec!["*".into()], blocked_domains: vec![], max_response_size: default_web_fetch_max_response_size(), @@ -1212,12 +1158,6 @@ pub struct WebSearchConfig { /// Search provider: "duckduckgo" (free, no API key) or "brave" (requires API key) #[serde(default = "default_web_search_provider")] pub provider: String, - /// Generic provider API key (used by firecrawl and as fallback for brave) - #[serde(default)] - pub api_key: Option, - /// Optional provider API URL override (for self-hosted providers) - #[serde(default)] - pub api_url: Option, /// Brave Search API key (required if provider is "brave") #[serde(default)] pub brave_api_key: Option, @@ -1246,8 +1186,6 @@ impl Default for WebSearchConfig { Self { enabled: false, provider: default_web_search_provider(), - api_key: None, - api_url: None, brave_api_key: None, max_results: default_web_search_max_results(), timeout_secs: default_web_search_timeout_secs(), @@ -2305,6 +2243,109 @@ impl Default for RuntimeConfig { } } +// ── Research Phase ─────────────────────────────────────────────── + +/// Research phase trigger mode. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "lowercase")] +pub enum ResearchTrigger { + /// Never trigger research phase. + #[default] + Never, + /// Always trigger research phase before responding. + Always, + /// Trigger when message contains configured keywords. + Keywords, + /// Trigger when message exceeds minimum length. + Length, + /// Trigger when message contains a question mark. + Question, +} + +/// Research phase configuration (`[research]` section). +/// +/// When enabled, the agent proactively gathers information using tools +/// before generating its main response. This creates a "thinking" phase +/// where the agent explores the codebase, searches memory, or fetches +/// external data to inform its answer. +/// +/// ```toml +/// [research] +/// enabled = true +/// trigger = "keywords" +/// keywords = ["find", "search", "check", "investigate"] +/// max_iterations = 5 +/// show_progress = true +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct ResearchPhaseConfig { + /// Enable the research phase. + #[serde(default)] + pub enabled: bool, + + /// When to trigger research phase. + #[serde(default)] + pub trigger: ResearchTrigger, + + /// Keywords that trigger research phase (when `trigger = "keywords"`). + #[serde(default = "default_research_keywords")] + pub keywords: Vec, + + /// Minimum message length to trigger research (when `trigger = "length"`). + #[serde(default = "default_research_min_length")] + pub min_message_length: usize, + + /// Maximum tool call iterations during research phase. + #[serde(default = "default_research_max_iterations")] + pub max_iterations: usize, + + /// Show detailed progress during research (tool calls, results). + #[serde(default = "default_true")] + pub show_progress: bool, + + /// Custom system prompt prefix for research phase. + /// If empty, uses default research instructions. + #[serde(default)] + pub system_prompt_prefix: String, +} + +fn default_research_keywords() -> Vec { + vec![ + "find".into(), + "search".into(), + "check".into(), + "investigate".into(), + "look".into(), + "research".into(), + "Π½Π°ΠΉΠ΄ΠΈ".into(), + "ΠΏΡ€ΠΎΠ²Π΅Ρ€ΡŒ".into(), + "исслСдуй".into(), + "ΠΏΠΎΠΈΡ‰ΠΈ".into(), + ] +} + +fn default_research_min_length() -> usize { + 50 +} + +fn default_research_max_iterations() -> usize { + 5 +} + +impl Default for ResearchPhaseConfig { + fn default() -> Self { + Self { + enabled: false, + trigger: ResearchTrigger::default(), + keywords: default_research_keywords(), + min_message_length: default_research_min_length(), + max_iterations: default_research_max_iterations(), + show_progress: true, + system_prompt_prefix: String::new(), + } + } +} + // ── Reliability / supervision ──────────────────────────────────── /// Reliability and supervision configuration (`[reliability]` section). @@ -3750,6 +3791,7 @@ impl Default for Config { autonomy: AutonomyConfig::default(), security: SecurityConfig::default(), runtime: RuntimeConfig::default(), + research: ResearchPhaseConfig::default(), reliability: ReliabilityConfig::default(), scheduler: SchedulerConfig::default(), agent: AgentConfig::default(), @@ -3779,7 +3821,6 @@ impl Default for Config { hardware: HardwareConfig::default(), query_classification: QueryClassificationConfig::default(), transcription: TranscriptionConfig::default(), - agents_ipc: AgentsIpcConfig::default(), } } } @@ -4229,16 +4270,6 @@ impl Config { "config.browser.computer_use.api_key", )?; - decrypt_optional_secret( - &store, - &mut config.web_fetch.api_key, - "config.web_fetch.api_key", - )?; - decrypt_optional_secret( - &store, - &mut config.web_search.api_key, - "config.web_search.api_key", - )?; decrypt_optional_secret( &store, &mut config.web_search.brave_api_key, @@ -4450,29 +4481,6 @@ impl Config { anyhow::bail!("scheduler.max_tasks must be greater than 0"); } - // Web tools - let web_fetch_provider = self.web_fetch.provider.trim().to_lowercase(); - if !web_fetch_provider.is_empty() - && !matches!( - web_fetch_provider.as_str(), - "fast_html2md" | "nanohtml2text" | "firecrawl" - ) - { - anyhow::bail!( - "web_fetch.provider must be one of: fast_html2md, nanohtml2text, firecrawl" - ); - } - - let web_search_provider = self.web_search.provider.trim().to_lowercase(); - if !web_search_provider.is_empty() - && !matches!( - web_search_provider.as_str(), - "duckduckgo" | "ddg" | "brave" | "firecrawl" - ) - { - anyhow::bail!("web_search.provider must be one of: duckduckgo, brave, firecrawl"); - } - // Model routes for (i, route) in self.model_routes.iter().enumerate() { if route.hint.trim().is_empty() { @@ -4740,36 +4748,6 @@ impl Config { self.web_search.enabled = enabled == "1" || enabled.eq_ignore_ascii_case("true"); } - // Web fetch provider: ZEROCLAW_WEB_FETCH_PROVIDER or WEB_FETCH_PROVIDER - if let Ok(provider) = std::env::var("ZEROCLAW_WEB_FETCH_PROVIDER") - .or_else(|_| std::env::var("WEB_FETCH_PROVIDER")) - { - let provider = provider.trim(); - if !provider.is_empty() { - self.web_fetch.provider = provider.to_string(); - } - } - - // Web fetch provider API key: ZEROCLAW_WEB_FETCH_API_KEY or WEB_FETCH_API_KEY - if let Ok(api_key) = std::env::var("ZEROCLAW_WEB_FETCH_API_KEY") - .or_else(|_| std::env::var("WEB_FETCH_API_KEY")) - { - let api_key = api_key.trim(); - if !api_key.is_empty() { - self.web_fetch.api_key = Some(api_key.to_string()); - } - } - - // Web fetch provider API URL: ZEROCLAW_WEB_FETCH_API_URL or WEB_FETCH_API_URL - if let Ok(api_url) = std::env::var("ZEROCLAW_WEB_FETCH_API_URL") - .or_else(|_| std::env::var("WEB_FETCH_API_URL")) - { - let api_url = api_url.trim(); - if !api_url.is_empty() { - self.web_fetch.api_url = Some(api_url.to_string()); - } - } - // Web search provider: ZEROCLAW_WEB_SEARCH_PROVIDER or WEB_SEARCH_PROVIDER if let Ok(provider) = std::env::var("ZEROCLAW_WEB_SEARCH_PROVIDER") .or_else(|_| std::env::var("WEB_SEARCH_PROVIDER")) @@ -4780,26 +4758,6 @@ impl Config { } } - // Web search provider API key: ZEROCLAW_WEB_SEARCH_API_KEY or WEB_SEARCH_API_KEY - if let Ok(api_key) = std::env::var("ZEROCLAW_WEB_SEARCH_API_KEY") - .or_else(|_| std::env::var("WEB_SEARCH_API_KEY")) - { - let api_key = api_key.trim(); - if !api_key.is_empty() { - self.web_search.api_key = Some(api_key.to_string()); - } - } - - // Web search provider API URL: ZEROCLAW_WEB_SEARCH_API_URL or WEB_SEARCH_API_URL - if let Ok(api_url) = std::env::var("ZEROCLAW_WEB_SEARCH_API_URL") - .or_else(|_| std::env::var("WEB_SEARCH_API_URL")) - { - let api_url = api_url.trim(); - if !api_url.is_empty() { - self.web_search.api_url = Some(api_url.to_string()); - } - } - // Brave API key: ZEROCLAW_BRAVE_API_KEY or BRAVE_API_KEY if let Ok(api_key) = std::env::var("ZEROCLAW_BRAVE_API_KEY").or_else(|_| std::env::var("BRAVE_API_KEY")) @@ -4948,16 +4906,6 @@ impl Config { "config.browser.computer_use.api_key", )?; - encrypt_optional_secret( - &store, - &mut config_to_save.web_fetch.api_key, - "config.web_fetch.api_key", - )?; - encrypt_optional_secret( - &store, - &mut config_to_save.web_search.api_key, - "config.web_search.api_key", - )?; encrypt_optional_secret( &store, &mut config_to_save.web_search.brave_api_key, @@ -5355,6 +5303,7 @@ default_temperature = 0.7 kind: "docker".into(), ..RuntimeConfig::default() }, + research: ResearchPhaseConfig::default(), reliability: ReliabilityConfig::default(), scheduler: SchedulerConfig::default(), skills: SkillsConfig::default(), @@ -5420,7 +5369,6 @@ default_temperature = 0.7 hooks: HooksConfig::default(), hardware: HardwareConfig::default(), transcription: TranscriptionConfig::default(), - agents_ipc: AgentsIpcConfig::default(), }; let toml_str = toml::to_string_pretty(&config).unwrap(); @@ -5575,6 +5523,7 @@ tool_dispatcher = "xml" autonomy: AutonomyConfig::default(), security: SecurityConfig::default(), runtime: RuntimeConfig::default(), + research: ResearchPhaseConfig::default(), reliability: ReliabilityConfig::default(), scheduler: SchedulerConfig::default(), skills: SkillsConfig::default(), @@ -5604,7 +5553,6 @@ tool_dispatcher = "xml" hooks: HooksConfig::default(), hardware: HardwareConfig::default(), transcription: TranscriptionConfig::default(), - agents_ipc: AgentsIpcConfig::default(), }; config.save().await.unwrap(); @@ -5639,8 +5587,6 @@ tool_dispatcher = "xml" config.api_key = Some("root-credential".into()); config.composio.api_key = Some("composio-credential".into()); config.browser.computer_use.api_key = Some("browser-credential".into()); - config.web_fetch.api_key = Some("web-fetch-credential".into()); - config.web_search.api_key = Some("web-search-credential".into()); config.web_search.brave_api_key = Some("brave-credential".into()); config.storage.provider.config.db_url = Some("postgres://user:pw@host/db".into()); @@ -5689,24 +5635,6 @@ tool_dispatcher = "xml" "browser-credential" ); - let web_fetch_encrypted = stored.web_fetch.api_key.as_deref().unwrap(); - assert!(crate::security::SecretStore::is_encrypted( - web_fetch_encrypted - )); - assert_eq!( - store.decrypt(web_fetch_encrypted).unwrap(), - "web-fetch-credential" - ); - - let web_search_generic_encrypted = stored.web_search.api_key.as_deref().unwrap(); - assert!(crate::security::SecretStore::is_encrypted( - web_search_generic_encrypted - )); - assert_eq!( - store.decrypt(web_search_generic_encrypted).unwrap(), - "web-search-credential" - ); - let web_search_encrypted = stored.web_search.brave_api_key.as_deref().unwrap(); assert!(crate::security::SecretStore::is_encrypted( web_search_encrypted @@ -7417,8 +7345,6 @@ default_model = "legacy-model" std::env::set_var("WEB_SEARCH_ENABLED", "false"); std::env::set_var("WEB_SEARCH_PROVIDER", "brave"); - std::env::set_var("WEB_SEARCH_API_KEY", "web-search-api-key"); - std::env::set_var("WEB_SEARCH_API_URL", "https://search.example.com/v1"); std::env::set_var("WEB_SEARCH_MAX_RESULTS", "7"); std::env::set_var("WEB_SEARCH_TIMEOUT_SECS", "20"); std::env::set_var("BRAVE_API_KEY", "brave-test-key"); @@ -7427,14 +7353,6 @@ default_model = "legacy-model" assert!(!config.web_search.enabled); assert_eq!(config.web_search.provider, "brave"); - assert_eq!( - config.web_search.api_key.as_deref(), - Some("web-search-api-key") - ); - assert_eq!( - config.web_search.api_url.as_deref(), - Some("https://search.example.com/v1") - ); assert_eq!(config.web_search.max_results, 7); assert_eq!(config.web_search.timeout_secs, 20); assert_eq!( @@ -7444,39 +7362,11 @@ default_model = "legacy-model" std::env::remove_var("WEB_SEARCH_ENABLED"); std::env::remove_var("WEB_SEARCH_PROVIDER"); - std::env::remove_var("WEB_SEARCH_API_KEY"); - std::env::remove_var("WEB_SEARCH_API_URL"); std::env::remove_var("WEB_SEARCH_MAX_RESULTS"); std::env::remove_var("WEB_SEARCH_TIMEOUT_SECS"); std::env::remove_var("BRAVE_API_KEY"); } - #[test] - async fn env_override_web_fetch_provider_config() { - let _env_guard = env_override_lock().await; - let mut config = Config::default(); - - std::env::set_var("WEB_FETCH_PROVIDER", "firecrawl"); - std::env::set_var("WEB_FETCH_API_KEY", "web-fetch-api-key"); - std::env::set_var("WEB_FETCH_API_URL", "https://firecrawl.example.com/v1"); - - config.apply_env_overrides(); - - assert_eq!(config.web_fetch.provider, "firecrawl"); - assert_eq!( - config.web_fetch.api_key.as_deref(), - Some("web-fetch-api-key") - ); - assert_eq!( - config.web_fetch.api_url.as_deref(), - Some("https://firecrawl.example.com/v1") - ); - - std::env::remove_var("WEB_FETCH_PROVIDER"); - std::env::remove_var("WEB_FETCH_API_KEY"); - std::env::remove_var("WEB_FETCH_API_URL"); - } - #[test] async fn env_override_web_search_invalid_values_ignored() { let _env_guard = env_override_lock().await; diff --git a/src/onboard/wizard.rs b/src/onboard/wizard.rs index e5b8a330f..f2af33cae 100644 --- a/src/onboard/wizard.rs +++ b/src/onboard/wizard.rs @@ -145,6 +145,7 @@ pub async fn run_wizard(force: bool) -> Result { autonomy: AutonomyConfig::default(), security: crate::config::SecurityConfig::default(), runtime: RuntimeConfig::default(), + research: crate::config::ResearchPhaseConfig::default(), reliability: crate::config::ReliabilityConfig::default(), scheduler: crate::config::schema::SchedulerConfig::default(), agent: crate::config::schema::AgentConfig::default(), @@ -174,7 +175,6 @@ pub async fn run_wizard(force: bool) -> Result { hardware: hardware_config, query_classification: crate::config::QueryClassificationConfig::default(), transcription: crate::config::TranscriptionConfig::default(), - agents_ipc: crate::config::AgentsIpcConfig::default(), }; println!( @@ -498,6 +498,7 @@ async fn run_quick_setup_with_home( autonomy: AutonomyConfig::default(), security: crate::config::SecurityConfig::default(), runtime: RuntimeConfig::default(), + research: crate::config::ResearchPhaseConfig::default(), reliability: crate::config::ReliabilityConfig::default(), scheduler: crate::config::schema::SchedulerConfig::default(), agent: crate::config::schema::AgentConfig::default(), @@ -527,7 +528,6 @@ async fn run_quick_setup_with_home( hardware: crate::config::HardwareConfig::default(), query_classification: crate::config::QueryClassificationConfig::default(), transcription: crate::config::TranscriptionConfig::default(), - agents_ipc: crate::config::AgentsIpcConfig::default(), }; config.save().await?; diff --git a/tests/agent_e2e.rs b/tests/agent_e2e.rs index a681d4181..dfa18a378 100644 --- a/tests/agent_e2e.rs +++ b/tests/agent_e2e.rs @@ -669,7 +669,7 @@ async fn e2e_empty_memory_context_passthrough() { /// Requires valid OAuth credentials in `~/.zeroclaw/`. /// Run manually: `cargo test e2e_live_openai_codex_multi_turn -- --ignored` #[tokio::test] -#[ignore] +#[ignore = "requires live OpenAI Codex API key"] async fn e2e_live_openai_codex_multi_turn() { use zeroclaw::providers::openai_codex::OpenAiCodexProvider; use zeroclaw::providers::traits::Provider; @@ -706,3 +706,412 @@ async fn e2e_live_openai_codex_multi_turn() { "Model should recall 'zephyr' from history, got: {r2}", ); } + +// ═════════════════════════════════════════════════════════════════════════════ +// Live integration test β€” Research Phase with real provider +// ═════════════════════════════════════════════════════════════════════════════ + +/// Tests the research phase module with a real LLM provider. +/// Verifies that: +/// 1. should_trigger correctly identifies research-worthy messages +/// 2. run_research_phase executes tool calls and gathers context +/// +/// Requires valid credentials in `~/.zeroclaw/`. +/// Run manually: `cargo test e2e_live_research_phase -- --ignored --nocapture` +#[tokio::test] +#[ignore = "requires live provider API key"] +async fn e2e_live_research_phase() { + use std::sync::Arc; + use zeroclaw::agent::research::{run_research_phase, should_trigger}; + use zeroclaw::config::{ResearchPhaseConfig, ResearchTrigger}; + use zeroclaw::observability::NoopObserver; + use zeroclaw::providers::openai_codex::OpenAiCodexProvider; + use zeroclaw::providers::traits::Provider; + use zeroclaw::tools::{Tool, ToolResult}; + + // ── Test should_trigger ── + let config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Keywords, + keywords: vec!["find".into(), "search".into(), "check".into()], + min_message_length: 20, + max_iterations: 3, + show_progress: true, + system_prompt_prefix: String::new(), + }; + + assert!( + should_trigger(&config, "find the main function"), + "Should trigger on 'find' keyword" + ); + assert!( + should_trigger(&config, "please search for errors"), + "Should trigger on 'search' keyword" + ); + assert!( + !should_trigger(&config, "hello world"), + "Should NOT trigger without keywords" + ); + + // ── Test with Always trigger ── + let always_config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Always, + ..config.clone() + }; + assert!( + should_trigger(&always_config, "any message"), + "Always trigger should match any message" + ); + + // ── Test research phase with live provider ── + // Create a simple echo tool for testing + struct EchoTool; + + #[async_trait::async_trait] + impl Tool for EchoTool { + fn name(&self) -> &str { + "echo" + } + fn description(&self) -> &str { + "Echoes the input message back. Use for testing." + } + fn parameters_schema(&self) -> serde_json::Value { + serde_json::json!({ + "type": "object", + "properties": { + "message": { + "type": "string", + "description": "Message to echo" + } + }, + "required": ["message"] + }) + } + async fn execute(&self, args: serde_json::Value) -> anyhow::Result { + let msg = args + .get("message") + .and_then(|v| v.as_str()) + .unwrap_or("(empty)"); + Ok(ToolResult { + success: true, + output: format!("Echo: {}", msg), + error: None, + }) + } + } + + let provider = OpenAiCodexProvider::new(&ProviderRuntimeOptions::default(), None) + .expect("OpenAI Codex provider should initialize for research test"); + let tools: Vec> = vec![Box::new(EchoTool)]; + let observer: Arc = Arc::new(NoopObserver); + + let research_config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Always, + max_iterations: 2, + show_progress: true, + ..Default::default() + }; + + println!("\n=== Starting Research Phase Test ===\n"); + + let result = run_research_phase( + &research_config, + &provider, + &tools, + "Use the echo tool to say 'research works'", + "gpt-5.3-codex", + 0.7, + observer, + ) + .await; + + match result { + Ok(research_result) => { + println!("Research completed successfully!"); + println!(" Duration: {:?}", research_result.duration); + println!(" Tool calls: {}", research_result.tool_call_count); + println!(" Context length: {} chars", research_result.context.len()); + + for summary in &research_result.tool_summaries { + println!( + " - Tool: {} | Success: {} | Args: {}", + summary.tool_name, summary.success, summary.arguments_preview + ); + } + + // The model should have called the echo tool at least once + // OR provided a research complete summary + assert!( + research_result.tool_call_count > 0 || !research_result.context.is_empty(), + "Research should produce tool calls or context" + ); + } + Err(e) => { + // Network/API errors are expected if credentials aren't configured + println!("Research phase error (may be expected): {}", e); + } + } + + println!("\n=== Research Phase Test Complete ===\n"); +} + +// ═════════════════════════════════════════════════════════════════════════════ +// Full Agent integration test β€” Research Phase in Agent.turn() +// ═════════════════════════════════════════════════════════════════════════════ + +/// Validates that the Agent correctly integrates research phase: +/// 1. Research phase is triggered based on config +/// 2. Research context is prepended to user message +/// 3. Provider receives enriched message +/// +/// This test uses mocks to verify the integration without external dependencies. +#[tokio::test] +async fn e2e_agent_research_phase_integration() { + use zeroclaw::config::{ResearchPhaseConfig, ResearchTrigger}; + + // Create a recording provider to capture what the agent sends + let (provider, recorded) = RecordingProvider::new(vec![ + text_response("I'll research that for you"), + text_response("Based on my research, here's the answer"), + ]); + + // Build agent with research config enabled (Keywords trigger) + let research_config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Keywords, + keywords: vec!["search".into(), "find".into(), "look".into()], + min_message_length: 10, + max_iterations: 2, + show_progress: false, + system_prompt_prefix: String::new(), + }; + + let mut agent = Agent::builder() + .provider(Box::new(provider)) + .tools(vec![Box::new(EchoTool)]) + .memory(make_memory()) + .observer(make_observer()) + .tool_dispatcher(Box::new(NativeToolDispatcher)) + .workspace_dir(std::env::temp_dir()) + .research_config(research_config) + .build() + .unwrap(); + + // This message should NOT trigger research (no keywords) + let response1 = agent.turn("hello there").await.unwrap(); + assert!(!response1.is_empty()); + + // Verify first message was sent without research enrichment + { + let requests = recorded.lock().unwrap(); + assert_eq!(requests.len(), 1); + let user_msg = requests[0].iter().find(|m| m.role == "user").unwrap(); + // Should be plain message without research prefix + assert!( + !user_msg.content.contains("[Research"), + "Message without keywords should not have research context" + ); + } +} + +/// Validates that Always trigger activates research on every message. +#[tokio::test] +async fn e2e_agent_research_always_trigger() { + use zeroclaw::config::{ResearchPhaseConfig, ResearchTrigger}; + + let (provider, recorded) = RecordingProvider::new(vec![ + // Research phase response + text_response("Research complete"), + // Main response + text_response("Here's your answer with research context"), + ]); + + let research_config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Always, + keywords: vec![], + min_message_length: 0, + max_iterations: 1, + show_progress: false, + system_prompt_prefix: String::new(), + }; + + let mut agent = Agent::builder() + .provider(Box::new(provider)) + .tools(vec![]) + .memory(make_memory()) + .observer(make_observer()) + .tool_dispatcher(Box::new(NativeToolDispatcher)) + .workspace_dir(std::env::temp_dir()) + .research_config(research_config) + .build() + .unwrap(); + + let response = agent.turn("any message").await.unwrap(); + assert!(!response.is_empty()); + + // With Always trigger, research should have been attempted + let requests = recorded.lock().unwrap(); + // At minimum 1 request (main turn), possibly 2 if research phase ran + assert!( + !requests.is_empty(), + "Provider should have received at least one request" + ); +} + +/// Validates that research phase works with prompt-guided providers (non-native tools). +/// The provider returns XML tool calls in text, which should be parsed and executed. +#[tokio::test] +async fn e2e_agent_research_prompt_guided() { + use zeroclaw::config::{ResearchPhaseConfig, ResearchTrigger}; + use zeroclaw::providers::traits::ProviderCapabilities; + + /// Mock provider that does NOT support native tools (like Gemini). + /// Returns XML tool calls in text that should be parsed by research phase. + struct PromptGuidedProvider { + responses: Mutex>, + } + + impl PromptGuidedProvider { + fn new(responses: Vec) -> Self { + Self { + responses: Mutex::new(responses), + } + } + } + + #[async_trait] + impl Provider for PromptGuidedProvider { + fn capabilities(&self) -> ProviderCapabilities { + ProviderCapabilities { + native_tool_calling: false, // Key difference! + vision: false, + } + } + + async fn chat_with_system( + &self, + _system_prompt: Option<&str>, + _message: &str, + _model: &str, + _temperature: f64, + ) -> Result { + Ok("fallback".into()) + } + + async fn chat( + &self, + _request: ChatRequest<'_>, + _model: &str, + _temperature: f64, + ) -> Result { + let mut guard = self.responses.lock().unwrap(); + if guard.is_empty() { + return Ok(ChatResponse { + text: Some("done".into()), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }); + } + Ok(guard.remove(0)) + } + } + + // Response 1: Research phase returns XML tool call + let research_response = ChatResponse { + text: Some( + r#"I'll use the echo tool to test. + +{"name": "echo", "arguments": {"message": "research test"}} +"# + .to_string(), + ), + tool_calls: vec![], // Empty! Tool call is in text + usage: None, + reasoning_content: None, + }; + + // Response 2: Research complete + let research_complete = ChatResponse { + text: Some("[RESEARCH COMPLETE]\n- Found: echo works".to_string()), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }; + + // Response 3: Main turn response + let main_response = text_response("Based on research, here's the answer"); + + let provider = + PromptGuidedProvider::new(vec![research_response, research_complete, main_response]); + + let research_config = ResearchPhaseConfig { + enabled: true, + trigger: ResearchTrigger::Always, + keywords: vec![], + min_message_length: 0, + max_iterations: 3, + show_progress: false, + system_prompt_prefix: String::new(), + }; + + let mut agent = Agent::builder() + .provider(Box::new(provider)) + .tools(vec![Box::new(EchoTool)]) + .memory(make_memory()) + .observer(make_observer()) + .tool_dispatcher(Box::new(NativeToolDispatcher)) + .workspace_dir(std::env::temp_dir()) + .research_config(research_config) + .build() + .unwrap(); + + let response = agent.turn("test prompt-guided research").await.unwrap(); + assert!( + !response.is_empty(), + "Should get response after prompt-guided research" + ); +} + +/// Validates that disabled research phase skips research entirely. +#[tokio::test] +async fn e2e_agent_research_disabled() { + use zeroclaw::config::{ResearchPhaseConfig, ResearchTrigger}; + + let (provider, recorded) = RecordingProvider::new(vec![text_response("Direct response")]); + + let research_config = ResearchPhaseConfig { + enabled: false, // Disabled + trigger: ResearchTrigger::Always, + keywords: vec![], + min_message_length: 0, + max_iterations: 5, + show_progress: true, + system_prompt_prefix: String::new(), + }; + + let mut agent = Agent::builder() + .provider(Box::new(provider)) + .tools(vec![Box::new(EchoTool)]) + .memory(make_memory()) + .observer(make_observer()) + .tool_dispatcher(Box::new(NativeToolDispatcher)) + .workspace_dir(std::env::temp_dir()) + .research_config(research_config) + .build() + .unwrap(); + + let response = agent.turn("find something").await.unwrap(); + assert_eq!(response, "Direct response"); + + // Only 1 request should be made (main turn, no research) + let requests = recorded.lock().unwrap(); + assert_eq!( + requests.len(), + 1, + "Disabled research should result in only 1 provider call" + ); +}