From b4a2afb04ab174be97a1aee670fbd3b9b3c9c02e Mon Sep 17 00:00:00 2001 From: Argenis Date: Fri, 20 Mar 2026 01:59:43 -0400 Subject: [PATCH] feat(tools): add text browser tool for headless environments (#4031) * feat(tools): add text browser tool for headless environments (#3879) * fix(tools): remove redundant match arm in text_browser clippy lint * ci: trigger fresh workflow run --- src/config/mod.rs | 4 +- src/config/schema.rs | 40 ++++ src/onboard/wizard.rs | 2 + src/tools/mod.rs | 11 + src/tools/text_browser.rs | 409 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 464 insertions(+), 2 deletions(-) create mode 100644 src/tools/text_browser.rs diff --git a/src/config/mod.rs b/src/config/mod.rs index 27fb51632..2d02bf0d0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -25,8 +25,8 @@ pub use schema::{ SchedulerConfig, SecretsConfig, SecurityConfig, SecurityOpsConfig, SkillCreationConfig, SkillsConfig, SkillsPromptInjectionMode, SlackConfig, StorageConfig, StorageProviderConfig, StorageProviderSection, StreamMode, SwarmConfig, SwarmStrategy, TelegramConfig, - ToolFilterGroup, ToolFilterGroupMode, TranscriptionConfig, TtsConfig, TunnelConfig, - WebFetchConfig, WebSearchConfig, WebhookConfig, WorkspaceConfig, + TextBrowserConfig, ToolFilterGroup, ToolFilterGroupMode, TranscriptionConfig, TtsConfig, + TunnelConfig, WebFetchConfig, WebSearchConfig, WebhookConfig, WorkspaceConfig, }; pub fn name_and_presence(channel: Option<&T>) -> (&'static str, bool) { diff --git a/src/config/schema.rs b/src/config/schema.rs index 1719ac5a0..d37e586d1 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -261,6 +261,10 @@ pub struct Config { #[serde(default)] pub web_fetch: WebFetchConfig, + /// Text browser tool configuration (`[text_browser]`). + #[serde(default)] + pub text_browser: TextBrowserConfig, + /// Web search tool configuration (`[web_search]`). #[serde(default)] pub web_search: WebSearchConfig, @@ -2089,6 +2093,39 @@ impl Default for WebFetchConfig { } } +// ── Text browser ───────────────────────────────────────────────── + +/// Text browser tool configuration (`[text_browser]` section). +/// +/// Uses text-based browsers (lynx, links, w3m) to render web pages as plain +/// text. Designed for headless/SSH environments without graphical browsers. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct TextBrowserConfig { + /// Enable `text_browser` tool + #[serde(default)] + pub enabled: bool, + /// Preferred text browser ("lynx", "links", or "w3m"). If unset, auto-detects. + #[serde(default)] + pub preferred_browser: Option, + /// Request timeout in seconds (default: 30) + #[serde(default = "default_text_browser_timeout_secs")] + pub timeout_secs: u64, +} + +fn default_text_browser_timeout_secs() -> u64 { + 30 +} + +impl Default for TextBrowserConfig { + fn default() -> Self { + Self { + enabled: false, + preferred_browser: None, + timeout_secs: default_text_browser_timeout_secs(), + } + } +} + // ── Web search ─────────────────────────────────────────────────── /// Web search tool configuration (`[web_search]` section). @@ -6185,6 +6222,7 @@ impl Default for Config { http_request: HttpRequestConfig::default(), multimodal: MultimodalConfig::default(), web_fetch: WebFetchConfig::default(), + text_browser: TextBrowserConfig::default(), web_search: WebSearchConfig::default(), project_intel: ProjectIntelConfig::default(), google_workspace: GoogleWorkspaceConfig::default(), @@ -8902,6 +8940,7 @@ default_temperature = 0.7 http_request: HttpRequestConfig::default(), multimodal: MultimodalConfig::default(), web_fetch: WebFetchConfig::default(), + text_browser: TextBrowserConfig::default(), web_search: WebSearchConfig::default(), project_intel: ProjectIntelConfig::default(), google_workspace: GoogleWorkspaceConfig::default(), @@ -9238,6 +9277,7 @@ tool_dispatcher = "xml" http_request: HttpRequestConfig::default(), multimodal: MultimodalConfig::default(), web_fetch: WebFetchConfig::default(), + text_browser: TextBrowserConfig::default(), web_search: WebSearchConfig::default(), project_intel: ProjectIntelConfig::default(), google_workspace: GoogleWorkspaceConfig::default(), diff --git a/src/onboard/wizard.rs b/src/onboard/wizard.rs index 7ec287d3d..56f8cce24 100644 --- a/src/onboard/wizard.rs +++ b/src/onboard/wizard.rs @@ -171,6 +171,7 @@ pub async fn run_wizard(force: bool) -> Result { http_request: crate::config::HttpRequestConfig::default(), multimodal: crate::config::MultimodalConfig::default(), web_fetch: crate::config::WebFetchConfig::default(), + text_browser: crate::config::TextBrowserConfig::default(), web_search: crate::config::WebSearchConfig::default(), project_intel: crate::config::ProjectIntelConfig::default(), google_workspace: crate::config::GoogleWorkspaceConfig::default(), @@ -589,6 +590,7 @@ async fn run_quick_setup_with_home( http_request: crate::config::HttpRequestConfig::default(), multimodal: crate::config::MultimodalConfig::default(), web_fetch: crate::config::WebFetchConfig::default(), + text_browser: crate::config::TextBrowserConfig::default(), web_search: crate::config::WebSearchConfig::default(), project_intel: crate::config::ProjectIntelConfig::default(), google_workspace: crate::config::GoogleWorkspaceConfig::default(), diff --git a/src/tools/mod.rs b/src/tools/mod.rs index eb5780355..49ffb6e91 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -76,6 +76,7 @@ pub mod screenshot; pub mod security_ops; pub mod shell; pub mod swarm; +pub mod text_browser; pub mod tool_search; pub mod traits; pub mod web_fetch; @@ -141,6 +142,7 @@ pub use screenshot::ScreenshotTool; pub use security_ops::SecurityOpsTool; pub use shell::ShellTool; pub use swarm::SwarmTool; +pub use text_browser::TextBrowserTool; pub use tool_search::ToolSearchTool; pub use traits::Tool; #[allow(unused_imports)] @@ -400,6 +402,15 @@ pub fn all_tools_with_runtime( ))); } + // Text browser tool (headless text-based browser rendering) + if root_config.text_browser.enabled { + tool_arcs.push(Arc::new(TextBrowserTool::new( + security.clone(), + root_config.text_browser.preferred_browser.clone(), + root_config.text_browser.timeout_secs, + ))); + } + // Web search tool (enabled by default for GLM and other models) if root_config.web_search.enabled { tool_arcs.push(Arc::new(WebSearchTool::new_with_config( diff --git a/src/tools/text_browser.rs b/src/tools/text_browser.rs new file mode 100644 index 000000000..a9bafa7b1 --- /dev/null +++ b/src/tools/text_browser.rs @@ -0,0 +1,409 @@ +use super::traits::{Tool, ToolResult}; +use crate::security::SecurityPolicy; +use async_trait::async_trait; +use serde_json::json; +use std::sync::Arc; +use std::time::Duration; + +/// Text browser tool: renders web pages as plain text using text-based browsers +/// (lynx, links, w3m). Ideal for headless/SSH environments where graphical +/// browsers are unavailable. +pub struct TextBrowserTool { + security: Arc, + preferred_browser: Option, + timeout_secs: u64, + max_response_size: usize, +} + +/// The text browsers we support, in order of auto-detection preference. +const SUPPORTED_BROWSERS: &[&str] = &["lynx", "links", "w3m"]; + +impl TextBrowserTool { + pub fn new( + security: Arc, + preferred_browser: Option, + timeout_secs: u64, + ) -> Self { + Self { + security, + preferred_browser, + timeout_secs, + max_response_size: 500_000, // 500KB, consistent with web_fetch + } + } + + fn validate_url(url: &str) -> anyhow::Result { + let url = url.trim(); + + if url.is_empty() { + anyhow::bail!("URL cannot be empty"); + } + + if url.chars().any(char::is_whitespace) { + anyhow::bail!("URL cannot contain whitespace"); + } + + if !url.starts_with("http://") && !url.starts_with("https://") { + anyhow::bail!("Only http:// and https:// URLs are allowed"); + } + + Ok(url.to_string()) + } + + fn truncate_response(&self, text: &str) -> String { + if text.len() > self.max_response_size { + let mut truncated = text + .chars() + .take(self.max_response_size) + .collect::(); + truncated.push_str("\n\n... [Response truncated due to size limit] ..."); + truncated + } else { + text.to_string() + } + } + + /// Detect which text browser is available on the system. + async fn detect_browser() -> Option { + for browser in SUPPORTED_BROWSERS { + if let Ok(output) = tokio::process::Command::new("which") + .arg(browser) + .output() + .await + { + if output.status.success() { + return Some((*browser).to_string()); + } + } + } + None + } + + /// Resolve which browser to use: prefer configured, then auto-detect. + async fn resolve_browser(&self, requested: Option<&str>) -> anyhow::Result { + // If the caller explicitly requested a browser via the tool parameter, use it. + if let Some(browser) = requested { + let browser = browser.trim().to_lowercase(); + if !SUPPORTED_BROWSERS.contains(&browser.as_str()) { + anyhow::bail!( + "Unsupported text browser '{browser}'. Supported: {}", + SUPPORTED_BROWSERS.join(", ") + ); + } + // Verify it's installed + let installed = tokio::process::Command::new("which") + .arg(&browser) + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false); + if !installed { + anyhow::bail!("Requested text browser '{browser}' is not installed"); + } + return Ok(browser); + } + + // If a preferred browser is set in config, try it first. + if let Some(ref preferred) = self.preferred_browser { + let preferred = preferred.trim().to_lowercase(); + if SUPPORTED_BROWSERS.contains(&preferred.as_str()) { + let installed = tokio::process::Command::new("which") + .arg(&preferred) + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false); + if installed { + return Ok(preferred); + } + tracing::warn!( + "Configured preferred text browser '{preferred}' is not installed, falling back to auto-detect" + ); + } + } + + // Auto-detect + Self::detect_browser().await.ok_or_else(|| { + anyhow::anyhow!( + "No text browser found. Install one of: {}", + SUPPORTED_BROWSERS.join(", ") + ) + }) + } + + /// Build the command arguments for the selected browser with `-dump` flag. + fn build_dump_args(_browser: &str, url: &str) -> Vec { + // All supported browsers (lynx, links, w3m) use the same `-dump` flag + vec!["-dump".to_string(), url.to_string()] + } +} + +#[async_trait] +impl Tool for TextBrowserTool { + fn name(&self) -> &str { + "text_browser" + } + + fn description(&self) -> &str { + "Render a web page as plain text using a text-based browser (lynx, links, or w3m). \ + Ideal for headless/SSH environments without a graphical browser. \ + Auto-detects available browser or uses a configured preference." + } + + fn parameters_schema(&self) -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "The HTTP or HTTPS URL to render as plain text" + }, + "browser": { + "type": "string", + "description": "Text browser to use: \"lynx\", \"links\", or \"w3m\". If omitted, auto-detects an available browser.", + "enum": ["lynx", "links", "w3m"] + } + }, + "required": ["url"] + }) + } + + async fn execute(&self, args: serde_json::Value) -> anyhow::Result { + let url = args + .get("url") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'url' parameter"))?; + + if !self.security.can_act() { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some("Action blocked: autonomy is read-only".into()), + }); + } + + if !self.security.record_action() { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some("Action blocked: rate limit exceeded".into()), + }); + } + + let url = match Self::validate_url(url) { + Ok(v) => v, + Err(e) => { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(e.to_string()), + }) + } + }; + + let requested_browser = args.get("browser").and_then(|v| v.as_str()); + + let browser = match self.resolve_browser(requested_browser).await { + Ok(b) => b, + Err(e) => { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(e.to_string()), + }) + } + }; + + let dump_args = Self::build_dump_args(&browser, &url); + + let timeout = Duration::from_secs(if self.timeout_secs == 0 { + tracing::warn!("text_browser: timeout_secs is 0, using safe default of 30s"); + 30 + } else { + self.timeout_secs + }); + + let result = tokio::time::timeout( + timeout, + tokio::process::Command::new(&browser) + .args(&dump_args) + .output(), + ) + .await; + + match result { + Ok(Ok(output)) => { + if output.status.success() { + let text = String::from_utf8_lossy(&output.stdout).into_owned(); + let text = self.truncate_response(&text); + Ok(ToolResult { + success: true, + output: text, + error: None, + }) + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!( + "{browser} exited with status {}: {}", + output.status, + stderr.trim() + )), + }) + } + } + Ok(Err(e)) => Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!("Failed to execute {browser}: {e}")), + }), + Err(_) => Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!( + "{browser} timed out after {} seconds", + timeout.as_secs() + )), + }), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::security::{AutonomyLevel, SecurityPolicy}; + + fn test_tool() -> TextBrowserTool { + let security = Arc::new(SecurityPolicy { + autonomy: AutonomyLevel::Supervised, + ..SecurityPolicy::default() + }); + TextBrowserTool::new(security, None, 30) + } + + #[test] + fn name_is_text_browser() { + let tool = test_tool(); + assert_eq!(tool.name(), "text_browser"); + } + + #[test] + fn parameters_schema_requires_url() { + let tool = test_tool(); + let schema = tool.parameters_schema(); + assert!(schema["properties"]["url"].is_object()); + let required = schema["required"].as_array().unwrap(); + assert!(required.iter().any(|v| v.as_str() == Some("url"))); + } + + #[test] + fn parameters_schema_has_optional_browser() { + let tool = test_tool(); + let schema = tool.parameters_schema(); + assert!(schema["properties"]["browser"].is_object()); + let required = schema["required"].as_array().unwrap(); + assert!(!required.iter().any(|v| v.as_str() == Some("browser"))); + } + + #[test] + fn validate_url_accepts_http() { + let got = TextBrowserTool::validate_url("http://example.com/page").unwrap(); + assert_eq!(got, "http://example.com/page"); + } + + #[test] + fn validate_url_accepts_https() { + let got = TextBrowserTool::validate_url("https://example.com/page").unwrap(); + assert_eq!(got, "https://example.com/page"); + } + + #[test] + fn validate_url_rejects_empty() { + let err = TextBrowserTool::validate_url("").unwrap_err().to_string(); + assert!(err.contains("empty")); + } + + #[test] + fn validate_url_rejects_ftp() { + let err = TextBrowserTool::validate_url("ftp://example.com") + .unwrap_err() + .to_string(); + assert!(err.contains("http://") || err.contains("https://")); + } + + #[test] + fn validate_url_rejects_whitespace() { + let err = TextBrowserTool::validate_url("https://example.com/hello world") + .unwrap_err() + .to_string(); + assert!(err.contains("whitespace")); + } + + #[test] + fn truncate_within_limit() { + let tool = test_tool(); + let text = "hello world"; + assert_eq!(tool.truncate_response(text), "hello world"); + } + + #[test] + fn truncate_over_limit() { + let security = Arc::new(SecurityPolicy::default()); + let mut tool = TextBrowserTool::new(security, None, 30); + tool.max_response_size = 10; + let text = "hello world this is long"; + let truncated = tool.truncate_response(text); + assert!(truncated.contains("[Response truncated")); + } + + #[test] + fn build_dump_args_lynx() { + let args = TextBrowserTool::build_dump_args("lynx", "https://example.com"); + assert_eq!(args, vec!["-dump", "https://example.com"]); + } + + #[test] + fn build_dump_args_links() { + let args = TextBrowserTool::build_dump_args("links", "https://example.com"); + assert_eq!(args, vec!["-dump", "https://example.com"]); + } + + #[test] + fn build_dump_args_w3m() { + let args = TextBrowserTool::build_dump_args("w3m", "https://example.com"); + assert_eq!(args, vec!["-dump", "https://example.com"]); + } + + #[tokio::test] + async fn blocks_readonly_mode() { + let security = Arc::new(SecurityPolicy { + autonomy: AutonomyLevel::ReadOnly, + ..SecurityPolicy::default() + }); + let tool = TextBrowserTool::new(security, None, 30); + let result = tool + .execute(json!({"url": "https://example.com"})) + .await + .unwrap(); + assert!(!result.success); + assert!(result.error.unwrap().contains("read-only")); + } + + #[tokio::test] + async fn blocks_rate_limited() { + let security = Arc::new(SecurityPolicy { + max_actions_per_hour: 0, + ..SecurityPolicy::default() + }); + let tool = TextBrowserTool::new(security, None, 30); + let result = tool + .execute(json!({"url": "https://example.com"})) + .await + .unwrap(); + assert!(!result.success); + assert!(result.error.unwrap().contains("rate limit")); + } +}