zeroclaw/src/tools/file_read.rs
Argenis a606f308f5
fix(security): respect allowed_roots in tool-level path pre-checks (#3434)
When workspace_only=true and allowed_roots is configured, several tools
(file_read, content_search, glob_search) rejected absolute paths before
the allowed_roots allowlist was consulted. Additionally, tilde paths
(~/...) passed is_path_allowed but were then incorrectly joined with
workspace_dir as literal relative paths.

Changes:
- Add SecurityPolicy::resolve_tool_path() to properly expand tilde
  paths and handle absolute vs relative path resolution for tools
- Add SecurityPolicy::is_under_allowed_root() for tool pre-checks to
  consult the allowed_roots allowlist before rejecting absolute paths
- Update file_read to use resolve_tool_path instead of workspace_dir.join
- Update content_search and glob_search absolute-path pre-checks to
  allow paths under allowed_roots
- Add tests covering workspace_only + allowed_roots scenarios

Closes #3082
2026-03-13 16:15:30 -04:00

1084 lines
39 KiB
Rust

use super::traits::{Tool, ToolResult};
use crate::security::SecurityPolicy;
use async_trait::async_trait;
use serde_json::json;
use std::sync::Arc;
const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024;
/// Read file contents with path sandboxing
pub struct FileReadTool {
security: Arc<SecurityPolicy>,
}
impl FileReadTool {
pub fn new(security: Arc<SecurityPolicy>) -> Self {
Self { security }
}
}
#[async_trait]
impl Tool for FileReadTool {
fn name(&self) -> &str {
"file_read"
}
fn description(&self) -> &str {
"Read file contents with line numbers. Supports partial reading via offset and limit. Extracts text from PDF; other binary files are read with lossy UTF-8 conversion."
}
fn parameters_schema(&self) -> serde_json::Value {
json!({
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Path to the file. Relative paths resolve from workspace; outside paths require policy allowlist."
},
"offset": {
"type": "integer",
"description": "Starting line number (1-based, default: 1)"
},
"limit": {
"type": "integer",
"description": "Maximum number of lines to return (default: all)"
}
},
"required": ["path"]
})
}
async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
let path = args
.get("path")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
if self.security.is_rate_limited() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Rate limit exceeded: too many actions in the last hour".into()),
});
}
// Security check: validate path is within workspace
if !self.security.is_path_allowed(path) {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Path not allowed by security policy: {path}")),
});
}
// Record action BEFORE canonicalization so that every non-trivially-rejected
// request consumes rate limit budget. This prevents attackers from probing
// path existence (via canonicalize errors) without rate limit cost.
if !self.security.record_action() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Rate limit exceeded: action budget exhausted".into()),
});
}
let full_path = self.security.resolve_tool_path(path);
// Resolve path before reading to block symlink escapes.
let resolved_path = match tokio::fs::canonicalize(&full_path).await {
Ok(p) => p,
Err(e) => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Failed to resolve file path: {e}")),
});
}
};
if !self.security.is_resolved_path_allowed(&resolved_path) {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(
self.security
.resolved_path_violation_message(&resolved_path),
),
});
}
// Check file size AFTER canonicalization to prevent TOCTOU symlink bypass
match tokio::fs::metadata(&resolved_path).await {
Ok(meta) => {
if meta.len() > MAX_FILE_SIZE_BYTES {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!(
"File too large: {} bytes (limit: {MAX_FILE_SIZE_BYTES} bytes)",
meta.len()
)),
});
}
}
Err(e) => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Failed to read file metadata: {e}")),
});
}
}
match tokio::fs::read_to_string(&resolved_path).await {
Ok(contents) => {
let lines: Vec<&str> = contents.lines().collect();
let total = lines.len();
if total == 0 {
return Ok(ToolResult {
success: true,
output: String::new(),
error: None,
});
}
let offset = args
.get("offset")
.and_then(|v| v.as_u64())
.map(|v| {
usize::try_from(v.max(1))
.unwrap_or(usize::MAX)
.saturating_sub(1)
})
.unwrap_or(0);
let start = offset.min(total);
let end = match args.get("limit").and_then(|v| v.as_u64()) {
Some(l) => {
let limit = usize::try_from(l).unwrap_or(usize::MAX);
(start.saturating_add(limit)).min(total)
}
None => total,
};
if start >= end {
return Ok(ToolResult {
success: true,
output: format!("[No lines in range, file has {total} lines]"),
error: None,
});
}
let numbered: String = lines[start..end]
.iter()
.enumerate()
.map(|(i, line)| format!("{}: {}", start + i + 1, line))
.collect::<Vec<_>>()
.join("\n");
let partial = start > 0 || end < total;
let summary = if partial {
format!("\n[Lines {}-{} of {total}]", start + 1, end)
} else {
format!("\n[{total} lines total]")
};
Ok(ToolResult {
success: true,
output: format!("{numbered}{summary}"),
error: None,
})
}
Err(_) => {
// Not valid UTF-8 — read raw bytes and try to extract text
let bytes = tokio::fs::read(&resolved_path)
.await
.map_err(|e| anyhow::anyhow!("Failed to read file: {e}"))?;
if let Some(text) = try_extract_pdf_text(&bytes) {
return Ok(ToolResult {
success: true,
output: text,
error: None,
});
}
// Lossy fallback — replaces invalid bytes with U+FFFD
let lossy = String::from_utf8_lossy(&bytes).into_owned();
Ok(ToolResult {
success: true,
output: lossy,
error: None,
})
}
}
}
}
#[cfg(feature = "rag-pdf")]
fn try_extract_pdf_text(bytes: &[u8]) -> Option<String> {
if bytes.len() < 5 || &bytes[..5] != b"%PDF-" {
return None;
}
let text = pdf_extract::extract_text_from_mem(bytes).ok()?;
if text.trim().is_empty() {
return None;
}
Some(text)
}
#[cfg(not(feature = "rag-pdf"))]
fn try_extract_pdf_text(_bytes: &[u8]) -> Option<String> {
None
}
#[cfg(test)]
mod tests {
use super::*;
use crate::security::{AutonomyLevel, SecurityPolicy};
fn test_security(workspace: std::path::PathBuf) -> Arc<SecurityPolicy> {
Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace,
..SecurityPolicy::default()
})
}
fn test_security_with(
workspace: std::path::PathBuf,
autonomy: AutonomyLevel,
max_actions_per_hour: u32,
) -> Arc<SecurityPolicy> {
Arc::new(SecurityPolicy {
autonomy,
workspace_dir: workspace,
max_actions_per_hour,
..SecurityPolicy::default()
})
}
#[test]
fn file_read_name() {
let tool = FileReadTool::new(test_security(std::env::temp_dir()));
assert_eq!(tool.name(), "file_read");
}
#[test]
fn file_read_schema_has_path() {
let tool = FileReadTool::new(test_security(std::env::temp_dir()));
let schema = tool.parameters_schema();
assert!(schema["properties"]["path"].is_object());
assert!(schema["properties"]["offset"].is_object());
assert!(schema["properties"]["limit"].is_object());
assert!(schema["required"]
.as_array()
.unwrap()
.contains(&json!("path")));
// offset and limit are optional
assert!(!schema["required"]
.as_array()
.unwrap()
.contains(&json!("offset")));
}
#[tokio::test]
async fn file_read_existing_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("test.txt"), "hello world")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
assert!(result.success);
assert!(result.output.contains("1: hello world"));
assert!(result.output.contains("[1 lines total]"));
assert!(result.error.is_none());
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_nonexistent_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_missing");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "nope.txt"})).await.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("Failed to resolve"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_blocks_path_traversal() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_traversal");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": "../../../etc/passwd"}))
.await
.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("not allowed"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_blocks_absolute_path() {
let tool = FileReadTool::new(test_security(std::env::temp_dir()));
let result = tool.execute(json!({"path": "/etc/passwd"})).await.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("not allowed"));
}
#[tokio::test]
async fn file_read_blocks_when_rate_limited() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_rate_limited");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("test.txt"), "hello world")
.await
.unwrap();
let tool = FileReadTool::new(test_security_with(
dir.clone(),
AutonomyLevel::Supervised,
0,
));
let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
assert!(!result.success);
assert!(result
.error
.as_deref()
.unwrap_or("")
.contains("Rate limit exceeded"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_allows_readonly_mode() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_readonly");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("test.txt"), "readonly ok")
.await
.unwrap();
let tool = FileReadTool::new(test_security_with(dir.clone(), AutonomyLevel::ReadOnly, 20));
let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
assert!(result.success);
assert!(result.output.contains("1: readonly ok"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_missing_path_param() {
let tool = FileReadTool::new(test_security(std::env::temp_dir()));
let result = tool.execute(json!({})).await;
assert!(result.is_err());
}
#[tokio::test]
async fn file_read_empty_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_empty");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("empty.txt"), "").await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "empty.txt"})).await.unwrap();
assert!(result.success);
assert_eq!(result.output, "");
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_nested_path() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_nested");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(dir.join("sub/dir"))
.await
.unwrap();
tokio::fs::write(dir.join("sub/dir/deep.txt"), "deep content")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": "sub/dir/deep.txt"}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("1: deep content"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[cfg(unix)]
#[tokio::test]
async fn file_read_blocks_symlink_escape() {
use std::os::unix::fs::symlink;
let root = std::env::temp_dir().join("zeroclaw_test_file_read_symlink_escape");
let workspace = root.join("workspace");
let outside = root.join("outside");
let _ = tokio::fs::remove_dir_all(&root).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
tokio::fs::create_dir_all(&outside).await.unwrap();
tokio::fs::write(outside.join("secret.txt"), "outside workspace")
.await
.unwrap();
symlink(outside.join("secret.txt"), workspace.join("escape.txt")).unwrap();
let tool = FileReadTool::new(test_security(workspace.clone()));
let result = tool.execute(json!({"path": "escape.txt"})).await.unwrap();
assert!(!result.success);
assert!(result
.error
.as_deref()
.unwrap_or("")
.contains("escapes workspace"));
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[tokio::test]
async fn file_read_outside_workspace_allowed_when_workspace_only_disabled() {
let root = std::env::temp_dir().join("zeroclaw_test_file_read_allowed_roots_hint");
let workspace = root.join("workspace");
let outside = root.join("outside");
let outside_file = outside.join("notes.txt");
let _ = tokio::fs::remove_dir_all(&root).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
tokio::fs::create_dir_all(&outside).await.unwrap();
tokio::fs::write(&outside_file, "outside").await.unwrap();
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace,
workspace_only: false,
forbidden_paths: vec![],
..SecurityPolicy::default()
});
let tool = FileReadTool::new(security);
let result = tool
.execute(json!({"path": outside_file.to_string_lossy().to_string()}))
.await
.unwrap();
assert!(result.success);
assert!(result.error.is_none());
assert!(result.output.contains("outside"));
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[tokio::test]
async fn file_read_nonexistent_consumes_rate_limit_budget() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_probe");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
// Allow only 2 actions total
let tool = FileReadTool::new(test_security_with(
dir.clone(),
AutonomyLevel::Supervised,
2,
));
// Both reads fail (file doesn't exist) but should consume budget
let r1 = tool.execute(json!({"path": "nope1.txt"})).await.unwrap();
assert!(!r1.success);
assert!(r1.error.as_ref().unwrap().contains("Failed to resolve"));
let r2 = tool.execute(json!({"path": "nope2.txt"})).await.unwrap();
assert!(!r2.success);
assert!(r2.error.as_ref().unwrap().contains("Failed to resolve"));
// Third attempt should be rate limited even though file doesn't exist
let r3 = tool.execute(json!({"path": "nope3.txt"})).await.unwrap();
assert!(!r3.success);
assert!(
r3.error.as_ref().unwrap().contains("Rate limit"),
"Expected rate limit error, got: {:?}",
r3.error
);
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_with_offset_and_limit() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("lines.txt"), "aaa\nbbb\nccc\nddd\neee")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
// Read lines 2-3
let result = tool
.execute(json!({"path": "lines.txt", "offset": 2, "limit": 2}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("2: bbb"));
assert!(result.output.contains("3: ccc"));
assert!(!result.output.contains("1: aaa"));
assert!(!result.output.contains("4: ddd"));
assert!(result.output.contains("[Lines 2-3 of 5]"));
// Read from offset 4 to end
let result = tool
.execute(json!({"path": "lines.txt", "offset": 4}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("4: ddd"));
assert!(result.output.contains("5: eee"));
assert!(result.output.contains("[Lines 4-5 of 5]"));
// Limit only (first 2 lines)
let result = tool
.execute(json!({"path": "lines.txt", "limit": 2}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("1: aaa"));
assert!(result.output.contains("2: bbb"));
assert!(!result.output.contains("3: ccc"));
assert!(result.output.contains("[Lines 1-2 of 5]"));
// Full read (no offset/limit) shows all lines
let result = tool.execute(json!({"path": "lines.txt"})).await.unwrap();
assert!(result.success);
assert!(result.output.contains("1: aaa"));
assert!(result.output.contains("5: eee"));
assert!(result.output.contains("[5 lines total]"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_offset_beyond_end() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset_end");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("short.txt"), "one\ntwo")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": "short.txt", "offset": 100}))
.await
.unwrap();
assert!(result.success);
assert!(result
.output
.contains("[No lines in range, file has 2 lines]"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_rejects_oversized_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_large");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
// Create a file just over 10 MB
let big = vec![b'x'; 10 * 1024 * 1024 + 1];
tokio::fs::write(dir.join("huge.bin"), &big).await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "huge.bin"})).await.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("File too large"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
/// PDF files should be readable via pdf-extract text extraction.
#[tokio::test]
async fn file_read_extracts_pdf_text() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_pdf");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/test_document.pdf");
tokio::fs::copy(&fixture, dir.join("report.pdf"))
.await
.expect("copy PDF fixture");
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "report.pdf"})).await.unwrap();
assert!(
result.success,
"PDF read must succeed, error: {:?}",
result.error
);
assert!(
result.output.contains("Hello"),
"extracted text must contain 'Hello', got: {}",
result.output
);
let _ = tokio::fs::remove_dir_all(&dir).await;
}
/// Non-UTF-8 binary files should be read with lossy conversion.
#[tokio::test]
async fn file_read_lossy_reads_binary_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_lossy");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
// Write bytes that are not valid UTF-8 and not a PDF
let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'h', b'i', 0x80];
tokio::fs::write(dir.join("data.bin"), &binary_data)
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "data.bin"})).await.unwrap();
assert!(
result.success,
"lossy read must succeed, error: {:?}",
result.error
);
assert!(
result.output.contains('\u{FFFD}'),
"lossy output must contain replacement character, got: {:?}",
result.output
);
assert!(
result.output.contains("hi"),
"lossy output must preserve valid ASCII, got: {:?}",
result.output
);
let _ = tokio::fs::remove_dir_all(&dir).await;
}
// ── E2E: full agent pipeline with real FileReadTool + PDF extraction ──
mod e2e_helpers {
use crate::config::MemoryConfig;
use crate::memory::{self, Memory};
use crate::observability::{NoopObserver, Observer};
use crate::providers::{ChatMessage, ChatRequest, ChatResponse, Provider};
use std::sync::{Arc, Mutex};
pub type SharedRequests = Arc<Mutex<Vec<Vec<ChatMessage>>>>;
pub struct RecordingProvider {
responses: Mutex<Vec<ChatResponse>>,
pub requests: SharedRequests,
}
impl RecordingProvider {
pub fn new(responses: Vec<ChatResponse>) -> (Self, SharedRequests) {
let requests: SharedRequests = Arc::new(Mutex::new(Vec::new()));
let provider = Self {
responses: Mutex::new(responses),
requests: requests.clone(),
};
(provider, requests)
}
}
#[async_trait::async_trait]
impl Provider for RecordingProvider {
async fn chat_with_system(
&self,
_system_prompt: Option<&str>,
_message: &str,
_model: &str,
_temperature: f64,
) -> anyhow::Result<String> {
Ok("fallback".into())
}
async fn chat(
&self,
request: ChatRequest<'_>,
_model: &str,
_temperature: f64,
) -> anyhow::Result<ChatResponse> {
self.requests
.lock()
.unwrap()
.push(request.messages.to_vec());
let mut guard = self.responses.lock().unwrap();
if guard.is_empty() {
return Ok(ChatResponse {
text: Some("done".into()),
tool_calls: vec![],
usage: None,
reasoning_content: None,
});
}
Ok(guard.remove(0))
}
}
pub fn make_memory() -> Arc<dyn Memory> {
let cfg = MemoryConfig {
backend: "none".into(),
..MemoryConfig::default()
};
Arc::from(memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
}
pub fn make_observer() -> Arc<dyn Observer> {
Arc::from(NoopObserver {})
}
}
/// End-to-end test: scripted provider calls `file_read` on a real PDF
/// fixture, the tool extracts text via pdf-extract, and the extracted
/// content reaches the provider in the tool result message.
#[tokio::test]
async fn e2e_agent_file_read_pdf_extraction() {
use crate::agent::agent::Agent;
use crate::agent::dispatcher::NativeToolDispatcher;
use crate::providers::{ChatResponse, Provider, ToolCall};
use e2e_helpers::*;
// ── Set up workspace with PDF fixture ──
let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_pdf");
let _ = tokio::fs::remove_dir_all(&workspace).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/test_document.pdf");
tokio::fs::copy(&fixture, workspace.join("report.pdf"))
.await
.expect("copy PDF fixture");
// ── Build real FileReadTool ──
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace.clone(),
..SecurityPolicy::default()
});
let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
// ── Script provider: call file_read → then answer ──
let (provider, recorded) = RecordingProvider::new(vec![
// Turn 1 response: provider asks to read the PDF
ChatResponse {
text: Some(String::new()),
tool_calls: vec![ToolCall {
id: "tc1".into(),
name: "file_read".into(),
arguments: r#"{"path": "report.pdf"}"#.into(),
}],
usage: None,
reasoning_content: None,
},
// Turn 1 continued: provider sees tool result and answers
ChatResponse {
text: Some("The PDF contains a greeting: Hello PDF".into()),
tool_calls: vec![],
usage: None,
reasoning_content: None,
},
]);
let mut agent = Agent::builder()
.provider(Box::new(provider) as Box<dyn Provider>)
.tools(vec![file_read_tool])
.memory(make_memory())
.observer(make_observer())
.tool_dispatcher(Box::new(NativeToolDispatcher))
.workspace_dir(workspace.clone())
.build()
.unwrap();
// ── Execute ──
let response = agent
.turn("Read report.pdf and tell me what it says")
.await
.unwrap();
// ── Verify final response ──
assert!(
response.contains("Hello PDF"),
"agent response must contain PDF content, got: {response}",
);
// ── Verify provider received extracted PDF text in tool result ──
{
let all_requests = recorded.lock().unwrap();
assert!(
all_requests.len() >= 2,
"expected at least 2 provider requests (initial + after tool), got {}",
all_requests.len(),
);
let second_request = &all_requests[1];
let tool_result_msg = second_request
.iter()
.find(|m| m.role == "tool")
.expect("second request must contain a tool result message");
assert!(
tool_result_msg.content.contains("Hello"),
"tool result must contain extracted PDF text 'Hello', got: {}",
tool_result_msg.content,
);
}
let _ = tokio::fs::remove_dir_all(&workspace).await;
}
/// End-to-end test: agent calls `file_read` on a binary file, gets
/// lossy UTF-8 output with replacement characters in the tool result.
#[tokio::test]
async fn e2e_agent_file_read_lossy_binary() {
use crate::agent::agent::Agent;
use crate::agent::dispatcher::NativeToolDispatcher;
use crate::providers::{ChatResponse, Provider, ToolCall};
use e2e_helpers::*;
// ── Set up workspace with binary file ──
let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_lossy");
let _ = tokio::fs::remove_dir_all(&workspace).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'v', b'a', b'l', b'i', b'd', 0x80];
tokio::fs::write(workspace.join("data.bin"), &binary_data)
.await
.unwrap();
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace.clone(),
..SecurityPolicy::default()
});
let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
let (provider, recorded) = RecordingProvider::new(vec![
ChatResponse {
text: Some(String::new()),
tool_calls: vec![ToolCall {
id: "tc1".into(),
name: "file_read".into(),
arguments: r#"{"path": "data.bin"}"#.into(),
}],
usage: None,
reasoning_content: None,
},
ChatResponse {
text: Some("The file appears to be binary data.".into()),
tool_calls: vec![],
usage: None,
reasoning_content: None,
},
]);
let mut agent = Agent::builder()
.provider(Box::new(provider) as Box<dyn Provider>)
.tools(vec![file_read_tool])
.memory(make_memory())
.observer(make_observer())
.tool_dispatcher(Box::new(NativeToolDispatcher))
.workspace_dir(workspace.clone())
.build()
.unwrap();
let response = agent.turn("Read data.bin").await.unwrap();
assert!(
response.contains("binary"),
"agent response must mention binary, got: {response}",
);
// Verify tool result contains lossy output with replacement chars
{
let all_requests = recorded.lock().unwrap();
assert!(
all_requests.len() >= 2,
"expected at least 2 provider requests, got {}",
all_requests.len(),
);
let tool_result_msg = all_requests[1]
.iter()
.find(|m| m.role == "tool")
.expect("second request must contain a tool result message");
assert!(
tool_result_msg.content.contains("valid"),
"tool result must preserve valid ASCII from binary file, got: {}",
tool_result_msg.content,
);
assert!(
tool_result_msg.content.contains('\u{FFFD}'),
"tool result must contain replacement character for invalid bytes, got: {}",
tool_result_msg.content,
);
}
let _ = tokio::fs::remove_dir_all(&workspace).await;
}
/// Live e2e: real OpenAI Codex provider + real FileReadTool + PDF fixture.
/// Verifies the model receives extracted PDF text and responds meaningfully.
///
/// Requires valid OAuth credentials in `~/.zeroclaw/`.
/// Run: `cargo test --lib -- tools::file_read::tests::e2e_live_file_read_pdf --ignored --nocapture`
#[tokio::test]
#[ignore = "requires valid OpenAI Codex OAuth credentials"]
async fn e2e_live_file_read_pdf() {
use crate::agent::agent::Agent;
use crate::agent::dispatcher::XmlToolDispatcher;
use crate::providers::openai_codex::OpenAiCodexProvider;
use crate::providers::{Provider, ProviderRuntimeOptions};
use e2e_helpers::*;
// ── Set up workspace with PDF fixture ──
let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_live_file_read_pdf");
let _ = tokio::fs::remove_dir_all(&workspace).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/test_document.pdf");
tokio::fs::copy(&fixture, workspace.join("report.pdf"))
.await
.expect("copy PDF fixture");
// ── Build real FileReadTool ──
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace.clone(),
..SecurityPolicy::default()
});
let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
// ── Real provider (OpenAI Codex uses XML tool dispatch) ──
let provider = OpenAiCodexProvider::new(&ProviderRuntimeOptions::default(), None)
.expect("provider should initialize");
let mut agent = Agent::builder()
.provider(Box::new(provider) as Box<dyn Provider>)
.tools(vec![file_read_tool])
.memory(make_memory())
.observer(make_observer())
.tool_dispatcher(Box::new(XmlToolDispatcher))
.workspace_dir(workspace.clone())
.model_name("gpt-5.3-codex".to_string())
.build()
.unwrap();
// ── Execute ──
let response = agent
.turn("Use the file_read tool to read report.pdf, then tell me what text it contains. Be concise.")
.await
.unwrap();
eprintln!("=== Live e2e response ===\n{response}\n=========================");
// ── Verify model saw the actual PDF content ("Hello PDF") ──
let lower = response.to_lowercase();
assert!(
lower.contains("hello"),
"model response must reference extracted PDF text 'Hello PDF', got: {response}",
);
let _ = tokio::fs::remove_dir_all(&workspace).await;
}
#[tokio::test]
async fn file_read_blocks_null_byte_in_path() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_null_byte");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": "test\0evil.txt"}))
.await
.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("not allowed"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_allowed_root_with_workspace_only() {
let root = std::env::temp_dir().join("zeroclaw_test_file_read_allowed_root");
let workspace = root.join("workspace");
let allowed = root.join("allowed_dir");
let _ = tokio::fs::remove_dir_all(&root).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
tokio::fs::create_dir_all(&allowed).await.unwrap();
tokio::fs::write(allowed.join("data.txt"), "allowed content")
.await
.unwrap();
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace.clone(),
workspace_only: true,
allowed_roots: vec![allowed.clone()],
..SecurityPolicy::default()
});
let tool = FileReadTool::new(security);
// Absolute path under allowed_root should succeed
let abs_path = allowed.join("data.txt").to_string_lossy().to_string();
let result = tool.execute(json!({"path": &abs_path})).await.unwrap();
assert!(
result.success,
"file_read with allowed_root path should succeed, error: {:?}",
result.error
);
assert!(result.output.contains("allowed content"));
// Path outside both workspace and allowed_roots should still fail
let outside = root.join("outside");
tokio::fs::create_dir_all(&outside).await.unwrap();
tokio::fs::write(outside.join("secret.txt"), "secret")
.await
.unwrap();
let outside_path = outside.join("secret.txt").to_string_lossy().to_string();
let result = tool.execute(json!({"path": &outside_path})).await.unwrap();
assert!(!result.success);
let _ = tokio::fs::remove_dir_all(&root).await;
}
}