zeroclaw/src/tools/file_read.rs
ZeroClaw Bot 8c0be20422 feat(providers): add quota_metadata to ChatResponse across all providers
Wire QuotaMetadata into ChatResponse for all provider implementations,
enabling quota tracking data to flow from API responses through the
agent loop to quota monitoring tools.

Depends on: circuit breaker (#1842) + quota monitoring (#1904)

Made-with: Cursor
2026-02-28 11:45:02 -05:00

1234 lines
44 KiB
Rust

use super::traits::{Tool, ToolResult};
use crate::security::file_link_guard::has_multiple_hard_links;
use crate::security::sensitive_paths::is_sensitive_file_path;
use crate::security::SecurityPolicy;
use async_trait::async_trait;
use serde_json::json;
use std::path::Path;
use std::sync::Arc;
const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024;
fn sensitive_file_block_message(path: &str) -> String {
format!(
"Reading sensitive file '{path}' is blocked by policy. \
Set [autonomy].allow_sensitive_file_reads = true only when strictly necessary."
)
}
fn hard_link_block_message(path: &Path) -> String {
format!(
"Reading multiply-linked file '{}' is blocked by policy \
(potential hard-link escape).",
path.display()
)
}
/// Read file contents with path sandboxing
pub struct FileReadTool {
security: Arc<SecurityPolicy>,
}
impl FileReadTool {
pub fn new(security: Arc<SecurityPolicy>) -> Self {
Self { security }
}
}
#[async_trait]
impl Tool for FileReadTool {
fn name(&self) -> &str {
"file_read"
}
fn description(&self) -> &str {
"Read file contents with line numbers. Supports partial reading via offset and limit. Extracts text from PDF; other binary files are read with lossy UTF-8 conversion. Sensitive files (for example .env and key material) are blocked by default."
}
fn parameters_schema(&self) -> serde_json::Value {
json!({
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Path to the file. Relative paths resolve from workspace; outside paths require policy allowlist."
},
"offset": {
"type": "integer",
"description": "Starting line number (1-based, default: 1)"
},
"limit": {
"type": "integer",
"description": "Maximum number of lines to return (default: all)"
}
},
"required": ["path"]
})
}
async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
let path = args
.get("path")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
if self.security.is_rate_limited() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Rate limit exceeded: too many actions in the last hour".into()),
});
}
// Security check: validate path is within workspace
if !self.security.is_path_allowed(path) {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Path not allowed by security policy: {path}")),
});
}
if !self.security.allow_sensitive_file_reads && is_sensitive_file_path(Path::new(path)) {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(sensitive_file_block_message(path)),
});
}
// Record action BEFORE canonicalization so that every non-trivially-rejected
// request consumes rate limit budget. This prevents attackers from probing
// path existence (via canonicalize errors) without rate limit cost.
if !self.security.record_action() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Rate limit exceeded: action budget exhausted".into()),
});
}
let full_path = self.security.workspace_dir.join(path);
// Resolve path before reading to block symlink escapes.
let resolved_path = match tokio::fs::canonicalize(&full_path).await {
Ok(p) => p,
Err(e) => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Failed to resolve file path: {e}")),
});
}
};
if !self.security.is_resolved_path_allowed(&resolved_path) {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(
self.security
.resolved_path_violation_message(&resolved_path),
),
});
}
if !self.security.allow_sensitive_file_reads && is_sensitive_file_path(&resolved_path) {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(sensitive_file_block_message(
&resolved_path.display().to_string(),
)),
});
}
// Check file size AFTER canonicalization to prevent TOCTOU symlink bypass
match tokio::fs::metadata(&resolved_path).await {
Ok(meta) => {
if has_multiple_hard_links(&meta) {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(hard_link_block_message(&resolved_path)),
});
}
if meta.len() > MAX_FILE_SIZE_BYTES {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!(
"File too large: {} bytes (limit: {MAX_FILE_SIZE_BYTES} bytes)",
meta.len()
)),
});
}
}
Err(e) => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Failed to read file metadata: {e}")),
});
}
}
match tokio::fs::read_to_string(&resolved_path).await {
Ok(contents) => {
let lines: Vec<&str> = contents.lines().collect();
let total = lines.len();
if total == 0 {
return Ok(ToolResult {
success: true,
output: String::new(),
error: None,
});
}
let offset = args
.get("offset")
.and_then(|v| v.as_u64())
.map(|v| {
usize::try_from(v.max(1))
.unwrap_or(usize::MAX)
.saturating_sub(1)
})
.unwrap_or(0);
let start = offset.min(total);
let end = match args.get("limit").and_then(|v| v.as_u64()) {
Some(l) => {
let limit = usize::try_from(l).unwrap_or(usize::MAX);
(start.saturating_add(limit)).min(total)
}
None => total,
};
if start >= end {
return Ok(ToolResult {
success: true,
output: format!("[No lines in range, file has {total} lines]"),
error: None,
});
}
let numbered: String = lines[start..end]
.iter()
.enumerate()
.map(|(i, line)| format!("{}: {}", start + i + 1, line))
.collect::<Vec<_>>()
.join("\n");
let partial = start > 0 || end < total;
let summary = if partial {
format!("\n[Lines {}-{} of {total}]", start + 1, end)
} else {
format!("\n[{total} lines total]")
};
Ok(ToolResult {
success: true,
output: format!("{numbered}{summary}"),
error: None,
})
}
Err(_) => {
// Not valid UTF-8 — read raw bytes and try to extract text
let bytes = tokio::fs::read(&resolved_path)
.await
.map_err(|e| anyhow::anyhow!("Failed to read file: {e}"))?;
if let Some(text) = try_extract_pdf_text(&bytes) {
return Ok(ToolResult {
success: true,
output: text,
error: None,
});
}
// Lossy fallback — replaces invalid bytes with U+FFFD
let lossy = String::from_utf8_lossy(&bytes).into_owned();
Ok(ToolResult {
success: true,
output: lossy,
error: None,
})
}
}
}
}
#[cfg(feature = "rag-pdf")]
fn try_extract_pdf_text(bytes: &[u8]) -> Option<String> {
if bytes.len() < 5 || &bytes[..5] != b"%PDF-" {
return None;
}
let text = pdf_extract::extract_text_from_mem(bytes).ok()?;
if text.trim().is_empty() {
return None;
}
Some(text)
}
#[cfg(not(feature = "rag-pdf"))]
fn try_extract_pdf_text(_bytes: &[u8]) -> Option<String> {
None
}
#[cfg(test)]
mod tests {
use super::*;
use crate::security::{AutonomyLevel, SecurityPolicy};
fn test_security(workspace: std::path::PathBuf) -> Arc<SecurityPolicy> {
Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace,
..SecurityPolicy::default()
})
}
fn test_security_with(
workspace: std::path::PathBuf,
autonomy: AutonomyLevel,
max_actions_per_hour: u32,
) -> Arc<SecurityPolicy> {
Arc::new(SecurityPolicy {
autonomy,
workspace_dir: workspace,
max_actions_per_hour,
..SecurityPolicy::default()
})
}
#[test]
fn file_read_name() {
let tool = FileReadTool::new(test_security(std::env::temp_dir()));
assert_eq!(tool.name(), "file_read");
}
#[test]
fn file_read_schema_has_path() {
let tool = FileReadTool::new(test_security(std::env::temp_dir()));
let schema = tool.parameters_schema();
assert!(schema["properties"]["path"].is_object());
assert!(schema["properties"]["offset"].is_object());
assert!(schema["properties"]["limit"].is_object());
assert!(schema["required"]
.as_array()
.unwrap()
.contains(&json!("path")));
// offset and limit are optional
assert!(!schema["required"]
.as_array()
.unwrap()
.contains(&json!("offset")));
}
#[tokio::test]
async fn file_read_existing_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("test.txt"), "hello world")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
assert!(result.success);
assert!(result.output.contains("1: hello world"));
assert!(result.output.contains("[1 lines total]"));
assert!(result.error.is_none());
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_nonexistent_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_missing");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "nope.txt"})).await.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("Failed to resolve"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_blocks_path_traversal() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_traversal");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": "../../../etc/passwd"}))
.await
.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("not allowed"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_blocks_absolute_path() {
let tool = FileReadTool::new(test_security(std::env::temp_dir()));
let result = tool.execute(json!({"path": "/etc/passwd"})).await.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("not allowed"));
}
#[tokio::test]
async fn file_read_blocks_sensitive_env_file_by_default() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_sensitive_env");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join(".env"), "API_KEY=plaintext-secret")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": ".env"})).await.unwrap();
assert!(!result.success);
assert!(result
.error
.as_deref()
.unwrap_or("")
.contains("sensitive file"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_blocks_sensitive_dotenv_variant_by_default() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_sensitive_env_variant");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join(".env.production"), "API_KEY=plaintext-secret")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": ".env.production"}))
.await
.unwrap();
assert!(!result.success);
assert!(result
.error
.as_deref()
.unwrap_or("")
.contains("sensitive file"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_blocks_sensitive_directory_credentials_by_default() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_sensitive_aws");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(dir.join(".aws")).await.unwrap();
tokio::fs::write(dir.join(".aws/credentials"), "aws_access_key_id=abc")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": ".aws/credentials"}))
.await
.unwrap();
assert!(!result.success);
assert!(result
.error
.as_deref()
.unwrap_or("")
.contains("sensitive file"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_allows_sensitive_file_when_policy_enabled() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_sensitive_allowed");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join(".env"), "SAFE=value")
.await
.unwrap();
let policy = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: dir.clone(),
allow_sensitive_file_reads: true,
..SecurityPolicy::default()
});
let tool = FileReadTool::new(policy);
let result = tool.execute(json!({"path": ".env"})).await.unwrap();
assert!(result.success);
assert!(result.output.contains("1: SAFE=value"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_allows_sensitive_nested_path_when_policy_enabled() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_sensitive_nested_allowed");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(dir.join(".aws")).await.unwrap();
tokio::fs::write(dir.join(".aws/credentials"), "aws_access_key_id=allowed")
.await
.unwrap();
let policy = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: dir.clone(),
allow_sensitive_file_reads: true,
..SecurityPolicy::default()
});
let tool = FileReadTool::new(policy);
let result = tool
.execute(json!({"path": ".aws/credentials"}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("1: aws_access_key_id=allowed"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_blocks_when_rate_limited() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_rate_limited");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("test.txt"), "hello world")
.await
.unwrap();
let tool = FileReadTool::new(test_security_with(
dir.clone(),
AutonomyLevel::Supervised,
0,
));
let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
assert!(!result.success);
assert!(result
.error
.as_deref()
.unwrap_or("")
.contains("Rate limit exceeded"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_allows_readonly_mode() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_readonly");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("test.txt"), "readonly ok")
.await
.unwrap();
let tool = FileReadTool::new(test_security_with(dir.clone(), AutonomyLevel::ReadOnly, 20));
let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
assert!(result.success);
assert!(result.output.contains("1: readonly ok"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_missing_path_param() {
let tool = FileReadTool::new(test_security(std::env::temp_dir()));
let result = tool.execute(json!({})).await;
assert!(result.is_err());
}
#[tokio::test]
async fn file_read_empty_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_empty");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("empty.txt"), "").await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "empty.txt"})).await.unwrap();
assert!(result.success);
assert_eq!(result.output, "");
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_nested_path() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_nested");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(dir.join("sub/dir"))
.await
.unwrap();
tokio::fs::write(dir.join("sub/dir/deep.txt"), "deep content")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": "sub/dir/deep.txt"}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("1: deep content"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[cfg(unix)]
#[tokio::test]
async fn file_read_blocks_symlink_escape() {
use std::os::unix::fs::symlink;
let root = std::env::temp_dir().join("zeroclaw_test_file_read_symlink_escape");
let workspace = root.join("workspace");
let outside = root.join("outside");
let _ = tokio::fs::remove_dir_all(&root).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
tokio::fs::create_dir_all(&outside).await.unwrap();
tokio::fs::write(outside.join("secret.txt"), "outside workspace")
.await
.unwrap();
symlink(outside.join("secret.txt"), workspace.join("escape.txt")).unwrap();
let tool = FileReadTool::new(test_security(workspace.clone()));
let result = tool.execute(json!({"path": "escape.txt"})).await.unwrap();
assert!(!result.success);
assert!(result
.error
.as_deref()
.unwrap_or("")
.contains("escapes workspace"));
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[cfg(unix)]
#[tokio::test]
async fn file_read_blocks_hardlink_escape() {
let root = std::env::temp_dir().join("zeroclaw_test_file_read_hardlink_escape");
let workspace = root.join("workspace");
let outside = root.join("outside");
let _ = tokio::fs::remove_dir_all(&root).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
tokio::fs::create_dir_all(&outside).await.unwrap();
tokio::fs::write(outside.join("secret.txt"), "outside workspace")
.await
.unwrap();
std::fs::hard_link(outside.join("secret.txt"), workspace.join("alias.txt")).unwrap();
let tool = FileReadTool::new(test_security(workspace.clone()));
let result = tool.execute(json!({"path": "alias.txt"})).await.unwrap();
assert!(!result.success);
assert!(result
.error
.as_deref()
.unwrap_or("")
.contains("hard-link escape"));
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[tokio::test]
async fn file_read_outside_workspace_allowed_when_workspace_only_disabled() {
let root = std::env::temp_dir().join("zeroclaw_test_file_read_allowed_roots_hint");
let workspace = root.join("workspace");
let outside = root.join("outside");
let outside_file = outside.join("notes.txt");
let _ = tokio::fs::remove_dir_all(&root).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
tokio::fs::create_dir_all(&outside).await.unwrap();
tokio::fs::write(&outside_file, "outside").await.unwrap();
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace,
workspace_only: false,
forbidden_paths: vec![],
..SecurityPolicy::default()
});
let tool = FileReadTool::new(security);
let result = tool
.execute(json!({"path": outside_file.to_string_lossy().to_string()}))
.await
.unwrap();
assert!(result.success);
assert!(result.error.is_none());
assert!(result.output.contains("outside"));
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[tokio::test]
async fn file_read_nonexistent_consumes_rate_limit_budget() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_probe");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
// Allow only 2 actions total
let tool = FileReadTool::new(test_security_with(
dir.clone(),
AutonomyLevel::Supervised,
2,
));
// Both reads fail (file doesn't exist) but should consume budget
let r1 = tool.execute(json!({"path": "nope1.txt"})).await.unwrap();
assert!(!r1.success);
assert!(r1.error.as_ref().unwrap().contains("Failed to resolve"));
let r2 = tool.execute(json!({"path": "nope2.txt"})).await.unwrap();
assert!(!r2.success);
assert!(r2.error.as_ref().unwrap().contains("Failed to resolve"));
// Third attempt should be rate limited even though file doesn't exist
let r3 = tool.execute(json!({"path": "nope3.txt"})).await.unwrap();
assert!(!r3.success);
assert!(
r3.error.as_ref().unwrap().contains("Rate limit"),
"Expected rate limit error, got: {:?}",
r3.error
);
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_with_offset_and_limit() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("lines.txt"), "aaa\nbbb\nccc\nddd\neee")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
// Read lines 2-3
let result = tool
.execute(json!({"path": "lines.txt", "offset": 2, "limit": 2}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("2: bbb"));
assert!(result.output.contains("3: ccc"));
assert!(!result.output.contains("1: aaa"));
assert!(!result.output.contains("4: ddd"));
assert!(result.output.contains("[Lines 2-3 of 5]"));
// Read from offset 4 to end
let result = tool
.execute(json!({"path": "lines.txt", "offset": 4}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("4: ddd"));
assert!(result.output.contains("5: eee"));
assert!(result.output.contains("[Lines 4-5 of 5]"));
// Limit only (first 2 lines)
let result = tool
.execute(json!({"path": "lines.txt", "limit": 2}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("1: aaa"));
assert!(result.output.contains("2: bbb"));
assert!(!result.output.contains("3: ccc"));
assert!(result.output.contains("[Lines 1-2 of 5]"));
// Full read (no offset/limit) shows all lines
let result = tool.execute(json!({"path": "lines.txt"})).await.unwrap();
assert!(result.success);
assert!(result.output.contains("1: aaa"));
assert!(result.output.contains("5: eee"));
assert!(result.output.contains("[5 lines total]"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_offset_beyond_end() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset_end");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
tokio::fs::write(dir.join("short.txt"), "one\ntwo")
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": "short.txt", "offset": 100}))
.await
.unwrap();
assert!(result.success);
assert!(result
.output
.contains("[No lines in range, file has 2 lines]"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
#[tokio::test]
async fn file_read_rejects_oversized_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_large");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
// Create a file just over 10 MB
let big = vec![b'x'; 10 * 1024 * 1024 + 1];
tokio::fs::write(dir.join("huge.bin"), &big).await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "huge.bin"})).await.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("File too large"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
/// PDF files should be readable via pdf-extract text extraction.
#[tokio::test]
async fn file_read_extracts_pdf_text() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_pdf");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/test_document.pdf");
tokio::fs::copy(&fixture, dir.join("report.pdf"))
.await
.expect("copy PDF fixture");
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "report.pdf"})).await.unwrap();
assert!(
result.success,
"PDF read must succeed, error: {:?}",
result.error
);
assert!(
result.output.contains("Hello"),
"extracted text must contain 'Hello', got: {}",
result.output
);
let _ = tokio::fs::remove_dir_all(&dir).await;
}
/// Non-UTF-8 binary files should be read with lossy conversion.
#[tokio::test]
async fn file_read_lossy_reads_binary_file() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_lossy");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
// Write bytes that are not valid UTF-8 and not a PDF
let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'h', b'i', 0x80];
tokio::fs::write(dir.join("data.bin"), &binary_data)
.await
.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool.execute(json!({"path": "data.bin"})).await.unwrap();
assert!(
result.success,
"lossy read must succeed, error: {:?}",
result.error
);
assert!(
result.output.contains('\u{FFFD}'),
"lossy output must contain replacement character, got: {:?}",
result.output
);
assert!(
result.output.contains("hi"),
"lossy output must preserve valid ASCII, got: {:?}",
result.output
);
let _ = tokio::fs::remove_dir_all(&dir).await;
}
// ── E2E: full agent pipeline with real FileReadTool + PDF extraction ──
mod e2e_helpers {
use crate::config::MemoryConfig;
use crate::memory::{self, Memory};
use crate::observability::{NoopObserver, Observer};
use crate::providers::{ChatMessage, ChatRequest, ChatResponse, Provider};
use std::sync::{Arc, Mutex};
pub type SharedRequests = Arc<Mutex<Vec<Vec<ChatMessage>>>>;
pub struct RecordingProvider {
responses: Mutex<Vec<ChatResponse>>,
pub requests: SharedRequests,
}
impl RecordingProvider {
pub fn new(responses: Vec<ChatResponse>) -> (Self, SharedRequests) {
let requests: SharedRequests = Arc::new(Mutex::new(Vec::new()));
let provider = Self {
responses: Mutex::new(responses),
requests: requests.clone(),
};
(provider, requests)
}
}
#[async_trait::async_trait]
impl Provider for RecordingProvider {
async fn chat_with_system(
&self,
_system_prompt: Option<&str>,
_message: &str,
_model: &str,
_temperature: f64,
) -> anyhow::Result<String> {
Ok("fallback".into())
}
async fn chat(
&self,
request: ChatRequest<'_>,
_model: &str,
_temperature: f64,
) -> anyhow::Result<ChatResponse> {
self.requests
.lock()
.unwrap()
.push(request.messages.to_vec());
let mut guard = self.responses.lock().unwrap();
if guard.is_empty() {
return Ok(ChatResponse {
text: Some("done".into()),
tool_calls: vec![],
usage: None,
reasoning_content: None,
quota_metadata: None,
});
}
Ok(guard.remove(0))
}
}
pub fn make_memory() -> Arc<dyn Memory> {
let cfg = MemoryConfig {
backend: "none".into(),
..MemoryConfig::default()
};
Arc::from(memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
}
pub fn make_observer() -> Arc<dyn Observer> {
Arc::from(NoopObserver {})
}
}
/// End-to-end test: scripted provider calls `file_read` on a real PDF
/// fixture, the tool extracts text via pdf-extract, and the extracted
/// content reaches the provider in the tool result message.
#[tokio::test]
async fn e2e_agent_file_read_pdf_extraction() {
use crate::agent::agent::Agent;
use crate::agent::dispatcher::NativeToolDispatcher;
use crate::providers::{ChatResponse, Provider, ToolCall};
use e2e_helpers::*;
// ── Set up workspace with PDF fixture ──
let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_pdf");
let _ = tokio::fs::remove_dir_all(&workspace).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/test_document.pdf");
tokio::fs::copy(&fixture, workspace.join("report.pdf"))
.await
.expect("copy PDF fixture");
// ── Build real FileReadTool ──
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace.clone(),
..SecurityPolicy::default()
});
let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
// ── Script provider: call file_read → then answer ──
let (provider, recorded) = RecordingProvider::new(vec![
// Turn 1 response: provider asks to read the PDF
ChatResponse {
text: Some(String::new()),
tool_calls: vec![ToolCall {
id: "tc1".into(),
name: "file_read".into(),
arguments: r#"{"path": "report.pdf"}"#.into(),
}],
usage: None,
reasoning_content: None,
quota_metadata: None,
},
// Turn 1 continued: provider sees tool result and answers
ChatResponse {
text: Some("The PDF contains a greeting: Hello PDF".into()),
tool_calls: vec![],
usage: None,
reasoning_content: None,
quota_metadata: None,
},
]);
let mut agent = Agent::builder()
.provider(Box::new(provider) as Box<dyn Provider>)
.tools(vec![file_read_tool])
.memory(make_memory())
.observer(make_observer())
.tool_dispatcher(Box::new(NativeToolDispatcher))
.workspace_dir(workspace.clone())
.build()
.unwrap();
// ── Execute ──
let response = agent
.turn("Read report.pdf and tell me what it says")
.await
.unwrap();
// ── Verify final response ──
assert!(
response.contains("Hello PDF"),
"agent response must contain PDF content, got: {response}",
);
// ── Verify provider received extracted PDF text in tool result ──
{
let all_requests = recorded.lock().unwrap();
assert!(
all_requests.len() >= 2,
"expected at least 2 provider requests (initial + after tool), got {}",
all_requests.len(),
);
let second_request = &all_requests[1];
let tool_result_msg = second_request
.iter()
.find(|m| m.role == "tool")
.expect("second request must contain a tool result message");
assert!(
tool_result_msg.content.contains("Hello"),
"tool result must contain extracted PDF text 'Hello', got: {}",
tool_result_msg.content,
);
}
let _ = tokio::fs::remove_dir_all(&workspace).await;
}
/// End-to-end test: agent calls `file_read` on a binary file, gets
/// lossy UTF-8 output with replacement characters in the tool result.
#[tokio::test]
async fn e2e_agent_file_read_lossy_binary() {
use crate::agent::agent::Agent;
use crate::agent::dispatcher::NativeToolDispatcher;
use crate::providers::{ChatResponse, Provider, ToolCall};
use e2e_helpers::*;
// ── Set up workspace with binary file ──
let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_lossy");
let _ = tokio::fs::remove_dir_all(&workspace).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'v', b'a', b'l', b'i', b'd', 0x80];
tokio::fs::write(workspace.join("data.bin"), &binary_data)
.await
.unwrap();
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace.clone(),
..SecurityPolicy::default()
});
let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
let (provider, recorded) = RecordingProvider::new(vec![
ChatResponse {
text: Some(String::new()),
tool_calls: vec![ToolCall {
id: "tc1".into(),
name: "file_read".into(),
arguments: r#"{"path": "data.bin"}"#.into(),
}],
usage: None,
reasoning_content: None,
quota_metadata: None,
},
ChatResponse {
text: Some("The file appears to be binary data.".into()),
tool_calls: vec![],
usage: None,
reasoning_content: None,
quota_metadata: None,
},
]);
let mut agent = Agent::builder()
.provider(Box::new(provider) as Box<dyn Provider>)
.tools(vec![file_read_tool])
.memory(make_memory())
.observer(make_observer())
.tool_dispatcher(Box::new(NativeToolDispatcher))
.workspace_dir(workspace.clone())
.build()
.unwrap();
let response = agent.turn("Read data.bin").await.unwrap();
assert!(
response.contains("binary"),
"agent response must mention binary, got: {response}",
);
// Verify tool result contains lossy output with replacement chars
{
let all_requests = recorded.lock().unwrap();
assert!(
all_requests.len() >= 2,
"expected at least 2 provider requests, got {}",
all_requests.len(),
);
let tool_result_msg = all_requests[1]
.iter()
.find(|m| m.role == "tool")
.expect("second request must contain a tool result message");
assert!(
tool_result_msg.content.contains("valid"),
"tool result must preserve valid ASCII from binary file, got: {}",
tool_result_msg.content,
);
assert!(
tool_result_msg.content.contains('\u{FFFD}'),
"tool result must contain replacement character for invalid bytes, got: {}",
tool_result_msg.content,
);
}
let _ = tokio::fs::remove_dir_all(&workspace).await;
}
/// Live e2e: real OpenAI Codex provider + real FileReadTool + PDF fixture.
/// Verifies the model receives extracted PDF text and responds meaningfully.
///
/// Requires valid OAuth credentials in `~/.zeroclaw/`.
/// Run: `cargo test --lib -- tools::file_read::tests::e2e_live_file_read_pdf --ignored --nocapture`
#[tokio::test]
#[ignore = "requires valid OpenAI Codex OAuth credentials"]
async fn e2e_live_file_read_pdf() {
use crate::agent::agent::Agent;
use crate::agent::dispatcher::XmlToolDispatcher;
use crate::providers::openai_codex::OpenAiCodexProvider;
use crate::providers::{Provider, ProviderRuntimeOptions};
use e2e_helpers::*;
// ── Set up workspace with PDF fixture ──
let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_live_file_read_pdf");
let _ = tokio::fs::remove_dir_all(&workspace).await;
tokio::fs::create_dir_all(&workspace).await.unwrap();
let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/test_document.pdf");
tokio::fs::copy(&fixture, workspace.join("report.pdf"))
.await
.expect("copy PDF fixture");
// ── Build real FileReadTool ──
let security = Arc::new(SecurityPolicy {
autonomy: AutonomyLevel::Supervised,
workspace_dir: workspace.clone(),
..SecurityPolicy::default()
});
let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
// ── Real provider (OpenAI Codex uses XML tool dispatch) ──
let provider = OpenAiCodexProvider::new(&ProviderRuntimeOptions::default(), None)
.expect("provider should initialize");
let mut agent = Agent::builder()
.provider(Box::new(provider) as Box<dyn Provider>)
.tools(vec![file_read_tool])
.memory(make_memory())
.observer(make_observer())
.tool_dispatcher(Box::new(XmlToolDispatcher))
.workspace_dir(workspace.clone())
.model_name("gpt-5.3-codex".to_string())
.build()
.unwrap();
// ── Execute ──
let response = agent
.turn("Use the file_read tool to read report.pdf, then tell me what text it contains. Be concise.")
.await
.unwrap();
eprintln!("=== Live e2e response ===\n{response}\n=========================");
// ── Verify model saw the actual PDF content ("Hello PDF") ──
let lower = response.to_lowercase();
assert!(
lower.contains("hello"),
"model response must reference extracted PDF text 'Hello PDF', got: {response}",
);
let _ = tokio::fs::remove_dir_all(&workspace).await;
}
#[tokio::test]
async fn file_read_blocks_null_byte_in_path() {
let dir = std::env::temp_dir().join("zeroclaw_test_file_read_null_byte");
let _ = tokio::fs::remove_dir_all(&dir).await;
tokio::fs::create_dir_all(&dir).await.unwrap();
let tool = FileReadTool::new(test_security(dir.clone()));
let result = tool
.execute(json!({"path": "test\0evil.txt"}))
.await
.unwrap();
assert!(!result.success);
assert!(result.error.as_ref().unwrap().contains("not allowed"));
let _ = tokio::fs::remove_dir_all(&dir).await;
}
}