901 lines
30 KiB
Rust
901 lines
30 KiB
Rust
use super::traits::{Tool, ToolResult};
|
|
use crate::security::SecurityPolicy;
|
|
use async_trait::async_trait;
|
|
use serde_json::json;
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::path::{Component, Path};
|
|
use std::sync::Arc;
|
|
|
|
/// Maximum PPTX file size (50 MB).
|
|
const MAX_PPTX_BYTES: u64 = 50 * 1024 * 1024;
|
|
/// Default character limit returned to the LLM.
|
|
const DEFAULT_MAX_CHARS: usize = 50_000;
|
|
/// Hard ceiling regardless of what the caller requests.
|
|
const MAX_OUTPUT_CHARS: usize = 200_000;
|
|
/// Upper bound for total uncompressed XML read from slide files.
|
|
const MAX_TOTAL_SLIDE_XML_BYTES: u64 = 16 * 1024 * 1024;
|
|
|
|
/// Extract plain text from a PPTX file in the workspace.
|
|
pub struct PptxReadTool {
|
|
security: Arc<SecurityPolicy>,
|
|
}
|
|
|
|
impl PptxReadTool {
|
|
pub fn new(security: Arc<SecurityPolicy>) -> Self {
|
|
Self { security }
|
|
}
|
|
}
|
|
|
|
/// Extract plain text from PPTX bytes.
|
|
///
|
|
/// PPTX is a ZIP archive containing `ppt/slides/slide*.xml`.
|
|
/// Text lives inside `<a:t>` elements; paragraphs are delimited by `<a:p>`.
|
|
fn extract_pptx_text(bytes: &[u8]) -> anyhow::Result<String> {
|
|
extract_pptx_text_with_limits(bytes, MAX_TOTAL_SLIDE_XML_BYTES)
|
|
}
|
|
|
|
fn extract_pptx_text_with_limits(
|
|
bytes: &[u8],
|
|
max_total_slide_xml_bytes: u64,
|
|
) -> anyhow::Result<String> {
|
|
use quick_xml::events::Event;
|
|
use quick_xml::Reader;
|
|
use std::io::Read;
|
|
|
|
let cursor = std::io::Cursor::new(bytes);
|
|
let mut archive = zip::ZipArchive::new(cursor)?;
|
|
|
|
// Collect all slide files and keep a deterministic numeric fallback order.
|
|
let mut fallback_slide_names: Vec<String> = (0..archive.len())
|
|
.filter_map(|i| {
|
|
let name = archive.by_index(i).ok()?.name().to_string();
|
|
if name.starts_with("ppt/slides/slide") && name.ends_with(".xml") {
|
|
Some(name)
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect();
|
|
fallback_slide_names.sort_by(|left, right| {
|
|
let left_idx = slide_numeric_index(left);
|
|
let right_idx = slide_numeric_index(right);
|
|
left_idx.cmp(&right_idx).then_with(|| left.cmp(right))
|
|
});
|
|
|
|
if fallback_slide_names.is_empty() {
|
|
anyhow::bail!("Not a valid PPTX (no slide XML files found)");
|
|
}
|
|
|
|
let manifest_order = parse_slide_order_from_manifest(&mut archive)?;
|
|
let fallback_name_set: HashSet<String> = fallback_slide_names.iter().cloned().collect();
|
|
let mut ordered_slide_names = Vec::new();
|
|
let mut seen = HashSet::new();
|
|
|
|
for slide_name in manifest_order {
|
|
if fallback_name_set.contains(&slide_name) && seen.insert(slide_name.clone()) {
|
|
ordered_slide_names.push(slide_name);
|
|
}
|
|
}
|
|
for slide_name in fallback_slide_names {
|
|
if seen.insert(slide_name.clone()) {
|
|
ordered_slide_names.push(slide_name);
|
|
}
|
|
}
|
|
|
|
let mut text = String::new();
|
|
let mut total_slide_xml_bytes = 0u64;
|
|
|
|
for slide_name in &ordered_slide_names {
|
|
let mut slide_file = archive
|
|
.by_name(slide_name)
|
|
.map_err(|e| anyhow::anyhow!("Failed to read {slide_name}: {e}"))?;
|
|
let slide_xml_size = slide_file.size();
|
|
total_slide_xml_bytes = total_slide_xml_bytes
|
|
.checked_add(slide_xml_size)
|
|
.ok_or_else(|| anyhow::anyhow!("Slide XML payload size overflow"))?;
|
|
if total_slide_xml_bytes > max_total_slide_xml_bytes {
|
|
anyhow::bail!(
|
|
"Slide XML payload too large: {} bytes (limit: {} bytes)",
|
|
total_slide_xml_bytes,
|
|
max_total_slide_xml_bytes
|
|
);
|
|
}
|
|
|
|
let mut xml_content = String::new();
|
|
slide_file.read_to_string(&mut xml_content)?;
|
|
|
|
let mut reader = Reader::from_str(&xml_content);
|
|
let mut in_text = false;
|
|
let slide_start = text.len();
|
|
|
|
loop {
|
|
match reader.read_event() {
|
|
Ok(Event::Start(e)) => {
|
|
let name = e.name();
|
|
if name.as_ref() == b"a:t" {
|
|
in_text = true;
|
|
} else if name.as_ref() == b"a:p" && text.len() > slide_start {
|
|
text.push('\n');
|
|
}
|
|
}
|
|
Ok(Event::Empty(e)) => {
|
|
// Self-closing <a:t/> contains no text and must not flip `in_text`.
|
|
if e.name().as_ref() == b"a:p" && text.len() > slide_start {
|
|
text.push('\n');
|
|
}
|
|
}
|
|
Ok(Event::End(e)) => {
|
|
if e.name().as_ref() == b"a:t" {
|
|
in_text = false;
|
|
}
|
|
}
|
|
Ok(Event::Text(e)) => {
|
|
if in_text {
|
|
text.push_str(&e.unescape()?);
|
|
}
|
|
}
|
|
Ok(Event::Eof) => break,
|
|
Err(e) => return Err(e.into()),
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
// Separate slides with a blank line.
|
|
if text.len() > slide_start && !text.ends_with('\n') {
|
|
text.push('\n');
|
|
}
|
|
}
|
|
|
|
Ok(text)
|
|
}
|
|
|
|
fn slide_numeric_index(slide_path: &str) -> Option<u32> {
|
|
let stem = Path::new(slide_path).file_stem()?.to_string_lossy();
|
|
let digits = stem.strip_prefix("slide")?;
|
|
digits.parse::<u32>().ok()
|
|
}
|
|
|
|
fn local_name(name: &[u8]) -> &[u8] {
|
|
name.rsplit(|b| *b == b':').next().unwrap_or(name)
|
|
}
|
|
|
|
fn normalize_slide_target(target: &str) -> Option<String> {
|
|
// External targets are not local slide XML content.
|
|
if target.contains("://") {
|
|
return None;
|
|
}
|
|
|
|
let mut segments = Vec::new();
|
|
for component in Path::new("ppt").join(target).components() {
|
|
match component {
|
|
Component::Normal(part) => segments.push(part.to_string_lossy().to_string()),
|
|
Component::CurDir => {}
|
|
Component::ParentDir => {
|
|
segments.pop()?;
|
|
}
|
|
Component::RootDir | Component::Prefix(_) => {}
|
|
}
|
|
}
|
|
|
|
let normalized = segments.join("/");
|
|
if normalized.starts_with("ppt/slides/slide") && normalized.ends_with(".xml") {
|
|
Some(normalized)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn parse_slide_order_from_manifest<R: std::io::Read + std::io::Seek>(
|
|
archive: &mut zip::ZipArchive<R>,
|
|
) -> anyhow::Result<Vec<String>> {
|
|
use quick_xml::events::Event;
|
|
use quick_xml::Reader;
|
|
use std::io::Read;
|
|
|
|
let mut presentation_xml = String::new();
|
|
match archive.by_name("ppt/presentation.xml") {
|
|
Ok(mut presentation_file) => {
|
|
presentation_file.read_to_string(&mut presentation_xml)?;
|
|
}
|
|
Err(zip::result::ZipError::FileNotFound) => return Ok(Vec::new()),
|
|
Err(err) => return Err(err.into()),
|
|
}
|
|
|
|
let mut rels_xml = String::new();
|
|
match archive.by_name("ppt/_rels/presentation.xml.rels") {
|
|
Ok(mut rels_file) => {
|
|
rels_file.read_to_string(&mut rels_xml)?;
|
|
}
|
|
Err(zip::result::ZipError::FileNotFound) => return Ok(Vec::new()),
|
|
Err(err) => return Err(err.into()),
|
|
}
|
|
|
|
let mut relationship_ids = Vec::new();
|
|
let mut presentation_reader = Reader::from_str(&presentation_xml);
|
|
loop {
|
|
match presentation_reader.read_event() {
|
|
Ok(Event::Start(ref event)) | Ok(Event::Empty(ref event)) => {
|
|
if local_name(event.name().as_ref()) == b"sldId" {
|
|
for attr in event.attributes().flatten() {
|
|
let raw_key = attr.key.as_ref();
|
|
if raw_key == b"r:id" || raw_key.ends_with(b":id") {
|
|
let rel_id = attr
|
|
.decode_and_unescape_value(presentation_reader.decoder())?
|
|
.into_owned();
|
|
relationship_ids.push(rel_id);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(Event::Eof) => break,
|
|
Err(err) => return Err(err.into()),
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
if relationship_ids.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
let mut relationship_targets: HashMap<String, String> = HashMap::new();
|
|
let mut rels_reader = Reader::from_str(&rels_xml);
|
|
loop {
|
|
match rels_reader.read_event() {
|
|
Ok(Event::Start(ref event)) | Ok(Event::Empty(ref event)) => {
|
|
if local_name(event.name().as_ref()) == b"Relationship" {
|
|
let mut rel_id = None;
|
|
let mut target = None;
|
|
|
|
for attr in event.attributes().flatten() {
|
|
let key = local_name(attr.key.as_ref());
|
|
if key.eq_ignore_ascii_case(b"id") {
|
|
rel_id = Some(
|
|
attr.decode_and_unescape_value(rels_reader.decoder())?
|
|
.into_owned(),
|
|
);
|
|
} else if key.eq_ignore_ascii_case(b"target") {
|
|
target = Some(
|
|
attr.decode_and_unescape_value(rels_reader.decoder())?
|
|
.into_owned(),
|
|
);
|
|
}
|
|
}
|
|
|
|
if let (Some(rel_id), Some(target)) = (rel_id, target) {
|
|
relationship_targets.insert(rel_id, target);
|
|
}
|
|
}
|
|
}
|
|
Ok(Event::Eof) => break,
|
|
Err(err) => return Err(err.into()),
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
let mut ordered_slide_names = Vec::new();
|
|
for rel_id in relationship_ids {
|
|
if let Some(target) = relationship_targets.get(&rel_id) {
|
|
if let Some(normalized) = normalize_slide_target(target) {
|
|
ordered_slide_names.push(normalized);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(ordered_slide_names)
|
|
}
|
|
|
|
fn parse_max_chars(args: &serde_json::Value) -> anyhow::Result<usize> {
|
|
let Some(value) = args.get("max_chars") else {
|
|
return Ok(DEFAULT_MAX_CHARS);
|
|
};
|
|
|
|
let serde_json::Value::Number(number) = value else {
|
|
anyhow::bail!("Invalid 'max_chars': expected a positive integer");
|
|
};
|
|
let Some(raw) = number.as_u64() else {
|
|
anyhow::bail!("Invalid 'max_chars': expected a positive integer");
|
|
};
|
|
if raw == 0 {
|
|
anyhow::bail!("Invalid 'max_chars': must be >= 1");
|
|
}
|
|
|
|
Ok(usize::try_from(raw)
|
|
.unwrap_or(MAX_OUTPUT_CHARS)
|
|
.min(MAX_OUTPUT_CHARS))
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Tool for PptxReadTool {
|
|
fn name(&self) -> &str {
|
|
"pptx_read"
|
|
}
|
|
|
|
fn description(&self) -> &str {
|
|
"Extract plain text from a PPTX (PowerPoint) file in the workspace. \
|
|
Returns all readable text content from all slides. No formatting, images, or charts."
|
|
}
|
|
|
|
fn parameters_schema(&self) -> serde_json::Value {
|
|
json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"path": {
|
|
"type": "string",
|
|
"description": "Path to the PPTX file. Relative paths resolve from workspace."
|
|
},
|
|
"max_chars": {
|
|
"type": "integer",
|
|
"description": "Maximum characters to return (default: 50000, max: 200000)",
|
|
"minimum": 1,
|
|
"maximum": 200_000
|
|
}
|
|
},
|
|
"required": ["path"]
|
|
})
|
|
}
|
|
|
|
async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
|
|
let path = args
|
|
.get("path")
|
|
.and_then(|v| v.as_str())
|
|
.ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
|
|
|
|
let max_chars = match parse_max_chars(&args) {
|
|
Ok(value) => value,
|
|
Err(err) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(err.to_string()),
|
|
})
|
|
}
|
|
};
|
|
|
|
if self.security.is_rate_limited() {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some("Rate limit exceeded: too many actions in the last hour".into()),
|
|
});
|
|
}
|
|
|
|
if !self.security.is_path_allowed(path) {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Path not allowed by security policy: {path}")),
|
|
});
|
|
}
|
|
|
|
if !self.security.record_action() {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some("Rate limit exceeded: action budget exhausted".into()),
|
|
});
|
|
}
|
|
|
|
let full_path = self.security.resolve_user_supplied_path(path);
|
|
|
|
let resolved_path = match tokio::fs::canonicalize(&full_path).await {
|
|
Ok(p) => p,
|
|
Err(e) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Failed to resolve file path: {e}")),
|
|
});
|
|
}
|
|
};
|
|
|
|
if !self.security.is_resolved_path_allowed(&resolved_path) {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(
|
|
self.security
|
|
.resolved_path_violation_message(&resolved_path),
|
|
),
|
|
});
|
|
}
|
|
|
|
tracing::debug!("Reading PPTX: {}", resolved_path.display());
|
|
|
|
match tokio::fs::metadata(&resolved_path).await {
|
|
Ok(meta) => {
|
|
if meta.len() > MAX_PPTX_BYTES {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!(
|
|
"PPTX too large: {} bytes (limit: {MAX_PPTX_BYTES} bytes)",
|
|
meta.len()
|
|
)),
|
|
});
|
|
}
|
|
}
|
|
Err(e) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Failed to read file metadata: {e}")),
|
|
});
|
|
}
|
|
}
|
|
|
|
let bytes = match tokio::fs::read(&resolved_path).await {
|
|
Ok(b) => b,
|
|
Err(e) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Failed to read PPTX file: {e}")),
|
|
});
|
|
}
|
|
};
|
|
|
|
let text = match tokio::task::spawn_blocking(move || extract_pptx_text(&bytes)).await {
|
|
Ok(Ok(t)) => t,
|
|
Ok(Err(e)) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("PPTX extraction failed: {e}")),
|
|
});
|
|
}
|
|
Err(e) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("PPTX extraction task panicked: {e}")),
|
|
});
|
|
}
|
|
};
|
|
|
|
if text.trim().is_empty() {
|
|
return Ok(ToolResult {
|
|
success: true,
|
|
output: "PPTX contains no extractable text".into(),
|
|
error: None,
|
|
});
|
|
}
|
|
|
|
let output = if text.chars().count() > max_chars {
|
|
let mut truncated: String = text.chars().take(max_chars).collect();
|
|
use std::fmt::Write as _;
|
|
let _ = write!(truncated, "\n\n... [truncated at {max_chars} chars]");
|
|
truncated
|
|
} else {
|
|
text
|
|
};
|
|
|
|
Ok(ToolResult {
|
|
success: true,
|
|
output,
|
|
error: None,
|
|
})
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::security::{AutonomyLevel, SecurityPolicy};
|
|
use tempfile::TempDir;
|
|
|
|
fn test_security(workspace: std::path::PathBuf) -> Arc<SecurityPolicy> {
|
|
Arc::new(SecurityPolicy {
|
|
autonomy: AutonomyLevel::Supervised,
|
|
workspace_dir: workspace,
|
|
..SecurityPolicy::default()
|
|
})
|
|
}
|
|
|
|
fn test_security_with_limit(
|
|
workspace: std::path::PathBuf,
|
|
max_actions: u32,
|
|
) -> Arc<SecurityPolicy> {
|
|
Arc::new(SecurityPolicy {
|
|
autonomy: AutonomyLevel::Supervised,
|
|
workspace_dir: workspace,
|
|
max_actions_per_hour: max_actions,
|
|
..SecurityPolicy::default()
|
|
})
|
|
}
|
|
|
|
/// Build a minimal valid PPTX (ZIP) in memory with one slide containing the given text.
|
|
fn minimal_pptx_bytes(slide_text: &str) -> Vec<u8> {
|
|
use std::io::Write;
|
|
|
|
let slide_xml = format!(
|
|
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
<p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
|
xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
|
|
<p:cSld>
|
|
<p:spTree>
|
|
<p:sp>
|
|
<p:txBody>
|
|
<a:p><a:r><a:t>{slide_text}</a:t></a:r></a:p>
|
|
</p:txBody>
|
|
</p:sp>
|
|
</p:spTree>
|
|
</p:cSld>
|
|
</p:sld>"#
|
|
);
|
|
|
|
let buf = std::io::Cursor::new(Vec::new());
|
|
let mut zip = zip::ZipWriter::new(buf);
|
|
let options = zip::write::SimpleFileOptions::default()
|
|
.compression_method(zip::CompressionMethod::Stored);
|
|
|
|
zip.start_file("ppt/slides/slide1.xml", options).unwrap();
|
|
zip.write_all(slide_xml.as_bytes()).unwrap();
|
|
|
|
let buf = zip.finish().unwrap();
|
|
buf.into_inner()
|
|
}
|
|
|
|
/// Build a PPTX with two slides.
|
|
fn two_slide_pptx_bytes(text1: &str, text2: &str) -> Vec<u8> {
|
|
use std::io::Write;
|
|
|
|
let make_slide = |text: &str| {
|
|
format!(
|
|
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
<p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
|
xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
|
|
<p:cSld>
|
|
<p:spTree>
|
|
<p:sp>
|
|
<p:txBody>
|
|
<a:p><a:r><a:t>{text}</a:t></a:r></a:p>
|
|
</p:txBody>
|
|
</p:sp>
|
|
</p:spTree>
|
|
</p:cSld>
|
|
</p:sld>"#
|
|
)
|
|
};
|
|
|
|
let buf = std::io::Cursor::new(Vec::new());
|
|
let mut zip = zip::ZipWriter::new(buf);
|
|
let options = zip::write::SimpleFileOptions::default()
|
|
.compression_method(zip::CompressionMethod::Stored);
|
|
|
|
zip.start_file("ppt/slides/slide1.xml", options).unwrap();
|
|
zip.write_all(make_slide(text1).as_bytes()).unwrap();
|
|
|
|
zip.start_file("ppt/slides/slide2.xml", options).unwrap();
|
|
zip.write_all(make_slide(text2).as_bytes()).unwrap();
|
|
|
|
let buf = zip.finish().unwrap();
|
|
buf.into_inner()
|
|
}
|
|
|
|
fn ordered_pptx_bytes(slides: &[(&str, &str)], presentation_order: &[&str]) -> Vec<u8> {
|
|
use std::io::Write;
|
|
|
|
let make_slide = |text: &str| {
|
|
format!(
|
|
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
<p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
|
xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
|
|
<p:cSld>
|
|
<p:spTree>
|
|
<p:sp>
|
|
<p:txBody>
|
|
<a:p><a:r><a:t>{text}</a:t></a:r></a:p>
|
|
</p:txBody>
|
|
</p:sp>
|
|
</p:spTree>
|
|
</p:cSld>
|
|
</p:sld>"#
|
|
)
|
|
};
|
|
|
|
let mut rels = Vec::new();
|
|
let mut sld_ids = Vec::new();
|
|
for (index, slide_name) in presentation_order.iter().enumerate() {
|
|
let rel_id = format!("rId{}", index + 1);
|
|
rels.push(format!(
|
|
r#"<Relationship Id="{rel_id}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" Target="slides/{slide_name}"/>"#
|
|
));
|
|
sld_ids.push(format!(
|
|
r#"<p:sldId id="{}" r:id="{rel_id}"/>"#,
|
|
256 + index
|
|
));
|
|
}
|
|
|
|
let presentation_xml = format!(
|
|
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
<p:presentation xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
|
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
|
|
<p:sldIdLst>{}</p:sldIdLst>
|
|
</p:presentation>"#,
|
|
sld_ids.join("")
|
|
);
|
|
let rels_xml = format!(
|
|
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
{}
|
|
</Relationships>"#,
|
|
rels.join("")
|
|
);
|
|
|
|
let buf = std::io::Cursor::new(Vec::new());
|
|
let mut zip = zip::ZipWriter::new(buf);
|
|
let options = zip::write::SimpleFileOptions::default()
|
|
.compression_method(zip::CompressionMethod::Stored);
|
|
|
|
zip.start_file("ppt/presentation.xml", options).unwrap();
|
|
zip.write_all(presentation_xml.as_bytes()).unwrap();
|
|
zip.start_file("ppt/_rels/presentation.xml.rels", options)
|
|
.unwrap();
|
|
zip.write_all(rels_xml.as_bytes()).unwrap();
|
|
|
|
for (slide_name, text) in slides {
|
|
zip.start_file(format!("ppt/slides/{slide_name}"), options)
|
|
.unwrap();
|
|
zip.write_all(make_slide(text).as_bytes()).unwrap();
|
|
}
|
|
|
|
zip.finish().unwrap().into_inner()
|
|
}
|
|
|
|
#[test]
|
|
fn name_is_pptx_read() {
|
|
let tool = PptxReadTool::new(test_security(std::env::temp_dir()));
|
|
assert_eq!(tool.name(), "pptx_read");
|
|
}
|
|
|
|
#[test]
|
|
fn description_not_empty() {
|
|
let tool = PptxReadTool::new(test_security(std::env::temp_dir()));
|
|
assert!(!tool.description().is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn schema_has_path_required() {
|
|
let tool = PptxReadTool::new(test_security(std::env::temp_dir()));
|
|
let schema = tool.parameters_schema();
|
|
assert!(schema["properties"]["path"].is_object());
|
|
assert!(schema["properties"]["max_chars"].is_object());
|
|
let required = schema["required"].as_array().unwrap();
|
|
assert!(required.contains(&json!("path")));
|
|
}
|
|
|
|
#[test]
|
|
fn spec_matches_metadata() {
|
|
let tool = PptxReadTool::new(test_security(std::env::temp_dir()));
|
|
let spec = tool.spec();
|
|
assert_eq!(spec.name, "pptx_read");
|
|
assert!(spec.parameters.is_object());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn missing_path_param_returns_error() {
|
|
let tool = PptxReadTool::new(test_security(std::env::temp_dir()));
|
|
let result = tool.execute(json!({})).await;
|
|
assert!(result.is_err());
|
|
assert!(result.unwrap_err().to_string().contains("path"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn absolute_path_is_blocked() {
|
|
let tool = PptxReadTool::new(test_security(std::env::temp_dir()));
|
|
let result = tool.execute(json!({"path": "/etc/passwd"})).await.unwrap();
|
|
assert!(!result.success);
|
|
assert!(result
|
|
.error
|
|
.as_deref()
|
|
.unwrap_or("")
|
|
.contains("not allowed"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn path_traversal_is_blocked() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let tool = PptxReadTool::new(test_security(tmp.path().to_path_buf()));
|
|
let result = tool
|
|
.execute(json!({"path": "../../../etc/passwd"}))
|
|
.await
|
|
.unwrap();
|
|
assert!(!result.success);
|
|
assert!(result
|
|
.error
|
|
.as_deref()
|
|
.unwrap_or("")
|
|
.contains("not allowed"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn nonexistent_file_returns_error() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let tool = PptxReadTool::new(test_security(tmp.path().to_path_buf()));
|
|
let result = tool.execute(json!({"path": "missing.pptx"})).await.unwrap();
|
|
assert!(!result.success);
|
|
assert!(result
|
|
.error
|
|
.as_deref()
|
|
.unwrap_or("")
|
|
.contains("Failed to resolve"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn rate_limit_blocks_request() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let tool = PptxReadTool::new(test_security_with_limit(tmp.path().to_path_buf(), 0));
|
|
let result = tool.execute(json!({"path": "any.pptx"})).await.unwrap();
|
|
assert!(!result.success);
|
|
assert!(result.error.as_deref().unwrap_or("").contains("Rate limit"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn extracts_text_from_valid_pptx() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let pptx_path = tmp.path().join("deck.pptx");
|
|
tokio::fs::write(&pptx_path, minimal_pptx_bytes("Hello PPTX"))
|
|
.await
|
|
.unwrap();
|
|
|
|
let tool = PptxReadTool::new(test_security(tmp.path().to_path_buf()));
|
|
let result = tool.execute(json!({"path": "deck.pptx"})).await.unwrap();
|
|
assert!(result.success);
|
|
assert!(
|
|
result.output.contains("Hello PPTX"),
|
|
"expected extracted text, got: {}",
|
|
result.output
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn extracts_text_from_multiple_slides() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let pptx_path = tmp.path().join("multi.pptx");
|
|
tokio::fs::write(&pptx_path, two_slide_pptx_bytes("Slide One", "Slide Two"))
|
|
.await
|
|
.unwrap();
|
|
|
|
let tool = PptxReadTool::new(test_security(tmp.path().to_path_buf()));
|
|
let result = tool.execute(json!({"path": "multi.pptx"})).await.unwrap();
|
|
assert!(result.success);
|
|
assert!(result.output.contains("Slide One"));
|
|
assert!(result.output.contains("Slide Two"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn invalid_zip_returns_extraction_error() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let pptx_path = tmp.path().join("bad.pptx");
|
|
tokio::fs::write(&pptx_path, b"this is not a zip file")
|
|
.await
|
|
.unwrap();
|
|
|
|
let tool = PptxReadTool::new(test_security(tmp.path().to_path_buf()));
|
|
let result = tool.execute(json!({"path": "bad.pptx"})).await.unwrap();
|
|
assert!(!result.success);
|
|
assert!(result
|
|
.error
|
|
.as_deref()
|
|
.unwrap_or("")
|
|
.contains("extraction failed"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn max_chars_truncates_output() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let long_text = "B".repeat(1000);
|
|
let pptx_path = tmp.path().join("long.pptx");
|
|
tokio::fs::write(&pptx_path, minimal_pptx_bytes(&long_text))
|
|
.await
|
|
.unwrap();
|
|
|
|
let tool = PptxReadTool::new(test_security(tmp.path().to_path_buf()));
|
|
let result = tool
|
|
.execute(json!({"path": "long.pptx", "max_chars": 50}))
|
|
.await
|
|
.unwrap();
|
|
assert!(result.success);
|
|
assert!(result.output.contains("truncated"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn invalid_max_chars_returns_tool_error() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let pptx_path = tmp.path().join("deck.pptx");
|
|
tokio::fs::write(&pptx_path, minimal_pptx_bytes("Hello"))
|
|
.await
|
|
.unwrap();
|
|
|
|
let tool = PptxReadTool::new(test_security(tmp.path().to_path_buf()));
|
|
let result = tool
|
|
.execute(json!({"path": "deck.pptx", "max_chars": "100"}))
|
|
.await
|
|
.unwrap();
|
|
assert!(!result.success);
|
|
assert!(result.error.as_deref().unwrap_or("").contains("max_chars"));
|
|
}
|
|
|
|
#[test]
|
|
fn slide_order_follows_presentation_manifest() {
|
|
let bytes = ordered_pptx_bytes(
|
|
&[
|
|
("slide1.xml", "One"),
|
|
("slide2.xml", "Two"),
|
|
("slide10.xml", "Ten"),
|
|
],
|
|
&["slide2.xml", "slide10.xml", "slide1.xml"],
|
|
);
|
|
|
|
let extracted = extract_pptx_text(&bytes).expect("extract text");
|
|
let two = extracted.find("Two").expect("two position");
|
|
let ten = extracted.find("Ten").expect("ten position");
|
|
let one = extracted.find("One").expect("one position");
|
|
assert!(two < ten && ten < one, "unexpected order: {extracted}");
|
|
}
|
|
|
|
#[test]
|
|
fn cumulative_slide_xml_limit_is_enforced() {
|
|
let bytes = two_slide_pptx_bytes("Alpha", "Beta");
|
|
let error = extract_pptx_text_with_limits(&bytes, 64).unwrap_err();
|
|
assert!(error.to_string().contains("Slide XML payload too large"));
|
|
}
|
|
|
|
#[test]
|
|
fn empty_text_tag_does_not_leak_in_text_state() {
|
|
use std::io::Write;
|
|
|
|
let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
<p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
|
xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
|
|
<p:cSld>
|
|
<p:spTree>
|
|
<p:sp>
|
|
<p:txBody>
|
|
<a:p><a:r><a:t/></a:r></a:p>
|
|
<a:p><a:r><a:t>Visible</a:t></a:r></a:p>
|
|
</p:txBody>
|
|
</p:sp>
|
|
</p:spTree>
|
|
</p:cSld>
|
|
</p:sld>"#;
|
|
|
|
let buf = std::io::Cursor::new(Vec::new());
|
|
let mut zip = zip::ZipWriter::new(buf);
|
|
let options = zip::write::SimpleFileOptions::default()
|
|
.compression_method(zip::CompressionMethod::Stored);
|
|
zip.start_file("ppt/slides/slide1.xml", options).unwrap();
|
|
zip.write_all(slide_xml.as_bytes()).unwrap();
|
|
let bytes = zip.finish().unwrap().into_inner();
|
|
|
|
let extracted = extract_pptx_text(&bytes).expect("extract text");
|
|
assert!(extracted.contains("Visible"));
|
|
}
|
|
|
|
#[cfg(unix)]
|
|
#[tokio::test]
|
|
async fn symlink_escape_is_blocked() {
|
|
use std::os::unix::fs::symlink;
|
|
|
|
let root = TempDir::new().unwrap();
|
|
let workspace = root.path().join("workspace");
|
|
let outside = root.path().join("outside");
|
|
tokio::fs::create_dir_all(&workspace).await.unwrap();
|
|
tokio::fs::create_dir_all(&outside).await.unwrap();
|
|
tokio::fs::write(outside.join("secret.pptx"), minimal_pptx_bytes("secret"))
|
|
.await
|
|
.unwrap();
|
|
symlink(outside.join("secret.pptx"), workspace.join("link.pptx")).unwrap();
|
|
|
|
let tool = PptxReadTool::new(test_security(workspace));
|
|
let result = tool.execute(json!({"path": "link.pptx"})).await.unwrap();
|
|
assert!(!result.success);
|
|
assert!(result
|
|
.error
|
|
.as_deref()
|
|
.unwrap_or("")
|
|
.contains("escapes workspace"));
|
|
}
|
|
}
|