fix(telegram): prevent non-image files from getting [IMAGE:] markers
Add file extension validation before generating [IMAGE:] markers for incoming Telegram attachments. Non-image files (e.g. .md, .txt, .pdf) now always use [Document:] format regardless of how Telegram classifies them, preventing false vision capability errors. Extract format_attachment_content() and is_image_extension() helpers to centralize the logic and make it testable. Fixes #1274 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
905e714659
commit
8331c65bcc
@ -152,6 +152,40 @@ impl TelegramAttachmentKind {
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether a file path has a recognized image extension.
|
||||
fn is_image_extension(path: &Path) -> bool {
|
||||
path.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.map(|ext| {
|
||||
matches!(
|
||||
ext.to_ascii_lowercase().as_str(),
|
||||
"png" | "jpg" | "jpeg" | "gif" | "webp" | "bmp"
|
||||
)
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Build the user-facing content string for an incoming attachment.
|
||||
///
|
||||
/// Photos with a recognized image extension use `[IMAGE:/path]` so the
|
||||
/// multimodal pipeline can validate vision capability. Non-image files
|
||||
/// always use `[Document: name] /path` regardless of how Telegram
|
||||
/// classified them.
|
||||
fn format_attachment_content(
|
||||
kind: IncomingAttachmentKind,
|
||||
local_filename: &str,
|
||||
local_path: &Path,
|
||||
) -> String {
|
||||
match kind {
|
||||
IncomingAttachmentKind::Photo if is_image_extension(local_path) => {
|
||||
format!("[IMAGE:{}]", local_path.display())
|
||||
}
|
||||
_ => {
|
||||
format!("[Document: {}] {}", local_filename, local_path.display())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_http_url(target: &str) -> bool {
|
||||
target.starts_with("http://") || target.starts_with("https://")
|
||||
}
|
||||
@ -971,16 +1005,10 @@ Allowlist Telegram username (without '@') or numeric user ID.",
|
||||
}
|
||||
|
||||
// Build message content.
|
||||
// Photos use [IMAGE:] marker so the multimodal pipeline validates
|
||||
// vision capability and rejects unsupported providers early.
|
||||
let mut content = match attachment.kind {
|
||||
IncomingAttachmentKind::Document => {
|
||||
format!("[Document: {}] {}", local_filename, local_path.display())
|
||||
}
|
||||
IncomingAttachmentKind::Photo => {
|
||||
format!("[IMAGE:{}]", local_path.display())
|
||||
}
|
||||
};
|
||||
// Photos with image extensions use [IMAGE:] marker so the multimodal
|
||||
// pipeline validates vision capability. Non-image files always get
|
||||
// [Document:] format regardless of Telegram's classification.
|
||||
let mut content = format_attachment_content(attachment.kind, &local_filename, &local_path);
|
||||
if let Some(caption) = &attachment.caption {
|
||||
if !caption.is_empty() {
|
||||
use std::fmt::Write;
|
||||
@ -4112,29 +4140,15 @@ mod tests {
|
||||
|
||||
// ── Attachment content format tests ──────────────────────────────
|
||||
|
||||
/// Photo attachments must use `[IMAGE:/path]` marker so the multimodal
|
||||
/// pipeline validates vision capability on the provider.
|
||||
/// Photo attachments with image extension must use `[IMAGE:/path]` marker
|
||||
/// so the multimodal pipeline validates vision capability on the provider.
|
||||
#[test]
|
||||
fn attachment_photo_content_uses_image_marker() {
|
||||
let att = IncomingAttachment {
|
||||
file_id: "photo_file_id".into(),
|
||||
file_name: None,
|
||||
file_size: Some(5000),
|
||||
caption: None,
|
||||
kind: IncomingAttachmentKind::Photo,
|
||||
};
|
||||
|
||||
let local_path = std::path::Path::new("/tmp/workspace/photo_123_45.jpg");
|
||||
let local_filename = "photo_123_45.jpg";
|
||||
|
||||
let content = match att.kind {
|
||||
IncomingAttachmentKind::Document => {
|
||||
format!("[Document: {}] {}", local_filename, local_path.display())
|
||||
}
|
||||
IncomingAttachmentKind::Photo => {
|
||||
format!("[IMAGE:{}]", local_path.display())
|
||||
}
|
||||
};
|
||||
let content =
|
||||
format_attachment_content(IncomingAttachmentKind::Photo, local_filename, local_path);
|
||||
|
||||
assert_eq!(content, "[IMAGE:/tmp/workspace/photo_123_45.jpg]");
|
||||
assert!(content.starts_with("[IMAGE:"));
|
||||
@ -4144,30 +4158,115 @@ mod tests {
|
||||
/// Document attachments keep `[Document: name] /path` format.
|
||||
#[test]
|
||||
fn attachment_document_content_uses_document_label() {
|
||||
let att = IncomingAttachment {
|
||||
file_id: "doc_file_id".into(),
|
||||
file_name: Some("report.pdf".into()),
|
||||
file_size: Some(12345),
|
||||
caption: None,
|
||||
kind: IncomingAttachmentKind::Document,
|
||||
};
|
||||
|
||||
let local_path = std::path::Path::new("/tmp/workspace/report.pdf");
|
||||
let local_filename = att.file_name.as_deref().unwrap();
|
||||
let local_filename = "report.pdf";
|
||||
|
||||
let content = match att.kind {
|
||||
IncomingAttachmentKind::Document => {
|
||||
format!("[Document: {}] {}", local_filename, local_path.display())
|
||||
}
|
||||
IncomingAttachmentKind::Photo => {
|
||||
format!("[IMAGE:{}]", local_path.display())
|
||||
}
|
||||
};
|
||||
let content =
|
||||
format_attachment_content(IncomingAttachmentKind::Document, local_filename, local_path);
|
||||
|
||||
assert_eq!(content, "[Document: report.pdf] /tmp/workspace/report.pdf");
|
||||
assert!(!content.contains("[IMAGE:"));
|
||||
}
|
||||
|
||||
/// Markdown files must never produce `[IMAGE:]` markers (issue #1274).
|
||||
#[test]
|
||||
fn markdown_file_never_produces_image_marker() {
|
||||
let local_path = std::path::Path::new("/tmp/workspace/telegram_files/notes.md");
|
||||
let local_filename = "notes.md";
|
||||
|
||||
// Even if Telegram misclassifies as Photo, extension guard prevents [IMAGE:].
|
||||
let content =
|
||||
format_attachment_content(IncomingAttachmentKind::Photo, local_filename, local_path);
|
||||
assert!(
|
||||
!content.contains("[IMAGE:"),
|
||||
"markdown must not get [IMAGE:] marker: {content}"
|
||||
);
|
||||
assert!(content.starts_with("[Document:"));
|
||||
|
||||
// As Document, it should also be correct.
|
||||
let content_doc =
|
||||
format_attachment_content(IncomingAttachmentKind::Document, local_filename, local_path);
|
||||
assert!(
|
||||
!content_doc.contains("[IMAGE:"),
|
||||
"markdown document must not get [IMAGE:] marker: {content_doc}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Non-image files classified as Photo fall back to `[Document:]` format.
|
||||
#[test]
|
||||
fn non_image_photo_falls_back_to_document_format() {
|
||||
for (filename, ext_path) in [
|
||||
("file.md", "/tmp/ws/file.md"),
|
||||
("file.txt", "/tmp/ws/file.txt"),
|
||||
("file.pdf", "/tmp/ws/file.pdf"),
|
||||
("file.csv", "/tmp/ws/file.csv"),
|
||||
("file.json", "/tmp/ws/file.json"),
|
||||
("file.zip", "/tmp/ws/file.zip"),
|
||||
("file", "/tmp/ws/file"),
|
||||
] {
|
||||
let path = std::path::Path::new(ext_path);
|
||||
let content = format_attachment_content(IncomingAttachmentKind::Photo, filename, path);
|
||||
assert!(
|
||||
!content.contains("[IMAGE:"),
|
||||
"{filename}: non-image file should not get [IMAGE:] marker, got: {content}"
|
||||
);
|
||||
assert!(
|
||||
content.starts_with("[Document:"),
|
||||
"{filename}: should use [Document:] format, got: {content}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// All recognized image extensions produce `[IMAGE:]` when classified as Photo.
|
||||
#[test]
|
||||
fn image_extensions_produce_image_marker() {
|
||||
for ext in ["png", "jpg", "jpeg", "gif", "webp", "bmp"] {
|
||||
let filename = format!("photo_1_2.{ext}");
|
||||
let path_str = format!("/tmp/ws/{filename}");
|
||||
let path = std::path::Path::new(&path_str);
|
||||
let content = format_attachment_content(IncomingAttachmentKind::Photo, &filename, path);
|
||||
assert!(
|
||||
content.starts_with("[IMAGE:"),
|
||||
"{ext}: image should get [IMAGE:] marker, got: {content}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Multimodal pipeline must return 0 image markers for document-formatted
|
||||
/// content — even for a file misclassified as Photo (issue #1274).
|
||||
#[test]
|
||||
fn markdown_attachment_not_detected_by_multimodal_image_markers() {
|
||||
let content = format_attachment_content(
|
||||
IncomingAttachmentKind::Photo,
|
||||
"notes.md",
|
||||
std::path::Path::new("/tmp/ws/notes.md"),
|
||||
);
|
||||
let messages = vec![crate::providers::ChatMessage::user(content)];
|
||||
assert_eq!(
|
||||
crate::multimodal::count_image_markers(&messages),
|
||||
0,
|
||||
"markdown file must not trigger image marker detection"
|
||||
);
|
||||
}
|
||||
|
||||
/// `is_image_extension` helper recognizes image formats and rejects others.
|
||||
#[test]
|
||||
fn is_image_extension_recognizes_images() {
|
||||
assert!(is_image_extension(std::path::Path::new("photo.png")));
|
||||
assert!(is_image_extension(std::path::Path::new("photo.jpg")));
|
||||
assert!(is_image_extension(std::path::Path::new("photo.jpeg")));
|
||||
assert!(is_image_extension(std::path::Path::new("photo.gif")));
|
||||
assert!(is_image_extension(std::path::Path::new("photo.webp")));
|
||||
assert!(is_image_extension(std::path::Path::new("photo.bmp")));
|
||||
assert!(is_image_extension(std::path::Path::new("PHOTO.PNG")));
|
||||
|
||||
assert!(!is_image_extension(std::path::Path::new("file.md")));
|
||||
assert!(!is_image_extension(std::path::Path::new("file.txt")));
|
||||
assert!(!is_image_extension(std::path::Path::new("file.pdf")));
|
||||
assert!(!is_image_extension(std::path::Path::new("file.csv")));
|
||||
assert!(!is_image_extension(std::path::Path::new("file")));
|
||||
}
|
||||
|
||||
/// `count_image_markers` from the multimodal module must detect the
|
||||
/// `[IMAGE:]` marker produced by photo attachment formatting.
|
||||
#[test]
|
||||
@ -4217,7 +4316,8 @@ mod tests {
|
||||
std::fs::write(&doc_path, b"%PDF-1.4 fake").expect("write doc fixture");
|
||||
assert!(doc_path.exists(), "document file must exist on disk");
|
||||
|
||||
let doc_content = format!("[Document: {}] {}", doc_filename, doc_path.display());
|
||||
let doc_content =
|
||||
format_attachment_content(IncomingAttachmentKind::Document, doc_filename, &doc_path);
|
||||
assert!(
|
||||
doc_content.starts_with("[Document: report.pdf]"),
|
||||
"document label format mismatch: {doc_content}"
|
||||
@ -4239,7 +4339,8 @@ mod tests {
|
||||
std::fs::copy(&fixture, &photo_path).expect("copy photo fixture");
|
||||
assert!(photo_path.exists(), "photo file must exist on disk");
|
||||
|
||||
let photo_content = format!("[IMAGE:{}]", photo_path.display());
|
||||
let photo_content =
|
||||
format_attachment_content(IncomingAttachmentKind::Photo, photo_filename, &photo_path);
|
||||
assert!(
|
||||
photo_content.starts_with("[IMAGE:"),
|
||||
"photo must use [IMAGE:] marker: {photo_content}"
|
||||
@ -4271,6 +4372,23 @@ mod tests {
|
||||
captioned.contains("Check this out"),
|
||||
"caption text must be present in content"
|
||||
);
|
||||
|
||||
// ── Markdown file sent as Photo (issue #1274) ────────────────
|
||||
let md_filename = "notes.md";
|
||||
let md_path = workspace.path().join(md_filename);
|
||||
std::fs::write(&md_path, b"# Hello\nSome markdown").expect("write md fixture");
|
||||
let md_content =
|
||||
format_attachment_content(IncomingAttachmentKind::Photo, md_filename, &md_path);
|
||||
assert!(
|
||||
!md_content.contains("[IMAGE:"),
|
||||
"markdown must not get [IMAGE:] marker: {md_content}"
|
||||
);
|
||||
let md_msgs = vec![crate::providers::ChatMessage::user(md_content)];
|
||||
assert_eq!(
|
||||
crate::multimodal::count_image_markers(&md_msgs),
|
||||
0,
|
||||
"markdown file must not trigger image marker detection"
|
||||
);
|
||||
}
|
||||
|
||||
// ── Groq provider rejects photo with vision error ────────────────
|
||||
|
||||
Loading…
Reference in New Issue
Block a user