//! End-to-end integration tests for the telegram-reader skill. //! //! These tests invoke the Python `telegram_reader.py` script directly via //! `tokio::process::Command`, validating JSON output structure and error handling. //! //! Requirements: //! - `TELEGRAM_API_ID`, `TELEGRAM_API_HASH`, `TELEGRAM_PHONE` env vars (or `.env` file) //! - Valid Telethon session at `~/.zeroclaw/workspace/skills/telegram-reader/.session/` //! - Network access to Telegram API //! //! Run: //! source .env && cargo test --test telegram_reader_e2e -- --ignored use serde_json::Value; use std::path::PathBuf; use tokio::process::Command; /// Resolved path to the telegram_reader.py script. fn script_path() -> PathBuf { let home = std::env::var("HOME").expect("HOME env var required"); PathBuf::from(home) .join(".zeroclaw/workspace/skills/telegram-reader/scripts/telegram_reader.py") } /// Path to the .env file used as credential fallback. fn dotenv_path() -> PathBuf { let home = std::env::var("HOME").expect("HOME env var required"); PathBuf::from(home).join("work/erp/zeroclaws/.env") } /// Load Telegram credentials from environment or .env file. /// Returns (api_id, api_hash, phone). fn load_credentials() -> (String, String, String) { // Try environment first if let (Ok(id), Ok(hash), Ok(phone)) = ( std::env::var("TELEGRAM_API_ID"), std::env::var("TELEGRAM_API_HASH"), std::env::var("TELEGRAM_PHONE"), ) { return (id, hash, phone); } // Fallback: parse .env file let env_path = dotenv_path(); let content = std::fs::read_to_string(&env_path) .unwrap_or_else(|e| panic!("Cannot read .env at {}: {e}", env_path.display())); let mut api_id = String::new(); let mut api_hash = String::new(); let mut phone = String::new(); for line in content.lines() { let line = line.trim(); if let Some(val) = line.strip_prefix("TELEGRAM_API_ID=") { api_id = val.trim().to_string(); } else if let Some(val) = line.strip_prefix("TELEGRAM_API_HASH=") { api_hash = val.trim().to_string(); } else if let Some(val) = line.strip_prefix("TELEGRAM_PHONE=") { phone = val.trim().to_string(); } } assert!( !api_id.is_empty(), "TELEGRAM_API_ID not found in env or .env" ); assert!( !api_hash.is_empty(), "TELEGRAM_API_HASH not found in env or .env" ); assert!(!phone.is_empty(), "TELEGRAM_PHONE not found in env or .env"); (api_id, api_hash, phone) } /// Run the telegram_reader.py script with given arguments and return parsed JSON. async fn run_telegram_reader(args: &[&str]) -> Value { let (api_id, api_hash, phone) = load_credentials(); let output = Command::new("python3") .arg(script_path()) .args(args) .env("TELEGRAM_API_ID", &api_id) .env("TELEGRAM_API_HASH", &api_hash) .env("TELEGRAM_PHONE", &phone) .output() .await .expect("Failed to execute telegram_reader.py"); let stdout = String::from_utf8_lossy(&output.stdout); let stderr = String::from_utf8_lossy(&output.stderr); serde_json::from_str(stdout.trim()).unwrap_or_else(|e| { panic!( "Failed to parse JSON from telegram_reader.py\n\ exit code: {:?}\n\ stdout: {stdout}\n\ stderr: {stderr}\n\ parse error: {e}", output.status.code() ) }) } /// Run the telegram_reader.py script without valid Telegram credentials. /// Uses a fake HOME to prevent .env fallback, but preserves PATH and /// Python paths so the interpreter and packages remain accessible. /// Returns (exit_code, stdout, stderr). async fn run_telegram_reader_no_creds(args: &[&str]) -> (Option, String, String) { let fake_home = "/tmp/telegram_reader_e2e_nocreds"; let mut cmd = Command::new("python3"); cmd.arg(script_path()) .args(args) // Clear Telegram vars .env_remove("TELEGRAM_API_ID") .env_remove("TELEGRAM_API_HASH") .env_remove("TELEGRAM_PHONE") // Fake HOME so .env fallback path doesn't resolve to real credentials .env("HOME", fake_home); // Preserve PATH so python3 and system libs are found if let Ok(path) = std::env::var("PATH") { cmd.env("PATH", path); } // Preserve Python package paths (user-installed telethon etc.) if let Ok(pp) = std::env::var("PYTHONPATH") { cmd.env("PYTHONPATH", pp); } // Preserve the real user site-packages by pointing Python to actual home let real_home = std::env::var("HOME").unwrap_or_default(); cmd.env("PYTHONUSERBASE", format!("{real_home}/.local")); let output = cmd .output() .await .expect("Failed to execute telegram_reader.py"); ( output.status.code(), String::from_utf8_lossy(&output.stdout).to_string(), String::from_utf8_lossy(&output.stderr).to_string(), ) } // ═══════════════════════════════════════════════════════════════════════════ // Test cases // ═══════════════════════════════════════════════════════════════════════════ /// Smoke test: `list_dialogs` returns valid JSON with expected structure. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_list_dialogs_returns_valid_json() { let result = run_telegram_reader(&["list_dialogs", "--limit", "5"]).await; assert_eq!( result["success"], true, "list_dialogs should succeed, got: {result}" ); assert!( result["count"].as_u64().unwrap() > 0, "Expected at least one dialog, got count={}", result["count"] ); let dialogs = result["dialogs"] .as_array() .expect("dialogs should be an array"); assert!(!dialogs.is_empty(), "dialogs array should not be empty"); // Validate dialog structure let first = &dialogs[0]; assert!(first["id"].is_number(), "dialog.id should be a number"); assert!( first["name"].is_string() || first["name"].is_null(), "dialog.name should be string or null" ); assert!(first["type"].is_string(), "dialog.type should be a string"); let valid_types = ["user", "bot", "group", "channel", "supergroup"]; let dtype = first["type"].as_str().unwrap(); assert!( valid_types.contains(&dtype), "dialog.type should be one of {valid_types:?}, got: {dtype}" ); } /// `search_messages` with a known contact returns valid message structure. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_search_messages_with_contact() { let result = run_telegram_reader(&[ "search_messages", "--contact-name", "zverozabr", "--limit", "3", ]) .await; assert_eq!( result["success"], true, "search_messages should succeed, got: {result}" ); assert!( result["count"].as_u64().unwrap() > 0, "Expected at least one message" ); // Validate chat metadata let chat = &result["chat"]; assert!(chat["id"].is_number(), "chat.id should be a number"); assert!(chat["type"].is_string(), "chat.type should be a string"); // Validate message structure let messages = result["messages"] .as_array() .expect("messages should be an array"); assert!(!messages.is_empty(), "messages array should not be empty"); let msg = &messages[0]; assert!(msg["id"].is_number(), "message.id should be a number"); assert!(msg["date"].is_string(), "message.date should be a string"); assert!( msg["text"].is_string(), "message.text should be a string (possibly empty)" ); assert!( msg["sender_id"].is_number(), "message.sender_id should be a number" ); } /// `search_messages` with a nonexistent contact returns error. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_search_invalid_contact_returns_error() { let result = run_telegram_reader(&[ "search_messages", "--contact-name", "totally_nonexistent_user_xyz_99999", "--limit", "1", ]) .await; assert_eq!( result["success"], false, "search with invalid contact should fail, got: {result}" ); assert!( result["error"].is_string(), "error field should be a string" ); assert!( !result["error"].as_str().unwrap().is_empty(), "error message should not be empty" ); } /// Running without credentials (no env vars, no .env fallback) should fail gracefully. #[tokio::test] #[ignore = "requires network"] async fn e2e_missing_credentials_returns_error() { let (exit_code, stdout, stderr) = run_telegram_reader_no_creds(&["list_dialogs", "--limit", "1"]).await; assert_ne!( exit_code, Some(0), "Should exit with non-zero when credentials are missing" ); // The script should output JSON error to stdout or stderr let combined = format!("{stdout}{stderr}"); let parsed: Result = serde_json::from_str(combined.trim()); match parsed { Ok(json) => { assert_eq!( json["success"], false, "Should report success=false without credentials" ); let error_msg = json["error"].as_str().unwrap_or(""); assert!( error_msg.contains("TELEGRAM_API_ID") || error_msg.contains("API") || error_msg.contains("environment"), "Error should mention missing credentials, got: {error_msg}" ); } Err(_) => { // If not valid JSON, at least verify non-zero exit assert_ne!( exit_code, Some(0), "Should exit non-zero without credentials" ); } } } /// `search_messages` with a keyword query returns filtered results. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_search_messages_with_query() { // Search in Saved Messages (own account) for a common word let result = run_telegram_reader(&[ "search_messages", "--contact-name", "zverozabr", "--query", "юрист", "--limit", "3", ]) .await; assert_eq!( result["success"], true, "search_messages with query should succeed, got: {result}" ); // count may be 0 if no messages match, but structure should be valid assert!(result["count"].is_number(), "count should be a number"); assert!(result["messages"].is_array(), "messages should be an array"); } /// `search_channels` finds channels by name keyword #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_search_channels_returns_matching_channels() { let result = run_telegram_reader(&[ "search_channels", "--channel-query", "python", "--limit", "5", ]) .await; assert_eq!( result["success"], true, "search_channels should succeed, got: {result}" ); assert!(result["count"].is_number(), "count should be a number"); assert!(result["channels"].is_array(), "channels should be an array"); assert_eq!(result["query"], "python", "query should be preserved"); // If we found any channels, validate structure if let Some(channels) = result["channels"].as_array() { if !channels.is_empty() { let first = &channels[0]; assert!(first["id"].is_number(), "channel.id should be a number"); assert!( first["name"].is_string() || first["name"].is_null(), "channel.name should be string or null" ); assert!(first["type"].is_string(), "channel.type should be a string"); } } } /// `search_global` with channel_filter performs two-step search #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_search_global_with_channel_filter() { let result = run_telegram_reader(&[ "search_global", "--query", "привет", "--channel-filter", "python", "--limit", "10", ]) .await; assert_eq!( result["success"], true, "search_global with channel_filter should succeed, got: {result}" ); assert!(result["count"].is_number(), "count should be a number"); assert!(result["results"].is_array(), "results should be an array"); assert_eq!( result["query"], "привет", "message query should be preserved" ); assert_eq!( result["channel_filter"], "python", "channel_filter should be preserved" ); assert!( result["dialogs_scanned"].is_number(), "dialogs_scanned should be a number" ); } #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_search_global_returns_results_from_multiple_chats() { let result = run_telegram_reader(&[ "search_global", "--query", "привет", "--limit", "10", "--dialogs-limit", "10", ]) .await; // Validate JSON structure assert_eq!( result["success"], true, "search_global should succeed, got: {result}" ); assert!(result["count"].is_number(), "count should be a number"); assert!(result["results"].is_array(), "results should be an array"); assert!( result["dialogs_scanned"].is_number(), "dialogs_scanned should be a number" ); assert_eq!( result["query"], "привет", "query should match the search term" ); // Validate result structure if any found if result["count"].as_u64().unwrap() > 0 { let first_result = &result["results"][0]; assert!( first_result["id"].is_number(), "message id should be a number" ); assert!( first_result["date"].is_string(), "message date should be a string" ); assert!( first_result["text"].is_string(), "message text should be a string" ); assert!( first_result["chat"].is_object(), "chat info should be an object" ); assert!( first_result["chat"]["name"].is_string(), "chat name should be a string" ); assert!( first_result["chat"]["type"].is_string(), "chat type should be a string" ); } } #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_search_global_with_no_results_returns_empty() { let result = run_telegram_reader(&[ "search_global", "--query", "xyzqwertynonexistent12345", "--limit", "5", "--dialogs-limit", "5", ]) .await; assert_eq!( result["success"], true, "search_global with no results should still succeed, got: {result}" ); assert_eq!( result["count"].as_u64().unwrap(), 0, "count should be 0 when no results found" ); assert!( result["results"].as_array().unwrap().is_empty(), "results array should be empty when no matches" ); } // ═══════════════════════════════════════════════════════════════════════════ // Bug reproduction tests — real-world agent failures (TDD red phase) // // These tests reproduce bugs observed in live agent session 2026-03-04: // Agent asDrgl failed to search Telegram for "кондиционер Самуи" due to // multiple cascading failures documented below. // ═══════════════════════════════════════════════════════════════════════════ /// Bug reproduction: empty string params should be treated as absent by script. /// /// Real-world failure: tool_handler rendered `--date-from '' --date-to '' --channel-filter ''` /// because LLM sent `""` for optional params. Even if tool_handler now strips these, /// the Python script must also be resilient — empty string args from CLI should behave /// identically to absent args (argparse stores "" not None). #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_bug_empty_string_params_treated_as_absent() { // Simulate pre-fix behavior: explicitly pass empty strings for optional params let with_empty = run_telegram_reader(&[ "search_global", "--query", "привет", "--limit", "5", "--dialogs-limit", "5", "--date-from", "", "--date-to", "", "--channel-filter", "", ]) .await; // Baseline: call without optional params at all let without = run_telegram_reader(&[ "search_global", "--query", "привет", "--limit", "5", "--dialogs-limit", "5", ]) .await; assert_eq!( with_empty["success"], true, "should succeed with empty string params, got: {with_empty}" ); assert_eq!( without["success"], true, "should succeed without optional params, got: {without}" ); // Empty string channel_filter must NOT activate channel filtering // (if it does, iter_dialogs scans 200 dialogs matching "" which matches everything) assert_eq!( with_empty["dialogs_scanned"], without["dialogs_scanned"], "empty string channel_filter should behave same as absent.\n\ with empty strings: dialogs_scanned={}, channel_filter={}\n\ without params: dialogs_scanned={}, channel_filter={}", with_empty["dialogs_scanned"], with_empty["channel_filter"], without["dialogs_scanned"], without["channel_filter"] ); } /// Bug reproduction: search_global stops scanning after first dialog fills limit. /// /// Real-world failure: `search_global --query "кондиционер" --dialogs-limit 30` /// returned `dialogs_scanned: 1` — the bot's own chat was first in dialog list, /// contained 10+ messages mentioning "кондиционер" (the bot's own replies!), /// and limit=10 was hit immediately. No real group chats were ever searched. /// /// Expected: search_global should always scan ALL requested dialogs (up to /// dialogs_limit), collecting per-dialog results, then return top N by date. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_bug_search_global_must_scan_all_requested_dialogs() { let result = run_telegram_reader(&[ "search_global", "--query", "привет", "--limit", "50", "--dialogs-limit", "10", ]) .await; assert_eq!( result["success"], true, "search_global should succeed, got: {result}" ); let scanned = result["dialogs_scanned"].as_u64().unwrap(); // With native global search (SearchGlobalRequest), dialogs_scanned = unique // chats that contributed results. For a common word like "привет" across // many chats, we expect results from multiple distinct chats. assert!( scanned >= 2, "search_global should return results from multiple chats, \ but dialogs_scanned={scanned}. This suggests the search is \ still limited to a single chat.", ); } /// Bug reproduction: search_global results dominated by bot's own messages. /// /// Real-world failure: all 10 results came from chat `asDrgl` (type=user), /// which is the bot talking to itself about "кондиционер". No results from /// actual Telegram groups/channels where real users discuss the topic. /// /// Expected: results from groups/channels/supergroups should be present /// when searching a common word across 20 dialogs. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_bug_search_global_results_include_group_chats() { let result = run_telegram_reader(&[ "search_global", "--query", "привет", "--limit", "30", "--dialogs-limit", "20", ]) .await; assert_eq!( result["success"], true, "search_global should succeed, got: {result}" ); let results = result["results"] .as_array() .expect("results should be an array"); assert!( !results.is_empty(), "search for common word 'привет' across 20 dialogs should find something" ); // Collect unique chat types from results let chat_types: Vec<&str> = results .iter() .filter_map(|r| r["chat"]["type"].as_str()) .collect(); let has_group_results = chat_types .iter() .any(|t| *t == "group" || *t == "channel" || *t == "supergroup"); assert!( has_group_results, "search_global across 20 dialogs for 'привет' should include results from \ groups/channels/supergroups, but got only these chat types: {chat_types:?}\n\ First 5 results: {}", results .iter() .take(5) .map(|r| format!( " {}({}) from {}", r["chat"]["name"], r["chat"]["type"], r["date"] )) .collect::>() .join("\n") ); } /// Bug reproduction: search_global returns bot-to-user echo messages. /// /// Real-world failure: the bot searched for "кондиционер" and found its own /// prior replies to the user: "С текущими инструментами я не могу искать...", /// "Ок, в Merry Samuistmas! по слову «кондиционер» ничего не нашлось." /// These are messages FROM the bot TO the user, polluting search results. /// /// Expected: messages sent by bot accounts (is_bot=true or known bot IDs) /// should be excluded or deprioritized in search_global results. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_bug_search_global_excludes_bot_echo_messages() { let result = run_telegram_reader(&[ "search_global", "--query", "кондиционер", "--limit", "20", "--dialogs-limit", "30", ]) .await; assert_eq!( result["success"], true, "search_global should succeed, got: {result}" ); let results = result["results"] .as_array() .expect("results should be an array"); // Count results that are from bot entities let bot_results: Vec<_> = results .iter() .filter(|r| { let sender_type = r["sender"]["type"].as_str().unwrap_or(""); let sender_name = r["sender"]["name"].as_str().unwrap_or(""); let sender_username = r["sender"]["username"].as_str().unwrap_or(""); // Bot messages: sender is a bot account, or the chat is a 1-on-1 with a bot sender_type == "bot" || sender_username.ends_with("_bot") || sender_username.ends_with("Bot") || sender_name == "asDrgl" }) .collect(); let total = results.len(); let bot_count = bot_results.len(); // Bot messages should not dominate results assert!( bot_count <= total / 4, "Bot echo messages should be <=25% of results, but got {bot_count}/{total}.\n\ Bot results: {}", bot_results .iter() .take(3) .map(|r| format!( " sender={}(@{}) in chat {}", r["sender"]["name"], r["sender"]["username"], r["chat"]["name"] )) .collect::>() .join("\n") ); } /// Bug reproduction: search_channels only searches user's existing dialogs. /// /// Real-world failure: user asked to search for Samui-related channels globally /// in Telegram. Agent used search_channels which only calls iter_dialogs — /// finds only channels the user already joined. No Telegram directory search. /// /// Expected: search_channels should search Telegram's global directory /// (via client.search_global or contacts.search) in addition to local dialogs, /// so it can discover NEW channels the user hasn't joined yet. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_bug_search_channels_finds_public_channels() { // "python" is common enough to exist in Telegram's global directory let result = run_telegram_reader(&[ "search_channels", "--channel-query", "python", "--limit", "200", ]) .await; assert_eq!( result["success"], true, "search_channels should succeed, got: {result}" ); let count = result["count"].as_u64().unwrap(); // With only iter_dialogs, this returns very few results (only already-joined chats). // With global search, "python" should find many public channels/groups. assert!( count >= 5, "search_channels for 'python' should find at least 5 channels \ (including from Telegram global directory), but found only {count}.\n\ This suggests search_channels is limited to iter_dialogs (user's existing chats)\n\ and does not search Telegram's global channel directory.\n\ Results: {result}" ); } /// Integration scenario: the exact query from the failed agent session. /// /// Reproduces the full failing scenario: user asks to find AC service on Samui. /// Agent should search Telegram for relevant channels and messages. /// This test verifies the search_global pipeline works end-to-end for this query. #[tokio::test] #[ignore = "requires network + Telegram credentials"] async fn e2e_scenario_samui_aircon_search_pipeline() { // Step 1: Find Samui-related channels let channels = run_telegram_reader(&[ "search_channels", "--channel-query", "Samui", "--limit", "200", ]) .await; assert_eq!( channels["success"], true, "channel search should succeed: {channels}" ); let channel_count = channels["count"].as_u64().unwrap(); // Step 2: Global search for aircon-related messages let search = run_telegram_reader(&["search_global", "--query", "кондиционер", "--limit", "20"]).await; assert_eq!( search["success"], true, "global search should succeed: {search}" ); let scanned = search["dialogs_scanned"].as_u64().unwrap(); let result_count = search["count"].as_u64().unwrap(); // With native global search, dialogs_scanned = unique chats with results assert!( scanned >= 2, "search should return results from multiple chats (scanned {scanned})" ); // Print diagnostic summary for manual review eprintln!( "\n=== Samui aircon search pipeline results ===\n\ Channels found for 'Samui': {channel_count}\n\ Dialogs scanned for 'кондиционер': {scanned}\n\ Messages found: {result_count}\n\ Channels: {}\n\ Top 5 search results:\n{}", channels["channels"], search["results"] .as_array() .unwrap() .iter() .take(5) .map(|r| format!( " [{}] {} in {}({}): {}", r["date"].as_str().unwrap_or("?"), r["sender"]["name"].as_str().unwrap_or("?"), r["chat"]["name"].as_str().unwrap_or("?"), r["chat"]["type"].as_str().unwrap_or("?"), &r["text"] .as_str() .unwrap_or("") .chars() .take(80) .collect::() )) .collect::>() .join("\n") ); }