#include #include "enrichers/enrichers.h" using namespace enrichers; // ── is_likely_email ───────────────────────────────────────────────────────── TEST_CASE("is_likely_email: valid emails", "[enrichers]") { CHECK(is_likely_email("info@example.com")); CHECK(is_likely_email("john.doe@company.co.uk")); CHECK(is_likely_email("contact@recycling-firm.de")); CHECK(is_likely_email("hello@my-domain.org")); } TEST_CASE("is_likely_email: rejects non-emails", "[enrichers]") { CHECK_FALSE(is_likely_email("")); CHECK_FALSE(is_likely_email("not-an-email")); CHECK_FALSE(is_likely_email("@no-user.com")); CHECK_FALSE(is_likely_email("user@")); } TEST_CASE("is_likely_email: rejects asset extensions", "[enrichers]") { CHECK_FALSE(is_likely_email("logo@site.png")); CHECK_FALSE(is_likely_email("icon@site.svg")); CHECK_FALSE(is_likely_email("style@site.css")); CHECK_FALSE(is_likely_email("script@site.js")); CHECK_FALSE(is_likely_email("photo@site.jpg")); CHECK_FALSE(is_likely_email("photo@site.webp")); } TEST_CASE("is_likely_email: rejects placeholder/hash patterns", "[enrichers]") { CHECK_FALSE(is_likely_email("user@example.com")); CHECK_FALSE(is_likely_email("test@test.com")); CHECK_FALSE(is_likely_email("a3f2b@hash.com")); CHECK_FALSE(is_likely_email("your@email.com")); CHECK_FALSE(is_likely_email("email@email.com")); CHECK_FALSE(is_likely_email("name@domain.com")); } // ── extract_emails ────────────────────────────────────────────────────────── TEST_CASE("extract_emails: finds emails in text", "[enrichers]") { auto emails = extract_emails("Contact us at info@example.org or sales@company.com"); CHECK(emails.size() >= 2); bool found_info = false, found_sales = false; for (auto& e : emails) { if (e == "info@example.org") found_info = true; if (e == "sales@company.com") found_sales = true; } CHECK(found_info); CHECK(found_sales); } TEST_CASE("extract_emails: deduplicates", "[enrichers]") { auto emails = extract_emails("info@acme.org info@acme.org info@acme.org"); CHECK(emails.size() == 1); } TEST_CASE("extract_emails: empty text returns empty", "[enrichers]") { auto emails = extract_emails(""); CHECK(emails.empty()); } TEST_CASE("extract_emails: filters out asset emails", "[enrichers]") { auto emails = extract_emails("logo@site.png info@real-company.de"); CHECK(emails.size() == 1); CHECK(emails[0] == "info@real-company.de"); } // ── resolve_url ───────────────────────────────────────────────────────────── TEST_CASE("resolve_url: absolute stays absolute", "[enrichers]") { CHECK(resolve_url("https://example.com", "https://other.com/page") == "https://other.com/page"); } TEST_CASE("resolve_url: relative path", "[enrichers]") { auto r = resolve_url("https://example.com/page", "/contact"); CHECK(r == "https://example.com/contact"); } TEST_CASE("resolve_url: protocol-relative", "[enrichers]") { auto r = resolve_url("https://example.com", "//other.com/foo"); CHECK(r == "https://other.com/foo"); } TEST_CASE("resolve_url: relative without slash", "[enrichers]") { auto r = resolve_url("https://example.com/dir/page", "about.html"); CHECK(r == "https://example.com/dir/about.html"); } // ── status_string ─────────────────────────────────────────────────────────── TEST_CASE("status_string: covers all statuses", "[enrichers]") { CHECK(std::string(status_string(EnrichStatus::OK)) == "OK"); CHECK(std::string(status_string(EnrichStatus::NO_EMAIL)) == "NO_EMAIL"); CHECK(std::string(status_string(EnrichStatus::META_TIMEOUT)) == "META_TIMEOUT"); CHECK(std::string(status_string(EnrichStatus::EMAIL_TIMEOUT)) == "EMAIL_TIMEOUT"); CHECK(std::string(status_string(EnrichStatus::FETCH_ERROR)) == "FETCH_ERROR"); CHECK(std::string(status_string(EnrichStatus::NO_PAGES)) == "NO_PAGES"); CHECK(std::string(status_string(EnrichStatus::ERROR)) == "ERROR"); } // ── EnrichConfig defaults ─────────────────────────────────────────────────── TEST_CASE("EnrichConfig: default values", "[enrichers]") { EnrichConfig cfg; CHECK(cfg.meta_timeout_ms == 20000); CHECK(cfg.email_timeout_ms == 30000); CHECK(cfg.email_page_timeout_ms == 10000); CHECK(cfg.email_max_pages == 8); CHECK(cfg.email_abort_after == 1); CHECK_FALSE(cfg.contact_patterns.empty()); CHECK_FALSE(cfg.probe_paths.empty()); }