mono-cpp/tests/unit/test_enrichers.cpp
2026-03-28 13:11:29 +01:00

116 lines
4.8 KiB
C++

#include <catch2/catch_test_macros.hpp>
#include "enrichers/enrichers.h"
using namespace enrichers;
// ── is_likely_email ─────────────────────────────────────────────────────────
TEST_CASE("is_likely_email: valid emails", "[enrichers]") {
CHECK(is_likely_email("info@example.com"));
CHECK(is_likely_email("john.doe@company.co.uk"));
CHECK(is_likely_email("contact@recycling-firm.de"));
CHECK(is_likely_email("hello@my-domain.org"));
}
TEST_CASE("is_likely_email: rejects non-emails", "[enrichers]") {
CHECK_FALSE(is_likely_email(""));
CHECK_FALSE(is_likely_email("not-an-email"));
CHECK_FALSE(is_likely_email("@no-user.com"));
CHECK_FALSE(is_likely_email("user@"));
}
TEST_CASE("is_likely_email: rejects asset extensions", "[enrichers]") {
CHECK_FALSE(is_likely_email("logo@site.png"));
CHECK_FALSE(is_likely_email("icon@site.svg"));
CHECK_FALSE(is_likely_email("style@site.css"));
CHECK_FALSE(is_likely_email("script@site.js"));
CHECK_FALSE(is_likely_email("photo@site.jpg"));
CHECK_FALSE(is_likely_email("photo@site.webp"));
}
TEST_CASE("is_likely_email: rejects placeholder/hash patterns", "[enrichers]") {
CHECK_FALSE(is_likely_email("user@example.com"));
CHECK_FALSE(is_likely_email("test@test.com"));
CHECK_FALSE(is_likely_email("a3f2b@hash.com"));
CHECK_FALSE(is_likely_email("your@email.com"));
CHECK_FALSE(is_likely_email("email@email.com"));
CHECK_FALSE(is_likely_email("name@domain.com"));
}
// ── extract_emails ──────────────────────────────────────────────────────────
TEST_CASE("extract_emails: finds emails in text", "[enrichers]") {
auto emails = extract_emails("Contact us at info@example.org or sales@company.com");
CHECK(emails.size() >= 2);
bool found_info = false, found_sales = false;
for (auto& e : emails) {
if (e == "info@example.org") found_info = true;
if (e == "sales@company.com") found_sales = true;
}
CHECK(found_info);
CHECK(found_sales);
}
TEST_CASE("extract_emails: deduplicates", "[enrichers]") {
auto emails = extract_emails("info@acme.org info@acme.org info@acme.org");
CHECK(emails.size() == 1);
}
TEST_CASE("extract_emails: empty text returns empty", "[enrichers]") {
auto emails = extract_emails("");
CHECK(emails.empty());
}
TEST_CASE("extract_emails: filters out asset emails", "[enrichers]") {
auto emails = extract_emails("logo@site.png info@real-company.de");
CHECK(emails.size() == 1);
CHECK(emails[0] == "info@real-company.de");
}
// ── resolve_url ─────────────────────────────────────────────────────────────
TEST_CASE("resolve_url: absolute stays absolute", "[enrichers]") {
CHECK(resolve_url("https://example.com", "https://other.com/page") == "https://other.com/page");
}
TEST_CASE("resolve_url: relative path", "[enrichers]") {
auto r = resolve_url("https://example.com/page", "/contact");
CHECK(r == "https://example.com/contact");
}
TEST_CASE("resolve_url: protocol-relative", "[enrichers]") {
auto r = resolve_url("https://example.com", "//other.com/foo");
CHECK(r == "https://other.com/foo");
}
TEST_CASE("resolve_url: relative without slash", "[enrichers]") {
auto r = resolve_url("https://example.com/dir/page", "about.html");
CHECK(r == "https://example.com/dir/about.html");
}
// ── status_string ───────────────────────────────────────────────────────────
TEST_CASE("status_string: covers all statuses", "[enrichers]") {
CHECK(std::string(status_string(EnrichStatus::OK)) == "OK");
CHECK(std::string(status_string(EnrichStatus::NO_EMAIL)) == "NO_EMAIL");
CHECK(std::string(status_string(EnrichStatus::META_TIMEOUT)) == "META_TIMEOUT");
CHECK(std::string(status_string(EnrichStatus::EMAIL_TIMEOUT)) == "EMAIL_TIMEOUT");
CHECK(std::string(status_string(EnrichStatus::FETCH_ERROR)) == "FETCH_ERROR");
CHECK(std::string(status_string(EnrichStatus::NO_PAGES)) == "NO_PAGES");
CHECK(std::string(status_string(EnrichStatus::ERROR)) == "ERROR");
}
// ── EnrichConfig defaults ───────────────────────────────────────────────────
TEST_CASE("EnrichConfig: default values", "[enrichers]") {
EnrichConfig cfg;
CHECK(cfg.meta_timeout_ms == 20000);
CHECK(cfg.email_timeout_ms == 30000);
CHECK(cfg.email_page_timeout_ms == 10000);
CHECK(cfg.email_max_pages == 8);
CHECK(cfg.email_abort_after == 1);
CHECK_FALSE(cfg.contact_patterns.empty());
CHECK_FALSE(cfg.probe_paths.empty());
}