116 lines
4.8 KiB
C++
116 lines
4.8 KiB
C++
#include <catch2/catch_test_macros.hpp>
|
|
#include "enrichers/enrichers.h"
|
|
|
|
using namespace enrichers;
|
|
|
|
// ── is_likely_email ─────────────────────────────────────────────────────────
|
|
|
|
TEST_CASE("is_likely_email: valid emails", "[enrichers]") {
|
|
CHECK(is_likely_email("info@example.com"));
|
|
CHECK(is_likely_email("john.doe@company.co.uk"));
|
|
CHECK(is_likely_email("contact@recycling-firm.de"));
|
|
CHECK(is_likely_email("hello@my-domain.org"));
|
|
}
|
|
|
|
TEST_CASE("is_likely_email: rejects non-emails", "[enrichers]") {
|
|
CHECK_FALSE(is_likely_email(""));
|
|
CHECK_FALSE(is_likely_email("not-an-email"));
|
|
CHECK_FALSE(is_likely_email("@no-user.com"));
|
|
CHECK_FALSE(is_likely_email("user@"));
|
|
}
|
|
|
|
TEST_CASE("is_likely_email: rejects asset extensions", "[enrichers]") {
|
|
CHECK_FALSE(is_likely_email("logo@site.png"));
|
|
CHECK_FALSE(is_likely_email("icon@site.svg"));
|
|
CHECK_FALSE(is_likely_email("style@site.css"));
|
|
CHECK_FALSE(is_likely_email("script@site.js"));
|
|
CHECK_FALSE(is_likely_email("photo@site.jpg"));
|
|
CHECK_FALSE(is_likely_email("photo@site.webp"));
|
|
}
|
|
|
|
TEST_CASE("is_likely_email: rejects placeholder/hash patterns", "[enrichers]") {
|
|
CHECK_FALSE(is_likely_email("user@example.com"));
|
|
CHECK_FALSE(is_likely_email("test@test.com"));
|
|
CHECK_FALSE(is_likely_email("a3f2b@hash.com"));
|
|
CHECK_FALSE(is_likely_email("your@email.com"));
|
|
CHECK_FALSE(is_likely_email("email@email.com"));
|
|
CHECK_FALSE(is_likely_email("name@domain.com"));
|
|
}
|
|
|
|
// ── extract_emails ──────────────────────────────────────────────────────────
|
|
|
|
TEST_CASE("extract_emails: finds emails in text", "[enrichers]") {
|
|
auto emails = extract_emails("Contact us at info@example.org or sales@company.com");
|
|
CHECK(emails.size() >= 2);
|
|
|
|
bool found_info = false, found_sales = false;
|
|
for (auto& e : emails) {
|
|
if (e == "info@example.org") found_info = true;
|
|
if (e == "sales@company.com") found_sales = true;
|
|
}
|
|
CHECK(found_info);
|
|
CHECK(found_sales);
|
|
}
|
|
|
|
TEST_CASE("extract_emails: deduplicates", "[enrichers]") {
|
|
auto emails = extract_emails("info@acme.org info@acme.org info@acme.org");
|
|
CHECK(emails.size() == 1);
|
|
}
|
|
|
|
TEST_CASE("extract_emails: empty text returns empty", "[enrichers]") {
|
|
auto emails = extract_emails("");
|
|
CHECK(emails.empty());
|
|
}
|
|
|
|
TEST_CASE("extract_emails: filters out asset emails", "[enrichers]") {
|
|
auto emails = extract_emails("logo@site.png info@real-company.de");
|
|
CHECK(emails.size() == 1);
|
|
CHECK(emails[0] == "info@real-company.de");
|
|
}
|
|
|
|
// ── resolve_url ─────────────────────────────────────────────────────────────
|
|
|
|
TEST_CASE("resolve_url: absolute stays absolute", "[enrichers]") {
|
|
CHECK(resolve_url("https://example.com", "https://other.com/page") == "https://other.com/page");
|
|
}
|
|
|
|
TEST_CASE("resolve_url: relative path", "[enrichers]") {
|
|
auto r = resolve_url("https://example.com/page", "/contact");
|
|
CHECK(r == "https://example.com/contact");
|
|
}
|
|
|
|
TEST_CASE("resolve_url: protocol-relative", "[enrichers]") {
|
|
auto r = resolve_url("https://example.com", "//other.com/foo");
|
|
CHECK(r == "https://other.com/foo");
|
|
}
|
|
|
|
TEST_CASE("resolve_url: relative without slash", "[enrichers]") {
|
|
auto r = resolve_url("https://example.com/dir/page", "about.html");
|
|
CHECK(r == "https://example.com/dir/about.html");
|
|
}
|
|
|
|
// ── status_string ───────────────────────────────────────────────────────────
|
|
|
|
TEST_CASE("status_string: covers all statuses", "[enrichers]") {
|
|
CHECK(std::string(status_string(EnrichStatus::OK)) == "OK");
|
|
CHECK(std::string(status_string(EnrichStatus::NO_EMAIL)) == "NO_EMAIL");
|
|
CHECK(std::string(status_string(EnrichStatus::META_TIMEOUT)) == "META_TIMEOUT");
|
|
CHECK(std::string(status_string(EnrichStatus::EMAIL_TIMEOUT)) == "EMAIL_TIMEOUT");
|
|
CHECK(std::string(status_string(EnrichStatus::FETCH_ERROR)) == "FETCH_ERROR");
|
|
CHECK(std::string(status_string(EnrichStatus::NO_PAGES)) == "NO_PAGES");
|
|
CHECK(std::string(status_string(EnrichStatus::ERROR)) == "ERROR");
|
|
}
|
|
|
|
// ── EnrichConfig defaults ───────────────────────────────────────────────────
|
|
|
|
TEST_CASE("EnrichConfig: default values", "[enrichers]") {
|
|
EnrichConfig cfg;
|
|
CHECK(cfg.meta_timeout_ms == 20000);
|
|
CHECK(cfg.email_timeout_ms == 30000);
|
|
CHECK(cfg.email_page_timeout_ms == 10000);
|
|
CHECK(cfg.email_max_pages == 8);
|
|
CHECK(cfg.email_abort_after == 1);
|
|
CHECK_FALSE(cfg.contact_patterns.empty());
|
|
CHECK_FALSE(cfg.probe_paths.empty());
|
|
}
|