From 241b7a87129eb1a8ba0601dc7f3bb88cc9415405 Mon Sep 17 00:00:00 2001 From: babayaga Date: Fri, 26 Dec 2025 11:28:43 +0100 Subject: [PATCH] email worker --- packages/search/dist-in/lib/email.js | 51 ++++++++----- packages/search/dist-in/lib/pupeteer.d.ts | 2 +- packages/search/dist-in/lib/pupeteer.js | 90 +++++++++++++++-------- packages/search/package-lock.json | 28 +++++++ packages/search/package.json | 1 + packages/search/src/lib/email.ts | 56 +++++++++----- packages/search/src/lib/pupeteer.ts | 89 ++++++++++++++-------- 7 files changed, 220 insertions(+), 97 deletions(-) diff --git a/packages/search/dist-in/lib/email.js b/packages/search/dist-in/lib/email.js index 1058f45e..240f63eb 100644 --- a/packages/search/dist-in/lib/email.js +++ b/packages/search/dist-in/lib/email.js @@ -1,4 +1,5 @@ import { logger } from '../index.js'; +import pMap from 'p-map'; import { CheerioWebBaseLoader } from "langchain/document_loaders/web/cheerio"; import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; import { htmlToText } from "html-to-text"; @@ -49,19 +50,18 @@ export const puppeteerLoader = async (url, headless, location, checkCancelled) = try { // Function to detect a valid URL loaderWithOptions = new loader(url, { launchOptions: { - headless, + headless: headless, ignoreHTTPSErrors: true }, gotoOptions: { - timeout: 15000, + timeout: location.pageTimeout || 15000, waitUntil: "networkidle0", }, async evaluate(page, browser) { if (checkCancelled && await checkCancelled()) { - debugger; const pid = browser.process()?.pid; - logger.warn(`Killing browser process ${pid} due to cancellation`); - await browser.close(); + logger.warn(`Cancellation requested inside evaluate for process ${pid}`); + // Do not close browser, it is shared. Page will be closed by finally block in pupeteer.ts throw new Error('CancelledByUser'); } const result = await page.evaluate(() => document.body.innerHTML); @@ -70,19 +70,23 @@ export const puppeteerLoader = async (url, headless, location, checkCancelled) = } }); // Race load against cancellation - const loadPromise = loaderWithOptions.load(); + let isFinished = false; + const loadPromise = loaderWithOptions.load().finally(() => { + isFinished = true; + }); const cancelPromise = new Promise(async (_, reject) => { if (!checkCancelled) return; // Poll for cancellation - while (true) { + while (!isFinished) { await new Promise(r => setTimeout(r, 1000)); + logger.info('Checking cancellation for ' + url); if (await checkCancelled()) { const browser = await getBrowser(); if (browser) { const pid = browser.process()?.pid; - logger.info(`Killing browser process ${pid} due to cancellation`); - await browser.close(); + logger.info(`Cancellation confirmed for process ${pid}`); + // await browser.close(); // Do not close shared browser } reject(new Error('CancelledByUser')); break; @@ -132,7 +136,7 @@ export const findEMail = async (question, url, opts, location) => { return false; } let pageUrl = url; - let docs = await puppeteerLoader(pageUrl, opts.headless, location, opts.checkCancelled); + let docs = await puppeteerLoader(pageUrl, opts.headless, { ...location, pageTimeout: opts.pageTimeout }, opts.checkCancelled); let emails = []; docs.forEach((d) => { if (d.pageContent && d.pageContent.indexOf('@') !== -1) { @@ -156,17 +160,27 @@ export const findEmailEach = async (location, opts, onProgress) => { } const emails = []; const abortAfter = opts.abortAfter ?? 1; - for (const page of location.meta.pages) { + const concurrency = opts.concurrency || 2; + const maxPages = opts.maxPages || 15; + const contactKeywords = ['contact', 'kontakt', 'contacto', 'contatto', 'info', 'imprint', 'impressum', 'help', 'support', 'about']; + // Sort pages: prioritize contact pages + const pagesToSearch = location.meta.pages.sort((a, b) => { + const urlA = a.url.toLowerCase(); + const urlB = b.url.toLowerCase(); + const scoreA = contactKeywords.some(k => urlA.includes(k)) ? 1 : 0; + const scoreB = contactKeywords.some(k => urlB.includes(k)) ? 1 : 0; + return scoreB - scoreA; // Descending order (contact pages first) + }).slice(0, maxPages); + await pMap(pagesToSearch, async (page) => { if (opts.checkCancelled && await opts.checkCancelled()) { - debugger; - logger.info(`[findEmailEach] Cancellation requested for ${location.title}`); - break; + // logger.info(`[findEmailEach] Cancellation requested for ${location.title}`); + return; } if (emails.length >= abortAfter) { - break; + return; } if (page.status !== 'PENDING') { - continue; + return; } page.status = 'SEARCHING_EMAIL'; try { @@ -179,7 +193,6 @@ export const findEmailEach = async (location, opts, onProgress) => { } catch (error) { if (error.message === 'CancelledByUser') { - debugger; throw error; } page.status = 'FAILED'; @@ -189,7 +202,7 @@ export const findEmailEach = async (location, opts, onProgress) => { if (onProgress) { await onProgress(page); } - } + }, { concurrency, stopOnError: false }); // Update location emails if (emails.length > 0) { const uniqueEmails = [...new Set([...(location.emails || []), ...emails])]; @@ -200,4 +213,4 @@ export const findEmailEach = async (location, opts, onProgress) => { } return emails; }; -//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZW1haWwuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvbGliL2VtYWlsLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBLE9BQU8sRUFBRSxNQUFNLEVBQUUsTUFBTSxhQUFhLENBQUE7QUFDcEMsT0FBTyxFQUFFLG9CQUFvQixFQUFFLE1BQU0sd0NBQXdDLENBQUE7QUFDN0UsT0FBTyxFQUFFLDhCQUE4QixFQUFFLE1BQU0seUJBQXlCLENBQUE7QUFDeEUsT0FBTyxFQUFFLFVBQVUsRUFBRSxNQUFNLGNBQWMsQ0FBQTtBQUN6QyxPQUFPLEVBQUUsMEJBQTBCLEVBQUUsUUFBUSxFQUFFLE1BQU0sMkJBQTJCLENBQUE7QUFFaEYsT0FBTyxFQUFFLFVBQVUsRUFBRSxNQUFNLFdBQVcsQ0FBQTtBQUV0QyxNQUFNLFVBQVUsR0FBRyxpREFBaUQsQ0FBQTtBQUNwRSxNQUFNLFdBQVcsR0FBRyxzQ0FBc0MsQ0FBQTtBQUUxRCxPQUFPLEVBQUUsc0JBQXNCLElBQUksTUFBTSxFQUFFLFVBQVUsRUFBRSxNQUFNLGVBQWUsQ0FBQTtBQUU1RSxNQUFNLE9BQU8scUJBQXNCLFNBQVEsMEJBQTBCO0lBQ2pFLE1BQU0sQ0FBQyxPQUFPO1FBQ1YsT0FBTyx1QkFBdUIsQ0FBQTtJQUNsQyxDQUFDO0lBQ0QsWUFBWSxPQUFPLEdBQUcsRUFBRTtRQUNwQixLQUFLLENBQUMsT0FBTyxDQUFDLENBQUM7UUFDZixNQUFNLENBQUMsY0FBYyxDQUFDLElBQUksRUFBRSxTQUFTLEVBQUU7WUFDbkMsVUFBVSxFQUFFLElBQUk7WUFDaEIsWUFBWSxFQUFFLElBQUk7WUFDbEIsUUFBUSxFQUFFLElBQUk7WUFDZCxLQUFLLEVBQUUsT0FBTztTQUNqQixDQUFDLENBQUE7SUFDTixDQUFDO0lBQ0QsS0FBSyxDQUFDLGtCQUFrQixDQUFDLFFBQWtCO1FBQ3ZDLE1BQU0sZ0JBQWdCLEdBQUcsVUFBVSxDQUFDLFFBQVEsQ0FBQyxXQUFXLEVBQUUsSUFBSSxDQUFDLFNBQVMsQ0FBQyxDQUFDLENBQUM7UUFDM0UsT0FBTyxJQUFJLFFBQVEsQ0FBQztZQUNoQixXQUFXLEVBQUUsZ0JBQWdCO1lBQzdCLFFBQVEsRUFBRSxFQUFFLEdBQUcsUUFBUSxDQUFDLFFBQVEsRUFBRTtTQUNyQyxDQUFDLENBQUM7SUFDUCxDQUFDO0NBQ0o7QUFFRCxNQUFNLENBQUMsTUFBTSxhQUFhLEdBQUcsS0FBSyxFQUFFLEdBQVcsRUFBRSxFQUFFO0lBQy9DLE1BQU0sTUFBTSxHQUFHLElBQUksb0JBQW9CLENBQUMsR0FBRyxDQUFDLENBQUE7SUFDNUMsTUFBTSxJQUFJLEdBQUcsTUFBTSxNQUFNLENBQUMsSUFBSSxFQUFFLENBQUE7SUFDaEMsTUFBTSxRQUFRLEdBQUcsOEJBQThCLENBQUMsWUFBWSxDQUFDLE1BQU0sQ0FBQyxDQUFBO0lBQ3BFLE1BQU0sV0FBVyxHQUFHLElBQUkscUJBQXFCLEVBQUUsQ0FBQTtJQUMvQyxNQUFNLFFBQVEsR0FBRyxRQUFRLENBQUMsSUFBSSxDQUFDLFdBQWtCLENBQUMsQ0FBQTtJQUNsRCxNQUFNLEdBQUcsR0FBRyxNQUFNLFFBQVEsQ0FBQyxNQUFNLENBQUMsSUFBSSxDQUFDLENBQUE7SUFDdkMsT0FBTyxHQUFHLENBQUE7QUFDZCxDQUFDLENBQUE7QUFHRCxNQUFNLENBQUMsTUFBTSxlQUFlLEdBQUcsS0FBSyxFQUFFLEdBQVcsRUFBRSxRQUFpQixFQUFFLFFBQXFCLEVBQUUsY0FBdUMsRUFBRSxFQUFFO0lBQ3BJLElBQUksVUFBVSxDQUFDLEdBQUcsQ0FBQyxLQUFLLEtBQUssSUFBSSxHQUFHLENBQUMsT0FBTyxDQUFDLFFBQVEsQ0FBQyxLQUFLLENBQUMsQ0FBQyxFQUFFLENBQUM7UUFDNUQsT0FBTyxFQUFFLENBQUE7SUFDYixDQUFDO0lBRUQsSUFBSSxjQUFjLElBQUksTUFBTSxjQUFjLEVBQUUsRUFBRSxDQUFDO1FBQzNDLE1BQU0sQ0FBQyxJQUFJLENBQUMsMkJBQTJCLEdBQUcsR0FBRyxDQUFDLENBQUM7UUFDL0MsT0FBTyxFQUFFLENBQUM7SUFDZCxDQUFDO0lBRUQsSUFBSSxpQkFBaUIsQ0FBQTtJQUNyQixJQUFJLENBQUMsQ0FBUSxpQ0FBaUM7UUFDMUMsaUJBQWlCLEdBQUcsSUFBSSxNQUFNLENBQzFCLEdBQUcsRUFDSDtZQUNJLGFBQWEsRUFBRTtnQkFDWCxRQUFRO2dCQUNSLGlCQUFpQixFQUFFLElBQUk7YUFDMUI7WUFFRCxXQUFXLEVBQUU7Z0JBQ1QsT0FBTyxFQUFFLEtBQUs7Z0JBQ2QsU0FBUyxFQUFFLGNBQWM7YUFDNUI7WUFDRCxLQUFLLENBQUMsUUFBUSxDQUFDLElBQUksRUFBRSxPQUFPO2dCQUN4QixJQUFJLGNBQWMsSUFBSSxNQUFNLGNBQWMsRUFBRSxFQUFFLENBQUM7b0JBQzNDLFFBQVEsQ0FBQTtvQkFDUixNQUFNLEdBQUcsR0FBRyxPQUFPLENBQUMsT0FBTyxFQUFFLEVBQUUsR0FBRyxDQUFDO29CQUNuQyxNQUFNLENBQUMsSUFBSSxDQUFDLDJCQUEyQixHQUFHLHNCQUFzQixDQUFDLENBQUM7b0JBQ2xFLE1BQU0sT0FBTyxDQUFDLEtBQUssRUFBRSxDQUFDO29CQUN0QixNQUFNLElBQUksS0FBSyxDQUFDLGlCQUFpQixDQUFDLENBQUM7Z0JBQ3ZDLENBQUM7Z0JBQ0QsTUFBTSxNQUFNLEdBQUcsTUFBTSxJQUFJLENBQUMsUUFBUSxDQUFDLEdBQUcsRUFBRSxDQUFDLFFBQVEsQ0FBQyxJQUFJLENBQUMsU0FBUyxDQUFDLENBQUE7Z0JBQ2pFLHdCQUF3QjtnQkFDeEIsT0FBTyxNQUFNLENBQUE7WUFDakIsQ0FBQztTQUNKLENBQ0osQ0FBQTtRQUNELGlDQUFpQztRQUNqQyxNQUFNLFdBQVcsR0FBRyxpQkFBaUIsQ0FBQyxJQUFJLEVBQUUsQ0FBQztRQUU3QyxNQUFNLGFBQWEsR0FBRyxJQUFJLE9BQU8sQ0FBUSxLQUFLLEVBQUUsQ0FBQyxFQUFFLE1BQU0sRUFBRSxFQUFFO1lBQ3pELElBQUksQ0FBQyxjQUFjO2dCQUFFLE9BQU87WUFDNUIsd0JBQXdCO1lBQ3hCLE9BQU8sSUFBSSxFQUFFLENBQUM7Z0JBQ1YsTUFBTSxJQUFJLE9BQU8sQ0FBQyxDQUFDLENBQUMsRUFBRSxDQUFDLFVBQVUsQ0FBQyxDQUFDLEVBQUUsSUFBSSxDQUFDLENBQUMsQ0FBQztnQkFDNUMsSUFBSSxNQUFNLGNBQWMsRUFBRSxFQUFFLENBQUM7b0JBQ3pCLE1BQU0sT0FBTyxHQUFHLE1BQU0sVUFBVSxFQUFFLENBQUM7b0JBQ25DLElBQUksT0FBTyxFQUFFLENBQUM7d0JBQ1YsTUFBTSxHQUFHLEdBQUcsT0FBTyxDQUFDLE9BQU8sRUFBRSxFQUFFLEdBQUcsQ0FBQzt3QkFDbkMsTUFBTSxDQUFDLElBQUksQ0FBQywyQkFBMkIsR0FBRyxzQkFBc0IsQ0FBQyxDQUFDO3dCQUNsRSxNQUFNLE9BQU8sQ0FBQyxLQUFLLEVBQUUsQ0FBQztvQkFDMUIsQ0FBQztvQkFDRCxNQUFNLENBQUMsSUFBSSxLQUFLLENBQUMsaUJBQWlCLENBQUMsQ0FBQyxDQUFDO29CQUNyQyxNQUFNO2dCQUNWLENBQUM7WUFDTCxDQUFDO1FBQ0wsQ0FBQyxDQUFDLENBQUM7UUFFSCxNQUFNLElBQUksR0FBRyxNQUFNLE9BQU8sQ0FBQyxJQUFJLENBQUMsQ0FBQyxXQUFXLEVBQUUsYUFBYSxDQUFDLENBQUMsQ0FBQztRQUM5RCxNQUFNLFFBQVEsR0FBRyw4QkFBOEIsQ0FBQyxZQUFZLENBQUMsTUFBTSxDQUFDLENBQUE7UUFDcEUsTUFBTSxXQUFXLEdBQUcsSUFBSSxxQkFBcUIsRUFBRSxDQUFBO1FBQy9DLE1BQU0sUUFBUSxHQUFHLFFBQVEsQ0FBQyxJQUFJLENBQUMsV0FBa0IsQ0FBQyxDQUFBO1FBQ2xELE1BQU0sR0FBRyxHQUFHLE1BQU0sUUFBUSxDQUFDLE1BQU0sQ0FBQyxJQUFJLENBQUMsQ0FBQTtRQUN2QyxPQUFPLEdBQUcsQ0FBQTtJQUNkLENBQUM7SUFBQyxPQUFPLEtBQUssRUFBRSxDQUFDO1FBQ2IsSUFBSSxLQUFLLFlBQVksS0FBSyxJQUFJLEtBQUssQ0FBQyxPQUFPLEtBQUssaUJBQWlCLEVBQUUsQ0FBQztZQUNoRSxNQUFNLEtBQUssQ0FBQztRQUNoQixDQUFDO1FBQ0QsTUFBTSxDQUFDLElBQUksQ0FBQyxzQkFBc0IsR0FBRyxHQUFHLEVBQUUsS0FBSyxZQUFZLEtBQUssQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FBQyxDQUFDLENBQUMsTUFBTSxDQUFDLEtBQUssQ0FBQyxDQUFDLENBQUE7UUFDakcsUUFBUSxDQUFDLFFBQVEsR0FBRyxJQUFJLENBQUE7UUFDeEIsMkNBQTJDO1FBRTNDLE9BQU8sRUFBRSxDQUFBO0lBQ2IsQ0FBQztBQUNMLENBQUMsQ0FBQTtBQUNELE1BQU0scUJBQXFCLEdBQUcsQ0FBQyxJQUFZLEVBQVksRUFBRTtJQUNyRCxNQUFNLEtBQUssR0FBRyxJQUFJLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FBQyxDQUFBO0lBQ2pDLE1BQU0sY0FBYyxHQUFhLEVBQUUsQ0FBQTtJQUNuQyxNQUFNLGVBQWUsR0FBRyxDQUFDLE1BQU0sRUFBRSxNQUFNLEVBQUUsT0FBTyxFQUFFLE1BQU0sRUFBRSxPQUFPLEVBQUUsTUFBTSxFQUFFLE1BQU0sRUFBRSxNQUFNLEVBQUUsT0FBTyxFQUFFLE9BQU8sQ0FBQyxDQUFDO0lBRTdHLEtBQUssTUFBTSxJQUFJLElBQUksS0FBSyxFQUFFLENBQUM7UUFDdkIsTUFBTSxPQUFPLEdBQUcsSUFBSSxDQUFDLEtBQUssQ0FBQyxVQUFVLENBQUMsQ0FBQTtRQUN0QyxJQUFJLE9BQU8sRUFBRSxDQUFDO1lBQ1YsS0FBSyxNQUFNLEtBQUssSUFBSSxPQUFPLEVBQUUsQ0FBQztnQkFDMUIsdUVBQXVFO2dCQUN2RSxNQUFNLFVBQVUsR0FBRyxLQUFLLENBQUMsV0FBVyxFQUFFLENBQUM7Z0JBQ3ZDLE1BQU0sT0FBTyxHQUFHLGVBQWUsQ0FBQyxJQUFJLENBQUMsR0FBRyxDQUFDLEVBQUUsQ0FBQyxVQUFVLENBQUMsUUFBUSxDQUFDLEdBQUcsQ0FBQyxDQUFDLENBQUM7Z0JBQ3RFLElBQUksQ0FBQyxPQUFPLEVBQUUsQ0FBQztvQkFDWCxjQUFjLENBQUMsSUFBSSxDQUFDLEtBQUssQ0FBQyxDQUFDO2dCQUMvQixDQUFDO1lBQ0wsQ0FBQztRQUNMLENBQUM7SUFDTCxDQUFDO0lBQ0QsT0FBTyxjQUFjLENBQUE7QUFDekIsQ0FBQyxDQUFBO0FBRUQsTUFBTSxDQUFDLE1BQU0sU0FBUyxHQUFHLEtBQUssRUFBRSxRQUFnQixFQUFFLEdBQVcsRUFBRSxJQUFxRSxFQUFFLFFBQXFCLEVBQUUsRUFBRTtJQUMzSiwrQ0FBK0M7SUFDL0MsSUFBSSxHQUFHLENBQUMsS0FBSyxDQUFDLFVBQVUsQ0FBQyxJQUFJLEdBQUcsQ0FBQyxLQUFLLENBQUMsV0FBVyxDQUFDLElBQUksR0FBRyxDQUFDLE9BQU8sQ0FBQyxRQUFRLENBQUMsS0FBSyxDQUFDLENBQUMsRUFBRSxDQUFDO1FBQ2xGLE1BQU0sQ0FBQyxJQUFJLENBQUMsb0JBQW9CLEVBQUUsR0FBRyxDQUFDLENBQUE7UUFDdEMsT0FBTyxLQUFLLENBQUE7SUFDaEIsQ0FBQztJQUNELElBQUksT0FBTyxHQUFHLEdBQUcsQ0FBQTtJQUNqQixJQUFJLElBQUksR0FBRyxNQUFNLGVBQWUsQ0FBQyxPQUFPLEVBQUUsSUFBSSxDQUFDLFFBQVEsRUFBRSxRQUFRLEVBQUUsSUFBSSxDQUFDLGNBQWMsQ0FBUSxDQUFBO0lBQzlGLElBQUksTUFBTSxHQUFhLEVBQUUsQ0FBQTtJQUN6QixJQUFJLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBTSxFQUFFLEVBQUU7UUFDcEIsSUFBSSxDQUFDLENBQUMsV0FBVyxJQUFJLENBQUMsQ0FBQyxXQUFXLENBQUMsT0FBTyxDQUFDLEdBQUcsQ0FBQyxLQUFLLENBQUMsQ0FBQyxFQUFFLENBQUM7WUFDckQsTUFBTSxLQUFLLEdBQUcscUJBQXFCLENBQUMsQ0FBQyxDQUFDLFdBQVcsQ0FBQyxDQUFBO1lBQ2xELElBQUksS0FBSyxFQUFFLENBQUM7Z0JBQ1IsTUFBTSxDQUFDLElBQUksQ0FBQyxHQUFHLEtBQUssQ0FBQyxDQUFBO1lBQ3pCLENBQUM7UUFDTCxDQUFDO0lBQ0wsQ0FBQyxDQUFDLENBQUE7SUFDRixNQUFNLEdBQUcsQ0FBQyxHQUFHLElBQUksR0FBRyxDQUFDLE1BQU0sQ0FBQyxDQUFDLENBQUE7SUFDN0IsUUFBUSxDQUFDLE1BQU0sR0FBRyxNQUFNLENBQUE7SUFDeEIsSUFBSSxNQUFNLENBQUMsTUFBTSxFQUFFLENBQUM7UUFDaEIsUUFBUSxDQUFDLEtBQUssR0FBRyxNQUFNLENBQUMsQ0FBQyxDQUFDLENBQUE7SUFDOUIsQ0FBQztJQUNELFFBQVEsQ0FBQyxLQUFLLElBQUksTUFBTSxDQUFDLEtBQUssQ0FBQyxtQkFBbUIsR0FBRyxNQUFNLFFBQVEsQ0FBQyxLQUFLLE1BQU0sUUFBUSxDQUFDLElBQUksTUFBTSxRQUFRLENBQUMsS0FBSyxNQUFNLElBQUksQ0FBQyxVQUFVLEVBQUUsQ0FBQyxDQUFBO0lBQ3hJLE9BQU8sTUFBTSxDQUFBO0FBQ2pCLENBQUMsQ0FBQTtBQUdELE1BQU0sQ0FBQyxNQUFNLGFBQWEsR0FBRyxLQUFLLEVBQUUsUUFBcUIsRUFBRSxJQUFtSSxFQUFFLFVBQTBDLEVBQUUsRUFBRTtJQUMxTyxJQUFJLENBQUMsUUFBUSxDQUFDLElBQUksSUFBSSxDQUFDLFFBQVEsQ0FBQyxJQUFJLENBQUMsS0FBSyxFQUFFLENBQUM7UUFDekMsT0FBTyxFQUFFLENBQUE7SUFDYixDQUFDO0lBRUQsTUFBTSxNQUFNLEdBQWEsRUFBRSxDQUFBO0lBQzNCLE1BQU0sVUFBVSxHQUFHLElBQUksQ0FBQyxVQUFVLElBQUksQ0FBQyxDQUFBO0lBRXZDLEtBQUssTUFBTSxJQUFJLElBQUksUUFBUSxDQUFDLElBQUksQ0FBQyxLQUFLLEVBQUUsQ0FBQztRQUNyQyxJQUFJLElBQUksQ0FBQyxjQUFjLElBQUksTUFBTSxJQUFJLENBQUMsY0FBYyxFQUFFLEVBQUUsQ0FBQztZQUNyRCxRQUFRLENBQUE7WUFDUixNQUFNLENBQUMsSUFBSSxDQUFDLDhDQUE4QyxRQUFRLENBQUMsS0FBSyxFQUFFLENBQUMsQ0FBQztZQUM1RSxNQUFNO1FBQ1YsQ0FBQztRQUVELElBQUksTUFBTSxDQUFDLE1BQU0sSUFBSSxVQUFVLEVBQUUsQ0FBQztZQUM5QixNQUFLO1FBQ1QsQ0FBQztRQUVELElBQUksSUFBSSxDQUFDLE1BQU0sS0FBSyxTQUFTLEVBQUUsQ0FBQztZQUM1QixTQUFRO1FBQ1osQ0FBQztRQUVELElBQUksQ0FBQyxNQUFNLEdBQUcsaUJBQWlCLENBQUE7UUFDL0IsSUFBSSxDQUFDO1lBQ0QsTUFBTSxDQUFDLElBQUksQ0FBQyx1QkFBdUIsSUFBSSxDQUFDLEdBQUcsRUFBRSxDQUFDLENBQUM7WUFDL0MsTUFBTSxVQUFVLEdBQUcsTUFBTSxTQUFTLENBQUMsWUFBWSxFQUFFLElBQUksQ0FBQyxHQUFHLEVBQUUsSUFBSSxFQUFFLFFBQVEsQ0FBQyxDQUFBO1lBQzFFLElBQUksVUFBVSxJQUFJLEtBQUssQ0FBQyxPQUFPLENBQUMsVUFBVSxDQUFDLEVBQUUsQ0FBQztnQkFDMUMsTUFBTSxDQUFDLElBQUksQ0FBQyxHQUFHLFVBQVUsQ0FBQyxDQUFBO1lBQzlCLENBQUM7WUFDRCxJQUFJLENBQUMsTUFBTSxHQUFHLGdCQUFnQixDQUFBO1FBQ2xDLENBQUM7UUFBQyxPQUFPLEtBQUssRUFBRSxDQUFDO1lBQ2IsSUFBSSxLQUFLLENBQUMsT0FBTyxLQUFLLGlCQUFpQixFQUFFLENBQUM7Z0JBQ3RDLFFBQVEsQ0FBQTtnQkFDUixNQUFNLEtBQUssQ0FBQztZQUNoQixDQUFDO1lBQ0QsSUFBSSxDQUFDLE1BQU0sR0FBRyxRQUFRLENBQUE7WUFDdEIsSUFBSSxDQUFDLEtBQUssR0FBRyxLQUFLLENBQUMsT0FBTyxDQUFBO1lBQzFCLE1BQU0sQ0FBQyxLQUFLLENBQUMsNkJBQTZCLElBQUksQ0FBQyxHQUFHLEdBQUcsRUFBRSxLQUFLLENBQUMsQ0FBQTtRQUNqRSxDQUFDO1FBRUQsSUFBSSxVQUFVLEVBQUUsQ0FBQztZQUNiLE1BQU0sVUFBVSxDQUFDLElBQUksQ0FBQyxDQUFBO1FBQzFCLENBQUM7SUFDTCxDQUFDO0lBRUQseUJBQXlCO0lBQ3pCLElBQUksTUFBTSxDQUFDLE1BQU0sR0FBRyxDQUFDLEVBQUUsQ0FBQztRQUNwQixNQUFNLFlBQVksR0FBRyxDQUFDLEdBQUcsSUFBSSxHQUFHLENBQUMsQ0FBQyxHQUFHLENBQUMsUUFBUSxDQUFDLE1BQU0sSUFBSSxFQUFFLENBQUMsRUFBRSxHQUFHLE1BQU0sQ0FBQyxDQUFDLENBQUMsQ0FBQTtRQUMxRSxRQUFRLENBQUMsTUFBTSxHQUFHLFlBQVksQ0FBQTtRQUM5QixJQUFJLFlBQVksQ0FBQyxNQUFNLEdBQUcsQ0FBQyxFQUFFLENBQUM7WUFDMUIsUUFBUSxDQUFDLEtBQUssR0FBRyxZQUFZLENBQUMsQ0FBQyxDQUFDLENBQUE7UUFDcEMsQ0FBQztJQUNMLENBQUM7SUFFRCxPQUFPLE1BQU0sQ0FBQTtBQUNqQixDQUFDLENBQUEifQ== \ No newline at end of file +//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZW1haWwuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvbGliL2VtYWlsLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBLE9BQU8sRUFBRSxNQUFNLEVBQUUsTUFBTSxhQUFhLENBQUE7QUFDcEMsT0FBTyxJQUFJLE1BQU0sT0FBTyxDQUFBO0FBQ3hCLE9BQU8sRUFBRSxvQkFBb0IsRUFBRSxNQUFNLHdDQUF3QyxDQUFBO0FBQzdFLE9BQU8sRUFBRSw4QkFBOEIsRUFBRSxNQUFNLHlCQUF5QixDQUFBO0FBQ3hFLE9BQU8sRUFBRSxVQUFVLEVBQUUsTUFBTSxjQUFjLENBQUE7QUFDekMsT0FBTyxFQUFFLDBCQUEwQixFQUFFLFFBQVEsRUFBRSxNQUFNLDJCQUEyQixDQUFBO0FBRWhGLE9BQU8sRUFBRSxVQUFVLEVBQUUsTUFBTSxXQUFXLENBQUE7QUFFdEMsTUFBTSxVQUFVLEdBQUcsaURBQWlELENBQUE7QUFDcEUsTUFBTSxXQUFXLEdBQUcsc0NBQXNDLENBQUE7QUFFMUQsT0FBTyxFQUFFLHNCQUFzQixJQUFJLE1BQU0sRUFBRSxVQUFVLEVBQUUsTUFBTSxlQUFlLENBQUE7QUFFNUUsTUFBTSxPQUFPLHFCQUFzQixTQUFRLDBCQUEwQjtJQUNqRSxNQUFNLENBQUMsT0FBTztRQUNWLE9BQU8sdUJBQXVCLENBQUE7SUFDbEMsQ0FBQztJQUNELFlBQVksT0FBTyxHQUFHLEVBQUU7UUFDcEIsS0FBSyxDQUFDLE9BQU8sQ0FBQyxDQUFDO1FBQ2YsTUFBTSxDQUFDLGNBQWMsQ0FBQyxJQUFJLEVBQUUsU0FBUyxFQUFFO1lBQ25DLFVBQVUsRUFBRSxJQUFJO1lBQ2hCLFlBQVksRUFBRSxJQUFJO1lBQ2xCLFFBQVEsRUFBRSxJQUFJO1lBQ2QsS0FBSyxFQUFFLE9BQU87U0FDakIsQ0FBQyxDQUFBO0lBQ04sQ0FBQztJQUNELEtBQUssQ0FBQyxrQkFBa0IsQ0FBQyxRQUFrQjtRQUN2QyxNQUFNLGdCQUFnQixHQUFHLFVBQVUsQ0FBQyxRQUFRLENBQUMsV0FBVyxFQUFFLElBQUksQ0FBQyxTQUFTLENBQUMsQ0FBQyxDQUFDO1FBQzNFLE9BQU8sSUFBSSxRQUFRLENBQUM7WUFDaEIsV0FBVyxFQUFFLGdCQUFnQjtZQUM3QixRQUFRLEVBQUUsRUFBRSxHQUFHLFFBQVEsQ0FBQyxRQUFRLEVBQUU7U0FDckMsQ0FBQyxDQUFDO0lBQ1AsQ0FBQztDQUNKO0FBRUQsTUFBTSxDQUFDLE1BQU0sYUFBYSxHQUFHLEtBQUssRUFBRSxHQUFXLEVBQUUsRUFBRTtJQUMvQyxNQUFNLE1BQU0sR0FBRyxJQUFJLG9CQUFvQixDQUFDLEdBQUcsQ0FBQyxDQUFBO0lBQzVDLE1BQU0sSUFBSSxHQUFHLE1BQU0sTUFBTSxDQUFDLElBQUksRUFBRSxDQUFBO0lBQ2hDLE1BQU0sUUFBUSxHQUFHLDhCQUE4QixDQUFDLFlBQVksQ0FBQyxNQUFNLENBQUMsQ0FBQTtJQUNwRSxNQUFNLFdBQVcsR0FBRyxJQUFJLHFCQUFxQixFQUFFLENBQUE7SUFDL0MsTUFBTSxRQUFRLEdBQUcsUUFBUSxDQUFDLElBQUksQ0FBQyxXQUFrQixDQUFDLENBQUE7SUFDbEQsTUFBTSxHQUFHLEdBQUcsTUFBTSxRQUFRLENBQUMsTUFBTSxDQUFDLElBQUksQ0FBQyxDQUFBO0lBQ3ZDLE9BQU8sR0FBRyxDQUFBO0FBQ2QsQ0FBQyxDQUFBO0FBR0QsTUFBTSxDQUFDLE1BQU0sZUFBZSxHQUFHLEtBQUssRUFBRSxHQUFXLEVBQUUsUUFBaUIsRUFBRSxRQUFxQixFQUFFLGNBQXVDLEVBQUUsRUFBRTtJQUNwSSxJQUFJLFVBQVUsQ0FBQyxHQUFHLENBQUMsS0FBSyxLQUFLLElBQUksR0FBRyxDQUFDLE9BQU8sQ0FBQyxRQUFRLENBQUMsS0FBSyxDQUFDLENBQUMsRUFBRSxDQUFDO1FBQzVELE9BQU8sRUFBRSxDQUFBO0lBQ2IsQ0FBQztJQUVELElBQUksY0FBYyxJQUFJLE1BQU0sY0FBYyxFQUFFLEVBQUUsQ0FBQztRQUMzQyxNQUFNLENBQUMsSUFBSSxDQUFDLDJCQUEyQixHQUFHLEdBQUcsQ0FBQyxDQUFDO1FBQy9DLE9BQU8sRUFBRSxDQUFDO0lBQ2QsQ0FBQztJQUVELElBQUksaUJBQWlCLENBQUE7SUFDckIsSUFBSSxDQUFDLENBQVEsaUNBQWlDO1FBQzFDLGlCQUFpQixHQUFHLElBQUksTUFBTSxDQUMxQixHQUFHLEVBQ0g7WUFDSSxhQUFhLEVBQUU7Z0JBQ1gsUUFBUSxFQUFFLFFBQVE7Z0JBQ2xCLGlCQUFpQixFQUFFLElBQUk7YUFDMUI7WUFFRCxXQUFXLEVBQUU7Z0JBQ1QsT0FBTyxFQUFFLFFBQVEsQ0FBQyxXQUFXLElBQUksS0FBSztnQkFDdEMsU0FBUyxFQUFFLGNBQWM7YUFDNUI7WUFDRCxLQUFLLENBQUMsUUFBUSxDQUFDLElBQUksRUFBRSxPQUFPO2dCQUN4QixJQUFJLGNBQWMsSUFBSSxNQUFNLGNBQWMsRUFBRSxFQUFFLENBQUM7b0JBQzNDLE1BQU0sR0FBRyxHQUFHLE9BQU8sQ0FBQyxPQUFPLEVBQUUsRUFBRSxHQUFHLENBQUM7b0JBQ25DLE1BQU0sQ0FBQyxJQUFJLENBQUMsc0RBQXNELEdBQUcsRUFBRSxDQUFDLENBQUM7b0JBQ3pFLDBGQUEwRjtvQkFDMUYsTUFBTSxJQUFJLEtBQUssQ0FBQyxpQkFBaUIsQ0FBQyxDQUFDO2dCQUN2QyxDQUFDO2dCQUNELE1BQU0sTUFBTSxHQUFHLE1BQU0sSUFBSSxDQUFDLFFBQVEsQ0FBQyxHQUFHLEVBQUUsQ0FBQyxRQUFRLENBQUMsSUFBSSxDQUFDLFNBQVMsQ0FBQyxDQUFBO2dCQUNqRSx3QkFBd0I7Z0JBQ3hCLE9BQU8sTUFBTSxDQUFBO1lBQ2pCLENBQUM7U0FDSixDQUNKLENBQUE7UUFDRCxpQ0FBaUM7UUFDakMsSUFBSSxVQUFVLEdBQUcsS0FBSyxDQUFDO1FBQ3ZCLE1BQU0sV0FBVyxHQUFHLGlCQUFpQixDQUFDLElBQUksRUFBRSxDQUFDLE9BQU8sQ0FBQyxHQUFHLEVBQUU7WUFDdEQsVUFBVSxHQUFHLElBQUksQ0FBQztRQUN0QixDQUFDLENBQUMsQ0FBQztRQUVILE1BQU0sYUFBYSxHQUFHLElBQUksT0FBTyxDQUFRLEtBQUssRUFBRSxDQUFDLEVBQUUsTUFBTSxFQUFFLEVBQUU7WUFDekQsSUFBSSxDQUFDLGNBQWM7Z0JBQUUsT0FBTztZQUM1Qix3QkFBd0I7WUFDeEIsT0FBTyxDQUFDLFVBQVUsRUFBRSxDQUFDO2dCQUNqQixNQUFNLElBQUksT0FBTyxDQUFDLENBQUMsQ0FBQyxFQUFFLENBQUMsVUFBVSxDQUFDLENBQUMsRUFBRSxJQUFJLENBQUMsQ0FBQyxDQUFDO2dCQUM1QyxNQUFNLENBQUMsSUFBSSxDQUFDLDRCQUE0QixHQUFHLEdBQUcsQ0FBQyxDQUFDO2dCQUNoRCxJQUFJLE1BQU0sY0FBYyxFQUFFLEVBQUUsQ0FBQztvQkFDekIsTUFBTSxPQUFPLEdBQUcsTUFBTSxVQUFVLEVBQUUsQ0FBQztvQkFDbkMsSUFBSSxPQUFPLEVBQUUsQ0FBQzt3QkFDVixNQUFNLEdBQUcsR0FBRyxPQUFPLENBQUMsT0FBTyxFQUFFLEVBQUUsR0FBRyxDQUFDO3dCQUNuQyxNQUFNLENBQUMsSUFBSSxDQUFDLHNDQUFzQyxHQUFHLEVBQUUsQ0FBQyxDQUFDO3dCQUN6RCx3REFBd0Q7b0JBQzVELENBQUM7b0JBRUQsTUFBTSxDQUFDLElBQUksS0FBSyxDQUFDLGlCQUFpQixDQUFDLENBQUMsQ0FBQztvQkFDckMsTUFBTTtnQkFDVixDQUFDO1lBQ0wsQ0FBQztRQUNMLENBQUMsQ0FBQyxDQUFDO1FBRUgsTUFBTSxJQUFJLEdBQUcsTUFBTSxPQUFPLENBQUMsSUFBSSxDQUFDLENBQUMsV0FBVyxFQUFFLGFBQWEsQ0FBQyxDQUFDLENBQUM7UUFDOUQsTUFBTSxRQUFRLEdBQUcsOEJBQThCLENBQUMsWUFBWSxDQUFDLE1BQU0sQ0FBQyxDQUFBO1FBQ3BFLE1BQU0sV0FBVyxHQUFHLElBQUkscUJBQXFCLEVBQUUsQ0FBQTtRQUMvQyxNQUFNLFFBQVEsR0FBRyxRQUFRLENBQUMsSUFBSSxDQUFDLFdBQWtCLENBQUMsQ0FBQTtRQUNsRCxNQUFNLEdBQUcsR0FBRyxNQUFNLFFBQVEsQ0FBQyxNQUFNLENBQUMsSUFBSSxDQUFDLENBQUE7UUFDdkMsT0FBTyxHQUFHLENBQUE7SUFDZCxDQUFDO0lBQUMsT0FBTyxLQUFLLEVBQUUsQ0FBQztRQUNiLElBQUksS0FBSyxZQUFZLEtBQUssSUFBSSxLQUFLLENBQUMsT0FBTyxLQUFLLGlCQUFpQixFQUFFLENBQUM7WUFDaEUsTUFBTSxLQUFLLENBQUM7UUFDaEIsQ0FBQztRQUNELE1BQU0sQ0FBQyxJQUFJLENBQUMsc0JBQXNCLEdBQUcsR0FBRyxFQUFFLEtBQUssWUFBWSxLQUFLLENBQUMsQ0FBQyxDQUFDLEtBQUssQ0FBQyxPQUFPLENBQUMsQ0FBQyxDQUFDLE1BQU0sQ0FBQyxLQUFLLENBQUMsQ0FBQyxDQUFBO1FBQ2pHLFFBQVEsQ0FBQyxRQUFRLEdBQUcsSUFBSSxDQUFBO1FBQ3hCLDJDQUEyQztRQUUzQyxPQUFPLEVBQUUsQ0FBQTtJQUNiLENBQUM7QUFDTCxDQUFDLENBQUE7QUFDRCxNQUFNLHFCQUFxQixHQUFHLENBQUMsSUFBWSxFQUFZLEVBQUU7SUFDckQsTUFBTSxLQUFLLEdBQUcsSUFBSSxDQUFDLEtBQUssQ0FBQyxPQUFPLENBQUMsQ0FBQTtJQUNqQyxNQUFNLGNBQWMsR0FBYSxFQUFFLENBQUE7SUFDbkMsTUFBTSxlQUFlLEdBQUcsQ0FBQyxNQUFNLEVBQUUsTUFBTSxFQUFFLE9BQU8sRUFBRSxNQUFNLEVBQUUsT0FBTyxFQUFFLE1BQU0sRUFBRSxNQUFNLEVBQUUsTUFBTSxFQUFFLE9BQU8sRUFBRSxPQUFPLENBQUMsQ0FBQztJQUU3RyxLQUFLLE1BQU0sSUFBSSxJQUFJLEtBQUssRUFBRSxDQUFDO1FBQ3ZCLE1BQU0sT0FBTyxHQUFHLElBQUksQ0FBQyxLQUFLLENBQUMsVUFBVSxDQUFDLENBQUE7UUFDdEMsSUFBSSxPQUFPLEVBQUUsQ0FBQztZQUNWLEtBQUssTUFBTSxLQUFLLElBQUksT0FBTyxFQUFFLENBQUM7Z0JBQzFCLHVFQUF1RTtnQkFDdkUsTUFBTSxVQUFVLEdBQUcsS0FBSyxDQUFDLFdBQVcsRUFBRSxDQUFDO2dCQUN2QyxNQUFNLE9BQU8sR0FBRyxlQUFlLENBQUMsSUFBSSxDQUFDLEdBQUcsQ0FBQyxFQUFFLENBQUMsVUFBVSxDQUFDLFFBQVEsQ0FBQyxHQUFHLENBQUMsQ0FBQyxDQUFDO2dCQUN0RSxJQUFJLENBQUMsT0FBTyxFQUFFLENBQUM7b0JBQ1gsY0FBYyxDQUFDLElBQUksQ0FBQyxLQUFLLENBQUMsQ0FBQztnQkFDL0IsQ0FBQztZQUNMLENBQUM7UUFDTCxDQUFDO0lBQ0wsQ0FBQztJQUNELE9BQU8sY0FBYyxDQUFBO0FBQ3pCLENBQUMsQ0FBQTtBQUVELE1BQU0sQ0FBQyxNQUFNLFNBQVMsR0FBRyxLQUFLLEVBQUUsUUFBZ0IsRUFBRSxHQUFXLEVBQUUsSUFBcUUsRUFBRSxRQUFxQixFQUFFLEVBQUU7SUFDM0osK0NBQStDO0lBQy9DLElBQUksR0FBRyxDQUFDLEtBQUssQ0FBQyxVQUFVLENBQUMsSUFBSSxHQUFHLENBQUMsS0FBSyxDQUFDLFdBQVcsQ0FBQyxJQUFJLEdBQUcsQ0FBQyxPQUFPLENBQUMsUUFBUSxDQUFDLEtBQUssQ0FBQyxDQUFDLEVBQUUsQ0FBQztRQUNsRixNQUFNLENBQUMsSUFBSSxDQUFDLG9CQUFvQixFQUFFLEdBQUcsQ0FBQyxDQUFBO1FBQ3RDLE9BQU8sS0FBSyxDQUFBO0lBQ2hCLENBQUM7SUFDRCxJQUFJLE9BQU8sR0FBRyxHQUFHLENBQUE7SUFDakIsSUFBSSxJQUFJLEdBQUcsTUFBTSxlQUFlLENBQUMsT0FBTyxFQUFFLElBQUksQ0FBQyxRQUFRLEVBQUUsRUFBRSxHQUFHLFFBQVEsRUFBRSxXQUFXLEVBQUUsSUFBSSxDQUFDLFdBQVcsRUFBRSxFQUFFLElBQUksQ0FBQyxjQUFjLENBQVEsQ0FBQTtJQUNwSSxJQUFJLE1BQU0sR0FBYSxFQUFFLENBQUE7SUFDekIsSUFBSSxDQUFDLE9BQU8sQ0FBQyxDQUFDLENBQU0sRUFBRSxFQUFFO1FBQ3BCLElBQUksQ0FBQyxDQUFDLFdBQVcsSUFBSSxDQUFDLENBQUMsV0FBVyxDQUFDLE9BQU8sQ0FBQyxHQUFHLENBQUMsS0FBSyxDQUFDLENBQUMsRUFBRSxDQUFDO1lBQ3JELE1BQU0sS0FBSyxHQUFHLHFCQUFxQixDQUFDLENBQUMsQ0FBQyxXQUFXLENBQUMsQ0FBQTtZQUNsRCxJQUFJLEtBQUssRUFBRSxDQUFDO2dCQUNSLE1BQU0sQ0FBQyxJQUFJLENBQUMsR0FBRyxLQUFLLENBQUMsQ0FBQTtZQUN6QixDQUFDO1FBQ0wsQ0FBQztJQUNMLENBQUMsQ0FBQyxDQUFBO0lBQ0YsTUFBTSxHQUFHLENBQUMsR0FBRyxJQUFJLEdBQUcsQ0FBQyxNQUFNLENBQUMsQ0FBQyxDQUFBO0lBQzdCLFFBQVEsQ0FBQyxNQUFNLEdBQUcsTUFBTSxDQUFBO0lBQ3hCLElBQUksTUFBTSxDQUFDLE1BQU0sRUFBRSxDQUFDO1FBQ2hCLFFBQVEsQ0FBQyxLQUFLLEdBQUcsTUFBTSxDQUFDLENBQUMsQ0FBQyxDQUFBO0lBQzlCLENBQUM7SUFDRCxRQUFRLENBQUMsS0FBSyxJQUFJLE1BQU0sQ0FBQyxLQUFLLENBQUMsbUJBQW1CLEdBQUcsTUFBTSxRQUFRLENBQUMsS0FBSyxNQUFNLFFBQVEsQ0FBQyxJQUFJLE1BQU0sUUFBUSxDQUFDLEtBQUssTUFBTSxJQUFJLENBQUMsVUFBVSxFQUFFLENBQUMsQ0FBQTtJQUN4SSxPQUFPLE1BQU0sQ0FBQTtBQUNqQixDQUFDLENBQUE7QUFHRCxNQUFNLENBQUMsTUFBTSxhQUFhLEdBQUcsS0FBSyxFQUFFLFFBQXFCLEVBQUUsSUFBbUksRUFBRSxVQUEwQyxFQUFFLEVBQUU7SUFDMU8sSUFBSSxDQUFDLFFBQVEsQ0FBQyxJQUFJLElBQUksQ0FBQyxRQUFRLENBQUMsSUFBSSxDQUFDLEtBQUssRUFBRSxDQUFDO1FBQ3pDLE9BQU8sRUFBRSxDQUFBO0lBQ2IsQ0FBQztJQUVELE1BQU0sTUFBTSxHQUFhLEVBQUUsQ0FBQTtJQUMzQixNQUFNLFVBQVUsR0FBRyxJQUFJLENBQUMsVUFBVSxJQUFJLENBQUMsQ0FBQTtJQUV2QyxNQUFNLFdBQVcsR0FBRyxJQUFJLENBQUMsV0FBVyxJQUFJLENBQUMsQ0FBQTtJQUN6QyxNQUFNLFFBQVEsR0FBRyxJQUFJLENBQUMsUUFBUSxJQUFJLEVBQUUsQ0FBQTtJQUNwQyxNQUFNLGVBQWUsR0FBRyxDQUFDLFNBQVMsRUFBRSxTQUFTLEVBQUUsVUFBVSxFQUFFLFVBQVUsRUFBRSxNQUFNLEVBQUUsU0FBUyxFQUFFLFdBQVcsRUFBRSxNQUFNLEVBQUUsU0FBUyxFQUFFLE9BQU8sQ0FBQyxDQUFDO0lBRW5JLHVDQUF1QztJQUN2QyxNQUFNLGFBQWEsR0FBRyxRQUFRLENBQUMsSUFBSSxDQUFDLEtBQUssQ0FBQyxJQUFJLENBQUMsQ0FBQyxDQUFDLEVBQUUsQ0FBQyxFQUFFLEVBQUU7UUFDcEQsTUFBTSxJQUFJLEdBQUcsQ0FBQyxDQUFDLEdBQUcsQ0FBQyxXQUFXLEVBQUUsQ0FBQztRQUNqQyxNQUFNLElBQUksR0FBRyxDQUFDLENBQUMsR0FBRyxDQUFDLFdBQVcsRUFBRSxDQUFDO1FBRWpDLE1BQU0sTUFBTSxHQUFHLGVBQWUsQ0FBQyxJQUFJLENBQUMsQ0FBQyxDQUFDLEVBQUUsQ0FBQyxJQUFJLENBQUMsUUFBUSxDQUFDLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUFDO1FBQ25FLE1BQU0sTUFBTSxHQUFHLGVBQWUsQ0FBQyxJQUFJLENBQUMsQ0FBQyxDQUFDLEVBQUUsQ0FBQyxJQUFJLENBQUMsUUFBUSxDQUFDLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUFDO1FBRW5FLE9BQU8sTUFBTSxHQUFHLE1BQU0sQ0FBQyxDQUFDLHlDQUF5QztJQUNyRSxDQUFDLENBQUMsQ0FBQyxLQUFLLENBQUMsQ0FBQyxFQUFFLFFBQVEsQ0FBQyxDQUFBO0lBRXJCLE1BQU0sSUFBSSxDQUFDLGFBQWEsRUFBRSxLQUFLLEVBQUUsSUFBVSxFQUFFLEVBQUU7UUFDM0MsSUFBSSxJQUFJLENBQUMsY0FBYyxJQUFJLE1BQU0sSUFBSSxDQUFDLGNBQWMsRUFBRSxFQUFFLENBQUM7WUFDckQsK0VBQStFO1lBQy9FLE9BQU07UUFDVixDQUFDO1FBRUQsSUFBSSxNQUFNLENBQUMsTUFBTSxJQUFJLFVBQVUsRUFBRSxDQUFDO1lBQzlCLE9BQU07UUFDVixDQUFDO1FBRUQsSUFBSSxJQUFJLENBQUMsTUFBTSxLQUFLLFNBQVMsRUFBRSxDQUFDO1lBQzVCLE9BQU07UUFDVixDQUFDO1FBRUQsSUFBSSxDQUFDLE1BQU0sR0FBRyxpQkFBaUIsQ0FBQTtRQUMvQixJQUFJLENBQUM7WUFDRCxNQUFNLENBQUMsSUFBSSxDQUFDLHVCQUF1QixJQUFJLENBQUMsR0FBRyxFQUFFLENBQUMsQ0FBQTtZQUM5QyxNQUFNLFVBQVUsR0FBRyxNQUFNLFNBQVMsQ0FBQyxZQUFZLEVBQUUsSUFBSSxDQUFDLEdBQUcsRUFBRSxJQUFJLEVBQUUsUUFBUSxDQUFDLENBQUE7WUFDMUUsSUFBSSxVQUFVLElBQUksS0FBSyxDQUFDLE9BQU8sQ0FBQyxVQUFVLENBQUMsRUFBRSxDQUFDO2dCQUMxQyxNQUFNLENBQUMsSUFBSSxDQUFDLEdBQUcsVUFBVSxDQUFDLENBQUE7WUFDOUIsQ0FBQztZQUNELElBQUksQ0FBQyxNQUFNLEdBQUcsZ0JBQWdCLENBQUE7UUFDbEMsQ0FBQztRQUFDLE9BQU8sS0FBSyxFQUFFLENBQUM7WUFDYixJQUFJLEtBQUssQ0FBQyxPQUFPLEtBQUssaUJBQWlCLEVBQUUsQ0FBQztnQkFDdEMsTUFBTSxLQUFLLENBQUM7WUFDaEIsQ0FBQztZQUNELElBQUksQ0FBQyxNQUFNLEdBQUcsUUFBUSxDQUFBO1lBQ3RCLElBQUksQ0FBQyxLQUFLLEdBQUcsS0FBSyxDQUFDLE9BQU8sQ0FBQTtZQUMxQixNQUFNLENBQUMsS0FBSyxDQUFDLDZCQUE2QixJQUFJLENBQUMsR0FBRyxHQUFHLEVBQUUsS0FBSyxDQUFDLENBQUE7UUFDakUsQ0FBQztRQUVELElBQUksVUFBVSxFQUFFLENBQUM7WUFDYixNQUFNLFVBQVUsQ0FBQyxJQUFJLENBQUMsQ0FBQTtRQUMxQixDQUFDO0lBQ0wsQ0FBQyxFQUFFLEVBQUUsV0FBVyxFQUFFLFdBQVcsRUFBRSxLQUFLLEVBQUUsQ0FBQyxDQUFBO0lBRXZDLHlCQUF5QjtJQUN6QixJQUFJLE1BQU0sQ0FBQyxNQUFNLEdBQUcsQ0FBQyxFQUFFLENBQUM7UUFDcEIsTUFBTSxZQUFZLEdBQUcsQ0FBQyxHQUFHLElBQUksR0FBRyxDQUFDLENBQUMsR0FBRyxDQUFDLFFBQVEsQ0FBQyxNQUFNLElBQUksRUFBRSxDQUFDLEVBQUUsR0FBRyxNQUFNLENBQUMsQ0FBQyxDQUFDLENBQUE7UUFDMUUsUUFBUSxDQUFDLE1BQU0sR0FBRyxZQUFZLENBQUE7UUFDOUIsSUFBSSxZQUFZLENBQUMsTUFBTSxHQUFHLENBQUMsRUFBRSxDQUFDO1lBQzFCLFFBQVEsQ0FBQyxLQUFLLEdBQUcsWUFBWSxDQUFDLENBQUMsQ0FBQyxDQUFBO1FBQ3BDLENBQUM7SUFDTCxDQUFDO0lBRUQsT0FBTyxNQUFNLENBQUE7QUFDakIsQ0FBQyxDQUFBIn0= \ No newline at end of file diff --git a/packages/search/dist-in/lib/pupeteer.d.ts b/packages/search/dist-in/lib/pupeteer.d.ts index abe01d54..0109509a 100644 --- a/packages/search/dist-in/lib/pupeteer.d.ts +++ b/packages/search/dist-in/lib/pupeteer.d.ts @@ -17,7 +17,7 @@ export type PuppeteerWebBaseLoaderOptions = { gotoOptions?: PuppeteerGotoOptions; evaluate?: PuppeteerEvaluate; }; -export declare const getBrowser: () => Browser; +export declare const getBrowser: () => Promise; export declare const getPage: (browser: Browser) => Promise; export declare class PuppeteerWebBaseLoader extends BaseDocumentLoader implements DocumentLoader { webPath: string; diff --git a/packages/search/dist-in/lib/pupeteer.js b/packages/search/dist-in/lib/pupeteer.js index d4457706..ad8630d2 100644 --- a/packages/search/dist-in/lib/pupeteer.js +++ b/packages/search/dist-in/lib/pupeteer.js @@ -1,5 +1,6 @@ import { Document } from "@langchain/core/documents"; import { BaseDocumentLoader } from "langchain/document_loaders/base"; +import pLimit from "p-limit"; /** * Class that extends the BaseDocumentLoader class and implements the * DocumentLoader interface. It represents a document loader for scraping @@ -17,25 +18,43 @@ import { BaseDocumentLoader } from "langchain/document_loaders/base"; * const screenshot = await loader.screenshot(); * ``` */ -let browser = null; -let page = null; -const launchBrowser = async (options) => { - if (browser) - return browser; - const { launch } = await PuppeteerWebBaseLoader.imports(); - browser = await launch({ - headless: true, - defaultViewport: null, - ignoreDefaultArgs: ["--disable-extensions"], - ...options?.launchOptions, - }); - return browser; +// Singleton browser promise to prevent race conditions +let browserPromise = null; +let idleTimer = null; +const limit = pLimit(parseInt(process.env.EMAIL_SEARCH_MAX_PUPETEER_PAGES || '10')); +const IDLE_TIMEOUT_SECONDS = parseInt(process.env.EMAIL_SEARCH_PUPETEER_IDLE_TIMEOUT_SECONDS || '60'); +const resetIdleTimer = () => { + if (idleTimer) + clearTimeout(idleTimer); + idleTimer = setTimeout(async () => { + if (browserPromise) { + console.log(`[Puppeteer] Browser idle timeout (${IDLE_TIMEOUT_SECONDS}s) reached, closing browser`); + const browser = await browserPromise; + await browser.close(); + browserPromise = null; + } + }, IDLE_TIMEOUT_SECONDS * 1000); }; -export const getBrowser = () => browser; +const launchBrowser = async (options) => { + resetIdleTimer(); + if (browserPromise) + return browserPromise; + browserPromise = (async () => { + const { launch } = await PuppeteerWebBaseLoader.imports(); + const b = await launch({ + headless: process.env.EMAIL_SEARCH_HEADLESS === 'false' ? false : true, + defaultViewport: null, + ignoreDefaultArgs: ["--disable-extensions"], + ...options?.launchOptions, + }); + return b; + })(); + return browserPromise; +}; +export const getBrowser = () => browserPromise; export const getPage = async (browser) => { - if (page) - return page; - page = await browser.newPage(); + // Always create a new page for concurrency + const page = await browser.newPage(); return page; }; export class PuppeteerWebBaseLoader extends BaseDocumentLoader { @@ -49,18 +68,31 @@ export class PuppeteerWebBaseLoader extends BaseDocumentLoader { static browser; static async _scrape(url, options) { const browser = await launchBrowser(options); - PuppeteerWebBaseLoader.browser = browser; - const page = await getPage(browser); - await page.goto(url, { - timeout: 5000, - waitUntil: "domcontentloaded", - ...options?.gotoOptions, + // PuppeteerWebBaseLoader.browser = browser // Static property usage is deprecated/incorrect with this pattern + return limit(async () => { + console.log(`[Puppeteer] Entering limit (Active: ${limit.activeCount}, Pending: ${limit.pendingCount}) for ${url}`); + try { + const page = await getPage(browser); + try { + await page.goto(url, { + timeout: 5000, + waitUntil: "domcontentloaded", + ...options?.gotoOptions, + }); + const bodyHTML = options?.evaluate + ? await options?.evaluate(page, browser) + : await page.evaluate(() => document.body.innerHTML); + return bodyHTML; + } + finally { + await page.close(); + } + } + finally { + console.log(`[Puppeteer] Exiting limit (Active: ${limit.activeCount}, Pending: ${limit.pendingCount}) for ${url}`); + resetIdleTimer(); + } }); - const bodyHTML = options?.evaluate - ? await options?.evaluate(page, browser) - : await page.evaluate(() => document.body.innerHTML); - //await browser.close() - return bodyHTML; } /** * Method that calls the _scrape method to perform the scraping of the web @@ -135,4 +167,4 @@ export class PuppeteerWebBaseLoader extends BaseDocumentLoader { } } } -//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicHVwZXRlZXIuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvbGliL3B1cGV0ZWVyLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQVFBLE9BQU8sRUFBRSxRQUFRLEVBQUUsTUFBTSwyQkFBMkIsQ0FBQTtBQUNwRCxPQUFPLEVBQUUsa0JBQWtCLEVBQWtCLE1BQU0saUNBQWlDLENBQUE7QUF3QnBGOzs7Ozs7Ozs7Ozs7Ozs7O0dBZ0JHO0FBQ0gsSUFBSSxPQUFPLEdBQVksSUFBSSxDQUFBO0FBQzNCLElBQUksSUFBSSxHQUFTLElBQUksQ0FBQTtBQUVyQixNQUFNLGFBQWEsR0FBRyxLQUFLLEVBQUUsT0FBdUMsRUFBb0IsRUFBRTtJQUN4RixJQUFJLE9BQU87UUFBRSxPQUFPLE9BQU8sQ0FBQTtJQUMzQixNQUFNLEVBQUUsTUFBTSxFQUFFLEdBQUcsTUFBTSxzQkFBc0IsQ0FBQyxPQUFPLEVBQUUsQ0FBQztJQUMxRCxPQUFPLEdBQUcsTUFBTSxNQUFNLENBQUM7UUFDckIsUUFBUSxFQUFFLElBQUk7UUFDZCxlQUFlLEVBQUUsSUFBSTtRQUNyQixpQkFBaUIsRUFBRSxDQUFDLHNCQUFzQixDQUFDO1FBQzNDLEdBQUcsT0FBTyxFQUFFLGFBQWE7S0FDMUIsQ0FBQyxDQUFBO0lBQ0YsT0FBTyxPQUFPLENBQUE7QUFDaEIsQ0FBQyxDQUFBO0FBRUQsTUFBTSxDQUFDLE1BQU0sVUFBVSxHQUFHLEdBQUcsRUFBRSxDQUFDLE9BQU8sQ0FBQTtBQUN2QyxNQUFNLENBQUMsTUFBTSxPQUFPLEdBQUcsS0FBSyxFQUFFLE9BQWdCLEVBQUUsRUFBRTtJQUNoRCxJQUFHLElBQUk7UUFDTCxPQUFPLElBQUksQ0FBQTtJQUViLElBQUksR0FBRyxNQUFNLE9BQU8sQ0FBQyxPQUFPLEVBQUUsQ0FBQTtJQUM5QixPQUFPLElBQUksQ0FBQTtBQUNiLENBQUMsQ0FBQTtBQUVELE1BQU0sT0FBTyxzQkFDWCxTQUFRLGtCQUFrQjtJQUlQO0lBRm5CLE9BQU8sQ0FBNEM7SUFFbkQsWUFBbUIsT0FBZSxFQUFFLE9BQXVDO1FBQ3pFLEtBQUssRUFBRSxDQUFDO1FBRFMsWUFBTyxHQUFQLE9BQU8sQ0FBUTtRQUVoQyxJQUFJLENBQUMsT0FBTyxHQUFHLE9BQU8sSUFBSSxTQUFTLENBQUM7SUFDdEMsQ0FBQztJQUVELE1BQU0sQ0FBQyxPQUFPLENBQVU7SUFFeEIsTUFBTSxDQUFDLEtBQUssQ0FBQyxPQUFPLENBQ2xCLEdBQVcsRUFDWCxPQUF1QztRQUd2QyxNQUFNLE9BQU8sR0FBRyxNQUFNLGFBQWEsQ0FBQyxPQUFPLENBQUMsQ0FBQTtRQUU1QyxzQkFBc0IsQ0FBQyxPQUFPLEdBQUcsT0FBTyxDQUFBO1FBQ3hDLE1BQU0sSUFBSSxHQUFHLE1BQU0sT0FBTyxDQUFDLE9BQU8sQ0FBQyxDQUFBO1FBQ25DLE1BQU0sSUFBSSxDQUFDLElBQUksQ0FBQyxHQUFHLEVBQUU7WUFDbkIsT0FBTyxFQUFFLElBQUk7WUFDYixTQUFTLEVBQUUsa0JBQWtCO1lBQzdCLEdBQUcsT0FBTyxFQUFFLFdBQVc7U0FDeEIsQ0FBQyxDQUFDO1FBRUgsTUFBTSxRQUFRLEdBQUcsT0FBTyxFQUFFLFFBQVE7WUFDaEMsQ0FBQyxDQUFDLE1BQU0sT0FBTyxFQUFFLFFBQVEsQ0FBQyxJQUFJLEVBQUUsT0FBTyxDQUFDO1lBQ3hDLENBQUMsQ0FBQyxNQUFNLElBQUksQ0FBQyxRQUFRLENBQUMsR0FBRyxFQUFFLENBQUMsUUFBUSxDQUFDLElBQUksQ0FBQyxTQUFTLENBQUMsQ0FBQztRQUV2RCx1QkFBdUI7UUFFdkIsT0FBTyxRQUFRLENBQUE7SUFDakIsQ0FBQztJQUVEOzs7O09BSUc7SUFDSCxLQUFLLENBQUMsTUFBTTtRQUNWLE9BQU8sc0JBQXNCLENBQUMsT0FBTyxDQUFDLElBQUksQ0FBQyxPQUFPLEVBQUUsSUFBSSxDQUFDLE9BQU8sQ0FBQyxDQUFDO0lBQ3BFLENBQUM7SUFFRDs7OztPQUlHO0lBQ0gsS0FBSyxDQUFDLElBQUk7UUFDUixNQUFNLElBQUksR0FBRyxNQUFNLElBQUksQ0FBQyxNQUFNLEVBQUUsQ0FBQztRQUVqQyxNQUFNLFFBQVEsR0FBRyxFQUFFLE1BQU0sRUFBRSxJQUFJLENBQUMsT0FBTyxFQUFFLENBQUM7UUFDMUMsT0FBTyxDQUFDLElBQUksUUFBUSxDQUFDLEVBQUUsV0FBVyxFQUFFLElBQUksRUFBRSxRQUFRLEVBQUUsQ0FBQyxDQUFDLENBQUM7SUFDekQsQ0FBQztJQUVEOzs7Ozs7OztPQVFHO0lBQ0gsTUFBTSxDQUFDLEtBQUssQ0FBQyxXQUFXLENBQ3RCLEdBQVcsRUFDWCxPQUF1QztRQUV2QyxNQUFNLEVBQUUsTUFBTSxFQUFFLEdBQUcsTUFBTSxzQkFBc0IsQ0FBQyxPQUFPLEVBQUUsQ0FBQztRQUUxRCxNQUFNLE9BQU8sR0FBRyxNQUFNLE1BQU0sQ0FBQztZQUMzQixRQUFRLEVBQUUsSUFBSTtZQUNkLGVBQWUsRUFBRSxJQUFJO1lBQ3JCLElBQUksRUFBRSxDQUFDLGlCQUFpQixDQUFDO1lBQ3pCLGlCQUFpQixFQUFFLENBQUMsc0JBQXNCLENBQUM7WUFDM0MsR0FBRyxPQUFPLEVBQUUsYUFBYTtTQUMxQixDQUFDLENBQUM7UUFDSCxNQUFNLElBQUksR0FBRyxNQUFNLE9BQU8sQ0FBQyxPQUFPLEVBQUUsQ0FBQztRQUVyQyxNQUFNLElBQUksQ0FBQyxJQUFJLENBQUMsR0FBRyxFQUFFO1lBQ25CLE9BQU8sRUFBRSxNQUFNO1lBQ2YsU0FBUyxFQUFFLGtCQUFrQjtZQUM3QixHQUFHLE9BQU8sRUFBRSxXQUFXO1NBQ3hCLENBQUMsQ0FBQztRQUNILE1BQU0sVUFBVSxHQUFHLE1BQU0sSUFBSSxDQUFDLFVBQVUsRUFBRSxDQUFDO1FBQzNDLE1BQU0sTUFBTSxHQUFHLFVBQVUsQ0FBQyxRQUFRLENBQUMsUUFBUSxDQUFDLENBQUM7UUFDN0MsTUFBTSxRQUFRLEdBQUcsRUFBRSxNQUFNLEVBQUUsR0FBRyxFQUFFLENBQUM7UUFDakMsT0FBTyxJQUFJLFFBQVEsQ0FBQyxFQUFFLFdBQVcsRUFBRSxNQUFNLEVBQUUsUUFBUSxFQUFFLENBQUMsQ0FBQztJQUN6RCxDQUFDO0lBRUQ7Ozs7O09BS0c7SUFDSCxLQUFLLENBQUMsVUFBVTtRQUNkLE9BQU8sc0JBQXNCLENBQUMsV0FBVyxDQUFDLElBQUksQ0FBQyxPQUFPLEVBQUUsSUFBSSxDQUFDLE9BQU8sQ0FBQyxDQUFDO0lBQ3hFLENBQUM7SUFFRDs7OztPQUlHO0lBQ0gsTUFBTSxDQUFDLEtBQUssQ0FBQyxPQUFPO1FBR2xCLElBQUksQ0FBQztZQUNILDZEQUE2RDtZQUM3RCxNQUFNLEVBQUUsTUFBTSxFQUFFLEdBQUcsTUFBTSxNQUFNLENBQUMsV0FBVyxDQUFDLENBQUE7WUFDNUMsT0FBTyxFQUFFLE1BQU0sRUFBRSxDQUFDO1FBQ3BCLENBQUM7UUFBQyxPQUFPLENBQUMsRUFBRSxDQUFDO1lBQ1gsT0FBTyxDQUFDLEtBQUssQ0FBQyxDQUFDLENBQUMsQ0FBQztZQUNqQixNQUFNLElBQUksS0FBSyxDQUNiLDBFQUEwRSxDQUMzRSxDQUFDO1FBQ0osQ0FBQztJQUNILENBQUM7Q0FDRiJ9 \ No newline at end of file +//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicHVwZXRlZXIuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvbGliL3B1cGV0ZWVyLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQVFBLE9BQU8sRUFBRSxRQUFRLEVBQUUsTUFBTSwyQkFBMkIsQ0FBQTtBQUNwRCxPQUFPLEVBQUUsa0JBQWtCLEVBQWtCLE1BQU0saUNBQWlDLENBQUE7QUFDcEYsT0FBTyxNQUFNLE1BQU0sU0FBUyxDQUFBO0FBd0I1Qjs7Ozs7Ozs7Ozs7Ozs7OztHQWdCRztBQUNILHVEQUF1RDtBQUN2RCxJQUFJLGNBQWMsR0FBNEIsSUFBSSxDQUFDO0FBQ25ELElBQUksU0FBUyxHQUEwQixJQUFJLENBQUM7QUFDNUMsTUFBTSxLQUFLLEdBQUcsTUFBTSxDQUFDLFFBQVEsQ0FBQyxPQUFPLENBQUMsR0FBRyxDQUFDLCtCQUErQixJQUFJLElBQUksQ0FBQyxDQUFDLENBQUE7QUFDbkYsTUFBTSxvQkFBb0IsR0FBRyxRQUFRLENBQUMsT0FBTyxDQUFDLEdBQUcsQ0FBQywwQ0FBMEMsSUFBSSxJQUFJLENBQUMsQ0FBQztBQUV0RyxNQUFNLGNBQWMsR0FBRyxHQUFHLEVBQUU7SUFDMUIsSUFBSSxTQUFTO1FBQUUsWUFBWSxDQUFDLFNBQVMsQ0FBQyxDQUFDO0lBQ3ZDLFNBQVMsR0FBRyxVQUFVLENBQUMsS0FBSyxJQUFJLEVBQUU7UUFDaEMsSUFBSSxjQUFjLEVBQUUsQ0FBQztZQUNuQixPQUFPLENBQUMsR0FBRyxDQUFDLHFDQUFxQyxvQkFBb0IsNkJBQTZCLENBQUMsQ0FBQztZQUNwRyxNQUFNLE9BQU8sR0FBRyxNQUFNLGNBQWMsQ0FBQztZQUNyQyxNQUFNLE9BQU8sQ0FBQyxLQUFLLEVBQUUsQ0FBQztZQUN0QixjQUFjLEdBQUcsSUFBSSxDQUFDO1FBQ3hCLENBQUM7SUFDSCxDQUFDLEVBQUUsb0JBQW9CLEdBQUcsSUFBSSxDQUFDLENBQUM7QUFDbEMsQ0FBQyxDQUFBO0FBRUQsTUFBTSxhQUFhLEdBQUcsS0FBSyxFQUFFLE9BQXVDLEVBQW9CLEVBQUU7SUFDeEYsY0FBYyxFQUFFLENBQUM7SUFDakIsSUFBSSxjQUFjO1FBQUUsT0FBTyxjQUFjLENBQUM7SUFFMUMsY0FBYyxHQUFHLENBQUMsS0FBSyxJQUFJLEVBQUU7UUFDM0IsTUFBTSxFQUFFLE1BQU0sRUFBRSxHQUFHLE1BQU0sc0JBQXNCLENBQUMsT0FBTyxFQUFFLENBQUM7UUFDMUQsTUFBTSxDQUFDLEdBQUcsTUFBTSxNQUFNLENBQUM7WUFDckIsUUFBUSxFQUFFLE9BQU8sQ0FBQyxHQUFHLENBQUMscUJBQXFCLEtBQUssT0FBTyxDQUFDLENBQUMsQ0FBQyxLQUFLLENBQUMsQ0FBQyxDQUFDLElBQUk7WUFDdEUsZUFBZSxFQUFFLElBQUk7WUFDckIsaUJBQWlCLEVBQUUsQ0FBQyxzQkFBc0IsQ0FBQztZQUMzQyxHQUFHLE9BQU8sRUFBRSxhQUFhO1NBQzFCLENBQUMsQ0FBQztRQUNILE9BQU8sQ0FBQyxDQUFDO0lBQ1gsQ0FBQyxDQUFDLEVBQUUsQ0FBQztJQUVMLE9BQU8sY0FBYyxDQUFDO0FBQ3hCLENBQUMsQ0FBQTtBQUVELE1BQU0sQ0FBQyxNQUFNLFVBQVUsR0FBRyxHQUFHLEVBQUUsQ0FBQyxjQUFjLENBQUM7QUFDL0MsTUFBTSxDQUFDLE1BQU0sT0FBTyxHQUFHLEtBQUssRUFBRSxPQUFnQixFQUFFLEVBQUU7SUFDaEQsMkNBQTJDO0lBQzNDLE1BQU0sSUFBSSxHQUFHLE1BQU0sT0FBTyxDQUFDLE9BQU8sRUFBRSxDQUFBO0lBQ3BDLE9BQU8sSUFBSSxDQUFBO0FBQ2IsQ0FBQyxDQUFBO0FBRUQsTUFBTSxPQUFPLHNCQUNYLFNBQVEsa0JBQWtCO0lBSVA7SUFGbkIsT0FBTyxDQUE0QztJQUVuRCxZQUFtQixPQUFlLEVBQUUsT0FBdUM7UUFDekUsS0FBSyxFQUFFLENBQUM7UUFEUyxZQUFPLEdBQVAsT0FBTyxDQUFRO1FBRWhDLElBQUksQ0FBQyxPQUFPLEdBQUcsT0FBTyxJQUFJLFNBQVMsQ0FBQztJQUN0QyxDQUFDO0lBRUQsTUFBTSxDQUFDLE9BQU8sQ0FBVTtJQUV4QixNQUFNLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FDbEIsR0FBVyxFQUNYLE9BQXVDO1FBR3ZDLE1BQU0sT0FBTyxHQUFHLE1BQU0sYUFBYSxDQUFDLE9BQU8sQ0FBQyxDQUFBO1FBRTVDLDhHQUE4RztRQUU5RyxPQUFPLEtBQUssQ0FBQyxLQUFLLElBQUksRUFBRTtZQUN0QixPQUFPLENBQUMsR0FBRyxDQUFDLHVDQUF1QyxLQUFLLENBQUMsV0FBVyxjQUFjLEtBQUssQ0FBQyxZQUFZLFNBQVMsR0FBRyxFQUFFLENBQUMsQ0FBQztZQUNwSCxJQUFJLENBQUM7Z0JBQ0gsTUFBTSxJQUFJLEdBQUcsTUFBTSxPQUFPLENBQUMsT0FBTyxDQUFDLENBQUE7Z0JBQ25DLElBQUksQ0FBQztvQkFDSCxNQUFNLElBQUksQ0FBQyxJQUFJLENBQUMsR0FBRyxFQUFFO3dCQUNuQixPQUFPLEVBQUUsSUFBSTt3QkFDYixTQUFTLEVBQUUsa0JBQWtCO3dCQUM3QixHQUFHLE9BQU8sRUFBRSxXQUFXO3FCQUN4QixDQUFDLENBQUM7b0JBRUgsTUFBTSxRQUFRLEdBQUcsT0FBTyxFQUFFLFFBQVE7d0JBQ2hDLENBQUMsQ0FBQyxNQUFNLE9BQU8sRUFBRSxRQUFRLENBQUMsSUFBSSxFQUFFLE9BQU8sQ0FBQzt3QkFDeEMsQ0FBQyxDQUFDLE1BQU0sSUFBSSxDQUFDLFFBQVEsQ0FBQyxHQUFHLEVBQUUsQ0FBQyxRQUFRLENBQUMsSUFBSSxDQUFDLFNBQVMsQ0FBQyxDQUFDO29CQUV2RCxPQUFPLFFBQVEsQ0FBQTtnQkFDakIsQ0FBQzt3QkFBUyxDQUFDO29CQUNULE1BQU0sSUFBSSxDQUFDLEtBQUssRUFBRSxDQUFBO2dCQUNwQixDQUFDO1lBQ0gsQ0FBQztvQkFBUyxDQUFDO2dCQUNULE9BQU8sQ0FBQyxHQUFHLENBQUMsc0NBQXNDLEtBQUssQ0FBQyxXQUFXLGNBQWMsS0FBSyxDQUFDLFlBQVksU0FBUyxHQUFHLEVBQUUsQ0FBQyxDQUFDO2dCQUNuSCxjQUFjLEVBQUUsQ0FBQztZQUNuQixDQUFDO1FBQ0gsQ0FBQyxDQUFDLENBQUE7SUFDSixDQUFDO0lBRUQ7Ozs7T0FJRztJQUNILEtBQUssQ0FBQyxNQUFNO1FBQ1YsT0FBTyxzQkFBc0IsQ0FBQyxPQUFPLENBQUMsSUFBSSxDQUFDLE9BQU8sRUFBRSxJQUFJLENBQUMsT0FBTyxDQUFDLENBQUM7SUFDcEUsQ0FBQztJQUVEOzs7O09BSUc7SUFDSCxLQUFLLENBQUMsSUFBSTtRQUNSLE1BQU0sSUFBSSxHQUFHLE1BQU0sSUFBSSxDQUFDLE1BQU0sRUFBRSxDQUFDO1FBRWpDLE1BQU0sUUFBUSxHQUFHLEVBQUUsTUFBTSxFQUFFLElBQUksQ0FBQyxPQUFPLEVBQUUsQ0FBQztRQUMxQyxPQUFPLENBQUMsSUFBSSxRQUFRLENBQUMsRUFBRSxXQUFXLEVBQUUsSUFBSSxFQUFFLFFBQVEsRUFBRSxDQUFDLENBQUMsQ0FBQztJQUN6RCxDQUFDO0lBRUQ7Ozs7Ozs7O09BUUc7SUFDSCxNQUFNLENBQUMsS0FBSyxDQUFDLFdBQVcsQ0FDdEIsR0FBVyxFQUNYLE9BQXVDO1FBRXZDLE1BQU0sRUFBRSxNQUFNLEVBQUUsR0FBRyxNQUFNLHNCQUFzQixDQUFDLE9BQU8sRUFBRSxDQUFDO1FBRTFELE1BQU0sT0FBTyxHQUFHLE1BQU0sTUFBTSxDQUFDO1lBQzNCLFFBQVEsRUFBRSxJQUFJO1lBQ2QsZUFBZSxFQUFFLElBQUk7WUFDckIsSUFBSSxFQUFFLENBQUMsaUJBQWlCLENBQUM7WUFDekIsaUJBQWlCLEVBQUUsQ0FBQyxzQkFBc0IsQ0FBQztZQUMzQyxHQUFHLE9BQU8sRUFBRSxhQUFhO1NBQzFCLENBQUMsQ0FBQztRQUNILE1BQU0sSUFBSSxHQUFHLE1BQU0sT0FBTyxDQUFDLE9BQU8sRUFBRSxDQUFDO1FBRXJDLE1BQU0sSUFBSSxDQUFDLElBQUksQ0FBQyxHQUFHLEVBQUU7WUFDbkIsT0FBTyxFQUFFLE1BQU07WUFDZixTQUFTLEVBQUUsa0JBQWtCO1lBQzdCLEdBQUcsT0FBTyxFQUFFLFdBQVc7U0FDeEIsQ0FBQyxDQUFDO1FBQ0gsTUFBTSxVQUFVLEdBQUcsTUFBTSxJQUFJLENBQUMsVUFBVSxFQUFFLENBQUM7UUFDM0MsTUFBTSxNQUFNLEdBQUcsVUFBVSxDQUFDLFFBQVEsQ0FBQyxRQUFRLENBQUMsQ0FBQztRQUM3QyxNQUFNLFFBQVEsR0FBRyxFQUFFLE1BQU0sRUFBRSxHQUFHLEVBQUUsQ0FBQztRQUNqQyxPQUFPLElBQUksUUFBUSxDQUFDLEVBQUUsV0FBVyxFQUFFLE1BQU0sRUFBRSxRQUFRLEVBQUUsQ0FBQyxDQUFDO0lBQ3pELENBQUM7SUFFRDs7Ozs7T0FLRztJQUNILEtBQUssQ0FBQyxVQUFVO1FBQ2QsT0FBTyxzQkFBc0IsQ0FBQyxXQUFXLENBQUMsSUFBSSxDQUFDLE9BQU8sRUFBRSxJQUFJLENBQUMsT0FBTyxDQUFDLENBQUM7SUFDeEUsQ0FBQztJQUVEOzs7O09BSUc7SUFDSCxNQUFNLENBQUMsS0FBSyxDQUFDLE9BQU87UUFHbEIsSUFBSSxDQUFDO1lBQ0gsNkRBQTZEO1lBQzdELE1BQU0sRUFBRSxNQUFNLEVBQUUsR0FBRyxNQUFNLE1BQU0sQ0FBQyxXQUFXLENBQUMsQ0FBQTtZQUM1QyxPQUFPLEVBQUUsTUFBTSxFQUFFLENBQUM7UUFDcEIsQ0FBQztRQUFDLE9BQU8sQ0FBQyxFQUFFLENBQUM7WUFDWCxPQUFPLENBQUMsS0FBSyxDQUFDLENBQUMsQ0FBQyxDQUFDO1lBQ2pCLE1BQU0sSUFBSSxLQUFLLENBQ2IsMEVBQTBFLENBQzNFLENBQUM7UUFDSixDQUFDO0lBQ0gsQ0FBQztDQUNGIn0= \ No newline at end of file diff --git a/packages/search/package-lock.json b/packages/search/package-lock.json index 5d3b8dcb..90afc834 100644 --- a/packages/search/package-lock.json +++ b/packages/search/package-lock.json @@ -32,6 +32,7 @@ "md5": "^2.3.0", "node-html-parser": "^6.1.12", "node-xlsx": "^0.23.0", + "p-limit": "^7.2.0", "p-map": "^4.0.0", "publish": "^0.6.0", "puppeteer": "^19.11.1", @@ -12561,6 +12562,21 @@ "node": ">=4" } }, + "node_modules/p-limit": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-7.2.0.tgz", + "integrity": "sha512-ATHLtwoTNDloHRFFxFJdHnG6n2WUeFjaR8XQMFdKIv0xkXjrER8/iG9iu265jOM95zXHAfv9oTkqhrfbIzosrQ==", + "license": "MIT", + "dependencies": { + "yocto-queue": "^1.2.1" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/p-map": { "version": "4.0.0", "license": "MIT", @@ -13917,6 +13933,18 @@ "fd-slicer": "~1.1.0" } }, + "node_modules/yocto-queue": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz", + "integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==", + "license": "MIT", + "engines": { + "node": ">=12.20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/zod": { "version": "3.25.76", "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", diff --git a/packages/search/package.json b/packages/search/package.json index 60479221..7071158e 100644 --- a/packages/search/package.json +++ b/packages/search/package.json @@ -38,6 +38,7 @@ "md5": "^2.3.0", "node-html-parser": "^6.1.12", "node-xlsx": "^0.23.0", + "p-limit": "^7.2.0", "p-map": "^4.0.0", "publish": "^0.6.0", "puppeteer": "^19.11.1", diff --git a/packages/search/src/lib/email.ts b/packages/search/src/lib/email.ts index 4332eadd..dbb05572 100644 --- a/packages/search/src/lib/email.ts +++ b/packages/search/src/lib/email.ts @@ -1,4 +1,5 @@ import { logger } from '../index.js' +import pMap from 'p-map' import { CheerioWebBaseLoader } from "langchain/document_loaders/web/cheerio" import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" import { htmlToText } from "html-to-text" @@ -60,20 +61,19 @@ export const puppeteerLoader = async (url: string, headless: boolean, location: url, { launchOptions: { - headless, + headless: headless, ignoreHTTPSErrors: true }, gotoOptions: { - timeout: 15000, + timeout: location.pageTimeout || 15000, waitUntil: "networkidle0", }, async evaluate(page, browser) { if (checkCancelled && await checkCancelled()) { - debugger const pid = browser.process()?.pid; - logger.warn(`Killing browser process ${pid} due to cancellation`); - await browser.close(); + logger.warn(`Cancellation requested inside evaluate for process ${pid}`); + // Do not close browser, it is shared. Page will be closed by finally block in pupeteer.ts throw new Error('CancelledByUser'); } const result = await page.evaluate(() => document.body.innerHTML) @@ -83,20 +83,25 @@ export const puppeteerLoader = async (url: string, headless: boolean, location: } ) // Race load against cancellation - const loadPromise = loaderWithOptions.load(); + let isFinished = false; + const loadPromise = loaderWithOptions.load().finally(() => { + isFinished = true; + }); const cancelPromise = new Promise(async (_, reject) => { if (!checkCancelled) return; // Poll for cancellation - while (true) { + while (!isFinished) { await new Promise(r => setTimeout(r, 1000)); + logger.info('Checking cancellation for ' + url); if (await checkCancelled()) { const browser = await getBrowser(); if (browser) { const pid = browser.process()?.pid; - logger.info(`Killing browser process ${pid} due to cancellation`); - await browser.close(); + logger.info(`Cancellation confirmed for process ${pid}`); + // await browser.close(); // Do not close shared browser } + reject(new Error('CancelledByUser')); break; } @@ -148,7 +153,7 @@ export const findEMail = async (question: string, url: string, opts: { headless? return false } let pageUrl = url - let docs = await puppeteerLoader(pageUrl, opts.headless, location, opts.checkCancelled) as any + let docs = await puppeteerLoader(pageUrl, opts.headless, { ...location, pageTimeout: opts.pageTimeout }, opts.checkCancelled) as any let emails: string[] = [] docs.forEach((d: any) => { if (d.pageContent && d.pageContent.indexOf('@') !== -1) { @@ -176,24 +181,38 @@ export const findEmailEach = async (location: LocalResult, opts: { headless?: bo const emails: string[] = [] const abortAfter = opts.abortAfter ?? 1 - for (const page of location.meta.pages) { + const concurrency = opts.concurrency || 2 + const maxPages = opts.maxPages || 15 + const contactKeywords = ['contact', 'kontakt', 'contacto', 'contatto', 'info', 'imprint', 'impressum', 'help', 'support', 'about']; + + // Sort pages: prioritize contact pages + const pagesToSearch = location.meta.pages.sort((a, b) => { + const urlA = a.url.toLowerCase(); + const urlB = b.url.toLowerCase(); + + const scoreA = contactKeywords.some(k => urlA.includes(k)) ? 1 : 0; + const scoreB = contactKeywords.some(k => urlB.includes(k)) ? 1 : 0; + + return scoreB - scoreA; // Descending order (contact pages first) + }).slice(0, maxPages) + + await pMap(pagesToSearch, async (page: Page) => { if (opts.checkCancelled && await opts.checkCancelled()) { - debugger - logger.info(`[findEmailEach] Cancellation requested for ${location.title}`); - break; + // logger.info(`[findEmailEach] Cancellation requested for ${location.title}`); + return } if (emails.length >= abortAfter) { - break + return } if (page.status !== 'PENDING') { - continue + return } page.status = 'SEARCHING_EMAIL' try { - logger.info(`Scraping email from ${page.url}`); + logger.info(`Scraping email from ${page.url}`) const pageEmails = await findEMail('find email', page.url, opts, location) if (pageEmails && Array.isArray(pageEmails)) { emails.push(...pageEmails) @@ -201,7 +220,6 @@ export const findEmailEach = async (location: LocalResult, opts: { headless?: bo page.status = 'SEARCHED_EMAIL' } catch (error) { if (error.message === 'CancelledByUser') { - debugger throw error; } page.status = 'FAILED' @@ -212,7 +230,7 @@ export const findEmailEach = async (location: LocalResult, opts: { headless?: bo if (onProgress) { await onProgress(page) } - } + }, { concurrency, stopOnError: false }) // Update location emails if (emails.length > 0) { diff --git a/packages/search/src/lib/pupeteer.ts b/packages/search/src/lib/pupeteer.ts index 5c15de51..472c3e24 100644 --- a/packages/search/src/lib/pupeteer.ts +++ b/packages/search/src/lib/pupeteer.ts @@ -8,6 +8,7 @@ import type { import { Document } from "@langchain/core/documents" import { BaseDocumentLoader, DocumentLoader } from "langchain/document_loaders/base" +import pLimit from "p-limit" export { Page, Browser } export type PuppeteerGotoOptions = WaitForOptions & { @@ -48,27 +49,46 @@ export type PuppeteerWebBaseLoaderOptions = { * const screenshot = await loader.screenshot(); * ``` */ -let browser: Browser = null -let page: Page = null +// Singleton browser promise to prevent race conditions +let browserPromise: Promise | null = null; +let idleTimer: NodeJS.Timeout | null = null; +const limit = pLimit(parseInt(process.env.EMAIL_SEARCH_MAX_PUPETEER_PAGES || '10')) +const IDLE_TIMEOUT_SECONDS = parseInt(process.env.EMAIL_SEARCH_PUPETEER_IDLE_TIMEOUT_SECONDS || '60'); -const launchBrowser = async (options?: PuppeteerWebBaseLoaderOptions): Promise => { - if (browser) return browser - const { launch } = await PuppeteerWebBaseLoader.imports(); - browser = await launch({ - headless: true, - defaultViewport: null, - ignoreDefaultArgs: ["--disable-extensions"], - ...options?.launchOptions, - }) - return browser +const resetIdleTimer = () => { + if (idleTimer) clearTimeout(idleTimer); + idleTimer = setTimeout(async () => { + if (browserPromise) { + console.log(`[Puppeteer] Browser idle timeout (${IDLE_TIMEOUT_SECONDS}s) reached, closing browser`); + const browser = await browserPromise; + await browser.close(); + browserPromise = null; + } + }, IDLE_TIMEOUT_SECONDS * 1000); } -export const getBrowser = () => browser +const launchBrowser = async (options?: PuppeteerWebBaseLoaderOptions): Promise => { + resetIdleTimer(); + if (browserPromise) return browserPromise; + + browserPromise = (async () => { + const { launch } = await PuppeteerWebBaseLoader.imports(); + const b = await launch({ + headless: process.env.EMAIL_SEARCH_HEADLESS === 'false' ? false : true, + defaultViewport: null, + ignoreDefaultArgs: ["--disable-extensions"], + ...options?.launchOptions, + }); + return b; + })(); + + return browserPromise; +} + +export const getBrowser = () => browserPromise; export const getPage = async (browser: Browser) => { - if(page) - return page - - page = await browser.newPage() + // Always create a new page for concurrency + const page = await browser.newPage() return page } @@ -91,21 +111,32 @@ export class PuppeteerWebBaseLoader const browser = await launchBrowser(options) - PuppeteerWebBaseLoader.browser = browser - const page = await getPage(browser) - await page.goto(url, { - timeout: 5000, - waitUntil: "domcontentloaded", - ...options?.gotoOptions, - }); + // PuppeteerWebBaseLoader.browser = browser // Static property usage is deprecated/incorrect with this pattern - const bodyHTML = options?.evaluate - ? await options?.evaluate(page, browser) - : await page.evaluate(() => document.body.innerHTML); + return limit(async () => { + console.log(`[Puppeteer] Entering limit (Active: ${limit.activeCount}, Pending: ${limit.pendingCount}) for ${url}`); + try { + const page = await getPage(browser) + try { + await page.goto(url, { + timeout: 5000, + waitUntil: "domcontentloaded", + ...options?.gotoOptions, + }); - //await browser.close() + const bodyHTML = options?.evaluate + ? await options?.evaluate(page, browser) + : await page.evaluate(() => document.body.innerHTML); - return bodyHTML + return bodyHTML + } finally { + await page.close() + } + } finally { + console.log(`[Puppeteer] Exiting limit (Active: ${limit.activeCount}, Pending: ${limit.pendingCount}) for ${url}`); + resetIdleTimer(); + } + }) } /**