mono/packages/ai-tools/dist/lib/tools/web.js
2025-02-20 18:15:43 +01:00

100 lines
9.3 KiB
JavaScript

import * as path from 'path';
import puppeteer from 'puppeteer';
import TurndownService from 'turndown';
import { toolLogger } from '../../index.js';
const turndown = new TurndownService();
export const tools = (target, options) => {
const logger = toolLogger(path.parse(__filename).name, options);
return [
{
type: 'function',
function: {
name: 'browse_page',
description: 'Browse a webpage and return its content as markdown, all links, images and pages main image',
parameters: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'URL of the webpage to browse'
}
},
required: ['url']
},
function: async (params) => {
try {
logger.debug(`Tool::BrowsePage Browsing ${params.url}`);
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
try {
const page = await browser.newPage();
logger.debug(`Tool::Web::BrowsePage Opening page ${params.url}`);
await page.goto(params.url, {
waitUntil: 'networkidle2'
});
const pageData = await page.evaluate((selector) => {
const elementsToRemove = document.querySelectorAll('script, style, link, meta, noscript, iframe, [style*="display:none"],[style*="display: none"], .hidden');
elementsToRemove.forEach(el => el.remove());
const links = Array.from(document.querySelectorAll('a'))
.map(a => ({
text: a.textContent?.trim() || '',
href: a.href
}))
.filter(link => link.href && link.href.startsWith('http'))
.slice(0, 20);
const images = Array.from(document.querySelectorAll('img'))
.map(img => ({
src: img.src,
alt: img.alt || '',
width: img.width,
height: img.height
}))
.filter(img => img.src && img.src.startsWith('http'))
.slice(0, 20);
const mainImage = document.querySelector('meta[property="og:image"]')?.getAttribute('content') ||
document.querySelector('meta[name="og:image"]')?.getAttribute('content');
let content;
const body = document.body;
content = body ? body.innerHTML : '';
return {
content,
links,
images,
ogImage: mainImage
};
}, null);
const markdown = turndown.turndown(pageData.content);
await browser.close();
const ret = {
success: true,
markdown: markdown,
links: pageData.links,
images: pageData.images,
mainImage: pageData.ogImage,
url: params.url
};
return ret;
}
catch (error) {
logger.debug('Error browsing page:', error.message, error);
await browser.close();
throw error;
}
}
catch (error) {
logger.debug('Error browsing page:', error.message);
return {
success: false,
error: error.message,
url: params.url
};
}
},
parse: JSON.parse
}
}
];
};
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoid2ViLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vLi4vc3JjL2xpYi90b29scy93ZWIudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBQUEsT0FBTyxLQUFLLElBQUksTUFBTSxNQUFNLENBQUE7QUFHNUIsT0FBTyxTQUFTLE1BQU0sV0FBVyxDQUFBO0FBQ2pDLE9BQU8sZUFBZSxNQUFNLFVBQVUsQ0FBQTtBQUV0QyxPQUFPLEVBQUUsVUFBVSxFQUFFLE1BQU0sZ0JBQWdCLENBQUE7QUFHM0MsTUFBTSxRQUFRLEdBQUcsSUFBSSxlQUFlLEVBQUUsQ0FBQTtBQUV0QyxNQUFNLENBQUMsTUFBTSxLQUFLLEdBQUcsQ0FBQyxNQUFjLEVBQUUsT0FBa0IsRUFBYyxFQUFFO0lBQ3BFLE1BQU0sTUFBTSxHQUFHLFVBQVUsQ0FBQyxJQUFJLENBQUMsS0FBSyxDQUFDLFVBQVUsQ0FBQyxDQUFDLElBQUksRUFBRSxPQUFPLENBQUMsQ0FBQTtJQUMvRCxPQUFPO1FBQ0g7WUFDSSxJQUFJLEVBQUUsVUFBVTtZQUNoQixRQUFRLEVBQUU7Z0JBQ04sSUFBSSxFQUFFLGFBQWE7Z0JBQ25CLFdBQVcsRUFBRSw2RkFBNkY7Z0JBQzFHLFVBQVUsRUFBRTtvQkFDUixJQUFJLEVBQUUsUUFBUTtvQkFDZCxVQUFVLEVBQUU7d0JBQ1IsR0FBRyxFQUFFOzRCQUNELElBQUksRUFBRSxRQUFROzRCQUNkLFdBQVcsRUFBRSw4QkFBOEI7eUJBQzlDO3FCQUNKO29CQUNELFFBQVEsRUFBRSxDQUFDLEtBQUssQ0FBQztpQkFDcEI7Z0JBQ0QsUUFBUSxFQUFFLEtBQUssRUFBRSxNQUFXLEVBQUUsRUFBRTtvQkFDNUIsSUFBSTt3QkFDQSxNQUFNLENBQUMsS0FBSyxDQUFDLDZCQUE2QixNQUFNLENBQUMsR0FBRyxFQUFFLENBQUMsQ0FBQzt3QkFDeEQsTUFBTSxPQUFPLEdBQUcsTUFBTSxTQUFTLENBQUMsTUFBTSxDQUFDOzRCQUNuQyxRQUFRLEVBQUUsSUFBSTs0QkFDZCxJQUFJLEVBQUUsQ0FBQyxjQUFjLEVBQUUsMEJBQTBCLENBQUM7eUJBQ3JELENBQUMsQ0FBQTt3QkFFRixJQUFJOzRCQUNBLE1BQU0sSUFBSSxHQUFHLE1BQU0sT0FBTyxDQUFDLE9BQU8sRUFBRSxDQUFBOzRCQUNwQyxNQUFNLENBQUMsS0FBSyxDQUFDLHNDQUFzQyxNQUFNLENBQUMsR0FBRyxFQUFFLENBQUMsQ0FBQTs0QkFDaEUsTUFBTSxJQUFJLENBQUMsSUFBSSxDQUFDLE1BQU0sQ0FBQyxHQUFHLEVBQUU7Z0NBQ3hCLFNBQVMsRUFBRSxjQUFjOzZCQUM1QixDQUFDLENBQUE7NEJBRUYsTUFBTSxRQUFRLEdBQUcsTUFBTSxJQUFJLENBQUMsUUFBUSxDQUFDLENBQUMsUUFBUSxFQUFFLEVBQUU7Z0NBQzlDLE1BQU0sZ0JBQWdCLEdBQUcsUUFBUSxDQUFDLGdCQUFnQixDQUM5Qyx3R0FBd0csQ0FDM0csQ0FBQTtnQ0FDRCxnQkFBZ0IsQ0FBQyxPQUFPLENBQUMsRUFBRSxDQUFDLEVBQUUsQ0FBQyxFQUFFLENBQUMsTUFBTSxFQUFFLENBQUMsQ0FBQTtnQ0FFM0MsTUFBTSxLQUFLLEdBQUcsS0FBSyxDQUFDLElBQUksQ0FBQyxRQUFRLENBQUMsZ0JBQWdCLENBQUMsR0FBRyxDQUFDLENBQUM7cUNBQ25ELEdBQUcsQ0FBQyxDQUFDLENBQUMsRUFBRSxDQUFDLENBQUM7b0NBQ1AsSUFBSSxFQUFFLENBQUMsQ0FBQyxXQUFXLEVBQUUsSUFBSSxFQUFFLElBQUksRUFBRTtvQ0FDakMsSUFBSSxFQUFFLENBQUMsQ0FBQyxJQUFJO2lDQUNmLENBQUMsQ0FBQztxQ0FDRixNQUFNLENBQUMsSUFBSSxDQUFDLEVBQUUsQ0FBQyxJQUFJLENBQUMsSUFBSSxJQUFJLElBQUksQ0FBQyxJQUFJLENBQUMsVUFBVSxDQUFDLE1BQU0sQ0FBQyxDQUFDO3FDQUN6RCxLQUFLLENBQUMsQ0FBQyxFQUFFLEVBQUUsQ0FBQyxDQUFBO2dDQUVqQixNQUFNLE1BQU0sR0FBRyxLQUFLLENBQUMsSUFBSSxDQUFDLFFBQVEsQ0FBQyxnQkFBZ0IsQ0FBQyxLQUFLLENBQUMsQ0FBQztxQ0FDdEQsR0FBRyxDQUFDLEdBQUcsQ0FBQyxFQUFFLENBQUMsQ0FBQztvQ0FDVCxHQUFHLEVBQUUsR0FBRyxDQUFDLEdBQUc7b0NBQ1osR0FBRyxFQUFFLEdBQUcsQ0FBQyxHQUFHLElBQUksRUFBRTtvQ0FDbEIsS0FBSyxFQUFFLEdBQUcsQ0FBQyxLQUFLO29DQUNoQixNQUFNLEVBQUUsR0FBRyxDQUFDLE1BQU07aUNBQ3JCLENBQUMsQ0FBQztxQ0FDRixNQUFNLENBQUMsR0FBRyxDQUFDLEVBQUUsQ0FBQyxHQUFHLENBQUMsR0FBRyxJQUFJLEdBQUcsQ0FBQyxHQUFHLENBQUMsVUFBVSxDQUFDLE1BQU0sQ0FBQyxDQUFDO3FDQUNwRCxLQUFLLENBQUMsQ0FBQyxFQUFFLEVBQUUsQ0FBQyxDQUFBO2dDQUVqQixNQUFNLFNBQVMsR0FBRyxRQUFRLENBQUMsYUFBYSxDQUFDLDJCQUEyQixDQUFDLEVBQUUsWUFBWSxDQUFDLFNBQVMsQ0FBQztvQ0FDMUYsUUFBUSxDQUFDLGFBQWEsQ0FBQyx1QkFBdUIsQ0FBQyxFQUFFLFlBQVksQ0FBQyxTQUFTLENBQUMsQ0FBQTtnQ0FFNUUsSUFBSSxPQUFPLENBQUE7Z0NBQ1gsTUFBTSxJQUFJLEdBQUcsUUFBUSxDQUFDLElBQUksQ0FBQTtnQ0FDMUIsT0FBTyxHQUFHLElBQUksQ0FBQyxDQUFDLENBQUMsSUFBSSxDQUFDLFNBQVMsQ0FBQyxDQUFDLENBQUMsRUFBRSxDQUFBO2dDQUNwQyxPQUFPO29DQUNILE9BQU87b0NBQ1AsS0FBSztvQ0FDTCxNQUFNO29DQUNOLE9BQU8sRUFBRSxTQUFTO2lDQUNyQixDQUFBOzRCQUNMLENBQUMsRUFBRSxJQUFJLENBQUMsQ0FBQTs0QkFFUixNQUFNLFFBQVEsR0FBRyxRQUFRLENBQUMsUUFBUSxDQUFDLFFBQVEsQ0FBQyxPQUFPLENBQUMsQ0FBQTs0QkFDcEQsTUFBTSxPQUFPLENBQUMsS0FBSyxFQUFFLENBQUE7NEJBQ3JCLE1BQU0sR0FBRyxHQUFHO2dDQUNSLE9BQU8sRUFBRSxJQUFJO2dDQUNiLFFBQVEsRUFBRSxRQUFRO2dDQUNsQixLQUFLLEVBQUUsUUFBUSxDQUFDLEtBQUs7Z0NBQ3JCLE1BQU0sRUFBRSxRQUFRLENBQUMsTUFBTTtnQ0FDdkIsU0FBUyxFQUFFLFFBQVEsQ0FBQyxPQUFPO2dDQUMzQixHQUFHLEVBQUUsTUFBTSxDQUFDLEdBQUc7NkJBQ2xCLENBQUM7NEJBQ0YsT0FBTyxHQUFHLENBQUE7eUJBQ2I7d0JBQUMsT0FBTyxLQUFVLEVBQUU7NEJBQ2pCLE1BQU0sQ0FBQyxLQUFLLENBQUMsc0JBQXNCLEVBQUUsS0FBSyxDQUFDLE9BQU8sRUFBRSxLQUFLLENBQUMsQ0FBQzs0QkFDM0QsTUFBTSxPQUFPLENBQUMsS0FBSyxFQUFFLENBQUE7NEJBQ3JCLE1BQU0sS0FBSyxDQUFBO3lCQUNkO3FCQUNKO29CQUFDLE9BQU8sS0FBVSxFQUFFO3dCQUNqQixNQUFNLENBQUMsS0FBSyxDQUFDLHNCQUFzQixFQUFFLEtBQUssQ0FBQyxPQUFPLENBQUMsQ0FBQzt3QkFDcEQsT0FBTzs0QkFDSCxPQUFPLEVBQUUsS0FBSzs0QkFDZCxLQUFLLEVBQUUsS0FBSyxDQUFDLE9BQU87NEJBQ3BCLEdBQUcsRUFBRSxNQUFNLENBQUMsR0FBRzt5QkFDbEIsQ0FBQztxQkFDTDtnQkFDTCxDQUFDO2dCQUNELEtBQUssRUFBRSxJQUFJLENBQUMsS0FBSzthQUNwQjtTQUN5QjtLQUNqQyxDQUFBO0FBQ0wsQ0FBQyxDQUFBIn0=