99 lines
9.2 KiB
JavaScript
99 lines
9.2 KiB
JavaScript
import puppeteer from 'puppeteer';
|
|
import TurndownService from 'turndown';
|
|
import { toolLogger } from '../../index.js';
|
|
const turndown = new TurndownService();
|
|
export const tools = (target, options) => {
|
|
const logger = toolLogger('web', options);
|
|
return [
|
|
{
|
|
type: 'function',
|
|
function: {
|
|
name: 'browse_page',
|
|
description: 'Browse a webpage and return its content as markdown, all links, images and pages main image',
|
|
parameters: {
|
|
type: 'object',
|
|
properties: {
|
|
url: {
|
|
type: 'string',
|
|
description: 'URL of the webpage to browse'
|
|
}
|
|
},
|
|
required: ['url']
|
|
},
|
|
function: async (params) => {
|
|
try {
|
|
logger.debug(`Tool::BrowsePage Browsing ${params.url}`);
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
});
|
|
try {
|
|
const page = await browser.newPage();
|
|
logger.debug(`Tool::Web::BrowsePage Opening page ${params.url}`);
|
|
await page.goto(params.url, {
|
|
waitUntil: 'networkidle2'
|
|
});
|
|
const pageData = await page.evaluate((selector) => {
|
|
const elementsToRemove = document.querySelectorAll('script, style, link, meta, noscript, iframe, [style*="display:none"],[style*="display: none"], .hidden');
|
|
elementsToRemove.forEach(el => el.remove());
|
|
const links = Array.from(document.querySelectorAll('a'))
|
|
.map(a => ({
|
|
text: a.textContent?.trim() || '',
|
|
href: a.href
|
|
}))
|
|
.filter(link => link.href && link.href.startsWith('http'))
|
|
.slice(0, 20);
|
|
const images = Array.from(document.querySelectorAll('img'))
|
|
.map(img => ({
|
|
src: img.src,
|
|
alt: img.alt || '',
|
|
width: img.width,
|
|
height: img.height
|
|
}))
|
|
.filter(img => img.src && img.src.startsWith('http'))
|
|
.slice(0, 20);
|
|
const mainImage = document.querySelector('meta[property="og:image"]')?.getAttribute('content') ||
|
|
document.querySelector('meta[name="og:image"]')?.getAttribute('content');
|
|
let content;
|
|
const body = document.body;
|
|
content = body ? body.innerHTML : '';
|
|
return {
|
|
content,
|
|
links,
|
|
images,
|
|
ogImage: mainImage
|
|
};
|
|
}, null);
|
|
const markdown = turndown.turndown(pageData.content);
|
|
await browser.close();
|
|
const ret = {
|
|
success: true,
|
|
markdown: markdown,
|
|
links: pageData.links,
|
|
images: pageData.images,
|
|
mainImage: pageData.ogImage,
|
|
url: params.url
|
|
};
|
|
return ret;
|
|
}
|
|
catch (error) {
|
|
logger.debug('Error browsing page:', error.message, error);
|
|
await browser.close();
|
|
throw error;
|
|
}
|
|
}
|
|
catch (error) {
|
|
logger.debug('Error browsing page:', error.message);
|
|
return {
|
|
success: false,
|
|
error: error.message,
|
|
url: params.url
|
|
};
|
|
}
|
|
},
|
|
parse: JSON.parse
|
|
}
|
|
}
|
|
];
|
|
};
|
|
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoid2ViLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vLi4vc3JjL2xpYi90b29scy93ZWIudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBR0EsT0FBTyxTQUFTLE1BQU0sV0FBVyxDQUFBO0FBQ2pDLE9BQU8sZUFBZSxNQUFNLFVBQVUsQ0FBQTtBQUV0QyxPQUFPLEVBQUUsVUFBVSxFQUFFLE1BQU0sZ0JBQWdCLENBQUE7QUFHM0MsTUFBTSxRQUFRLEdBQUcsSUFBSSxlQUFlLEVBQUUsQ0FBQTtBQUV0QyxNQUFNLENBQUMsTUFBTSxLQUFLLEdBQUcsQ0FBQyxNQUFjLEVBQUUsT0FBa0IsRUFBYyxFQUFFO0lBQ3BFLE1BQU0sTUFBTSxHQUFHLFVBQVUsQ0FBQyxLQUFLLEVBQUUsT0FBTyxDQUFDLENBQUE7SUFDekMsT0FBTztRQUNIO1lBQ0ksSUFBSSxFQUFFLFVBQVU7WUFDaEIsUUFBUSxFQUFFO2dCQUNOLElBQUksRUFBRSxhQUFhO2dCQUNuQixXQUFXLEVBQUUsNkZBQTZGO2dCQUMxRyxVQUFVLEVBQUU7b0JBQ1IsSUFBSSxFQUFFLFFBQVE7b0JBQ2QsVUFBVSxFQUFFO3dCQUNSLEdBQUcsRUFBRTs0QkFDRCxJQUFJLEVBQUUsUUFBUTs0QkFDZCxXQUFXLEVBQUUsOEJBQThCO3lCQUM5QztxQkFDSjtvQkFDRCxRQUFRLEVBQUUsQ0FBQyxLQUFLLENBQUM7aUJBQ3BCO2dCQUNELFFBQVEsRUFBRSxLQUFLLEVBQUUsTUFBVyxFQUFFLEVBQUU7b0JBQzVCLElBQUk7d0JBQ0EsTUFBTSxDQUFDLEtBQUssQ0FBQyw2QkFBNkIsTUFBTSxDQUFDLEdBQUcsRUFBRSxDQUFDLENBQUM7d0JBQ3hELE1BQU0sT0FBTyxHQUFHLE1BQU0sU0FBUyxDQUFDLE1BQU0sQ0FBQzs0QkFDbkMsUUFBUSxFQUFFLElBQUk7NEJBQ2QsSUFBSSxFQUFFLENBQUMsY0FBYyxFQUFFLDBCQUEwQixDQUFDO3lCQUNyRCxDQUFDLENBQUE7d0JBRUYsSUFBSTs0QkFDQSxNQUFNLElBQUksR0FBRyxNQUFNLE9BQU8sQ0FBQyxPQUFPLEVBQUUsQ0FBQTs0QkFDcEMsTUFBTSxDQUFDLEtBQUssQ0FBQyxzQ0FBc0MsTUFBTSxDQUFDLEdBQUcsRUFBRSxDQUFDLENBQUE7NEJBQ2hFLE1BQU0sSUFBSSxDQUFDLElBQUksQ0FBQyxNQUFNLENBQUMsR0FBRyxFQUFFO2dDQUN4QixTQUFTLEVBQUUsY0FBYzs2QkFDNUIsQ0FBQyxDQUFBOzRCQUVGLE1BQU0sUUFBUSxHQUFHLE1BQU0sSUFBSSxDQUFDLFFBQVEsQ0FBQyxDQUFDLFFBQVEsRUFBRSxFQUFFO2dDQUM5QyxNQUFNLGdCQUFnQixHQUFHLFFBQVEsQ0FBQyxnQkFBZ0IsQ0FDOUMsd0dBQXdHLENBQzNHLENBQUE7Z0NBQ0QsZ0JBQWdCLENBQUMsT0FBTyxDQUFDLEVBQUUsQ0FBQyxFQUFFLENBQUMsRUFBRSxDQUFDLE1BQU0sRUFBRSxDQUFDLENBQUE7Z0NBRTNDLE1BQU0sS0FBSyxHQUFHLEtBQUssQ0FBQyxJQUFJLENBQUMsUUFBUSxDQUFDLGdCQUFnQixDQUFDLEdBQUcsQ0FBQyxDQUFDO3FDQUNuRCxHQUFHLENBQUMsQ0FBQyxDQUFDLEVBQUUsQ0FBQyxDQUFDO29DQUNQLElBQUksRUFBRSxDQUFDLENBQUMsV0FBVyxFQUFFLElBQUksRUFBRSxJQUFJLEVBQUU7b0NBQ2pDLElBQUksRUFBRSxDQUFDLENBQUMsSUFBSTtpQ0FDZixDQUFDLENBQUM7cUNBQ0YsTUFBTSxDQUFDLElBQUksQ0FBQyxFQUFFLENBQUMsSUFBSSxDQUFDLElBQUksSUFBSSxJQUFJLENBQUMsSUFBSSxDQUFDLFVBQVUsQ0FBQyxNQUFNLENBQUMsQ0FBQztxQ0FDekQsS0FBSyxDQUFDLENBQUMsRUFBRSxFQUFFLENBQUMsQ0FBQTtnQ0FFakIsTUFBTSxNQUFNLEdBQUcsS0FBSyxDQUFDLElBQUksQ0FBQyxRQUFRLENBQUMsZ0JBQWdCLENBQUMsS0FBSyxDQUFDLENBQUM7cUNBQ3RELEdBQUcsQ0FBQyxHQUFHLENBQUMsRUFBRSxDQUFDLENBQUM7b0NBQ1QsR0FBRyxFQUFFLEdBQUcsQ0FBQyxHQUFHO29DQUNaLEdBQUcsRUFBRSxHQUFHLENBQUMsR0FBRyxJQUFJLEVBQUU7b0NBQ2xCLEtBQUssRUFBRSxHQUFHLENBQUMsS0FBSztvQ0FDaEIsTUFBTSxFQUFFLEdBQUcsQ0FBQyxNQUFNO2lDQUNyQixDQUFDLENBQUM7cUNBQ0YsTUFBTSxDQUFDLEdBQUcsQ0FBQyxFQUFFLENBQUMsR0FBRyxDQUFDLEdBQUcsSUFBSSxHQUFHLENBQUMsR0FBRyxDQUFDLFVBQVUsQ0FBQyxNQUFNLENBQUMsQ0FBQztxQ0FDcEQsS0FBSyxDQUFDLENBQUMsRUFBRSxFQUFFLENBQUMsQ0FBQTtnQ0FFakIsTUFBTSxTQUFTLEdBQUcsUUFBUSxDQUFDLGFBQWEsQ0FBQywyQkFBMkIsQ0FBQyxFQUFFLFlBQVksQ0FBQyxTQUFTLENBQUM7b0NBQzFGLFFBQVEsQ0FBQyxhQUFhLENBQUMsdUJBQXVCLENBQUMsRUFBRSxZQUFZLENBQUMsU0FBUyxDQUFDLENBQUE7Z0NBRTVFLElBQUksT0FBTyxDQUFBO2dDQUNYLE1BQU0sSUFBSSxHQUFHLFFBQVEsQ0FBQyxJQUFJLENBQUE7Z0NBQzFCLE9BQU8sR0FBRyxJQUFJLENBQUMsQ0FBQyxDQUFDLElBQUksQ0FBQyxTQUFTLENBQUMsQ0FBQyxDQUFDLEVBQUUsQ0FBQTtnQ0FDcEMsT0FBTztvQ0FDSCxPQUFPO29DQUNQLEtBQUs7b0NBQ0wsTUFBTTtvQ0FDTixPQUFPLEVBQUUsU0FBUztpQ0FDckIsQ0FBQTs0QkFDTCxDQUFDLEVBQUUsSUFBSSxDQUFDLENBQUE7NEJBRVIsTUFBTSxRQUFRLEdBQUcsUUFBUSxDQUFDLFFBQVEsQ0FBQyxRQUFRLENBQUMsT0FBTyxDQUFDLENBQUE7NEJBQ3BELE1BQU0sT0FBTyxDQUFDLEtBQUssRUFBRSxDQUFBOzRCQUNyQixNQUFNLEdBQUcsR0FBRztnQ0FDUixPQUFPLEVBQUUsSUFBSTtnQ0FDYixRQUFRLEVBQUUsUUFBUTtnQ0FDbEIsS0FBSyxFQUFFLFFBQVEsQ0FBQyxLQUFLO2dDQUNyQixNQUFNLEVBQUUsUUFBUSxDQUFDLE1BQU07Z0NBQ3ZCLFNBQVMsRUFBRSxRQUFRLENBQUMsT0FBTztnQ0FDM0IsR0FBRyxFQUFFLE1BQU0sQ0FBQyxHQUFHOzZCQUNsQixDQUFDOzRCQUNGLE9BQU8sR0FBRyxDQUFBO3lCQUNiO3dCQUFDLE9BQU8sS0FBVSxFQUFFOzRCQUNqQixNQUFNLENBQUMsS0FBSyxDQUFDLHNCQUFzQixFQUFFLEtBQUssQ0FBQyxPQUFPLEVBQUUsS0FBSyxDQUFDLENBQUM7NEJBQzNELE1BQU0sT0FBTyxDQUFDLEtBQUssRUFBRSxDQUFBOzRCQUNyQixNQUFNLEtBQUssQ0FBQTt5QkFDZDtxQkFDSjtvQkFBQyxPQUFPLEtBQVUsRUFBRTt3QkFDakIsTUFBTSxDQUFDLEtBQUssQ0FBQyxzQkFBc0IsRUFBRSxLQUFLLENBQUMsT0FBTyxDQUFDLENBQUM7d0JBQ3BELE9BQU87NEJBQ0gsT0FBTyxFQUFFLEtBQUs7NEJBQ2QsS0FBSyxFQUFFLEtBQUssQ0FBQyxPQUFPOzRCQUNwQixHQUFHLEVBQUUsTUFBTSxDQUFDLEdBQUc7eUJBQ2xCLENBQUM7cUJBQ0w7Z0JBQ0wsQ0FBQztnQkFDRCxLQUFLLEVBQUUsSUFBSSxDQUFDLEtBQUs7YUFDcEI7U0FDeUI7S0FDakMsQ0FBQTtBQUNMLENBQUMsQ0FBQSJ9
|