This commit is contained in:
2025-03-28 17:12:22 +01:00
parent 6c8e235118
commit 8ec21cf2c8
8 changed files with 2366 additions and 13 deletions
+93
View File
@@ -0,0 +1,93 @@
import { describe, it, expect, jest } from '@jest/globals';
import { PuppeteerUrlChecker, FetchUrlChecker, checkUrl, UrlCheckResult } from './url.js';
describe('URL Checker', () => {
// Test URLs
const validUrl = 'https://backend.orbit.dtu.dk/ws/portalfiles/portal/278424474/Bertelsen_et_al_2022.pdf';
const invalidUrl = 'https://example.com/404';
const timeoutUrl = 'https://example.com/timeout';
// Increase timeout for real browser tests
jest.setTimeout(30000);
describe('PuppeteerUrlChecker', () => {
const checker = new PuppeteerUrlChecker();
it('should validate a valid URL', async () => {
const result = await checker.check('https://www.google.com');
expect(result.valid).toBe(true);
});
it('should handle invalid URLs', async () => {
const result = await checker.check('https://www.google.com/nonexistent-page-123456789');
expect(result.valid).toBe(false);
expect(result.error).toContain('404');
});
it('should handle timeouts', async () => {
const result = await checker.check('http://example.com:81', 1000); // Port 81 should timeout quickly
expect(result.valid).toBe(false);
expect(result.error).toContain('Timeout');
});
it('should handle network errors', async () => {
const result = await checker.check('http://invalid.domain.thisisnotreal');
expect(result.valid).toBe(false);
expect(result.error).toBeTruthy();
});
});
describe('FetchUrlChecker', () => {
const checker = new FetchUrlChecker();
beforeEach(() => {
global.fetch = jest.fn() as unknown as typeof fetch;
});
it('should validate a valid URL', async () => {
const mockResponse = {
ok: true,
status: 200,
statusText: 'OK'
} as unknown as Response;
(global.fetch as unknown as jest.Mock).mockResolvedValue(mockResponse);
const result = await checker.check(validUrl);
expect(result).toEqual({ valid: true });
});
it('should handle invalid URLs', async () => {
const mockResponse = {
ok: false,
status: 404,
statusText: 'Not Found'
} as unknown as Response;
(global.fetch as unknown as jest.Mock).mockResolvedValue(mockResponse);
const result = await checker.check(invalidUrl);
expect(result).toEqual({
valid: false,
error: 'HTTP 404: Not Found'
});
});
it('should handle timeouts', async () => {
(global.fetch as unknown as jest.Mock).mockRejectedValue(new Error('Timeout'));
const result = await checker.check(timeoutUrl);
expect(result).toEqual({
valid: false,
error: 'Timeout'
});
});
});
describe('checkUrl convenience function', () => {
it('should use the default checker', async () => {
const result = await checkUrl('https://www.google.com');
expect(result.valid).toBe(true);
});
});
});
+114
View File
@@ -0,0 +1,114 @@
import puppeteer from 'puppeteer';
export interface UrlCheckResult {
valid: boolean;
error?: string;
}
export interface UrlChecker {
check(url: string, timeout?: number): Promise<UrlCheckResult>;
}
export class PuppeteerUrlChecker implements UrlChecker {
private readonly defaultTimeout: number = 10000;
private readonly userAgent: string = 'Mozilla/5.0 (compatible; PolymechBot/1.0; +http://polymech.org)';
async check(url: string, timeout: number = this.defaultTimeout): Promise<UrlCheckResult> {
let browser;
try {
browser = await puppeteer.launch({
headless: 'new' as any,
args: ['--ignore-certificate-errors', '--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setUserAgent(this.userAgent);
await page.setDefaultNavigationTimeout(timeout);
const response = await page.goto(url, {
waitUntil: 'networkidle0',
timeout: timeout
});
if (!response) {
return { valid: false, error: 'No response received' };
}
const status = response.status();
if (status >= 200 && status < 400) {
return { valid: true };
}
return {
valid: false,
error: `HTTP ${status}: ${response.statusText()}`
};
} catch (error) {
if (error instanceof Error) {
return {
valid: false,
error: error.message
};
}
return {
valid: false,
error: 'Unknown error occurred'
};
} finally {
if (browser) {
await browser.close();
}
}
}
}
export class FetchUrlChecker implements UrlChecker {
private readonly defaultTimeout: number = 10000;
private readonly userAgent: string = 'Mozilla/5.0 (compatible; PolymechBot/1.0; +http://polymech.org)';
async check(url: string, timeout: number = this.defaultTimeout): Promise<UrlCheckResult> {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
const response = await fetch(url, {
signal: controller.signal,
redirect: 'follow',
headers: {
'User-Agent': this.userAgent
}
});
clearTimeout(timeoutId);
if (!response.ok) {
return {
valid: false,
error: `HTTP ${response.status}: ${response.statusText}`
};
}
return { valid: true };
} catch (error) {
if (error instanceof Error) {
return {
valid: false,
error: error.message
};
}
return {
valid: false,
error: 'Unknown error occurred'
};
}
}
}
// Default checker instance
export const defaultChecker: UrlChecker = new PuppeteerUrlChecker();
// Export a convenience function
export async function checkUrl(url: string, timeout?: number): Promise<UrlCheckResult> {
return defaultChecker.check(url, timeout);
}