generated from polymech/site-template
latest
This commit is contained in:
@@ -0,0 +1,93 @@
|
||||
import { describe, it, expect, jest } from '@jest/globals';
|
||||
import { PuppeteerUrlChecker, FetchUrlChecker, checkUrl, UrlCheckResult } from './url.js';
|
||||
|
||||
describe('URL Checker', () => {
|
||||
// Test URLs
|
||||
const validUrl = 'https://backend.orbit.dtu.dk/ws/portalfiles/portal/278424474/Bertelsen_et_al_2022.pdf';
|
||||
const invalidUrl = 'https://example.com/404';
|
||||
const timeoutUrl = 'https://example.com/timeout';
|
||||
|
||||
// Increase timeout for real browser tests
|
||||
jest.setTimeout(30000);
|
||||
|
||||
describe('PuppeteerUrlChecker', () => {
|
||||
const checker = new PuppeteerUrlChecker();
|
||||
|
||||
it('should validate a valid URL', async () => {
|
||||
const result = await checker.check('https://www.google.com');
|
||||
expect(result.valid).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle invalid URLs', async () => {
|
||||
const result = await checker.check('https://www.google.com/nonexistent-page-123456789');
|
||||
expect(result.valid).toBe(false);
|
||||
expect(result.error).toContain('404');
|
||||
});
|
||||
|
||||
it('should handle timeouts', async () => {
|
||||
const result = await checker.check('http://example.com:81', 1000); // Port 81 should timeout quickly
|
||||
expect(result.valid).toBe(false);
|
||||
expect(result.error).toContain('Timeout');
|
||||
});
|
||||
|
||||
it('should handle network errors', async () => {
|
||||
const result = await checker.check('http://invalid.domain.thisisnotreal');
|
||||
expect(result.valid).toBe(false);
|
||||
expect(result.error).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
describe('FetchUrlChecker', () => {
|
||||
const checker = new FetchUrlChecker();
|
||||
|
||||
beforeEach(() => {
|
||||
global.fetch = jest.fn() as unknown as typeof fetch;
|
||||
});
|
||||
|
||||
it('should validate a valid URL', async () => {
|
||||
const mockResponse = {
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK'
|
||||
} as unknown as Response;
|
||||
|
||||
(global.fetch as unknown as jest.Mock).mockResolvedValue(mockResponse);
|
||||
|
||||
const result = await checker.check(validUrl);
|
||||
expect(result).toEqual({ valid: true });
|
||||
});
|
||||
|
||||
it('should handle invalid URLs', async () => {
|
||||
const mockResponse = {
|
||||
ok: false,
|
||||
status: 404,
|
||||
statusText: 'Not Found'
|
||||
} as unknown as Response;
|
||||
|
||||
(global.fetch as unknown as jest.Mock).mockResolvedValue(mockResponse);
|
||||
|
||||
const result = await checker.check(invalidUrl);
|
||||
expect(result).toEqual({
|
||||
valid: false,
|
||||
error: 'HTTP 404: Not Found'
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle timeouts', async () => {
|
||||
(global.fetch as unknown as jest.Mock).mockRejectedValue(new Error('Timeout'));
|
||||
|
||||
const result = await checker.check(timeoutUrl);
|
||||
expect(result).toEqual({
|
||||
valid: false,
|
||||
error: 'Timeout'
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('checkUrl convenience function', () => {
|
||||
it('should use the default checker', async () => {
|
||||
const result = await checkUrl('https://www.google.com');
|
||||
expect(result.valid).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
+114
@@ -0,0 +1,114 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
export interface UrlCheckResult {
|
||||
valid: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface UrlChecker {
|
||||
check(url: string, timeout?: number): Promise<UrlCheckResult>;
|
||||
}
|
||||
|
||||
export class PuppeteerUrlChecker implements UrlChecker {
|
||||
private readonly defaultTimeout: number = 10000;
|
||||
private readonly userAgent: string = 'Mozilla/5.0 (compatible; PolymechBot/1.0; +http://polymech.org)';
|
||||
|
||||
async check(url: string, timeout: number = this.defaultTimeout): Promise<UrlCheckResult> {
|
||||
let browser;
|
||||
try {
|
||||
browser = await puppeteer.launch({
|
||||
headless: 'new' as any,
|
||||
args: ['--ignore-certificate-errors', '--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent(this.userAgent);
|
||||
await page.setDefaultNavigationTimeout(timeout);
|
||||
|
||||
const response = await page.goto(url, {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: timeout
|
||||
});
|
||||
|
||||
if (!response) {
|
||||
return { valid: false, error: 'No response received' };
|
||||
}
|
||||
|
||||
const status = response.status();
|
||||
if (status >= 200 && status < 400) {
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
return {
|
||||
valid: false,
|
||||
error: `HTTP ${status}: ${response.statusText()}`
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
return {
|
||||
valid: false,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
return {
|
||||
valid: false,
|
||||
error: 'Unknown error occurred'
|
||||
};
|
||||
} finally {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class FetchUrlChecker implements UrlChecker {
|
||||
private readonly defaultTimeout: number = 10000;
|
||||
private readonly userAgent: string = 'Mozilla/5.0 (compatible; PolymechBot/1.0; +http://polymech.org)';
|
||||
|
||||
async check(url: string, timeout: number = this.defaultTimeout): Promise<UrlCheckResult> {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
const response = await fetch(url, {
|
||||
signal: controller.signal,
|
||||
redirect: 'follow',
|
||||
headers: {
|
||||
'User-Agent': this.userAgent
|
||||
}
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (!response.ok) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `HTTP ${response.status}: ${response.statusText}`
|
||||
};
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
return {
|
||||
valid: false,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
return {
|
||||
valid: false,
|
||||
error: 'Unknown error occurred'
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Default checker instance
|
||||
export const defaultChecker: UrlChecker = new PuppeteerUrlChecker();
|
||||
|
||||
// Export a convenience function
|
||||
export async function checkUrl(url: string, timeout?: number): Promise<UrlCheckResult> {
|
||||
return defaultChecker.check(url, timeout);
|
||||
}
|
||||
Reference in New Issue
Block a user