base:url - meta

This commit is contained in:
lovebird 2025-03-29 11:13:30 +01:00
parent 5020f624c9
commit ed0d8b60f2
24 changed files with 6569 additions and 8629 deletions

View File

@ -23,7 +23,7 @@
"format": "unix-time"
}
],
"default": "2025-03-29T08:36:39.384Z"
"default": "2025-03-29T08:49:25.330Z"
},
"description": {
"type": "string",

File diff suppressed because one or more lines are too long

View File

@ -4790,5 +4790,24 @@
"https://www.youtube.com/watch?v=yODwM9c1srg": {
"isValid": true,
"timestamp": 1743237262353
},
"https://www.alibaba.com/product-detail/SJ25-SJ35-SJ45-SJ65-single-screw_1600600262552.html": {
"isValid": {
"url": "https://www.alibaba.com/product-detail/SJ25-SJ35-SJ45-SJ65-single-screw_1600600262552.html",
"title": "Sj25 Sj35 Sj45 Sj65 Single Screw Extruder Small Lab Plastic Extruder - Buy Plastic Extruder,Single Screw Extruder,Lab Plastic Extruder Product on Alibaba.com",
"siteName": "www.alibaba.com",
"description": "Sj25 Sj35 Sj45 Sj65 Single Screw Extruder Small Lab Plastic Extruder - Buy Plastic Extruder,Single Screw Extruder,Lab Plastic Extruder Product on Alibaba.com",
"mediaType": "product",
"contentType": "text/html",
"images": [
"https://sc04.alicdn.com/kf/H72f50510a1934196a62b0dafd881bd61u.jpg"
],
"videos": [],
"favicons": [
"https://www.alibaba.com/favicon.ico"
],
"charset": "UTF-8"
},
"timestamp": 1743241420363
}
}

12
jest.config.js Normal file
View File

@ -0,0 +1,12 @@
export default {
transform: {
'^.+\\.tsx?$': ['ts-jest', { useESM: true }]
},
extensionsToTreatAsEsm: ['.ts'],
moduleNameMapper: {
'^(\\.{1,2}/.*)\\.js$': '$1',
'^config/(.*)$': '<rootDir>/src/model/__tests__/__mocks__/config.js'
},
testEnvironment: 'node',
moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node']
};

137
package-lock.json generated
View File

@ -46,6 +46,7 @@
"imagetools": "file:../astro-components/packages/imagetools",
"jsonpath-plus": "^10.3.0",
"lighthouse": "^12.3.0",
"link-preview-js": "^3.0.14",
"linkinator": "^6.1.2",
"markdown-it": "^14.1.0",
"marked": "^15.0.7",
@ -6304,6 +6305,12 @@
"integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==",
"license": "MIT"
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
"license": "ISC"
},
"node_modules/boxen": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/boxen/-/boxen-8.0.1.tgz",
@ -6808,6 +6815,45 @@
"node": "*"
}
},
"node_modules/cheerio": {
"version": "1.0.0-rc.11",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.11.tgz",
"integrity": "sha512-bQwNaDIBKID5ts/DsdhxrjqFXYfLw4ste+wMKqWA8DyKcS4qwsPP4Bk8ZNaTJjvpiX/qW3BT4sU7d6Bh5i+dag==",
"license": "MIT",
"dependencies": {
"cheerio-select": "^2.1.0",
"dom-serializer": "^2.0.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"htmlparser2": "^8.0.1",
"parse5": "^7.0.0",
"parse5-htmlparser2-tree-adapter": "^7.0.0",
"tslib": "^2.4.0"
},
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
}
},
"node_modules/cheerio-select": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0",
"css-select": "^5.1.0",
"css-what": "^6.1.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/chokidar": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
@ -7508,6 +7554,34 @@
"integrity": "sha512-N3ASg0C4kNPUaNxt1XAvzHIVuzdtr8KLgfk1O8WDyimp1GisPAHESupArO2ieHk9QWbrJ/WkQODyh21Ps/xhxw==",
"license": "Apache-2.0"
},
"node_modules/css-select": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
"license": "BSD-2-Clause",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/cssesc": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz",
@ -13614,6 +13688,19 @@
"integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
"license": "MIT"
},
"node_modules/link-preview-js": {
"version": "3.0.14",
"resolved": "https://registry.npmjs.org/link-preview-js/-/link-preview-js-3.0.14.tgz",
"integrity": "sha512-BAGZGCogqsWfF3msPt0c6DXr4+4zv7fregAxPioFYZJKoQEbKhJOhmu7VQjZmtKd1VRQ6CbL80Ok2KhpIuWJnQ==",
"license": "MIT",
"dependencies": {
"cheerio": "1.0.0-rc.11",
"url": "0.11.0"
},
"engines": {
"node": ">=18"
}
},
"node_modules/linkify-it": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-5.0.0.tgz",
@ -16586,6 +16673,18 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/number-is-nan": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/number-is-nan/-/number-is-nan-1.0.1.tgz",
@ -17137,6 +17236,19 @@
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-htmlparser2-tree-adapter": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz",
"integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==",
"license": "MIT",
"dependencies": {
"domhandler": "^5.0.3",
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/pascalcase": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/pascalcase/-/pascalcase-0.1.1.tgz",
@ -17977,6 +18089,15 @@
],
"license": "MIT"
},
"node_modules/querystring": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz",
"integrity": "sha512-X/xY82scca2tau62i9mDyU9K+I+djTMUsvwf7xnUX5GLvVzgJybOJf4Y6o9Zx3oJK/LSXg5tTZBjwzqVPaPO2g==",
"deprecated": "The querystring API is considered Legacy. new code should use the URLSearchParams API instead.",
"engines": {
"node": ">=0.4.x"
}
},
"node_modules/queue-microtask": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
@ -21432,6 +21553,22 @@
"deprecated": "Please see https://github.com/lydell/urix#deprecated",
"license": "MIT"
},
"node_modules/url": {
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/url/-/url-0.11.0.tgz",
"integrity": "sha512-kbailJa29QrtXnxgq+DdCEGlbTeYM2eJUxsz6vjZavrCYPMIFHMKQmSKYAIuUK2i7hgPm28a8piX5NTUtM/LKQ==",
"license": "MIT",
"dependencies": {
"punycode": "1.3.2",
"querystring": "0.2.0"
}
},
"node_modules/url/node_modules/punycode": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz",
"integrity": "sha512-RofWgt/7fL5wP1Y7fxE7/EmTLzQVnB0ycyibJ0OOHIlJqTNzglYFxVwETOcIoJqJmpDXJ9xImDv+Fq34F/d4Dw==",
"license": "MIT"
},
"node_modules/urlpattern-polyfill": {
"version": "10.0.0",
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",

View File

@ -23,7 +23,8 @@
"test:base:watch": "vitest watch src/base",
"test:model": "vitest run src/model",
"test:model:watch": "vitest watch src/model",
"test:watch": "node --experimental-vm-modules node_modules/jest/bin/jest.js --watch"
"test:watch": "node --experimental-vm-modules node_modules/jest/bin/jest.js --watch",
"test:url": "vitest run src/base/url.test.ts src/base/link-preview.test.ts"
},
"dependencies": {
"@astrojs/compiler": "^2.10.4",
@ -64,6 +65,7 @@
"imagetools": "file:../astro-components/packages/imagetools",
"jsonpath-plus": "^10.3.0",
"lighthouse": "^12.3.0",
"link-preview-js": "^3.0.14",
"linkinator": "^6.1.2",
"markdown-it": "^14.1.0",
"marked": "^15.0.7",
@ -104,36 +106,16 @@
"@types/jest": "^29.5.14",
"@typescript-eslint/eslint-plugin": "^7.1.0",
"@typescript-eslint/parser": "^7.1.0",
"@vitest/coverage-v8": "^1.3.1",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.1.3",
"prettier": "^3.2.5",
"@vitest/coverage-v8": "^1.3.1",
"eslint-plugin-prettier": "^5.1.0",
"jest": "^29.7.0",
"micromark-util-sanitize-uri": "^2.0.1",
"normalize-url": "^8.0.1",
"prettier": "^3.2.5",
"sass-embedded": "^1.83.4",
"ts-jest": "^29.3.0",
"vitest": "^1.3.1"
},
"jest": {
"preset": "ts-jest/presets/default-esm",
"testEnvironment": "node",
"extensionsToTreatAsEsm": [
".ts",
".tsx"
],
"moduleNameMapper": {
"^@/(.*)$": "<rootDir>/src/$1",
"^(\\.{1,2}/.*)\\.js$": "$1"
},
"transform": {
"^.+\\.tsx?$": [
"ts-jest",
{
"useESM": true
}
]
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
import { jest } from '@jest/globals';
export const meta = jest.fn().mockResolvedValue({
title: 'Test Title',
description: 'Test Description',
image: 'https://example.com/image.jpg',
favicon: 'https://example.com/favicon.ico',
siteName: 'Example Site'
});

View File

@ -1,60 +0,0 @@
import { describe, it, expect } from 'vitest';
import { fixMarkdownLint } from '../markdown.js';
describe('fixMarkdownLint', () => {
it('should fix heading capitalization', async () => {
const input = `# hello world
## this is a test heading
### another heading here`;
const result = await fixMarkdownLint(input);
expect(result.fixed).toBe(`# Hello World
## This Is A Test Heading
### Another Heading Here`);
expect(result.errors.length).toBe(0);
});
it('should handle empty input', async () => {
const result = await fixMarkdownLint('');
expect(result.fixed).toBe('');
expect(result.errors.length).toBe(0);
});
it('should handle markdown with code blocks', async () => {
const input = `# test heading
\`\`\`typescript
const hello = "world";
\`\`\`
## another heading`;
const result = await fixMarkdownLint(input);
expect(result.fixed).toBe(`# Test Heading
\`\`\`typescript
const hello = "world";
\`\`\`
## Another Heading`);
expect(result.errors.length).toBe(0);
});
it('should handle markdown with lists', async () => {
const input = `# test heading
- item 1
- item 2
- subitem 1
- subitem 2
## another heading`;
const result = await fixMarkdownLint(input);
expect(result.fixed).toBe(`# Test Heading
- Item 1
- Item 2
- Subitem 1
- Subitem 2
## Another Heading`);
expect(result.errors.length).toBe(0);
});
});

View File

@ -0,0 +1,105 @@
import { jest } from '@jest/globals';
import { urlCache } from '../url-cache.js';
import { validateUrl } from '../../model/filters.js';
import fs from 'fs/promises';
import path from 'path';
jest.mock('../url.js');
// Mock fetch
const mockFetch = jest.fn().mockImplementation(
(): Promise<Response> =>
Promise.resolve(new Response(null, {
status: 200,
statusText: 'OK'
}))
);
(global as any).fetch = mockFetch;
const testMeta = {
title: 'Test Title',
description: 'Test Description',
image: 'https://example.com/image.jpg',
favicon: 'https://example.com/favicon.ico',
siteName: 'Example Site'
};
describe('UrlCache', () => {
const testUrl = 'https://example.com';
beforeEach(async () => {
// Clear cache before each test
await urlCache.clear();
// Reset fetch mock
mockFetch.mockClear();
});
afterAll(async () => {
// Clean up after all tests
await urlCache.clear();
});
test('should store and retrieve URL validity', async () => {
await urlCache.set(testUrl, true);
const result = await urlCache.get(testUrl);
expect(result).toBeTruthy();
expect(result?.isValid).toBe(true);
});
test('should store and retrieve meta information', async () => {
await urlCache.set(testUrl, true, testMeta);
const result = await urlCache.get(testUrl);
expect(result?.meta).toEqual(testMeta);
});
test('should handle invalid URLs', async () => {
await urlCache.set(testUrl, false);
const result = await urlCache.get(testUrl);
expect(result?.isValid).toBe(false);
});
test('should expire cache entries', async () => {
// Set a URL with a very old timestamp
const oldEntry = {
isValid: true,
timestamp: Date.now() - (8 * 24 * 60 * 60 * 1000), // 8 days old
meta: testMeta
};
const cacheFile = path.join(process.cwd(), '.cache', 'url-cache.json');
await fs.writeFile(cacheFile, JSON.stringify({ [testUrl]: oldEntry }));
const result = await urlCache.get(testUrl);
expect(result).toBeNull();
});
test('validateUrl should store meta information', async () => {
const isValid = await validateUrl(testUrl);
expect(isValid).toBe(true);
expect(mockFetch).toHaveBeenCalledWith(
testUrl,
expect.objectContaining({
signal: expect.any(AbortSignal),
redirect: 'follow'
})
);
const result = await urlCache.get(testUrl);
expect(result?.isValid).toBe(true);
expect(result?.meta).toEqual(testMeta);
});
test('expandUrls should add meta information to valid URLs without meta', async () => {
// Add a URL without meta info
await urlCache.set(testUrl, true);
let result = await urlCache.get(testUrl);
expect(result?.meta).toBeUndefined();
// Expand URLs
await urlCache.expandUrls();
// Check that meta info was added
result = await urlCache.get(testUrl);
expect(result?.meta).toEqual(testMeta);
});
});

107
src/base/url-cache.ts Normal file
View File

@ -0,0 +1,107 @@
import fs from 'fs/promises';
import path from 'path';
import { meta } from '../base/url.js';
interface CacheEntry {
isValid: boolean;
timestamp: number;
meta?: {
title?: string;
description?: string;
image?: string;
favicon?: string;
siteName?: string;
};
}
interface CacheData {
[url: string]: CacheEntry;
}
const CACHE_FILE = path.join(process.cwd(), '.cache', 'url-cache.json');
const CACHE_EXPIRY = 7 * 24 * 60 * 60 * 1000; // 1 week in milliseconds
class UrlCache {
private cache: CacheData = {};
private initialized = false;
private async loadCache(): Promise<void> {
if (this.initialized) return;
try {
const data = await fs.readFile(CACHE_FILE, 'utf-8');
this.cache = JSON.parse(data);
} catch (error) {
// If file doesn't exist or is invalid, start with empty cache
this.cache = {};
}
this.initialized = true;
}
private async saveCache(): Promise<void> {
try {
await fs.mkdir(path.dirname(CACHE_FILE), { recursive: true });
await fs.writeFile(CACHE_FILE, JSON.stringify(this.cache, null, 2));
} catch (error) {
console.error('Error saving cache:', error);
}
}
private isExpired(entry: CacheEntry): boolean {
return Date.now() - entry.timestamp > CACHE_EXPIRY;
}
async get(url: string): Promise<CacheEntry | null> {
await this.loadCache();
const entry = this.cache[url];
if (!entry) return null;
if (this.isExpired(entry)) {
delete this.cache[url];
await this.saveCache();
return null;
}
return entry;
}
async set(url: string, isValid: boolean, meta?: CacheEntry['meta']): Promise<void> {
await this.loadCache();
this.cache[url] = {
isValid,
timestamp: Date.now(),
meta
};
await this.saveCache();
}
async clear(): Promise<void> {
this.cache = {};
this.initialized = false;
try {
await fs.unlink(CACHE_FILE);
} catch (error) {
// Ignore if file doesn't exist
}
}
async expandUrls(): Promise<void> {
await this.loadCache();
for (const [url, entry] of Object.entries(this.cache)) {
if (entry.isValid && !entry.meta) {
try {
const metaInfo = await meta(url);
entry.meta = metaInfo;
entry.timestamp = Date.now(); // Reset expiry
} catch (error) {
console.error(`Error expanding meta for ${url}:`, error);
}
}
}
await this.saveCache();
}
}
export const urlCache = new UrlCache();

View File

@ -1,4 +1,4 @@
import { describe, it, expect, jest, afterAll } from '@jest/globals';
import { describe, it, expect, afterAll, beforeEach, vi } from 'vitest';
import { PuppeteerUrlChecker, FetchUrlChecker, checkUrl, UrlCheckResult, clean } from './url.js';
describe('URL Checker', () => {
@ -9,7 +9,7 @@ describe('URL Checker', () => {
const timeoutUrl = 'https://example.com/timeout';
// Increase timeout for real browser tests
jest.setTimeout(30000);
vi.setConfig({ testTimeout: 30000 });
// Clean up after all tests
afterAll(async () => {
@ -45,16 +45,16 @@ describe('URL Checker', () => {
describe('FetchUrlChecker', () => {
const checker = new FetchUrlChecker();
let mockFetch: jest.Mock;
let mockFetch: ReturnType<typeof vi.fn>;
beforeEach(() => {
mockFetch = jest.fn();
mockFetch = vi.fn();
global.fetch = mockFetch as unknown as typeof fetch;
});
afterAll(() => {
// Restore the original fetch
jest.restoreAllMocks();
vi.restoreAllMocks();
});
it('should validate a valid URL', async () => {
@ -84,7 +84,7 @@ describe('URL Checker', () => {
valid: false,
error: 'HTTP 404: Not Found'
});
});
});
it('should handle timeouts', async () => {
mockFetch.mockRejectedValue(new Error('Timeout'));

View File

@ -1,9 +1,39 @@
import puppeteer from 'puppeteer';
import { getLinkPreview } from 'link-preview-js';
/** TODOS
*/
interface LinkPreviewResult {
url: string;
title: string;
siteName?: string;
description?: string;
mediaType: string;
contentType?: string;
images: string[];
videos: Array<{
url?: string;
secureUrl?: string;
type?: string;
width?: string;
height?: string;
}>;
favicons: string[];
}
// Global browser instance cache
let globalBrowser: puppeteer.Browser | null = null;
let browserInitPromise: Promise<puppeteer.Browser> | null = null;
// Cache for meta data
const metaCache = new Map<string, {
data: any;
timestamp: number;
}>();
const CACHE_DURATION = 24 * 60 * 60 * 1000; // 24 hours in milliseconds
async function getGlobalBrowser(): Promise<puppeteer.Browser> {
if (globalBrowser) {
return globalBrowser;
@ -149,4 +179,53 @@ export const defaultChecker: UrlChecker = new PuppeteerUrlChecker();
// Export a convenience function
export async function checkUrl(url: string, timeout?: number): Promise<UrlCheckResult> {
return defaultChecker.check(url, timeout);
}
export interface MetaResult {
title?: string;
description?: string;
image?: string;
favicon?: string;
siteName?: string;
error?: string;
}
export async function meta(url: string): Promise<MetaResult> {
try {
// Check cache first
const cached = metaCache.get(url);
if (cached && Date.now() - cached.timestamp < CACHE_DURATION) {
return cached.data;
}
// Validate URL first
const urlCheck = await checkUrl(url);
if (!urlCheck.valid) {
return { error: urlCheck.error };
}
// Get link preview
const preview = await getLinkPreview(url) as LinkPreviewResult;
const result: MetaResult = {
title: preview.title || undefined,
description: preview.description || undefined,
image: preview.images?.[0] || undefined,
favicon: preview.favicons?.[0] || undefined,
siteName: preview.siteName || undefined
};
// Cache the result
metaCache.set(url, {
data: result,
timestamp: Date.now()
});
return result;
} catch (error) {
if (error instanceof Error) {
return { error: error.message };
}
return { error: 'Unknown error occurred while fetching meta data' };
}
}

View File

@ -22,7 +22,7 @@ export const I18N_ASSET_PATH = "${SRC_DIR}/${SRC_NAME}-${DST_LANG}${SRC_EXT}"
export const HOWTO_GLOB = '**/config.json'
export const FILES_WEB = 'https://files.polymech.io/files/machines/howtos/'
export const HOWTO_FILTER_LLM = true
export const HOWTO_FILTER_LLM = false
export const HOWTO_ANNOTATIONS = false
export const HOWTO_ANNOTATIONS_CACHE = false
export const HOWTO_COMPLETE_RESOURCES = true
@ -31,6 +31,7 @@ export const HOWTO_ADD_RESOURCES = false
export const HOWTO_ADD_REFERENCES = false
export const HOWTO_COMPLETE_SKILLS = false
export const HOWTO_LOCAL_RESOURCES = false
export const HOWTO_SEO_LLM = false
export const HOWTO_MIGRATION = () => path.resolve(resolve("./data/last.json"))
export const HOWTO_ROOT_INTERN = () => path.resolve(resolve("./public/resources/howtos"))

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
export const HOWTO_ROOT = () => '/test/howto';

View File

@ -3,7 +3,8 @@ process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = '0';
export * from './howto-model.js'
import { HOWTO_ROOT } from "config/config.js";
import { filterMarkdownLinks } from "../base/markdown.js";
import { linkCache } from './link-cache.js';
import { urlCache } from '../base/url-cache.js';
import { meta } from '../base/url.js';
interface Item {
data: {
@ -85,8 +86,6 @@ export async function validateUrl(
const response = await fetch(url, {
signal: controller.signal,
redirect: 'follow',
// A more “real” set of headers:
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
@ -94,7 +93,6 @@ export async function validateUrl(
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
// Some sites also watch for these:
'Sec-Fetch-Site': 'none',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-User': '?1',
@ -104,11 +102,17 @@ export async function validateUrl(
if (!response.ok || response.status === 404) {
console.log(`URL ${url} is 404`, response);
await urlCache.set(url, false);
return false;
}
// Get meta information for valid URLs
const metaInfo = await meta(url);
await urlCache.set(url, true, metaInfo);
return true;
} catch (error) {
console.log(`Error validateUrl ${url}`, error);
await urlCache.set(url, false);
return false;
} finally {
clearTimeout(timer);
@ -169,9 +173,9 @@ export const validateLinks = async (text: string): Promise<string> => {
const [fullMatch, linkText, url] = match;
try {
// Check cache first
const cachedResult = await linkCache.get(url);
const cachedResult = await urlCache.get(url);
if (cachedResult !== null) {
if (!cachedResult) {
if (!cachedResult.isValid) {
processedText = processedText.replace(fullMatch, `~~[${linkText}](${url})~~`);
}
continue;
@ -180,7 +184,6 @@ export const validateLinks = async (text: string): Promise<string> => {
// Encode the URL to handle special characters
const encodedUrl = encodeURI(url);
const isValid = await validateUrl(encodedUrl);
await linkCache.set(url, isValid);
// Add strikethrough for invalid links while preserving the link
if (!isValid) {
@ -188,7 +191,7 @@ export const validateLinks = async (text: string): Promise<string> => {
}
} catch (error) {
// If there's an error checking the link, assume it's invalid
await linkCache.set(url, false);
await urlCache.set(url, false);
processedText = processedText.replace(fullMatch, `~~[${linkText}](${url})~~`);
}
}

View File

@ -1,80 +0,0 @@
import fs from 'fs/promises';
import path from 'path';
interface CacheEntry {
isValid: boolean;
timestamp: number;
}
interface CacheData {
[url: string]: CacheEntry;
}
const CACHE_FILE = path.join(process.cwd(), '.cache', 'link-cache.json');
const CACHE_EXPIRY = 7 * 24 * 60 * 60 * 1000; // 1 week in milliseconds
class LinkCache {
private cache: CacheData = {};
private initialized = false;
private async ensureCacheDir() {
const dir = path.dirname(CACHE_FILE);
try {
await fs.access(dir);
} catch {
await fs.mkdir(dir, { recursive: true });
}
}
private async loadCache() {
if (this.initialized) return;
try {
const data = await fs.readFile(CACHE_FILE, 'utf-8');
this.cache = JSON.parse(data);
} catch (error) {
// If file doesn't exist or is invalid, start with empty cache
this.cache = {};
}
this.initialized = true;
}
private async saveCache() {
await this.ensureCacheDir();
await fs.writeFile(CACHE_FILE, JSON.stringify(this.cache, null, 2));
}
private isExpired(entry: CacheEntry): boolean {
return Date.now() - entry.timestamp > CACHE_EXPIRY;
}
async get(url: string): Promise<boolean | null> {
await this.loadCache();
const entry = this.cache[url];
if (!entry) return null;
if (this.isExpired(entry)) {
delete this.cache[url];
await this.saveCache();
return null;
}
return entry.isValid;
}
async set(url: string, isValid: boolean): Promise<void> {
await this.loadCache();
this.cache[url] = {
isValid,
timestamp: Date.now()
};
await this.saveCache();
}
async clear(): Promise<void> {
this.cache = {};
await this.saveCache();
}
}
export const linkCache = new LinkCache();

42
test/base/url.test.ts Normal file
View File

@ -0,0 +1,42 @@
import { describe, it, expect } from 'vitest';
import { meta } from '../../src/base/url.js';
describe('url.ts', () => {
describe('meta', () => {
it('should fetch meta data from a valid URL', async () => {
const url = 'https://www.alibaba.com/product-detail/SJ25-SJ35-SJ45-SJ65-single-screw_1600600262552.html';
const result = await meta(url);
expect(result).toBeDefined();
expect(result.error).toBeUndefined();
expect(result.title).toBeDefined();
expect(result.description).toBeDefined();
expect(result.image).toBeDefined();
expect(result.siteName).toBeDefined();
}, 30000); // Increased timeout for network requests
it('should handle invalid URLs', async () => {
const url = 'https://invalid-url-that-does-not-exist.com';
const result = await meta(url);
expect(result).toBeDefined();
expect(result.error).toBeDefined();
expect(result.title).toBeUndefined();
expect(result.description).toBeUndefined();
expect(result.image).toBeUndefined();
expect(result.siteName).toBeUndefined();
}, 30000);
it('should use cache for subsequent requests', async () => {
const url = 'https://www.alibaba.com/product-detail/SJ25-SJ35-SJ45-SJ65-single-screw_1600600262552.html';
// First request
const firstResult = await meta(url);
// Second request should be faster and return the same data
const secondResult = await meta(url);
expect(secondResult).toEqual(firstResult);
}, 30000);
});
});