From b324473b6509013dd599c08b97f655947b0e1a4e Mon Sep 17 00:00:00 2001 From: babayaga Date: Fri, 14 Mar 2025 08:06:06 +0100 Subject: [PATCH] mupdf implementation --- packages/content/ref/pdf-to-images/README.md | 113 -------- .../pdf-to-images/dist/commands/convert.js | 57 ++++ .../content/ref/pdf-to-images/dist/index.js | 7 + .../content/ref/pdf-to-images/dist/lib/pdf.js | 28 ++ .../content/ref/pdf-to-images/dist/types.js | 13 + .../ref/pdf-to-images/package-lock.json | 273 ++++++++++++++++++ .../content/ref/pdf-to-images/package.json | 49 +--- .../pdf-to-images/src/__tests__/index.test.ts | 203 ------------- packages/content/ref/pdf-to-images/src/cli.ts | 67 ----- .../ref/pdf-to-images/src/commands/convert.ts | 68 +++++ .../content/ref/pdf-to-images/src/index.ts | 70 +---- .../content/ref/pdf-to-images/src/lib/pdf.ts | 51 ++++ .../content/ref/pdf-to-images/src/types.ts | 29 +- .../content/ref/pdf-to-images/tsconfig.json | 11 +- .../content/ref/pdf-to-images/tsup.config.ts | 11 - .../ref/pdf-to-images/vitest.config.ts | 11 - 16 files changed, 537 insertions(+), 524 deletions(-) delete mode 100644 packages/content/ref/pdf-to-images/README.md create mode 100644 packages/content/ref/pdf-to-images/dist/commands/convert.js create mode 100644 packages/content/ref/pdf-to-images/dist/index.js create mode 100644 packages/content/ref/pdf-to-images/dist/lib/pdf.js create mode 100644 packages/content/ref/pdf-to-images/dist/types.js create mode 100644 packages/content/ref/pdf-to-images/package-lock.json delete mode 100644 packages/content/ref/pdf-to-images/src/__tests__/index.test.ts delete mode 100644 packages/content/ref/pdf-to-images/src/cli.ts create mode 100644 packages/content/ref/pdf-to-images/src/commands/convert.ts create mode 100644 packages/content/ref/pdf-to-images/src/lib/pdf.ts delete mode 100644 packages/content/ref/pdf-to-images/tsup.config.ts delete mode 100644 packages/content/ref/pdf-to-images/vitest.config.ts diff --git a/packages/content/ref/pdf-to-images/README.md b/packages/content/ref/pdf-to-images/README.md deleted file mode 100644 index 03ddb505..00000000 --- a/packages/content/ref/pdf-to-images/README.md +++ /dev/null @@ -1,113 +0,0 @@ -# pdf-to-images - -A TypeScript CLI tool and library for converting PDF files to images (PNG/JPEG) with ESM support. - -## Features - -- Convert PDF files to PNG or JPEG images -- Select specific pages to convert -- Adjustable image quality -- ESM support -- TypeScript types included -- Command-line interface -- Programmatic API - -## Installation - -```bash -# Using pnpm (recommended) -pnpm add pdf-to-images - -# Using npm -npm install pdf-to-images - -# Using yarn -yarn add pdf-to-images -``` - -## CLI Usage - -```bash -# Convert all pages to PNG (default) -pdf-to-images convert --input input.pdf --output ./output - -# Convert specific pages to JPEG -pdf-to-images convert --input input.pdf --output ./output --format jpeg --pages "1,2,3" - -# Convert with custom quality -pdf-to-images convert --input input.pdf --output ./output --quality 80 -``` - -### CLI Options - -- `--input, -i`: Input PDF file path (required) -- `--output, -o`: Output directory path (required) -- `--format, -f`: Output format ('png' or 'jpeg', default: 'png') -- `--quality, -q`: Output image quality (1-100, default: 90) -- `--pages, -p`: Pages to convert ("all" or comma-separated page numbers, default: "all") - -## Programmatic Usage - -```typescript -import { convertPdfToImages } from 'pdf-to-images'; - -async function example() { - // Convert all pages to PNG - const files = await convertPdfToImages({ - inputPath: 'input.pdf', - outputDir: './output', - format: 'png', - quality: 90, - pages: 'all' - }); - - console.log('Generated files:', files); - - // Convert specific pages to JPEG - const specificPages = await convertPdfToImages({ - inputPath: 'input.pdf', - outputDir: './output', - format: 'jpeg', - quality: 85, - pages: [0, 2, 4] // Convert pages 1, 3, and 5 (0-based indexing) - }); - - console.log('Generated files:', specificPages); -} -``` - -### API Options - -```typescript -interface ConversionOptions { - inputPath: string; // Path to input PDF file - outputDir: string; // Path to output directory - format?: 'png' | 'jpeg'; // Output format (default: 'png') - quality?: number; // Output quality 1-100 (default: 90) - pages?: number[] | 'all'; // Pages to convert (default: 'all') -} -``` - -## Development - -```bash -# Install dependencies -pnpm install - -# Build the project -pnpm build - -# Run tests -pnpm test - -# Run tests with coverage -pnpm test:coverage - -# Watch mode during development -pnpm dev -``` - -## License - -ISC - diff --git a/packages/content/ref/pdf-to-images/dist/commands/convert.js b/packages/content/ref/pdf-to-images/dist/commands/convert.js new file mode 100644 index 00000000..98df0e7d --- /dev/null +++ b/packages/content/ref/pdf-to-images/dist/commands/convert.js @@ -0,0 +1,57 @@ +import { Logger } from 'tslog'; +import { ConvertCommandSchema } from '../types.js'; +import { convertPdfToImages } from '../lib/pdf.js'; +import { existsSync } from 'node:fs'; +import { dirname } from 'node:path'; +import { mkdir, readFile } from 'node:fs/promises'; +export const command = 'convert'; +export const desc = 'Convert PDF to images'; +export const builder = { + input: { + alias: 'i', + type: 'string', + description: 'Input PDF file', + demandOption: true + }, + output: { + alias: 'o', + type: 'string', + description: 'Output directory for images', + demandOption: true + }, + dpi: { + type: 'number', + description: 'DPI for output images', + default: 300 + }, + format: { + type: 'string', + choices: ['png', 'jpg'], + default: 'png', + description: 'Output image format' + } +}; +export async function handler(argv) { + const logger = new Logger(); + try { + const config = ConvertCommandSchema.parse(argv); + if (!existsSync(config.input)) { + throw new Error(`Input file ${config.input} does not exist`); + } + await mkdir(dirname(config.output), { recursive: true }); + logger.info(`Converting PDF ${config.input} to images...`); + const pdfData = await readFile(config.input); + const outputFiles = await convertPdfToImages(pdfData, { + outputPathPrefix: config.output, + dpi: config.dpi, + format: config.format, + logger + }); + logger.info('Conversion completed successfully'); + logger.info(`Generated ${outputFiles.length} images`); + } + catch (error) { + logger.error('Error during conversion:', error); + process.exit(1); + } +} diff --git a/packages/content/ref/pdf-to-images/dist/index.js b/packages/content/ref/pdf-to-images/dist/index.js new file mode 100644 index 00000000..b0b3fbef --- /dev/null +++ b/packages/content/ref/pdf-to-images/dist/index.js @@ -0,0 +1,7 @@ +import yargs from 'yargs'; +import { hideBin } from 'yargs/helpers'; +yargs(hideBin(process.argv)) + .command(require('./commands/convert.js')) + .demandCommand(1, 'You need to specify a command') + .strict() + .argv; diff --git a/packages/content/ref/pdf-to-images/dist/lib/pdf.js b/packages/content/ref/pdf-to-images/dist/lib/pdf.js new file mode 100644 index 00000000..1615cf0c --- /dev/null +++ b/packages/content/ref/pdf-to-images/dist/lib/pdf.js @@ -0,0 +1,28 @@ +import * as mupdf from 'mupdf'; +import { Logger } from 'tslog'; +import { writeFile } from 'node:fs/promises'; +export async function convertPdfToImages(pdfData, options) { + const logger = options.logger || new Logger(); + const outputFiles = []; + try { + const doc = mupdf.Document.openDocument(pdfData, 'pdf'); + const pageCount = doc.countPages(); + logger.info(`Processing ${pageCount} pages`); + for (let i = 0; i < pageCount; i++) { + const page = doc.loadPage(i); + const pixmap = page.toPixmap([1, 0, 0, 1, 0, 0], mupdf.ColorSpace.DeviceRGB, false); + const outputPath = `${options.outputPathPrefix}_${i + 1}.${options.format}`; + const imageData = options.format === 'png' + ? pixmap.asPNG() + : pixmap.asJPEG(90, false); + await writeFile(outputPath, imageData); + outputFiles.push(outputPath); + logger.info(`Converted page ${i + 1} to ${outputPath}`); + } + return outputFiles; + } + catch (error) { + logger.error('Error converting PDF to images:', error); + throw error; + } +} diff --git a/packages/content/ref/pdf-to-images/dist/types.js b/packages/content/ref/pdf-to-images/dist/types.js new file mode 100644 index 00000000..82dab42d --- /dev/null +++ b/packages/content/ref/pdf-to-images/dist/types.js @@ -0,0 +1,13 @@ +import { z } from 'zod'; +export const ConvertCommandSchema = z.object({ + input: z.string(), + output: z.string(), + dpi: z.number().default(300), + format: z.enum(['png', 'jpg']).default('png') +}); +export const ConfigSchema = z.object({ + input: z.string().min(1), + output: z.string().min(1), + dpi: z.number().int().positive().default(300), + format: z.enum(['png', 'jpg']).default('png') +}); diff --git a/packages/content/ref/pdf-to-images/package-lock.json b/packages/content/ref/pdf-to-images/package-lock.json new file mode 100644 index 00000000..31228b34 --- /dev/null +++ b/packages/content/ref/pdf-to-images/package-lock.json @@ -0,0 +1,273 @@ +{ + "name": "mu", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "mu", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "@types/yargs": "^17.0.33", + "mupdf": "^1.3.3", + "tslog": "^4.9.3", + "typescript": "^5.8.2", + "yargs": "^17.7.2", + "zod": "^3.24.2" + }, + "devDependencies": { + "@types/node": "^22.13.10" + } + }, + "node_modules/@types/node": { + "version": "22.13.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.10.tgz", + "integrity": "sha512-I6LPUvlRH+O6VRUqYOcMudhaIdUVWfsjnZavnsraHvpBwaEyMN29ry+0UVJhImYL16xsscu0aske3yA+uPOWfw==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.20.0" + } + }, + "node_modules/@types/yargs": { + "version": "17.0.33", + "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz", + "integrity": "sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==", + "license": "MIT", + "dependencies": { + "@types/yargs-parser": "*" + } + }, + "node_modules/@types/yargs-parser": { + "version": "21.0.3", + "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz", + "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==", + "license": "MIT" + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "license": "ISC", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "license": "MIT" + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT" + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/mupdf": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/mupdf/-/mupdf-1.3.3.tgz", + "integrity": "sha512-uS/uqQZ1+3zSkaL5ngauT98o5gIIRtQPW54vYTVlqBYS0tho9TrjHr0RmAUJlp/XTJyghNhDUjD7l++EZkMyyA==", + "license": "AGPL-3.0-or-later" + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/tslog": { + "version": "4.9.3", + "resolved": "https://registry.npmjs.org/tslog/-/tslog-4.9.3.tgz", + "integrity": "sha512-oDWuGVONxhVEBtschLf2cs/Jy8i7h1T+CpdkTNWQgdAF7DhRo2G8vMCgILKe7ojdEkLhICWgI1LYSSKaJsRgcw==", + "license": "MIT", + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/fullstack-build/tslog?sponsor=1" + } + }, + "node_modules/typescript": { + "version": "5.8.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.2.tgz", + "integrity": "sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==", + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "dev": true, + "license": "MIT" + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "license": "MIT", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/zod": { + "version": "3.24.2", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz", + "integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/packages/content/ref/pdf-to-images/package.json b/packages/content/ref/pdf-to-images/package.json index 52c44082..fd1b5fce 100644 --- a/packages/content/ref/pdf-to-images/package.json +++ b/packages/content/ref/pdf-to-images/package.json @@ -1,52 +1,25 @@ { - "name": "pdf-to-images", + "name": "mu", "version": "1.0.0", - "description": "CLI tool to convert PDF files to images", - "type": "module", - "main": "./dist/index.js", - "types": "./dist/index.d.ts", - "bin": { - "pdf-to-images": "./dist/cli.js" - }, - "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js" - } - }, + "description": "", + "main": "dist/index.js", "scripts": { - "build": "tsup", - "dev": "tsx watch src/cli.ts", - "start": "node dist/cli.js", - "test": "vitest run", - "test:watch": "vitest", - "test:coverage": "vitest run --coverage" + "build": "tsc", + "start": "node dist/index.js" }, - "keywords": [ - "pdf", - "images", - "conversion", - "cli" - ], + "keywords": [], "author": "", "license": "ISC", + "type": "module", "dependencies": { - "canvas": "^3.1.0", - "pdf-img-convert": "^2.0.0", - "sharp": "^0.33.2", + "@types/yargs": "^17.0.33", + "mupdf": "^1.3.3", "tslog": "^4.9.3", + "typescript": "^5.8.2", "yargs": "^17.7.2", "zod": "^3.24.2" }, "devDependencies": { - "@types/node": "^22.13.10", - "@types/yargs": "^17.0.33", - "@typescript-eslint/eslint-plugin": "^6.21.0", - "@typescript-eslint/parser": "^6.21.0", - "@vitest/coverage-v8": "^2.1.9", - "tsup": "^8.4.0", - "tsx": "^4.19.3", - "typescript": "^5.8.2", - "vitest": "^3.0.8" + "@types/node": "^22.13.10" } } diff --git a/packages/content/ref/pdf-to-images/src/__tests__/index.test.ts b/packages/content/ref/pdf-to-images/src/__tests__/index.test.ts deleted file mode 100644 index aec817f7..00000000 --- a/packages/content/ref/pdf-to-images/src/__tests__/index.test.ts +++ /dev/null @@ -1,203 +0,0 @@ -import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest'; -import { convertPdfToImages } from '../index.js'; -import { ConversionOptionsSchema } from '../types.js'; -import { mkdtemp, writeFile, rm, mkdir } from 'fs/promises'; -import { join, dirname, resolve } from 'path'; -import { existsSync } from 'fs'; - -// No mocks for sharp or pdf-img-convert as we want to test real file conversion - -describe('convertPdfToImages', () => { - let tempDir: string; - const realPdfPath = resolve(__dirname, '../../tests/ifb.pdf'); - - beforeAll(async () => { - // Verify the test PDF file exists before running tests - if (!existsSync(realPdfPath)) { - throw new Error(`Test PDF file not found: ${realPdfPath}. Make sure it exists before running tests.`); - } - - // Create a temporary directory for tests under ./tests/.temp - const testsDir = resolve(__dirname, '../../tests'); - tempDir = resolve(testsDir, '.temp'); - - // Create the temp directory if it doesn't exist - if (!existsSync(tempDir)) { - await mkdir(tempDir, { recursive: true }); - } - }); - - afterAll(async () => { - // Cleanup temporary directory and files - if (tempDir && existsSync(tempDir)) { - try { - await rm(tempDir, { recursive: true, force: true }); - } catch (error) { - console.error(`Error cleaning up temporary directory: ${error}`); - } - } - }); - - describe('options validation', () => { - it('should validate basic conversion options', () => { - const validOptions = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'png', - quality: 90, - pages: 'all', - }; - - expect(() => ConversionOptionsSchema.parse(validOptions)).not.toThrow(); - }); - - it('should validate JPEG format', () => { - const jpegOptions = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'jpeg', - quality: 90, - pages: 'all', - }; - - expect(() => ConversionOptionsSchema.parse(jpegOptions)).not.toThrow(); - }); - - it('should validate specific page selection', () => { - const pageOptions = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'png', - quality: 90, - pages: [1, 2, 3], - }; - - expect(() => ConversionOptionsSchema.parse(pageOptions)).not.toThrow(); - }); - - it('should validate quality range', () => { - // Minimum quality - const minQuality = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'png', - quality: 1, - pages: 'all', - }; - expect(() => ConversionOptionsSchema.parse(minQuality)).not.toThrow(); - - // Maximum quality - const maxQuality = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'png', - quality: 100, - pages: 'all', - }; - expect(() => ConversionOptionsSchema.parse(maxQuality)).not.toThrow(); - }); - - it('should throw on invalid input path', () => { - const invalidPath = { - inputPath: '', - outputDir: resolve(tempDir, 'output'), - format: 'png', - quality: 90, - pages: 'all', - } as any; - - expect(() => ConversionOptionsSchema.parse(invalidPath)).toThrow(); - }); - - it('should throw on invalid format', () => { - const invalidFormat = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'gif', // Not supported - quality: 90, - pages: 'all', - } as any; - - expect(() => ConversionOptionsSchema.parse(invalidFormat)).toThrow(); - }); - - it('should throw on invalid quality value', () => { - const tooLowQuality = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'png', - quality: 0, - pages: 'all', - } as any; - - const tooHighQuality = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'png', - quality: 101, - pages: 'all', - } as any; - - expect(() => ConversionOptionsSchema.parse(tooLowQuality)).toThrow(); - expect(() => ConversionOptionsSchema.parse(tooHighQuality)).toThrow(); - }); - - it('should throw on invalid pages format', () => { - const invalidPages = { - inputPath: realPdfPath, - outputDir: resolve(tempDir, 'output'), - format: 'png', - quality: 90, - pages: 'some', // Not 'all' or array - } as any; - - expect(() => ConversionOptionsSchema.parse(invalidPages)).toThrow(); - }); - }); - - describe('real PDF file conversion', () => { - it('should convert a real PDF file to PNG images', async () => { - const pngOutputDir = resolve(tempDir, 'output-ifb-png'); - const pngOptions = { - inputPath: realPdfPath, - outputDir: pngOutputDir, - format: 'png', - quality: 90, - pages: 'all', - }; - - const pngOutputFiles = await convertPdfToImages(pngOptions); - - // Check number of files and naming - expect(pngOutputFiles.length).toBeGreaterThan(0); - expect(pngOutputFiles[0]).toBe(resolve(pngOutputDir, 'page-1.png')); - - // Check if files actually exist - for (const file of pngOutputFiles) { - expect(existsSync(file)).toBe(true, `Expected ${file} to exist`); - } - }); - - it('should convert a real PDF file to JPEG images', async () => { - const jpegOutputDir = resolve(tempDir, 'output-ifb-jpeg'); - const jpegOptions = { - inputPath: realPdfPath, - outputDir: jpegOutputDir, - format: 'jpeg', - quality: 90, - pages: 'all', - }; - - const jpegOutputFiles = await convertPdfToImages(jpegOptions); - - // Check number of files and naming - expect(jpegOutputFiles.length).toBeGreaterThan(0); - expect(jpegOutputFiles[0]).toBe(resolve(jpegOutputDir, 'page-1.jpeg')); - - // Check if files actually exist - for (const file of jpegOutputFiles) { - expect(existsSync(file)).toBe(true, `Expected ${file} to exist`); - } - }); - }); -}); diff --git a/packages/content/ref/pdf-to-images/src/cli.ts b/packages/content/ref/pdf-to-images/src/cli.ts deleted file mode 100644 index d3d43a19..00000000 --- a/packages/content/ref/pdf-to-images/src/cli.ts +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env node -import yargs from 'yargs'; -import { hideBin } from 'yargs/helpers'; -import { Logger } from 'tslog'; -import { convertPdfToImages } from './index.js'; -import { ConversionOptionsSchema } from './types.js'; - -const logger = new Logger(); - -yargs(hideBin(process.argv)) - .command('convert', 'Convert PDF to images', { - input: { - alias: 'i', - type: 'string', - description: 'Input PDF file path', - demandOption: true, - }, - output: { - alias: 'o', - type: 'string', - description: 'Output directory path', - demandOption: true, - }, - format: { - alias: 'f', - type: 'string', - choices: ['png', 'jpeg'], - default: 'png', - description: 'Output image format', - }, - quality: { - alias: 'q', - type: 'number', - description: 'Output image quality (1-100)', - default: 90, - }, - pages: { - alias: 'p', - type: 'string', - description: 'Pages to convert (e.g., "1,2,3" or "all")', - default: 'all', - }, - }, async (argv) => { - try { - const pages = argv.pages === 'all' - ? 'all' - : argv.pages.split(',').map(p => parseInt(p.trim(), 10) - 1); - - const options = ConversionOptionsSchema.parse({ - inputPath: argv.input, - outputDir: argv.output, - format: argv.format, - quality: argv.quality, - pages, - }); - - const outputFiles = await convertPdfToImages(options); - logger.info(`Successfully converted ${outputFiles.length} pages`); - logger.info('Output files:', outputFiles); - } catch (error) { - logger.error('Error:', error); - process.exit(1); - } - }) - .help() - .argv; - diff --git a/packages/content/ref/pdf-to-images/src/commands/convert.ts b/packages/content/ref/pdf-to-images/src/commands/convert.ts new file mode 100644 index 00000000..06ad8b37 --- /dev/null +++ b/packages/content/ref/pdf-to-images/src/commands/convert.ts @@ -0,0 +1,68 @@ + +import { Arguments } from 'yargs'; +import { Logger } from 'tslog'; +import { ConvertCommandSchema, ConvertCommandConfig } from '../types.js'; +import { convertPdfToImages } from '../lib/pdf.js'; +import { existsSync } from 'node:fs'; +import { dirname } from 'node:path'; +import { mkdir, readFile } from 'node:fs/promises'; + +export const command = 'convert'; +export const desc = 'Convert PDF to images'; + +export const builder = { + input: { + alias: 'i', + type: 'string', + description: 'Input PDF file', + demandOption: true + }, + output: { + alias: 'o', + type: 'string', + description: 'Output directory for images', + demandOption: true + }, + dpi: { + type: 'number', + description: 'DPI for output images', + default: 300 + }, + format: { + type: 'string', + choices: ['png', 'jpg'], + default: 'png', + description: 'Output image format' + } +}; + +export async function handler(argv: Arguments): Promise { + const logger = new Logger(); + + try { + const config = ConvertCommandSchema.parse(argv); + + if (!existsSync(config.input)) { + throw new Error(`Input file ${config.input} does not exist`); + } + + await mkdir(dirname(config.output), { recursive: true }); + + logger.info(`Converting PDF ${config.input} to images...`); + + const pdfData = await readFile(config.input); + const outputFiles = await convertPdfToImages(pdfData, { + outputPathPrefix: config.output, + dpi: config.dpi, + format: config.format, + logger + }); + + logger.info('Conversion completed successfully'); + logger.info(`Generated ${outputFiles.length} images`); + } catch (error) { + logger.error('Error during conversion:', error); + process.exit(1); + } +} + diff --git a/packages/content/ref/pdf-to-images/src/index.ts b/packages/content/ref/pdf-to-images/src/index.ts index 6bc1a8b4..109c6cc9 100644 --- a/packages/content/ref/pdf-to-images/src/index.ts +++ b/packages/content/ref/pdf-to-images/src/index.ts @@ -1,64 +1,8 @@ -import { convert } from 'pdf-img-convert'; -import { mkdir } from 'fs/promises'; -import { join } from 'path'; -import { Logger } from 'tslog'; -import sharp from 'sharp'; -import { ConversionOptions } from './types.js'; - -const logger = new Logger(); - -async function saveImage(imageData: string, outputPath: string, format: 'png' | 'jpeg', quality: number): Promise { - const base64Data = imageData.replace(/^data:image\/(png|jpeg);base64,/, ''); - const buffer = Buffer.from(base64Data, 'base64'); - - const sharpImage = sharp(buffer); - if (format === 'jpeg') { - await sharpImage - .jpeg({ quality }) - .toFile(outputPath); - } else { - await sharpImage - .png() - .toFile(outputPath); - } -} -export async function convertPdfToImages(options: ConversionOptions): Promise { - const { inputPath, outputDir, format, quality, pages } = options; - - try { - // Ensure output directory exists - await mkdir(outputDir, { recursive: true }); - - // Convert PDF to images - const pdfPages = await convert(inputPath, { - width: 2048, // Reasonable default width - height: 2048, // Maintain aspect ratio - quality: quality / 100, // Convert 1-100 range to 0-1 - }); - const pagesToProcess = pages === 'all' - ? Array.from({ length: pdfPages.length }, (_, i) => i) - : pages; - - const outputFiles: string[] = []; - - for (const pageNum of pagesToProcess) { - if (pageNum >= pdfPages.length) { - logger.warn(`Page ${pageNum + 1} does not exist in the PDF`); - continue; - } - - const outputPath = join(outputDir, `page-${pageNum + 1}.${format}`); - await saveImage(pdfPages[pageNum], outputPath, format, quality); - logger.info(`Saved page ${pageNum + 1} to ${outputPath}`); - outputFiles.push(outputPath); - } - - return outputFiles; - } catch (error) { - logger.error('Error converting PDF to images:', error); - throw error; - } -} - -export { ConversionOptions, ConversionOptionsSchema } from './types.js'; +import yargs from 'yargs'; +import { hideBin } from 'yargs/helpers'; +yargs(hideBin(process.argv)) + .command(require('./commands/convert.js')) + .demandCommand(1, 'You need to specify a command') + .strict() + .argv; diff --git a/packages/content/ref/pdf-to-images/src/lib/pdf.ts b/packages/content/ref/pdf-to-images/src/lib/pdf.ts new file mode 100644 index 00000000..6b78a510 --- /dev/null +++ b/packages/content/ref/pdf-to-images/src/lib/pdf.ts @@ -0,0 +1,51 @@ +import * as mupdf from 'mupdf'; +import { Logger } from 'tslog'; +import { writeFile } from 'node:fs/promises'; + +export type ImageFormat = 'png' | 'jpg'; + +export interface PdfToImageOptions { + outputPathPrefix: string; + dpi: number; + format: ImageFormat; + logger?: Logger; +} + +export async function convertPdfToImages( + pdfData: Buffer, + options: PdfToImageOptions +): Promise { + const logger = options.logger || new Logger (); + const outputFiles: string[] = []; + + try { + const doc = mupdf.Document.openDocument(pdfData, 'pdf'); + const pageCount = doc.countPages(); + + logger.info(`Processing ${pageCount} pages`); + + for (let i = 0; i < pageCount; i++) { + const page = doc.loadPage(i); + const pixmap = page.toPixmap( + [1, 0, 0, 1, 0, 0], + mupdf.ColorSpace.DeviceRGB, + false + ); + + const outputPath = `${options.outputPathPrefix}_${i + 1}.${options.format}`; + const imageData = options.format === 'png' + ? pixmap.asPNG() + : pixmap.asJPEG(90, false); + + await writeFile(outputPath, imageData); + outputFiles.push(outputPath); + logger.info(`Converted page ${i + 1} to ${outputPath}`); + } + + return outputFiles; + } catch (error) { + logger.error('Error converting PDF to images:', error); + throw error; + } +} + diff --git a/packages/content/ref/pdf-to-images/src/types.ts b/packages/content/ref/pdf-to-images/src/types.ts index 5334bf70..6dc21045 100644 --- a/packages/content/ref/pdf-to-images/src/types.ts +++ b/packages/content/ref/pdf-to-images/src/types.ts @@ -1,15 +1,22 @@ -import { z } from 'zod'; -export const ConversionOptionsSchema = z.object({ - inputPath: z.string(), - outputDir: z.string(), - format: z.enum(['png', 'jpeg']).default('png'), - quality: z.number().min(1).max(100).default(90), - pages: z.union([ - z.array(z.number()), - z.literal('all') - ]).default('all'), +import { z } from 'zod'; +import type { ImageFormat } from './lib/pdf.js'; + +export const ConvertCommandSchema = z.object({ + input: z.string(), + output: z.string(), + dpi: z.number().default(300), + format: z.enum(['png', 'jpg'] as const).default('png') }); -export type ConversionOptions = z.infer; +export type ConvertCommandConfig = z.infer; + +export const ConfigSchema = z.object({ + input: z.string().min(1), + output: z.string().min(1), + dpi: z.number().int().positive().default(300), + format: z.enum(['png', 'jpg']).default('png') +}); + +export type Config = z.infer; diff --git a/packages/content/ref/pdf-to-images/tsconfig.json b/packages/content/ref/pdf-to-images/tsconfig.json index 8ea82c9d..2abf6975 100644 --- a/packages/content/ref/pdf-to-images/tsconfig.json +++ b/packages/content/ref/pdf-to-images/tsconfig.json @@ -3,16 +3,13 @@ "target": "ES2022", "module": "NodeNext", "moduleResolution": "NodeNext", - "lib": ["ES2022"], - "outDir": "dist", - "rootDir": "src", + "outDir": "./dist", + "rootDir": "./src", "strict": true, "esModuleInterop": true, "skipLibCheck": true, - "forceConsistentCasingInFileNames": true, - "declaration": true + "forceConsistentCasingInFileNames": true }, "include": ["src/**/*"], - "exclude": ["node_modules", "dist", "**/*.test.ts"] + "exclude": ["node_modules", "dist"] } - diff --git a/packages/content/ref/pdf-to-images/tsup.config.ts b/packages/content/ref/pdf-to-images/tsup.config.ts deleted file mode 100644 index dd623643..00000000 --- a/packages/content/ref/pdf-to-images/tsup.config.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { defineConfig } from 'tsup'; - -export default defineConfig({ - entry: ['src/index.ts', 'src/cli.ts'], - format: ['esm'], - dts: true, - splitting: false, - sourcemap: true, - clean: true, -}); - diff --git a/packages/content/ref/pdf-to-images/vitest.config.ts b/packages/content/ref/pdf-to-images/vitest.config.ts deleted file mode 100644 index eab961f1..00000000 --- a/packages/content/ref/pdf-to-images/vitest.config.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { defineConfig } from 'vitest/config'; - -export default defineConfig({ - test: { - coverage: { - provider: 'v8', - reporter: ['text', 'json', 'html'], - }, - }, -}); -