diff --git a/packages/content/ref/pdf-to-images/README.md b/packages/content/ref/pdf-to-images/README.md new file mode 100644 index 00000000..03ddb505 --- /dev/null +++ b/packages/content/ref/pdf-to-images/README.md @@ -0,0 +1,113 @@ +# pdf-to-images + +A TypeScript CLI tool and library for converting PDF files to images (PNG/JPEG) with ESM support. + +## Features + +- Convert PDF files to PNG or JPEG images +- Select specific pages to convert +- Adjustable image quality +- ESM support +- TypeScript types included +- Command-line interface +- Programmatic API + +## Installation + +```bash +# Using pnpm (recommended) +pnpm add pdf-to-images + +# Using npm +npm install pdf-to-images + +# Using yarn +yarn add pdf-to-images +``` + +## CLI Usage + +```bash +# Convert all pages to PNG (default) +pdf-to-images convert --input input.pdf --output ./output + +# Convert specific pages to JPEG +pdf-to-images convert --input input.pdf --output ./output --format jpeg --pages "1,2,3" + +# Convert with custom quality +pdf-to-images convert --input input.pdf --output ./output --quality 80 +``` + +### CLI Options + +- `--input, -i`: Input PDF file path (required) +- `--output, -o`: Output directory path (required) +- `--format, -f`: Output format ('png' or 'jpeg', default: 'png') +- `--quality, -q`: Output image quality (1-100, default: 90) +- `--pages, -p`: Pages to convert ("all" or comma-separated page numbers, default: "all") + +## Programmatic Usage + +```typescript +import { convertPdfToImages } from 'pdf-to-images'; + +async function example() { + // Convert all pages to PNG + const files = await convertPdfToImages({ + inputPath: 'input.pdf', + outputDir: './output', + format: 'png', + quality: 90, + pages: 'all' + }); + + console.log('Generated files:', files); + + // Convert specific pages to JPEG + const specificPages = await convertPdfToImages({ + inputPath: 'input.pdf', + outputDir: './output', + format: 'jpeg', + quality: 85, + pages: [0, 2, 4] // Convert pages 1, 3, and 5 (0-based indexing) + }); + + console.log('Generated files:', specificPages); +} +``` + +### API Options + +```typescript +interface ConversionOptions { + inputPath: string; // Path to input PDF file + outputDir: string; // Path to output directory + format?: 'png' | 'jpeg'; // Output format (default: 'png') + quality?: number; // Output quality 1-100 (default: 90) + pages?: number[] | 'all'; // Pages to convert (default: 'all') +} +``` + +## Development + +```bash +# Install dependencies +pnpm install + +# Build the project +pnpm build + +# Run tests +pnpm test + +# Run tests with coverage +pnpm test:coverage + +# Watch mode during development +pnpm dev +``` + +## License + +ISC + diff --git a/packages/content/ref/pdf-to-images/package.json b/packages/content/ref/pdf-to-images/package.json new file mode 100644 index 00000000..52c44082 --- /dev/null +++ b/packages/content/ref/pdf-to-images/package.json @@ -0,0 +1,52 @@ +{ + "name": "pdf-to-images", + "version": "1.0.0", + "description": "CLI tool to convert PDF files to images", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "bin": { + "pdf-to-images": "./dist/cli.js" + }, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "scripts": { + "build": "tsup", + "dev": "tsx watch src/cli.ts", + "start": "node dist/cli.js", + "test": "vitest run", + "test:watch": "vitest", + "test:coverage": "vitest run --coverage" + }, + "keywords": [ + "pdf", + "images", + "conversion", + "cli" + ], + "author": "", + "license": "ISC", + "dependencies": { + "canvas": "^3.1.0", + "pdf-img-convert": "^2.0.0", + "sharp": "^0.33.2", + "tslog": "^4.9.3", + "yargs": "^17.7.2", + "zod": "^3.24.2" + }, + "devDependencies": { + "@types/node": "^22.13.10", + "@types/yargs": "^17.0.33", + "@typescript-eslint/eslint-plugin": "^6.21.0", + "@typescript-eslint/parser": "^6.21.0", + "@vitest/coverage-v8": "^2.1.9", + "tsup": "^8.4.0", + "tsx": "^4.19.3", + "typescript": "^5.8.2", + "vitest": "^3.0.8" + } +} diff --git a/packages/content/ref/pdf-to-images/src/__tests__/index.test.ts b/packages/content/ref/pdf-to-images/src/__tests__/index.test.ts new file mode 100644 index 00000000..aec817f7 --- /dev/null +++ b/packages/content/ref/pdf-to-images/src/__tests__/index.test.ts @@ -0,0 +1,203 @@ +import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest'; +import { convertPdfToImages } from '../index.js'; +import { ConversionOptionsSchema } from '../types.js'; +import { mkdtemp, writeFile, rm, mkdir } from 'fs/promises'; +import { join, dirname, resolve } from 'path'; +import { existsSync } from 'fs'; + +// No mocks for sharp or pdf-img-convert as we want to test real file conversion + +describe('convertPdfToImages', () => { + let tempDir: string; + const realPdfPath = resolve(__dirname, '../../tests/ifb.pdf'); + + beforeAll(async () => { + // Verify the test PDF file exists before running tests + if (!existsSync(realPdfPath)) { + throw new Error(`Test PDF file not found: ${realPdfPath}. Make sure it exists before running tests.`); + } + + // Create a temporary directory for tests under ./tests/.temp + const testsDir = resolve(__dirname, '../../tests'); + tempDir = resolve(testsDir, '.temp'); + + // Create the temp directory if it doesn't exist + if (!existsSync(tempDir)) { + await mkdir(tempDir, { recursive: true }); + } + }); + + afterAll(async () => { + // Cleanup temporary directory and files + if (tempDir && existsSync(tempDir)) { + try { + await rm(tempDir, { recursive: true, force: true }); + } catch (error) { + console.error(`Error cleaning up temporary directory: ${error}`); + } + } + }); + + describe('options validation', () => { + it('should validate basic conversion options', () => { + const validOptions = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'png', + quality: 90, + pages: 'all', + }; + + expect(() => ConversionOptionsSchema.parse(validOptions)).not.toThrow(); + }); + + it('should validate JPEG format', () => { + const jpegOptions = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'jpeg', + quality: 90, + pages: 'all', + }; + + expect(() => ConversionOptionsSchema.parse(jpegOptions)).not.toThrow(); + }); + + it('should validate specific page selection', () => { + const pageOptions = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'png', + quality: 90, + pages: [1, 2, 3], + }; + + expect(() => ConversionOptionsSchema.parse(pageOptions)).not.toThrow(); + }); + + it('should validate quality range', () => { + // Minimum quality + const minQuality = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'png', + quality: 1, + pages: 'all', + }; + expect(() => ConversionOptionsSchema.parse(minQuality)).not.toThrow(); + + // Maximum quality + const maxQuality = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'png', + quality: 100, + pages: 'all', + }; + expect(() => ConversionOptionsSchema.parse(maxQuality)).not.toThrow(); + }); + + it('should throw on invalid input path', () => { + const invalidPath = { + inputPath: '', + outputDir: resolve(tempDir, 'output'), + format: 'png', + quality: 90, + pages: 'all', + } as any; + + expect(() => ConversionOptionsSchema.parse(invalidPath)).toThrow(); + }); + + it('should throw on invalid format', () => { + const invalidFormat = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'gif', // Not supported + quality: 90, + pages: 'all', + } as any; + + expect(() => ConversionOptionsSchema.parse(invalidFormat)).toThrow(); + }); + + it('should throw on invalid quality value', () => { + const tooLowQuality = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'png', + quality: 0, + pages: 'all', + } as any; + + const tooHighQuality = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'png', + quality: 101, + pages: 'all', + } as any; + + expect(() => ConversionOptionsSchema.parse(tooLowQuality)).toThrow(); + expect(() => ConversionOptionsSchema.parse(tooHighQuality)).toThrow(); + }); + + it('should throw on invalid pages format', () => { + const invalidPages = { + inputPath: realPdfPath, + outputDir: resolve(tempDir, 'output'), + format: 'png', + quality: 90, + pages: 'some', // Not 'all' or array + } as any; + + expect(() => ConversionOptionsSchema.parse(invalidPages)).toThrow(); + }); + }); + + describe('real PDF file conversion', () => { + it('should convert a real PDF file to PNG images', async () => { + const pngOutputDir = resolve(tempDir, 'output-ifb-png'); + const pngOptions = { + inputPath: realPdfPath, + outputDir: pngOutputDir, + format: 'png', + quality: 90, + pages: 'all', + }; + + const pngOutputFiles = await convertPdfToImages(pngOptions); + + // Check number of files and naming + expect(pngOutputFiles.length).toBeGreaterThan(0); + expect(pngOutputFiles[0]).toBe(resolve(pngOutputDir, 'page-1.png')); + + // Check if files actually exist + for (const file of pngOutputFiles) { + expect(existsSync(file)).toBe(true, `Expected ${file} to exist`); + } + }); + + it('should convert a real PDF file to JPEG images', async () => { + const jpegOutputDir = resolve(tempDir, 'output-ifb-jpeg'); + const jpegOptions = { + inputPath: realPdfPath, + outputDir: jpegOutputDir, + format: 'jpeg', + quality: 90, + pages: 'all', + }; + + const jpegOutputFiles = await convertPdfToImages(jpegOptions); + + // Check number of files and naming + expect(jpegOutputFiles.length).toBeGreaterThan(0); + expect(jpegOutputFiles[0]).toBe(resolve(jpegOutputDir, 'page-1.jpeg')); + + // Check if files actually exist + for (const file of jpegOutputFiles) { + expect(existsSync(file)).toBe(true, `Expected ${file} to exist`); + } + }); + }); +}); diff --git a/packages/content/ref/pdf-to-images/src/cli.ts b/packages/content/ref/pdf-to-images/src/cli.ts new file mode 100644 index 00000000..d3d43a19 --- /dev/null +++ b/packages/content/ref/pdf-to-images/src/cli.ts @@ -0,0 +1,67 @@ +#!/usr/bin/env node +import yargs from 'yargs'; +import { hideBin } from 'yargs/helpers'; +import { Logger } from 'tslog'; +import { convertPdfToImages } from './index.js'; +import { ConversionOptionsSchema } from './types.js'; + +const logger = new Logger(); + +yargs(hideBin(process.argv)) + .command('convert', 'Convert PDF to images', { + input: { + alias: 'i', + type: 'string', + description: 'Input PDF file path', + demandOption: true, + }, + output: { + alias: 'o', + type: 'string', + description: 'Output directory path', + demandOption: true, + }, + format: { + alias: 'f', + type: 'string', + choices: ['png', 'jpeg'], + default: 'png', + description: 'Output image format', + }, + quality: { + alias: 'q', + type: 'number', + description: 'Output image quality (1-100)', + default: 90, + }, + pages: { + alias: 'p', + type: 'string', + description: 'Pages to convert (e.g., "1,2,3" or "all")', + default: 'all', + }, + }, async (argv) => { + try { + const pages = argv.pages === 'all' + ? 'all' + : argv.pages.split(',').map(p => parseInt(p.trim(), 10) - 1); + + const options = ConversionOptionsSchema.parse({ + inputPath: argv.input, + outputDir: argv.output, + format: argv.format, + quality: argv.quality, + pages, + }); + + const outputFiles = await convertPdfToImages(options); + logger.info(`Successfully converted ${outputFiles.length} pages`); + logger.info('Output files:', outputFiles); + } catch (error) { + logger.error('Error:', error); + process.exit(1); + } + }) + .help() + .argv; + diff --git a/packages/content/ref/pdf-to-images/src/index.ts b/packages/content/ref/pdf-to-images/src/index.ts new file mode 100644 index 00000000..6bc1a8b4 --- /dev/null +++ b/packages/content/ref/pdf-to-images/src/index.ts @@ -0,0 +1,64 @@ +import { convert } from 'pdf-img-convert'; +import { mkdir } from 'fs/promises'; +import { join } from 'path'; +import { Logger } from 'tslog'; +import sharp from 'sharp'; +import { ConversionOptions } from './types.js'; + +const logger = new Logger(); + +async function saveImage(imageData: string, outputPath: string, format: 'png' | 'jpeg', quality: number): Promise { + const base64Data = imageData.replace(/^data:image\/(png|jpeg);base64,/, ''); + const buffer = Buffer.from(base64Data, 'base64'); + + const sharpImage = sharp(buffer); + if (format === 'jpeg') { + await sharpImage + .jpeg({ quality }) + .toFile(outputPath); + } else { + await sharpImage + .png() + .toFile(outputPath); + } +} +export async function convertPdfToImages(options: ConversionOptions): Promise { + const { inputPath, outputDir, format, quality, pages } = options; + + try { + // Ensure output directory exists + await mkdir(outputDir, { recursive: true }); + + // Convert PDF to images + const pdfPages = await convert(inputPath, { + width: 2048, // Reasonable default width + height: 2048, // Maintain aspect ratio + quality: quality / 100, // Convert 1-100 range to 0-1 + }); + const pagesToProcess = pages === 'all' + ? Array.from({ length: pdfPages.length }, (_, i) => i) + : pages; + + const outputFiles: string[] = []; + + for (const pageNum of pagesToProcess) { + if (pageNum >= pdfPages.length) { + logger.warn(`Page ${pageNum + 1} does not exist in the PDF`); + continue; + } + + const outputPath = join(outputDir, `page-${pageNum + 1}.${format}`); + await saveImage(pdfPages[pageNum], outputPath, format, quality); + logger.info(`Saved page ${pageNum + 1} to ${outputPath}`); + outputFiles.push(outputPath); + } + + return outputFiles; + } catch (error) { + logger.error('Error converting PDF to images:', error); + throw error; + } +} + +export { ConversionOptions, ConversionOptionsSchema } from './types.js'; + diff --git a/packages/content/ref/pdf-to-images/src/types.ts b/packages/content/ref/pdf-to-images/src/types.ts new file mode 100644 index 00000000..5334bf70 --- /dev/null +++ b/packages/content/ref/pdf-to-images/src/types.ts @@ -0,0 +1,15 @@ +import { z } from 'zod'; + +export const ConversionOptionsSchema = z.object({ + inputPath: z.string(), + outputDir: z.string(), + format: z.enum(['png', 'jpeg']).default('png'), + quality: z.number().min(1).max(100).default(90), + pages: z.union([ + z.array(z.number()), + z.literal('all') + ]).default('all'), +}); + +export type ConversionOptions = z.infer; + diff --git a/packages/content/ref/pdf-to-images/tests/.temp/test.pdf b/packages/content/ref/pdf-to-images/tests/.temp/test.pdf new file mode 100644 index 00000000..e88317a7 --- /dev/null +++ b/packages/content/ref/pdf-to-images/tests/.temp/test.pdf @@ -0,0 +1,13 @@ +%PDF-1.4 +1 0 obj<>endobj 2 0 obj<>endobj 3 0 obj<>endobj +xref +0 4 +0000000000 65535 f +0000000010 00000 n +0000000053 00000 n +0000000102 00000 n + +trailer<> +startxref +149 +%%EOF \ No newline at end of file diff --git a/packages/content/ref/pdf-to-images/tests/ifb.pdf b/packages/content/ref/pdf-to-images/tests/ifb.pdf new file mode 100644 index 00000000..1164dccc Binary files /dev/null and b/packages/content/ref/pdf-to-images/tests/ifb.pdf differ diff --git a/packages/content/ref/pdf-to-images/tsconfig.json b/packages/content/ref/pdf-to-images/tsconfig.json new file mode 100644 index 00000000..8ea82c9d --- /dev/null +++ b/packages/content/ref/pdf-to-images/tsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "lib": ["ES2022"], + "outDir": "dist", + "rootDir": "src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} + diff --git a/packages/content/ref/pdf-to-images/tsup.config.ts b/packages/content/ref/pdf-to-images/tsup.config.ts new file mode 100644 index 00000000..dd623643 --- /dev/null +++ b/packages/content/ref/pdf-to-images/tsup.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'tsup'; + +export default defineConfig({ + entry: ['src/index.ts', 'src/cli.ts'], + format: ['esm'], + dts: true, + splitting: false, + sourcemap: true, + clean: true, +}); + diff --git a/packages/content/ref/pdf-to-images/vitest.config.ts b/packages/content/ref/pdf-to-images/vitest.config.ts new file mode 100644 index 00000000..eab961f1 --- /dev/null +++ b/packages/content/ref/pdf-to-images/vitest.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html'], + }, + }, +}); +