content:pdf->image

This commit is contained in:
lovebird 2025-03-13 23:25:20 +01:00
parent be98cd1942
commit f7d1b8bdce
11 changed files with 567 additions and 0 deletions

View File

@ -0,0 +1,113 @@
# pdf-to-images
A TypeScript CLI tool and library for converting PDF files to images (PNG/JPEG) with ESM support.
## Features
- Convert PDF files to PNG or JPEG images
- Select specific pages to convert
- Adjustable image quality
- ESM support
- TypeScript types included
- Command-line interface
- Programmatic API
## Installation
```bash
# Using pnpm (recommended)
pnpm add pdf-to-images
# Using npm
npm install pdf-to-images
# Using yarn
yarn add pdf-to-images
```
## CLI Usage
```bash
# Convert all pages to PNG (default)
pdf-to-images convert --input input.pdf --output ./output
# Convert specific pages to JPEG
pdf-to-images convert --input input.pdf --output ./output --format jpeg --pages "1,2,3"
# Convert with custom quality
pdf-to-images convert --input input.pdf --output ./output --quality 80
```
### CLI Options
- `--input, -i`: Input PDF file path (required)
- `--output, -o`: Output directory path (required)
- `--format, -f`: Output format ('png' or 'jpeg', default: 'png')
- `--quality, -q`: Output image quality (1-100, default: 90)
- `--pages, -p`: Pages to convert ("all" or comma-separated page numbers, default: "all")
## Programmatic Usage
```typescript
import { convertPdfToImages } from 'pdf-to-images';
async function example() {
// Convert all pages to PNG
const files = await convertPdfToImages({
inputPath: 'input.pdf',
outputDir: './output',
format: 'png',
quality: 90,
pages: 'all'
});
console.log('Generated files:', files);
// Convert specific pages to JPEG
const specificPages = await convertPdfToImages({
inputPath: 'input.pdf',
outputDir: './output',
format: 'jpeg',
quality: 85,
pages: [0, 2, 4] // Convert pages 1, 3, and 5 (0-based indexing)
});
console.log('Generated files:', specificPages);
}
```
### API Options
```typescript
interface ConversionOptions {
inputPath: string; // Path to input PDF file
outputDir: string; // Path to output directory
format?: 'png' | 'jpeg'; // Output format (default: 'png')
quality?: number; // Output quality 1-100 (default: 90)
pages?: number[] | 'all'; // Pages to convert (default: 'all')
}
```
## Development
```bash
# Install dependencies
pnpm install
# Build the project
pnpm build
# Run tests
pnpm test
# Run tests with coverage
pnpm test:coverage
# Watch mode during development
pnpm dev
```
## License
ISC

View File

@ -0,0 +1,52 @@
{
"name": "pdf-to-images",
"version": "1.0.0",
"description": "CLI tool to convert PDF files to images",
"type": "module",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"bin": {
"pdf-to-images": "./dist/cli.js"
},
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js"
}
},
"scripts": {
"build": "tsup",
"dev": "tsx watch src/cli.ts",
"start": "node dist/cli.js",
"test": "vitest run",
"test:watch": "vitest",
"test:coverage": "vitest run --coverage"
},
"keywords": [
"pdf",
"images",
"conversion",
"cli"
],
"author": "",
"license": "ISC",
"dependencies": {
"canvas": "^3.1.0",
"pdf-img-convert": "^2.0.0",
"sharp": "^0.33.2",
"tslog": "^4.9.3",
"yargs": "^17.7.2",
"zod": "^3.24.2"
},
"devDependencies": {
"@types/node": "^22.13.10",
"@types/yargs": "^17.0.33",
"@typescript-eslint/eslint-plugin": "^6.21.0",
"@typescript-eslint/parser": "^6.21.0",
"@vitest/coverage-v8": "^2.1.9",
"tsup": "^8.4.0",
"tsx": "^4.19.3",
"typescript": "^5.8.2",
"vitest": "^3.0.8"
}
}

View File

@ -0,0 +1,203 @@
import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest';
import { convertPdfToImages } from '../index.js';
import { ConversionOptionsSchema } from '../types.js';
import { mkdtemp, writeFile, rm, mkdir } from 'fs/promises';
import { join, dirname, resolve } from 'path';
import { existsSync } from 'fs';
// No mocks for sharp or pdf-img-convert as we want to test real file conversion
describe('convertPdfToImages', () => {
let tempDir: string;
const realPdfPath = resolve(__dirname, '../../tests/ifb.pdf');
beforeAll(async () => {
// Verify the test PDF file exists before running tests
if (!existsSync(realPdfPath)) {
throw new Error(`Test PDF file not found: ${realPdfPath}. Make sure it exists before running tests.`);
}
// Create a temporary directory for tests under ./tests/.temp
const testsDir = resolve(__dirname, '../../tests');
tempDir = resolve(testsDir, '.temp');
// Create the temp directory if it doesn't exist
if (!existsSync(tempDir)) {
await mkdir(tempDir, { recursive: true });
}
});
afterAll(async () => {
// Cleanup temporary directory and files
if (tempDir && existsSync(tempDir)) {
try {
await rm(tempDir, { recursive: true, force: true });
} catch (error) {
console.error(`Error cleaning up temporary directory: ${error}`);
}
}
});
describe('options validation', () => {
it('should validate basic conversion options', () => {
const validOptions = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 90,
pages: 'all',
};
expect(() => ConversionOptionsSchema.parse(validOptions)).not.toThrow();
});
it('should validate JPEG format', () => {
const jpegOptions = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'jpeg',
quality: 90,
pages: 'all',
};
expect(() => ConversionOptionsSchema.parse(jpegOptions)).not.toThrow();
});
it('should validate specific page selection', () => {
const pageOptions = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 90,
pages: [1, 2, 3],
};
expect(() => ConversionOptionsSchema.parse(pageOptions)).not.toThrow();
});
it('should validate quality range', () => {
// Minimum quality
const minQuality = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 1,
pages: 'all',
};
expect(() => ConversionOptionsSchema.parse(minQuality)).not.toThrow();
// Maximum quality
const maxQuality = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 100,
pages: 'all',
};
expect(() => ConversionOptionsSchema.parse(maxQuality)).not.toThrow();
});
it('should throw on invalid input path', () => {
const invalidPath = {
inputPath: '',
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 90,
pages: 'all',
} as any;
expect(() => ConversionOptionsSchema.parse(invalidPath)).toThrow();
});
it('should throw on invalid format', () => {
const invalidFormat = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'gif', // Not supported
quality: 90,
pages: 'all',
} as any;
expect(() => ConversionOptionsSchema.parse(invalidFormat)).toThrow();
});
it('should throw on invalid quality value', () => {
const tooLowQuality = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 0,
pages: 'all',
} as any;
const tooHighQuality = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 101,
pages: 'all',
} as any;
expect(() => ConversionOptionsSchema.parse(tooLowQuality)).toThrow();
expect(() => ConversionOptionsSchema.parse(tooHighQuality)).toThrow();
});
it('should throw on invalid pages format', () => {
const invalidPages = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 90,
pages: 'some', // Not 'all' or array
} as any;
expect(() => ConversionOptionsSchema.parse(invalidPages)).toThrow();
});
});
describe('real PDF file conversion', () => {
it('should convert a real PDF file to PNG images', async () => {
const pngOutputDir = resolve(tempDir, 'output-ifb-png');
const pngOptions = {
inputPath: realPdfPath,
outputDir: pngOutputDir,
format: 'png',
quality: 90,
pages: 'all',
};
const pngOutputFiles = await convertPdfToImages(pngOptions);
// Check number of files and naming
expect(pngOutputFiles.length).toBeGreaterThan(0);
expect(pngOutputFiles[0]).toBe(resolve(pngOutputDir, 'page-1.png'));
// Check if files actually exist
for (const file of pngOutputFiles) {
expect(existsSync(file)).toBe(true, `Expected ${file} to exist`);
}
});
it('should convert a real PDF file to JPEG images', async () => {
const jpegOutputDir = resolve(tempDir, 'output-ifb-jpeg');
const jpegOptions = {
inputPath: realPdfPath,
outputDir: jpegOutputDir,
format: 'jpeg',
quality: 90,
pages: 'all',
};
const jpegOutputFiles = await convertPdfToImages(jpegOptions);
// Check number of files and naming
expect(jpegOutputFiles.length).toBeGreaterThan(0);
expect(jpegOutputFiles[0]).toBe(resolve(jpegOutputDir, 'page-1.jpeg'));
// Check if files actually exist
for (const file of jpegOutputFiles) {
expect(existsSync(file)).toBe(true, `Expected ${file} to exist`);
}
});
});
});

View File

@ -0,0 +1,67 @@
#!/usr/bin/env node
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import { Logger } from 'tslog';
import { convertPdfToImages } from './index.js';
import { ConversionOptionsSchema } from './types.js';
const logger = new Logger();
yargs(hideBin(process.argv))
.command('convert', 'Convert PDF to images', {
input: {
alias: 'i',
type: 'string',
description: 'Input PDF file path',
demandOption: true,
},
output: {
alias: 'o',
type: 'string',
description: 'Output directory path',
demandOption: true,
},
format: {
alias: 'f',
type: 'string',
choices: ['png', 'jpeg'],
default: 'png',
description: 'Output image format',
},
quality: {
alias: 'q',
type: 'number',
description: 'Output image quality (1-100)',
default: 90,
},
pages: {
alias: 'p',
type: 'string',
description: 'Pages to convert (e.g., "1,2,3" or "all")',
default: 'all',
},
}, async (argv) => {
try {
const pages = argv.pages === 'all'
? 'all'
: argv.pages.split(',').map(p => parseInt(p.trim(), 10) - 1);
const options = ConversionOptionsSchema.parse({
inputPath: argv.input,
outputDir: argv.output,
format: argv.format,
quality: argv.quality,
pages,
});
const outputFiles = await convertPdfToImages(options);
logger.info(`Successfully converted ${outputFiles.length} pages`);
logger.info('Output files:', outputFiles);
} catch (error) {
logger.error('Error:', error);
process.exit(1);
}
})
.help()
.argv;

View File

@ -0,0 +1,64 @@
import { convert } from 'pdf-img-convert';
import { mkdir } from 'fs/promises';
import { join } from 'path';
import { Logger } from 'tslog';
import sharp from 'sharp';
import { ConversionOptions } from './types.js';
const logger = new Logger();
async function saveImage(imageData: string, outputPath: string, format: 'png' | 'jpeg', quality: number): Promise<void> {
const base64Data = imageData.replace(/^data:image\/(png|jpeg);base64,/, '');
const buffer = Buffer.from(base64Data, 'base64');
const sharpImage = sharp(buffer);
if (format === 'jpeg') {
await sharpImage
.jpeg({ quality })
.toFile(outputPath);
} else {
await sharpImage
.png()
.toFile(outputPath);
}
}
export async function convertPdfToImages(options: ConversionOptions): Promise<string[]> {
const { inputPath, outputDir, format, quality, pages } = options;
try {
// Ensure output directory exists
await mkdir(outputDir, { recursive: true });
// Convert PDF to images
const pdfPages = await convert(inputPath, {
width: 2048, // Reasonable default width
height: 2048, // Maintain aspect ratio
quality: quality / 100, // Convert 1-100 range to 0-1
});
const pagesToProcess = pages === 'all'
? Array.from({ length: pdfPages.length }, (_, i) => i)
: pages;
const outputFiles: string[] = [];
for (const pageNum of pagesToProcess) {
if (pageNum >= pdfPages.length) {
logger.warn(`Page ${pageNum + 1} does not exist in the PDF`);
continue;
}
const outputPath = join(outputDir, `page-${pageNum + 1}.${format}`);
await saveImage(pdfPages[pageNum], outputPath, format, quality);
logger.info(`Saved page ${pageNum + 1} to ${outputPath}`);
outputFiles.push(outputPath);
}
return outputFiles;
} catch (error) {
logger.error('Error converting PDF to images:', error);
throw error;
}
}
export { ConversionOptions, ConversionOptionsSchema } from './types.js';

View File

@ -0,0 +1,15 @@
import { z } from 'zod';
export const ConversionOptionsSchema = z.object({
inputPath: z.string(),
outputDir: z.string(),
format: z.enum(['png', 'jpeg']).default('png'),
quality: z.number().min(1).max(100).default(90),
pages: z.union([
z.array(z.number()),
z.literal('all')
]).default('all'),
});
export type ConversionOptions = z.infer<typeof ConversionOptionsSchema>;

View File

@ -0,0 +1,13 @@
%PDF-1.4
1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj 3 0 obj<</Type/Page/MediaBox[0 0 3 3]>>endobj
xref
0 4
0000000000 65535 f
0000000010 00000 n
0000000053 00000 n
0000000102 00000 n
trailer<</Size 4/Root 1 0 R>>
startxref
149
%%EOF

Binary file not shown.

View File

@ -0,0 +1,18 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"lib": ["ES2022"],
"outDir": "dist",
"rootDir": "src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"declaration": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "**/*.test.ts"]
}

View File

@ -0,0 +1,11 @@
import { defineConfig } from 'tsup';
export default defineConfig({
entry: ['src/index.ts', 'src/cli.ts'],
format: ['esm'],
dts: true,
splitting: false,
sourcemap: true,
clean: true,
});

View File

@ -0,0 +1,11 @@
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
coverage: {
provider: 'v8',
reporter: ['text', 'json', 'html'],
},
},
});