content:pdf->image
This commit is contained in:
parent
be98cd1942
commit
f7d1b8bdce
113
packages/content/ref/pdf-to-images/README.md
Normal file
113
packages/content/ref/pdf-to-images/README.md
Normal file
@ -0,0 +1,113 @@
|
||||
# pdf-to-images
|
||||
|
||||
A TypeScript CLI tool and library for converting PDF files to images (PNG/JPEG) with ESM support.
|
||||
|
||||
## Features
|
||||
|
||||
- Convert PDF files to PNG or JPEG images
|
||||
- Select specific pages to convert
|
||||
- Adjustable image quality
|
||||
- ESM support
|
||||
- TypeScript types included
|
||||
- Command-line interface
|
||||
- Programmatic API
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# Using pnpm (recommended)
|
||||
pnpm add pdf-to-images
|
||||
|
||||
# Using npm
|
||||
npm install pdf-to-images
|
||||
|
||||
# Using yarn
|
||||
yarn add pdf-to-images
|
||||
```
|
||||
|
||||
## CLI Usage
|
||||
|
||||
```bash
|
||||
# Convert all pages to PNG (default)
|
||||
pdf-to-images convert --input input.pdf --output ./output
|
||||
|
||||
# Convert specific pages to JPEG
|
||||
pdf-to-images convert --input input.pdf --output ./output --format jpeg --pages "1,2,3"
|
||||
|
||||
# Convert with custom quality
|
||||
pdf-to-images convert --input input.pdf --output ./output --quality 80
|
||||
```
|
||||
|
||||
### CLI Options
|
||||
|
||||
- `--input, -i`: Input PDF file path (required)
|
||||
- `--output, -o`: Output directory path (required)
|
||||
- `--format, -f`: Output format ('png' or 'jpeg', default: 'png')
|
||||
- `--quality, -q`: Output image quality (1-100, default: 90)
|
||||
- `--pages, -p`: Pages to convert ("all" or comma-separated page numbers, default: "all")
|
||||
|
||||
## Programmatic Usage
|
||||
|
||||
```typescript
|
||||
import { convertPdfToImages } from 'pdf-to-images';
|
||||
|
||||
async function example() {
|
||||
// Convert all pages to PNG
|
||||
const files = await convertPdfToImages({
|
||||
inputPath: 'input.pdf',
|
||||
outputDir: './output',
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'all'
|
||||
});
|
||||
|
||||
console.log('Generated files:', files);
|
||||
|
||||
// Convert specific pages to JPEG
|
||||
const specificPages = await convertPdfToImages({
|
||||
inputPath: 'input.pdf',
|
||||
outputDir: './output',
|
||||
format: 'jpeg',
|
||||
quality: 85,
|
||||
pages: [0, 2, 4] // Convert pages 1, 3, and 5 (0-based indexing)
|
||||
});
|
||||
|
||||
console.log('Generated files:', specificPages);
|
||||
}
|
||||
```
|
||||
|
||||
### API Options
|
||||
|
||||
```typescript
|
||||
interface ConversionOptions {
|
||||
inputPath: string; // Path to input PDF file
|
||||
outputDir: string; // Path to output directory
|
||||
format?: 'png' | 'jpeg'; // Output format (default: 'png')
|
||||
quality?: number; // Output quality 1-100 (default: 90)
|
||||
pages?: number[] | 'all'; // Pages to convert (default: 'all')
|
||||
}
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
pnpm install
|
||||
|
||||
# Build the project
|
||||
pnpm build
|
||||
|
||||
# Run tests
|
||||
pnpm test
|
||||
|
||||
# Run tests with coverage
|
||||
pnpm test:coverage
|
||||
|
||||
# Watch mode during development
|
||||
pnpm dev
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
ISC
|
||||
|
||||
52
packages/content/ref/pdf-to-images/package.json
Normal file
52
packages/content/ref/pdf-to-images/package.json
Normal file
@ -0,0 +1,52 @@
|
||||
{
|
||||
"name": "pdf-to-images",
|
||||
"version": "1.0.0",
|
||||
"description": "CLI tool to convert PDF files to images",
|
||||
"type": "module",
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"bin": {
|
||||
"pdf-to-images": "./dist/cli.js"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsup",
|
||||
"dev": "tsx watch src/cli.ts",
|
||||
"start": "node dist/cli.js",
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest",
|
||||
"test:coverage": "vitest run --coverage"
|
||||
},
|
||||
"keywords": [
|
||||
"pdf",
|
||||
"images",
|
||||
"conversion",
|
||||
"cli"
|
||||
],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"canvas": "^3.1.0",
|
||||
"pdf-img-convert": "^2.0.0",
|
||||
"sharp": "^0.33.2",
|
||||
"tslog": "^4.9.3",
|
||||
"yargs": "^17.7.2",
|
||||
"zod": "^3.24.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.13.10",
|
||||
"@types/yargs": "^17.0.33",
|
||||
"@typescript-eslint/eslint-plugin": "^6.21.0",
|
||||
"@typescript-eslint/parser": "^6.21.0",
|
||||
"@vitest/coverage-v8": "^2.1.9",
|
||||
"tsup": "^8.4.0",
|
||||
"tsx": "^4.19.3",
|
||||
"typescript": "^5.8.2",
|
||||
"vitest": "^3.0.8"
|
||||
}
|
||||
}
|
||||
203
packages/content/ref/pdf-to-images/src/__tests__/index.test.ts
Normal file
203
packages/content/ref/pdf-to-images/src/__tests__/index.test.ts
Normal file
@ -0,0 +1,203 @@
|
||||
import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest';
|
||||
import { convertPdfToImages } from '../index.js';
|
||||
import { ConversionOptionsSchema } from '../types.js';
|
||||
import { mkdtemp, writeFile, rm, mkdir } from 'fs/promises';
|
||||
import { join, dirname, resolve } from 'path';
|
||||
import { existsSync } from 'fs';
|
||||
|
||||
// No mocks for sharp or pdf-img-convert as we want to test real file conversion
|
||||
|
||||
describe('convertPdfToImages', () => {
|
||||
let tempDir: string;
|
||||
const realPdfPath = resolve(__dirname, '../../tests/ifb.pdf');
|
||||
|
||||
beforeAll(async () => {
|
||||
// Verify the test PDF file exists before running tests
|
||||
if (!existsSync(realPdfPath)) {
|
||||
throw new Error(`Test PDF file not found: ${realPdfPath}. Make sure it exists before running tests.`);
|
||||
}
|
||||
|
||||
// Create a temporary directory for tests under ./tests/.temp
|
||||
const testsDir = resolve(__dirname, '../../tests');
|
||||
tempDir = resolve(testsDir, '.temp');
|
||||
|
||||
// Create the temp directory if it doesn't exist
|
||||
if (!existsSync(tempDir)) {
|
||||
await mkdir(tempDir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
// Cleanup temporary directory and files
|
||||
if (tempDir && existsSync(tempDir)) {
|
||||
try {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
} catch (error) {
|
||||
console.error(`Error cleaning up temporary directory: ${error}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
describe('options validation', () => {
|
||||
it('should validate basic conversion options', () => {
|
||||
const validOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
};
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(validOptions)).not.toThrow();
|
||||
});
|
||||
|
||||
it('should validate JPEG format', () => {
|
||||
const jpegOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'jpeg',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
};
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(jpegOptions)).not.toThrow();
|
||||
});
|
||||
|
||||
it('should validate specific page selection', () => {
|
||||
const pageOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: [1, 2, 3],
|
||||
};
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(pageOptions)).not.toThrow();
|
||||
});
|
||||
|
||||
it('should validate quality range', () => {
|
||||
// Minimum quality
|
||||
const minQuality = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 1,
|
||||
pages: 'all',
|
||||
};
|
||||
expect(() => ConversionOptionsSchema.parse(minQuality)).not.toThrow();
|
||||
|
||||
// Maximum quality
|
||||
const maxQuality = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 100,
|
||||
pages: 'all',
|
||||
};
|
||||
expect(() => ConversionOptionsSchema.parse(maxQuality)).not.toThrow();
|
||||
});
|
||||
|
||||
it('should throw on invalid input path', () => {
|
||||
const invalidPath = {
|
||||
inputPath: '',
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
} as any;
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(invalidPath)).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on invalid format', () => {
|
||||
const invalidFormat = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'gif', // Not supported
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
} as any;
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(invalidFormat)).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on invalid quality value', () => {
|
||||
const tooLowQuality = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 0,
|
||||
pages: 'all',
|
||||
} as any;
|
||||
|
||||
const tooHighQuality = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 101,
|
||||
pages: 'all',
|
||||
} as any;
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(tooLowQuality)).toThrow();
|
||||
expect(() => ConversionOptionsSchema.parse(tooHighQuality)).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on invalid pages format', () => {
|
||||
const invalidPages = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'some', // Not 'all' or array
|
||||
} as any;
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(invalidPages)).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('real PDF file conversion', () => {
|
||||
it('should convert a real PDF file to PNG images', async () => {
|
||||
const pngOutputDir = resolve(tempDir, 'output-ifb-png');
|
||||
const pngOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: pngOutputDir,
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
};
|
||||
|
||||
const pngOutputFiles = await convertPdfToImages(pngOptions);
|
||||
|
||||
// Check number of files and naming
|
||||
expect(pngOutputFiles.length).toBeGreaterThan(0);
|
||||
expect(pngOutputFiles[0]).toBe(resolve(pngOutputDir, 'page-1.png'));
|
||||
|
||||
// Check if files actually exist
|
||||
for (const file of pngOutputFiles) {
|
||||
expect(existsSync(file)).toBe(true, `Expected ${file} to exist`);
|
||||
}
|
||||
});
|
||||
|
||||
it('should convert a real PDF file to JPEG images', async () => {
|
||||
const jpegOutputDir = resolve(tempDir, 'output-ifb-jpeg');
|
||||
const jpegOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: jpegOutputDir,
|
||||
format: 'jpeg',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
};
|
||||
|
||||
const jpegOutputFiles = await convertPdfToImages(jpegOptions);
|
||||
|
||||
// Check number of files and naming
|
||||
expect(jpegOutputFiles.length).toBeGreaterThan(0);
|
||||
expect(jpegOutputFiles[0]).toBe(resolve(jpegOutputDir, 'page-1.jpeg'));
|
||||
|
||||
// Check if files actually exist
|
||||
for (const file of jpegOutputFiles) {
|
||||
expect(existsSync(file)).toBe(true, `Expected ${file} to exist`);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
67
packages/content/ref/pdf-to-images/src/cli.ts
Normal file
67
packages/content/ref/pdf-to-images/src/cli.ts
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env node
|
||||
import yargs from 'yargs';
|
||||
import { hideBin } from 'yargs/helpers';
|
||||
import { Logger } from 'tslog';
|
||||
import { convertPdfToImages } from './index.js';
|
||||
import { ConversionOptionsSchema } from './types.js';
|
||||
|
||||
const logger = new Logger();
|
||||
|
||||
yargs(hideBin(process.argv))
|
||||
.command('convert', 'Convert PDF to images', {
|
||||
input: {
|
||||
alias: 'i',
|
||||
type: 'string',
|
||||
description: 'Input PDF file path',
|
||||
demandOption: true,
|
||||
},
|
||||
output: {
|
||||
alias: 'o',
|
||||
type: 'string',
|
||||
description: 'Output directory path',
|
||||
demandOption: true,
|
||||
},
|
||||
format: {
|
||||
alias: 'f',
|
||||
type: 'string',
|
||||
choices: ['png', 'jpeg'],
|
||||
default: 'png',
|
||||
description: 'Output image format',
|
||||
},
|
||||
quality: {
|
||||
alias: 'q',
|
||||
type: 'number',
|
||||
description: 'Output image quality (1-100)',
|
||||
default: 90,
|
||||
},
|
||||
pages: {
|
||||
alias: 'p',
|
||||
type: 'string',
|
||||
description: 'Pages to convert (e.g., "1,2,3" or "all")',
|
||||
default: 'all',
|
||||
},
|
||||
}, async (argv) => {
|
||||
try {
|
||||
const pages = argv.pages === 'all'
|
||||
? 'all'
|
||||
: argv.pages.split(',').map(p => parseInt(p.trim(), 10) - 1);
|
||||
|
||||
const options = ConversionOptionsSchema.parse({
|
||||
inputPath: argv.input,
|
||||
outputDir: argv.output,
|
||||
format: argv.format,
|
||||
quality: argv.quality,
|
||||
pages,
|
||||
});
|
||||
|
||||
const outputFiles = await convertPdfToImages(options);
|
||||
logger.info(`Successfully converted ${outputFiles.length} pages`);
|
||||
logger.info('Output files:', outputFiles);
|
||||
} catch (error) {
|
||||
logger.error('Error:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
})
|
||||
.help()
|
||||
.argv;
|
||||
|
||||
64
packages/content/ref/pdf-to-images/src/index.ts
Normal file
64
packages/content/ref/pdf-to-images/src/index.ts
Normal file
@ -0,0 +1,64 @@
|
||||
import { convert } from 'pdf-img-convert';
|
||||
import { mkdir } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
import { Logger } from 'tslog';
|
||||
import sharp from 'sharp';
|
||||
import { ConversionOptions } from './types.js';
|
||||
|
||||
const logger = new Logger();
|
||||
|
||||
async function saveImage(imageData: string, outputPath: string, format: 'png' | 'jpeg', quality: number): Promise<void> {
|
||||
const base64Data = imageData.replace(/^data:image\/(png|jpeg);base64,/, '');
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
|
||||
const sharpImage = sharp(buffer);
|
||||
if (format === 'jpeg') {
|
||||
await sharpImage
|
||||
.jpeg({ quality })
|
||||
.toFile(outputPath);
|
||||
} else {
|
||||
await sharpImage
|
||||
.png()
|
||||
.toFile(outputPath);
|
||||
}
|
||||
}
|
||||
export async function convertPdfToImages(options: ConversionOptions): Promise<string[]> {
|
||||
const { inputPath, outputDir, format, quality, pages } = options;
|
||||
|
||||
try {
|
||||
// Ensure output directory exists
|
||||
await mkdir(outputDir, { recursive: true });
|
||||
|
||||
// Convert PDF to images
|
||||
const pdfPages = await convert(inputPath, {
|
||||
width: 2048, // Reasonable default width
|
||||
height: 2048, // Maintain aspect ratio
|
||||
quality: quality / 100, // Convert 1-100 range to 0-1
|
||||
});
|
||||
const pagesToProcess = pages === 'all'
|
||||
? Array.from({ length: pdfPages.length }, (_, i) => i)
|
||||
: pages;
|
||||
|
||||
const outputFiles: string[] = [];
|
||||
|
||||
for (const pageNum of pagesToProcess) {
|
||||
if (pageNum >= pdfPages.length) {
|
||||
logger.warn(`Page ${pageNum + 1} does not exist in the PDF`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const outputPath = join(outputDir, `page-${pageNum + 1}.${format}`);
|
||||
await saveImage(pdfPages[pageNum], outputPath, format, quality);
|
||||
logger.info(`Saved page ${pageNum + 1} to ${outputPath}`);
|
||||
outputFiles.push(outputPath);
|
||||
}
|
||||
|
||||
return outputFiles;
|
||||
} catch (error) {
|
||||
logger.error('Error converting PDF to images:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export { ConversionOptions, ConversionOptionsSchema } from './types.js';
|
||||
|
||||
15
packages/content/ref/pdf-to-images/src/types.ts
Normal file
15
packages/content/ref/pdf-to-images/src/types.ts
Normal file
@ -0,0 +1,15 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
export const ConversionOptionsSchema = z.object({
|
||||
inputPath: z.string(),
|
||||
outputDir: z.string(),
|
||||
format: z.enum(['png', 'jpeg']).default('png'),
|
||||
quality: z.number().min(1).max(100).default(90),
|
||||
pages: z.union([
|
||||
z.array(z.number()),
|
||||
z.literal('all')
|
||||
]).default('all'),
|
||||
});
|
||||
|
||||
export type ConversionOptions = z.infer<typeof ConversionOptionsSchema>;
|
||||
|
||||
13
packages/content/ref/pdf-to-images/tests/.temp/test.pdf
Normal file
13
packages/content/ref/pdf-to-images/tests/.temp/test.pdf
Normal file
@ -0,0 +1,13 @@
|
||||
%PDF-1.4
|
||||
1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj 3 0 obj<</Type/Page/MediaBox[0 0 3 3]>>endobj
|
||||
xref
|
||||
0 4
|
||||
0000000000 65535 f
|
||||
0000000010 00000 n
|
||||
0000000053 00000 n
|
||||
0000000102 00000 n
|
||||
|
||||
trailer<</Size 4/Root 1 0 R>>
|
||||
startxref
|
||||
149
|
||||
%%EOF
|
||||
BIN
packages/content/ref/pdf-to-images/tests/ifb.pdf
Normal file
BIN
packages/content/ref/pdf-to-images/tests/ifb.pdf
Normal file
Binary file not shown.
18
packages/content/ref/pdf-to-images/tsconfig.json
Normal file
18
packages/content/ref/pdf-to-images/tsconfig.json
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"lib": ["ES2022"],
|
||||
"outDir": "dist",
|
||||
"rootDir": "src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "**/*.test.ts"]
|
||||
}
|
||||
|
||||
11
packages/content/ref/pdf-to-images/tsup.config.ts
Normal file
11
packages/content/ref/pdf-to-images/tsup.config.ts
Normal file
@ -0,0 +1,11 @@
|
||||
import { defineConfig } from 'tsup';
|
||||
|
||||
export default defineConfig({
|
||||
entry: ['src/index.ts', 'src/cli.ts'],
|
||||
format: ['esm'],
|
||||
dts: true,
|
||||
splitting: false,
|
||||
sourcemap: true,
|
||||
clean: true,
|
||||
});
|
||||
|
||||
11
packages/content/ref/pdf-to-images/vitest.config.ts
Normal file
11
packages/content/ref/pdf-to-images/vitest.config.ts
Normal file
@ -0,0 +1,11 @@
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
coverage: {
|
||||
provider: 'v8',
|
||||
reporter: ['text', 'json', 'html'],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
Loading…
Reference in New Issue
Block a user