mupdf implementation

This commit is contained in:
lovebird 2025-03-14 08:06:06 +01:00
parent f7d1b8bdce
commit b324473b65
16 changed files with 537 additions and 524 deletions

View File

@ -1,113 +0,0 @@
# pdf-to-images
A TypeScript CLI tool and library for converting PDF files to images (PNG/JPEG) with ESM support.
## Features
- Convert PDF files to PNG or JPEG images
- Select specific pages to convert
- Adjustable image quality
- ESM support
- TypeScript types included
- Command-line interface
- Programmatic API
## Installation
```bash
# Using pnpm (recommended)
pnpm add pdf-to-images
# Using npm
npm install pdf-to-images
# Using yarn
yarn add pdf-to-images
```
## CLI Usage
```bash
# Convert all pages to PNG (default)
pdf-to-images convert --input input.pdf --output ./output
# Convert specific pages to JPEG
pdf-to-images convert --input input.pdf --output ./output --format jpeg --pages "1,2,3"
# Convert with custom quality
pdf-to-images convert --input input.pdf --output ./output --quality 80
```
### CLI Options
- `--input, -i`: Input PDF file path (required)
- `--output, -o`: Output directory path (required)
- `--format, -f`: Output format ('png' or 'jpeg', default: 'png')
- `--quality, -q`: Output image quality (1-100, default: 90)
- `--pages, -p`: Pages to convert ("all" or comma-separated page numbers, default: "all")
## Programmatic Usage
```typescript
import { convertPdfToImages } from 'pdf-to-images';
async function example() {
// Convert all pages to PNG
const files = await convertPdfToImages({
inputPath: 'input.pdf',
outputDir: './output',
format: 'png',
quality: 90,
pages: 'all'
});
console.log('Generated files:', files);
// Convert specific pages to JPEG
const specificPages = await convertPdfToImages({
inputPath: 'input.pdf',
outputDir: './output',
format: 'jpeg',
quality: 85,
pages: [0, 2, 4] // Convert pages 1, 3, and 5 (0-based indexing)
});
console.log('Generated files:', specificPages);
}
```
### API Options
```typescript
interface ConversionOptions {
inputPath: string; // Path to input PDF file
outputDir: string; // Path to output directory
format?: 'png' | 'jpeg'; // Output format (default: 'png')
quality?: number; // Output quality 1-100 (default: 90)
pages?: number[] | 'all'; // Pages to convert (default: 'all')
}
```
## Development
```bash
# Install dependencies
pnpm install
# Build the project
pnpm build
# Run tests
pnpm test
# Run tests with coverage
pnpm test:coverage
# Watch mode during development
pnpm dev
```
## License
ISC

View File

@ -0,0 +1,57 @@
import { Logger } from 'tslog';
import { ConvertCommandSchema } from '../types.js';
import { convertPdfToImages } from '../lib/pdf.js';
import { existsSync } from 'node:fs';
import { dirname } from 'node:path';
import { mkdir, readFile } from 'node:fs/promises';
export const command = 'convert';
export const desc = 'Convert PDF to images';
export const builder = {
input: {
alias: 'i',
type: 'string',
description: 'Input PDF file',
demandOption: true
},
output: {
alias: 'o',
type: 'string',
description: 'Output directory for images',
demandOption: true
},
dpi: {
type: 'number',
description: 'DPI for output images',
default: 300
},
format: {
type: 'string',
choices: ['png', 'jpg'],
default: 'png',
description: 'Output image format'
}
};
export async function handler(argv) {
const logger = new Logger();
try {
const config = ConvertCommandSchema.parse(argv);
if (!existsSync(config.input)) {
throw new Error(`Input file ${config.input} does not exist`);
}
await mkdir(dirname(config.output), { recursive: true });
logger.info(`Converting PDF ${config.input} to images...`);
const pdfData = await readFile(config.input);
const outputFiles = await convertPdfToImages(pdfData, {
outputPathPrefix: config.output,
dpi: config.dpi,
format: config.format,
logger
});
logger.info('Conversion completed successfully');
logger.info(`Generated ${outputFiles.length} images`);
}
catch (error) {
logger.error('Error during conversion:', error);
process.exit(1);
}
}

View File

@ -0,0 +1,7 @@
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
yargs(hideBin(process.argv))
.command(require('./commands/convert.js'))
.demandCommand(1, 'You need to specify a command')
.strict()
.argv;

View File

@ -0,0 +1,28 @@
import * as mupdf from 'mupdf';
import { Logger } from 'tslog';
import { writeFile } from 'node:fs/promises';
export async function convertPdfToImages(pdfData, options) {
const logger = options.logger || new Logger();
const outputFiles = [];
try {
const doc = mupdf.Document.openDocument(pdfData, 'pdf');
const pageCount = doc.countPages();
logger.info(`Processing ${pageCount} pages`);
for (let i = 0; i < pageCount; i++) {
const page = doc.loadPage(i);
const pixmap = page.toPixmap([1, 0, 0, 1, 0, 0], mupdf.ColorSpace.DeviceRGB, false);
const outputPath = `${options.outputPathPrefix}_${i + 1}.${options.format}`;
const imageData = options.format === 'png'
? pixmap.asPNG()
: pixmap.asJPEG(90, false);
await writeFile(outputPath, imageData);
outputFiles.push(outputPath);
logger.info(`Converted page ${i + 1} to ${outputPath}`);
}
return outputFiles;
}
catch (error) {
logger.error('Error converting PDF to images:', error);
throw error;
}
}

View File

@ -0,0 +1,13 @@
import { z } from 'zod';
export const ConvertCommandSchema = z.object({
input: z.string(),
output: z.string(),
dpi: z.number().default(300),
format: z.enum(['png', 'jpg']).default('png')
});
export const ConfigSchema = z.object({
input: z.string().min(1),
output: z.string().min(1),
dpi: z.number().int().positive().default(300),
format: z.enum(['png', 'jpg']).default('png')
});

View File

@ -0,0 +1,273 @@
{
"name": "mu",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "mu",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@types/yargs": "^17.0.33",
"mupdf": "^1.3.3",
"tslog": "^4.9.3",
"typescript": "^5.8.2",
"yargs": "^17.7.2",
"zod": "^3.24.2"
},
"devDependencies": {
"@types/node": "^22.13.10"
}
},
"node_modules/@types/node": {
"version": "22.13.10",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.10.tgz",
"integrity": "sha512-I6LPUvlRH+O6VRUqYOcMudhaIdUVWfsjnZavnsraHvpBwaEyMN29ry+0UVJhImYL16xsscu0aske3yA+uPOWfw==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~6.20.0"
}
},
"node_modules/@types/yargs": {
"version": "17.0.33",
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz",
"integrity": "sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==",
"license": "MIT",
"dependencies": {
"@types/yargs-parser": "*"
}
},
"node_modules/@types/yargs-parser": {
"version": "21.0.3",
"resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz",
"integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==",
"license": "MIT"
},
"node_modules/ansi-regex": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/ansi-styles": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
"license": "MIT",
"dependencies": {
"color-convert": "^2.0.1"
},
"engines": {
"node": ">=8"
},
"funding": {
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
}
},
"node_modules/cliui": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
"integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
"license": "ISC",
"dependencies": {
"string-width": "^4.2.0",
"strip-ansi": "^6.0.1",
"wrap-ansi": "^7.0.0"
},
"engines": {
"node": ">=12"
}
},
"node_modules/color-convert": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
"license": "MIT",
"dependencies": {
"color-name": "~1.1.4"
},
"engines": {
"node": ">=7.0.0"
}
},
"node_modules/color-name": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
"license": "MIT"
},
"node_modules/emoji-regex": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
"license": "MIT"
},
"node_modules/escalade": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
"integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/get-caller-file": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
"license": "ISC",
"engines": {
"node": "6.* || 8.* || >= 10.*"
}
},
"node_modules/is-fullwidth-code-point": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/mupdf": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/mupdf/-/mupdf-1.3.3.tgz",
"integrity": "sha512-uS/uqQZ1+3zSkaL5ngauT98o5gIIRtQPW54vYTVlqBYS0tho9TrjHr0RmAUJlp/XTJyghNhDUjD7l++EZkMyyA==",
"license": "AGPL-3.0-or-later"
},
"node_modules/require-directory": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/string-width": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"license": "MIT",
"dependencies": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
"strip-ansi": "^6.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/strip-ansi": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
"license": "MIT",
"dependencies": {
"ansi-regex": "^5.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/tslog": {
"version": "4.9.3",
"resolved": "https://registry.npmjs.org/tslog/-/tslog-4.9.3.tgz",
"integrity": "sha512-oDWuGVONxhVEBtschLf2cs/Jy8i7h1T+CpdkTNWQgdAF7DhRo2G8vMCgILKe7ojdEkLhICWgI1LYSSKaJsRgcw==",
"license": "MIT",
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/fullstack-build/tslog?sponsor=1"
}
},
"node_modules/typescript": {
"version": "5.8.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.2.tgz",
"integrity": "sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==",
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=14.17"
}
},
"node_modules/undici-types": {
"version": "6.20.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
"integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==",
"dev": true,
"license": "MIT"
},
"node_modules/wrap-ansi": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
"license": "MIT",
"dependencies": {
"ansi-styles": "^4.0.0",
"string-width": "^4.1.0",
"strip-ansi": "^6.0.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
}
},
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
"license": "ISC",
"engines": {
"node": ">=10"
}
},
"node_modules/yargs": {
"version": "17.7.2",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
"license": "MIT",
"dependencies": {
"cliui": "^8.0.1",
"escalade": "^3.1.1",
"get-caller-file": "^2.0.5",
"require-directory": "^2.1.1",
"string-width": "^4.2.3",
"y18n": "^5.0.5",
"yargs-parser": "^21.1.1"
},
"engines": {
"node": ">=12"
}
},
"node_modules/yargs-parser": {
"version": "21.1.1",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
"license": "ISC",
"engines": {
"node": ">=12"
}
},
"node_modules/zod": {
"version": "3.24.2",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz",
"integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
}
}
}

View File

@ -1,52 +1,25 @@
{
"name": "pdf-to-images",
"name": "mu",
"version": "1.0.0",
"description": "CLI tool to convert PDF files to images",
"type": "module",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"bin": {
"pdf-to-images": "./dist/cli.js"
},
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js"
}
},
"description": "",
"main": "dist/index.js",
"scripts": {
"build": "tsup",
"dev": "tsx watch src/cli.ts",
"start": "node dist/cli.js",
"test": "vitest run",
"test:watch": "vitest",
"test:coverage": "vitest run --coverage"
"build": "tsc",
"start": "node dist/index.js"
},
"keywords": [
"pdf",
"images",
"conversion",
"cli"
],
"keywords": [],
"author": "",
"license": "ISC",
"type": "module",
"dependencies": {
"canvas": "^3.1.0",
"pdf-img-convert": "^2.0.0",
"sharp": "^0.33.2",
"@types/yargs": "^17.0.33",
"mupdf": "^1.3.3",
"tslog": "^4.9.3",
"typescript": "^5.8.2",
"yargs": "^17.7.2",
"zod": "^3.24.2"
},
"devDependencies": {
"@types/node": "^22.13.10",
"@types/yargs": "^17.0.33",
"@typescript-eslint/eslint-plugin": "^6.21.0",
"@typescript-eslint/parser": "^6.21.0",
"@vitest/coverage-v8": "^2.1.9",
"tsup": "^8.4.0",
"tsx": "^4.19.3",
"typescript": "^5.8.2",
"vitest": "^3.0.8"
"@types/node": "^22.13.10"
}
}

View File

@ -1,203 +0,0 @@
import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest';
import { convertPdfToImages } from '../index.js';
import { ConversionOptionsSchema } from '../types.js';
import { mkdtemp, writeFile, rm, mkdir } from 'fs/promises';
import { join, dirname, resolve } from 'path';
import { existsSync } from 'fs';
// No mocks for sharp or pdf-img-convert as we want to test real file conversion
describe('convertPdfToImages', () => {
let tempDir: string;
const realPdfPath = resolve(__dirname, '../../tests/ifb.pdf');
beforeAll(async () => {
// Verify the test PDF file exists before running tests
if (!existsSync(realPdfPath)) {
throw new Error(`Test PDF file not found: ${realPdfPath}. Make sure it exists before running tests.`);
}
// Create a temporary directory for tests under ./tests/.temp
const testsDir = resolve(__dirname, '../../tests');
tempDir = resolve(testsDir, '.temp');
// Create the temp directory if it doesn't exist
if (!existsSync(tempDir)) {
await mkdir(tempDir, { recursive: true });
}
});
afterAll(async () => {
// Cleanup temporary directory and files
if (tempDir && existsSync(tempDir)) {
try {
await rm(tempDir, { recursive: true, force: true });
} catch (error) {
console.error(`Error cleaning up temporary directory: ${error}`);
}
}
});
describe('options validation', () => {
it('should validate basic conversion options', () => {
const validOptions = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 90,
pages: 'all',
};
expect(() => ConversionOptionsSchema.parse(validOptions)).not.toThrow();
});
it('should validate JPEG format', () => {
const jpegOptions = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'jpeg',
quality: 90,
pages: 'all',
};
expect(() => ConversionOptionsSchema.parse(jpegOptions)).not.toThrow();
});
it('should validate specific page selection', () => {
const pageOptions = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 90,
pages: [1, 2, 3],
};
expect(() => ConversionOptionsSchema.parse(pageOptions)).not.toThrow();
});
it('should validate quality range', () => {
// Minimum quality
const minQuality = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 1,
pages: 'all',
};
expect(() => ConversionOptionsSchema.parse(minQuality)).not.toThrow();
// Maximum quality
const maxQuality = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 100,
pages: 'all',
};
expect(() => ConversionOptionsSchema.parse(maxQuality)).not.toThrow();
});
it('should throw on invalid input path', () => {
const invalidPath = {
inputPath: '',
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 90,
pages: 'all',
} as any;
expect(() => ConversionOptionsSchema.parse(invalidPath)).toThrow();
});
it('should throw on invalid format', () => {
const invalidFormat = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'gif', // Not supported
quality: 90,
pages: 'all',
} as any;
expect(() => ConversionOptionsSchema.parse(invalidFormat)).toThrow();
});
it('should throw on invalid quality value', () => {
const tooLowQuality = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 0,
pages: 'all',
} as any;
const tooHighQuality = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 101,
pages: 'all',
} as any;
expect(() => ConversionOptionsSchema.parse(tooLowQuality)).toThrow();
expect(() => ConversionOptionsSchema.parse(tooHighQuality)).toThrow();
});
it('should throw on invalid pages format', () => {
const invalidPages = {
inputPath: realPdfPath,
outputDir: resolve(tempDir, 'output'),
format: 'png',
quality: 90,
pages: 'some', // Not 'all' or array
} as any;
expect(() => ConversionOptionsSchema.parse(invalidPages)).toThrow();
});
});
describe('real PDF file conversion', () => {
it('should convert a real PDF file to PNG images', async () => {
const pngOutputDir = resolve(tempDir, 'output-ifb-png');
const pngOptions = {
inputPath: realPdfPath,
outputDir: pngOutputDir,
format: 'png',
quality: 90,
pages: 'all',
};
const pngOutputFiles = await convertPdfToImages(pngOptions);
// Check number of files and naming
expect(pngOutputFiles.length).toBeGreaterThan(0);
expect(pngOutputFiles[0]).toBe(resolve(pngOutputDir, 'page-1.png'));
// Check if files actually exist
for (const file of pngOutputFiles) {
expect(existsSync(file)).toBe(true, `Expected ${file} to exist`);
}
});
it('should convert a real PDF file to JPEG images', async () => {
const jpegOutputDir = resolve(tempDir, 'output-ifb-jpeg');
const jpegOptions = {
inputPath: realPdfPath,
outputDir: jpegOutputDir,
format: 'jpeg',
quality: 90,
pages: 'all',
};
const jpegOutputFiles = await convertPdfToImages(jpegOptions);
// Check number of files and naming
expect(jpegOutputFiles.length).toBeGreaterThan(0);
expect(jpegOutputFiles[0]).toBe(resolve(jpegOutputDir, 'page-1.jpeg'));
// Check if files actually exist
for (const file of jpegOutputFiles) {
expect(existsSync(file)).toBe(true, `Expected ${file} to exist`);
}
});
});
});

View File

@ -1,67 +0,0 @@
#!/usr/bin/env node
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import { Logger } from 'tslog';
import { convertPdfToImages } from './index.js';
import { ConversionOptionsSchema } from './types.js';
const logger = new Logger();
yargs(hideBin(process.argv))
.command('convert', 'Convert PDF to images', {
input: {
alias: 'i',
type: 'string',
description: 'Input PDF file path',
demandOption: true,
},
output: {
alias: 'o',
type: 'string',
description: 'Output directory path',
demandOption: true,
},
format: {
alias: 'f',
type: 'string',
choices: ['png', 'jpeg'],
default: 'png',
description: 'Output image format',
},
quality: {
alias: 'q',
type: 'number',
description: 'Output image quality (1-100)',
default: 90,
},
pages: {
alias: 'p',
type: 'string',
description: 'Pages to convert (e.g., "1,2,3" or "all")',
default: 'all',
},
}, async (argv) => {
try {
const pages = argv.pages === 'all'
? 'all'
: argv.pages.split(',').map(p => parseInt(p.trim(), 10) - 1);
const options = ConversionOptionsSchema.parse({
inputPath: argv.input,
outputDir: argv.output,
format: argv.format,
quality: argv.quality,
pages,
});
const outputFiles = await convertPdfToImages(options);
logger.info(`Successfully converted ${outputFiles.length} pages`);
logger.info('Output files:', outputFiles);
} catch (error) {
logger.error('Error:', error);
process.exit(1);
}
})
.help()
.argv;

View File

@ -0,0 +1,68 @@
import { Arguments } from 'yargs';
import { Logger } from 'tslog';
import { ConvertCommandSchema, ConvertCommandConfig } from '../types.js';
import { convertPdfToImages } from '../lib/pdf.js';
import { existsSync } from 'node:fs';
import { dirname } from 'node:path';
import { mkdir, readFile } from 'node:fs/promises';
export const command = 'convert';
export const desc = 'Convert PDF to images';
export const builder = {
input: {
alias: 'i',
type: 'string',
description: 'Input PDF file',
demandOption: true
},
output: {
alias: 'o',
type: 'string',
description: 'Output directory for images',
demandOption: true
},
dpi: {
type: 'number',
description: 'DPI for output images',
default: 300
},
format: {
type: 'string',
choices: ['png', 'jpg'],
default: 'png',
description: 'Output image format'
}
};
export async function handler(argv: Arguments): Promise<void> {
const logger = new Logger();
try {
const config = ConvertCommandSchema.parse(argv);
if (!existsSync(config.input)) {
throw new Error(`Input file ${config.input} does not exist`);
}
await mkdir(dirname(config.output), { recursive: true });
logger.info(`Converting PDF ${config.input} to images...`);
const pdfData = await readFile(config.input);
const outputFiles = await convertPdfToImages(pdfData, {
outputPathPrefix: config.output,
dpi: config.dpi,
format: config.format,
logger
});
logger.info('Conversion completed successfully');
logger.info(`Generated ${outputFiles.length} images`);
} catch (error) {
logger.error('Error during conversion:', error);
process.exit(1);
}
}

View File

@ -1,64 +1,8 @@
import { convert } from 'pdf-img-convert';
import { mkdir } from 'fs/promises';
import { join } from 'path';
import { Logger } from 'tslog';
import sharp from 'sharp';
import { ConversionOptions } from './types.js';
const logger = new Logger();
async function saveImage(imageData: string, outputPath: string, format: 'png' | 'jpeg', quality: number): Promise<void> {
const base64Data = imageData.replace(/^data:image\/(png|jpeg);base64,/, '');
const buffer = Buffer.from(base64Data, 'base64');
const sharpImage = sharp(buffer);
if (format === 'jpeg') {
await sharpImage
.jpeg({ quality })
.toFile(outputPath);
} else {
await sharpImage
.png()
.toFile(outputPath);
}
}
export async function convertPdfToImages(options: ConversionOptions): Promise<string[]> {
const { inputPath, outputDir, format, quality, pages } = options;
try {
// Ensure output directory exists
await mkdir(outputDir, { recursive: true });
// Convert PDF to images
const pdfPages = await convert(inputPath, {
width: 2048, // Reasonable default width
height: 2048, // Maintain aspect ratio
quality: quality / 100, // Convert 1-100 range to 0-1
});
const pagesToProcess = pages === 'all'
? Array.from({ length: pdfPages.length }, (_, i) => i)
: pages;
const outputFiles: string[] = [];
for (const pageNum of pagesToProcess) {
if (pageNum >= pdfPages.length) {
logger.warn(`Page ${pageNum + 1} does not exist in the PDF`);
continue;
}
const outputPath = join(outputDir, `page-${pageNum + 1}.${format}`);
await saveImage(pdfPages[pageNum], outputPath, format, quality);
logger.info(`Saved page ${pageNum + 1} to ${outputPath}`);
outputFiles.push(outputPath);
}
return outputFiles;
} catch (error) {
logger.error('Error converting PDF to images:', error);
throw error;
}
}
export { ConversionOptions, ConversionOptionsSchema } from './types.js';
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
yargs(hideBin(process.argv))
.command(require('./commands/convert.js'))
.demandCommand(1, 'You need to specify a command')
.strict()
.argv;

View File

@ -0,0 +1,51 @@
import * as mupdf from 'mupdf';
import { Logger } from 'tslog';
import { writeFile } from 'node:fs/promises';
export type ImageFormat = 'png' | 'jpg';
export interface PdfToImageOptions {
outputPathPrefix: string;
dpi: number;
format: ImageFormat;
logger?: Logger<any>;
}
export async function convertPdfToImages(
pdfData: Buffer,
options: PdfToImageOptions
): Promise<string[]> {
const logger = options.logger || new Logger <any>();
const outputFiles: string[] = [];
try {
const doc = mupdf.Document.openDocument(pdfData, 'pdf');
const pageCount = doc.countPages();
logger.info(`Processing ${pageCount} pages`);
for (let i = 0; i < pageCount; i++) {
const page = doc.loadPage(i);
const pixmap = page.toPixmap(
[1, 0, 0, 1, 0, 0],
mupdf.ColorSpace.DeviceRGB,
false
);
const outputPath = `${options.outputPathPrefix}_${i + 1}.${options.format}`;
const imageData = options.format === 'png'
? pixmap.asPNG()
: pixmap.asJPEG(90, false);
await writeFile(outputPath, imageData);
outputFiles.push(outputPath);
logger.info(`Converted page ${i + 1} to ${outputPath}`);
}
return outputFiles;
} catch (error) {
logger.error('Error converting PDF to images:', error);
throw error;
}
}

View File

@ -1,15 +1,22 @@
import { z } from 'zod';
export const ConversionOptionsSchema = z.object({
inputPath: z.string(),
outputDir: z.string(),
format: z.enum(['png', 'jpeg']).default('png'),
quality: z.number().min(1).max(100).default(90),
pages: z.union([
z.array(z.number()),
z.literal('all')
]).default('all'),
import { z } from 'zod';
import type { ImageFormat } from './lib/pdf.js';
export const ConvertCommandSchema = z.object({
input: z.string(),
output: z.string(),
dpi: z.number().default(300),
format: z.enum(['png', 'jpg'] as const).default('png')
});
export type ConversionOptions = z.infer<typeof ConversionOptionsSchema>;
export type ConvertCommandConfig = z.infer<typeof ConvertCommandSchema>;
export const ConfigSchema = z.object({
input: z.string().min(1),
output: z.string().min(1),
dpi: z.number().int().positive().default(300),
format: z.enum(['png', 'jpg']).default('png')
});
export type Config = z.infer<typeof ConfigSchema>;

View File

@ -3,16 +3,13 @@
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"lib": ["ES2022"],
"outDir": "dist",
"rootDir": "src",
"outDir": "./dist",
"rootDir": "./src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"declaration": true
"forceConsistentCasingInFileNames": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "**/*.test.ts"]
"exclude": ["node_modules", "dist"]
}

View File

@ -1,11 +0,0 @@
import { defineConfig } from 'tsup';
export default defineConfig({
entry: ['src/index.ts', 'src/cli.ts'],
format: ['esm'],
dts: true,
splitting: false,
sourcemap: true,
clean: true,
});

View File

@ -1,11 +0,0 @@
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
coverage: {
provider: 'v8',
reporter: ['text', 'json', 'html'],
},
},
});