mupdf implementation
This commit is contained in:
parent
f7d1b8bdce
commit
b324473b65
@ -1,113 +0,0 @@
|
||||
# pdf-to-images
|
||||
|
||||
A TypeScript CLI tool and library for converting PDF files to images (PNG/JPEG) with ESM support.
|
||||
|
||||
## Features
|
||||
|
||||
- Convert PDF files to PNG or JPEG images
|
||||
- Select specific pages to convert
|
||||
- Adjustable image quality
|
||||
- ESM support
|
||||
- TypeScript types included
|
||||
- Command-line interface
|
||||
- Programmatic API
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# Using pnpm (recommended)
|
||||
pnpm add pdf-to-images
|
||||
|
||||
# Using npm
|
||||
npm install pdf-to-images
|
||||
|
||||
# Using yarn
|
||||
yarn add pdf-to-images
|
||||
```
|
||||
|
||||
## CLI Usage
|
||||
|
||||
```bash
|
||||
# Convert all pages to PNG (default)
|
||||
pdf-to-images convert --input input.pdf --output ./output
|
||||
|
||||
# Convert specific pages to JPEG
|
||||
pdf-to-images convert --input input.pdf --output ./output --format jpeg --pages "1,2,3"
|
||||
|
||||
# Convert with custom quality
|
||||
pdf-to-images convert --input input.pdf --output ./output --quality 80
|
||||
```
|
||||
|
||||
### CLI Options
|
||||
|
||||
- `--input, -i`: Input PDF file path (required)
|
||||
- `--output, -o`: Output directory path (required)
|
||||
- `--format, -f`: Output format ('png' or 'jpeg', default: 'png')
|
||||
- `--quality, -q`: Output image quality (1-100, default: 90)
|
||||
- `--pages, -p`: Pages to convert ("all" or comma-separated page numbers, default: "all")
|
||||
|
||||
## Programmatic Usage
|
||||
|
||||
```typescript
|
||||
import { convertPdfToImages } from 'pdf-to-images';
|
||||
|
||||
async function example() {
|
||||
// Convert all pages to PNG
|
||||
const files = await convertPdfToImages({
|
||||
inputPath: 'input.pdf',
|
||||
outputDir: './output',
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'all'
|
||||
});
|
||||
|
||||
console.log('Generated files:', files);
|
||||
|
||||
// Convert specific pages to JPEG
|
||||
const specificPages = await convertPdfToImages({
|
||||
inputPath: 'input.pdf',
|
||||
outputDir: './output',
|
||||
format: 'jpeg',
|
||||
quality: 85,
|
||||
pages: [0, 2, 4] // Convert pages 1, 3, and 5 (0-based indexing)
|
||||
});
|
||||
|
||||
console.log('Generated files:', specificPages);
|
||||
}
|
||||
```
|
||||
|
||||
### API Options
|
||||
|
||||
```typescript
|
||||
interface ConversionOptions {
|
||||
inputPath: string; // Path to input PDF file
|
||||
outputDir: string; // Path to output directory
|
||||
format?: 'png' | 'jpeg'; // Output format (default: 'png')
|
||||
quality?: number; // Output quality 1-100 (default: 90)
|
||||
pages?: number[] | 'all'; // Pages to convert (default: 'all')
|
||||
}
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
pnpm install
|
||||
|
||||
# Build the project
|
||||
pnpm build
|
||||
|
||||
# Run tests
|
||||
pnpm test
|
||||
|
||||
# Run tests with coverage
|
||||
pnpm test:coverage
|
||||
|
||||
# Watch mode during development
|
||||
pnpm dev
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
ISC
|
||||
|
||||
57
packages/content/ref/pdf-to-images/dist/commands/convert.js
vendored
Normal file
57
packages/content/ref/pdf-to-images/dist/commands/convert.js
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
import { Logger } from 'tslog';
|
||||
import { ConvertCommandSchema } from '../types.js';
|
||||
import { convertPdfToImages } from '../lib/pdf.js';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { dirname } from 'node:path';
|
||||
import { mkdir, readFile } from 'node:fs/promises';
|
||||
export const command = 'convert';
|
||||
export const desc = 'Convert PDF to images';
|
||||
export const builder = {
|
||||
input: {
|
||||
alias: 'i',
|
||||
type: 'string',
|
||||
description: 'Input PDF file',
|
||||
demandOption: true
|
||||
},
|
||||
output: {
|
||||
alias: 'o',
|
||||
type: 'string',
|
||||
description: 'Output directory for images',
|
||||
demandOption: true
|
||||
},
|
||||
dpi: {
|
||||
type: 'number',
|
||||
description: 'DPI for output images',
|
||||
default: 300
|
||||
},
|
||||
format: {
|
||||
type: 'string',
|
||||
choices: ['png', 'jpg'],
|
||||
default: 'png',
|
||||
description: 'Output image format'
|
||||
}
|
||||
};
|
||||
export async function handler(argv) {
|
||||
const logger = new Logger();
|
||||
try {
|
||||
const config = ConvertCommandSchema.parse(argv);
|
||||
if (!existsSync(config.input)) {
|
||||
throw new Error(`Input file ${config.input} does not exist`);
|
||||
}
|
||||
await mkdir(dirname(config.output), { recursive: true });
|
||||
logger.info(`Converting PDF ${config.input} to images...`);
|
||||
const pdfData = await readFile(config.input);
|
||||
const outputFiles = await convertPdfToImages(pdfData, {
|
||||
outputPathPrefix: config.output,
|
||||
dpi: config.dpi,
|
||||
format: config.format,
|
||||
logger
|
||||
});
|
||||
logger.info('Conversion completed successfully');
|
||||
logger.info(`Generated ${outputFiles.length} images`);
|
||||
}
|
||||
catch (error) {
|
||||
logger.error('Error during conversion:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
7
packages/content/ref/pdf-to-images/dist/index.js
vendored
Normal file
7
packages/content/ref/pdf-to-images/dist/index.js
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
import yargs from 'yargs';
|
||||
import { hideBin } from 'yargs/helpers';
|
||||
yargs(hideBin(process.argv))
|
||||
.command(require('./commands/convert.js'))
|
||||
.demandCommand(1, 'You need to specify a command')
|
||||
.strict()
|
||||
.argv;
|
||||
28
packages/content/ref/pdf-to-images/dist/lib/pdf.js
vendored
Normal file
28
packages/content/ref/pdf-to-images/dist/lib/pdf.js
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
import * as mupdf from 'mupdf';
|
||||
import { Logger } from 'tslog';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
export async function convertPdfToImages(pdfData, options) {
|
||||
const logger = options.logger || new Logger();
|
||||
const outputFiles = [];
|
||||
try {
|
||||
const doc = mupdf.Document.openDocument(pdfData, 'pdf');
|
||||
const pageCount = doc.countPages();
|
||||
logger.info(`Processing ${pageCount} pages`);
|
||||
for (let i = 0; i < pageCount; i++) {
|
||||
const page = doc.loadPage(i);
|
||||
const pixmap = page.toPixmap([1, 0, 0, 1, 0, 0], mupdf.ColorSpace.DeviceRGB, false);
|
||||
const outputPath = `${options.outputPathPrefix}_${i + 1}.${options.format}`;
|
||||
const imageData = options.format === 'png'
|
||||
? pixmap.asPNG()
|
||||
: pixmap.asJPEG(90, false);
|
||||
await writeFile(outputPath, imageData);
|
||||
outputFiles.push(outputPath);
|
||||
logger.info(`Converted page ${i + 1} to ${outputPath}`);
|
||||
}
|
||||
return outputFiles;
|
||||
}
|
||||
catch (error) {
|
||||
logger.error('Error converting PDF to images:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
13
packages/content/ref/pdf-to-images/dist/types.js
vendored
Normal file
13
packages/content/ref/pdf-to-images/dist/types.js
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
import { z } from 'zod';
|
||||
export const ConvertCommandSchema = z.object({
|
||||
input: z.string(),
|
||||
output: z.string(),
|
||||
dpi: z.number().default(300),
|
||||
format: z.enum(['png', 'jpg']).default('png')
|
||||
});
|
||||
export const ConfigSchema = z.object({
|
||||
input: z.string().min(1),
|
||||
output: z.string().min(1),
|
||||
dpi: z.number().int().positive().default(300),
|
||||
format: z.enum(['png', 'jpg']).default('png')
|
||||
});
|
||||
273
packages/content/ref/pdf-to-images/package-lock.json
generated
Normal file
273
packages/content/ref/pdf-to-images/package-lock.json
generated
Normal file
@ -0,0 +1,273 @@
|
||||
{
|
||||
"name": "mu",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "mu",
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@types/yargs": "^17.0.33",
|
||||
"mupdf": "^1.3.3",
|
||||
"tslog": "^4.9.3",
|
||||
"typescript": "^5.8.2",
|
||||
"yargs": "^17.7.2",
|
||||
"zod": "^3.24.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.13.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "22.13.10",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.10.tgz",
|
||||
"integrity": "sha512-I6LPUvlRH+O6VRUqYOcMudhaIdUVWfsjnZavnsraHvpBwaEyMN29ry+0UVJhImYL16xsscu0aske3yA+uPOWfw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.20.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/yargs": {
|
||||
"version": "17.0.33",
|
||||
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz",
|
||||
"integrity": "sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/yargs-parser": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/yargs-parser": {
|
||||
"version": "21.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz",
|
||||
"integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ansi-regex": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
||||
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/ansi-styles": {
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
|
||||
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"color-convert": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/cliui": {
|
||||
"version": "8.0.1",
|
||||
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
|
||||
"integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"string-width": "^4.2.0",
|
||||
"strip-ansi": "^6.0.1",
|
||||
"wrap-ansi": "^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/color-convert": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
|
||||
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"color-name": "~1.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/color-name": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
|
||||
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/emoji-regex": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/escalade": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
||||
"integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/get-caller-file": {
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
|
||||
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": "6.* || 8.* || >= 10.*"
|
||||
}
|
||||
},
|
||||
"node_modules/is-fullwidth-code-point": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
||||
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/mupdf": {
|
||||
"version": "1.3.3",
|
||||
"resolved": "https://registry.npmjs.org/mupdf/-/mupdf-1.3.3.tgz",
|
||||
"integrity": "sha512-uS/uqQZ1+3zSkaL5ngauT98o5gIIRtQPW54vYTVlqBYS0tho9TrjHr0RmAUJlp/XTJyghNhDUjD7l++EZkMyyA==",
|
||||
"license": "AGPL-3.0-or-later"
|
||||
},
|
||||
"node_modules/require-directory": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
|
||||
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/string-width": {
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"emoji-regex": "^8.0.0",
|
||||
"is-fullwidth-code-point": "^3.0.0",
|
||||
"strip-ansi": "^6.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/strip-ansi": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ansi-regex": "^5.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/tslog": {
|
||||
"version": "4.9.3",
|
||||
"resolved": "https://registry.npmjs.org/tslog/-/tslog-4.9.3.tgz",
|
||||
"integrity": "sha512-oDWuGVONxhVEBtschLf2cs/Jy8i7h1T+CpdkTNWQgdAF7DhRo2G8vMCgILKe7ojdEkLhICWgI1LYSSKaJsRgcw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fullstack-build/tslog?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.8.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.2.tgz",
|
||||
"integrity": "sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "6.20.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
|
||||
"integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/wrap-ansi": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
||||
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^4.0.0",
|
||||
"string-width": "^4.1.0",
|
||||
"strip-ansi": "^6.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/y18n": {
|
||||
"version": "5.0.8",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
|
||||
"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs": {
|
||||
"version": "17.7.2",
|
||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
|
||||
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cliui": "^8.0.1",
|
||||
"escalade": "^3.1.1",
|
||||
"get-caller-file": "^2.0.5",
|
||||
"require-directory": "^2.1.1",
|
||||
"string-width": "^4.2.3",
|
||||
"y18n": "^5.0.5",
|
||||
"yargs-parser": "^21.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs-parser": {
|
||||
"version": "21.1.1",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
|
||||
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.24.2",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz",
|
||||
"integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,52 +1,25 @@
|
||||
{
|
||||
"name": "pdf-to-images",
|
||||
"name": "mu",
|
||||
"version": "1.0.0",
|
||||
"description": "CLI tool to convert PDF files to images",
|
||||
"type": "module",
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"bin": {
|
||||
"pdf-to-images": "./dist/cli.js"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js"
|
||||
}
|
||||
},
|
||||
"description": "",
|
||||
"main": "dist/index.js",
|
||||
"scripts": {
|
||||
"build": "tsup",
|
||||
"dev": "tsx watch src/cli.ts",
|
||||
"start": "node dist/cli.js",
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest",
|
||||
"test:coverage": "vitest run --coverage"
|
||||
"build": "tsc",
|
||||
"start": "node dist/index.js"
|
||||
},
|
||||
"keywords": [
|
||||
"pdf",
|
||||
"images",
|
||||
"conversion",
|
||||
"cli"
|
||||
],
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"canvas": "^3.1.0",
|
||||
"pdf-img-convert": "^2.0.0",
|
||||
"sharp": "^0.33.2",
|
||||
"@types/yargs": "^17.0.33",
|
||||
"mupdf": "^1.3.3",
|
||||
"tslog": "^4.9.3",
|
||||
"typescript": "^5.8.2",
|
||||
"yargs": "^17.7.2",
|
||||
"zod": "^3.24.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.13.10",
|
||||
"@types/yargs": "^17.0.33",
|
||||
"@typescript-eslint/eslint-plugin": "^6.21.0",
|
||||
"@typescript-eslint/parser": "^6.21.0",
|
||||
"@vitest/coverage-v8": "^2.1.9",
|
||||
"tsup": "^8.4.0",
|
||||
"tsx": "^4.19.3",
|
||||
"typescript": "^5.8.2",
|
||||
"vitest": "^3.0.8"
|
||||
"@types/node": "^22.13.10"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,203 +0,0 @@
|
||||
import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest';
|
||||
import { convertPdfToImages } from '../index.js';
|
||||
import { ConversionOptionsSchema } from '../types.js';
|
||||
import { mkdtemp, writeFile, rm, mkdir } from 'fs/promises';
|
||||
import { join, dirname, resolve } from 'path';
|
||||
import { existsSync } from 'fs';
|
||||
|
||||
// No mocks for sharp or pdf-img-convert as we want to test real file conversion
|
||||
|
||||
describe('convertPdfToImages', () => {
|
||||
let tempDir: string;
|
||||
const realPdfPath = resolve(__dirname, '../../tests/ifb.pdf');
|
||||
|
||||
beforeAll(async () => {
|
||||
// Verify the test PDF file exists before running tests
|
||||
if (!existsSync(realPdfPath)) {
|
||||
throw new Error(`Test PDF file not found: ${realPdfPath}. Make sure it exists before running tests.`);
|
||||
}
|
||||
|
||||
// Create a temporary directory for tests under ./tests/.temp
|
||||
const testsDir = resolve(__dirname, '../../tests');
|
||||
tempDir = resolve(testsDir, '.temp');
|
||||
|
||||
// Create the temp directory if it doesn't exist
|
||||
if (!existsSync(tempDir)) {
|
||||
await mkdir(tempDir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
// Cleanup temporary directory and files
|
||||
if (tempDir && existsSync(tempDir)) {
|
||||
try {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
} catch (error) {
|
||||
console.error(`Error cleaning up temporary directory: ${error}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
describe('options validation', () => {
|
||||
it('should validate basic conversion options', () => {
|
||||
const validOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
};
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(validOptions)).not.toThrow();
|
||||
});
|
||||
|
||||
it('should validate JPEG format', () => {
|
||||
const jpegOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'jpeg',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
};
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(jpegOptions)).not.toThrow();
|
||||
});
|
||||
|
||||
it('should validate specific page selection', () => {
|
||||
const pageOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: [1, 2, 3],
|
||||
};
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(pageOptions)).not.toThrow();
|
||||
});
|
||||
|
||||
it('should validate quality range', () => {
|
||||
// Minimum quality
|
||||
const minQuality = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 1,
|
||||
pages: 'all',
|
||||
};
|
||||
expect(() => ConversionOptionsSchema.parse(minQuality)).not.toThrow();
|
||||
|
||||
// Maximum quality
|
||||
const maxQuality = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 100,
|
||||
pages: 'all',
|
||||
};
|
||||
expect(() => ConversionOptionsSchema.parse(maxQuality)).not.toThrow();
|
||||
});
|
||||
|
||||
it('should throw on invalid input path', () => {
|
||||
const invalidPath = {
|
||||
inputPath: '',
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
} as any;
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(invalidPath)).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on invalid format', () => {
|
||||
const invalidFormat = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'gif', // Not supported
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
} as any;
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(invalidFormat)).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on invalid quality value', () => {
|
||||
const tooLowQuality = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 0,
|
||||
pages: 'all',
|
||||
} as any;
|
||||
|
||||
const tooHighQuality = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 101,
|
||||
pages: 'all',
|
||||
} as any;
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(tooLowQuality)).toThrow();
|
||||
expect(() => ConversionOptionsSchema.parse(tooHighQuality)).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on invalid pages format', () => {
|
||||
const invalidPages = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: resolve(tempDir, 'output'),
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'some', // Not 'all' or array
|
||||
} as any;
|
||||
|
||||
expect(() => ConversionOptionsSchema.parse(invalidPages)).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('real PDF file conversion', () => {
|
||||
it('should convert a real PDF file to PNG images', async () => {
|
||||
const pngOutputDir = resolve(tempDir, 'output-ifb-png');
|
||||
const pngOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: pngOutputDir,
|
||||
format: 'png',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
};
|
||||
|
||||
const pngOutputFiles = await convertPdfToImages(pngOptions);
|
||||
|
||||
// Check number of files and naming
|
||||
expect(pngOutputFiles.length).toBeGreaterThan(0);
|
||||
expect(pngOutputFiles[0]).toBe(resolve(pngOutputDir, 'page-1.png'));
|
||||
|
||||
// Check if files actually exist
|
||||
for (const file of pngOutputFiles) {
|
||||
expect(existsSync(file)).toBe(true, `Expected ${file} to exist`);
|
||||
}
|
||||
});
|
||||
|
||||
it('should convert a real PDF file to JPEG images', async () => {
|
||||
const jpegOutputDir = resolve(tempDir, 'output-ifb-jpeg');
|
||||
const jpegOptions = {
|
||||
inputPath: realPdfPath,
|
||||
outputDir: jpegOutputDir,
|
||||
format: 'jpeg',
|
||||
quality: 90,
|
||||
pages: 'all',
|
||||
};
|
||||
|
||||
const jpegOutputFiles = await convertPdfToImages(jpegOptions);
|
||||
|
||||
// Check number of files and naming
|
||||
expect(jpegOutputFiles.length).toBeGreaterThan(0);
|
||||
expect(jpegOutputFiles[0]).toBe(resolve(jpegOutputDir, 'page-1.jpeg'));
|
||||
|
||||
// Check if files actually exist
|
||||
for (const file of jpegOutputFiles) {
|
||||
expect(existsSync(file)).toBe(true, `Expected ${file} to exist`);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -1,67 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
import yargs from 'yargs';
|
||||
import { hideBin } from 'yargs/helpers';
|
||||
import { Logger } from 'tslog';
|
||||
import { convertPdfToImages } from './index.js';
|
||||
import { ConversionOptionsSchema } from './types.js';
|
||||
|
||||
const logger = new Logger();
|
||||
|
||||
yargs(hideBin(process.argv))
|
||||
.command('convert', 'Convert PDF to images', {
|
||||
input: {
|
||||
alias: 'i',
|
||||
type: 'string',
|
||||
description: 'Input PDF file path',
|
||||
demandOption: true,
|
||||
},
|
||||
output: {
|
||||
alias: 'o',
|
||||
type: 'string',
|
||||
description: 'Output directory path',
|
||||
demandOption: true,
|
||||
},
|
||||
format: {
|
||||
alias: 'f',
|
||||
type: 'string',
|
||||
choices: ['png', 'jpeg'],
|
||||
default: 'png',
|
||||
description: 'Output image format',
|
||||
},
|
||||
quality: {
|
||||
alias: 'q',
|
||||
type: 'number',
|
||||
description: 'Output image quality (1-100)',
|
||||
default: 90,
|
||||
},
|
||||
pages: {
|
||||
alias: 'p',
|
||||
type: 'string',
|
||||
description: 'Pages to convert (e.g., "1,2,3" or "all")',
|
||||
default: 'all',
|
||||
},
|
||||
}, async (argv) => {
|
||||
try {
|
||||
const pages = argv.pages === 'all'
|
||||
? 'all'
|
||||
: argv.pages.split(',').map(p => parseInt(p.trim(), 10) - 1);
|
||||
|
||||
const options = ConversionOptionsSchema.parse({
|
||||
inputPath: argv.input,
|
||||
outputDir: argv.output,
|
||||
format: argv.format,
|
||||
quality: argv.quality,
|
||||
pages,
|
||||
});
|
||||
|
||||
const outputFiles = await convertPdfToImages(options);
|
||||
logger.info(`Successfully converted ${outputFiles.length} pages`);
|
||||
logger.info('Output files:', outputFiles);
|
||||
} catch (error) {
|
||||
logger.error('Error:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
})
|
||||
.help()
|
||||
.argv;
|
||||
|
||||
68
packages/content/ref/pdf-to-images/src/commands/convert.ts
Normal file
68
packages/content/ref/pdf-to-images/src/commands/convert.ts
Normal file
@ -0,0 +1,68 @@
|
||||
|
||||
import { Arguments } from 'yargs';
|
||||
import { Logger } from 'tslog';
|
||||
import { ConvertCommandSchema, ConvertCommandConfig } from '../types.js';
|
||||
import { convertPdfToImages } from '../lib/pdf.js';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { dirname } from 'node:path';
|
||||
import { mkdir, readFile } from 'node:fs/promises';
|
||||
|
||||
export const command = 'convert';
|
||||
export const desc = 'Convert PDF to images';
|
||||
|
||||
export const builder = {
|
||||
input: {
|
||||
alias: 'i',
|
||||
type: 'string',
|
||||
description: 'Input PDF file',
|
||||
demandOption: true
|
||||
},
|
||||
output: {
|
||||
alias: 'o',
|
||||
type: 'string',
|
||||
description: 'Output directory for images',
|
||||
demandOption: true
|
||||
},
|
||||
dpi: {
|
||||
type: 'number',
|
||||
description: 'DPI for output images',
|
||||
default: 300
|
||||
},
|
||||
format: {
|
||||
type: 'string',
|
||||
choices: ['png', 'jpg'],
|
||||
default: 'png',
|
||||
description: 'Output image format'
|
||||
}
|
||||
};
|
||||
|
||||
export async function handler(argv: Arguments): Promise<void> {
|
||||
const logger = new Logger();
|
||||
|
||||
try {
|
||||
const config = ConvertCommandSchema.parse(argv);
|
||||
|
||||
if (!existsSync(config.input)) {
|
||||
throw new Error(`Input file ${config.input} does not exist`);
|
||||
}
|
||||
|
||||
await mkdir(dirname(config.output), { recursive: true });
|
||||
|
||||
logger.info(`Converting PDF ${config.input} to images...`);
|
||||
|
||||
const pdfData = await readFile(config.input);
|
||||
const outputFiles = await convertPdfToImages(pdfData, {
|
||||
outputPathPrefix: config.output,
|
||||
dpi: config.dpi,
|
||||
format: config.format,
|
||||
logger
|
||||
});
|
||||
|
||||
logger.info('Conversion completed successfully');
|
||||
logger.info(`Generated ${outputFiles.length} images`);
|
||||
} catch (error) {
|
||||
logger.error('Error during conversion:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,64 +1,8 @@
|
||||
import { convert } from 'pdf-img-convert';
|
||||
import { mkdir } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
import { Logger } from 'tslog';
|
||||
import sharp from 'sharp';
|
||||
import { ConversionOptions } from './types.js';
|
||||
|
||||
const logger = new Logger();
|
||||
|
||||
async function saveImage(imageData: string, outputPath: string, format: 'png' | 'jpeg', quality: number): Promise<void> {
|
||||
const base64Data = imageData.replace(/^data:image\/(png|jpeg);base64,/, '');
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
|
||||
const sharpImage = sharp(buffer);
|
||||
if (format === 'jpeg') {
|
||||
await sharpImage
|
||||
.jpeg({ quality })
|
||||
.toFile(outputPath);
|
||||
} else {
|
||||
await sharpImage
|
||||
.png()
|
||||
.toFile(outputPath);
|
||||
}
|
||||
}
|
||||
export async function convertPdfToImages(options: ConversionOptions): Promise<string[]> {
|
||||
const { inputPath, outputDir, format, quality, pages } = options;
|
||||
|
||||
try {
|
||||
// Ensure output directory exists
|
||||
await mkdir(outputDir, { recursive: true });
|
||||
|
||||
// Convert PDF to images
|
||||
const pdfPages = await convert(inputPath, {
|
||||
width: 2048, // Reasonable default width
|
||||
height: 2048, // Maintain aspect ratio
|
||||
quality: quality / 100, // Convert 1-100 range to 0-1
|
||||
});
|
||||
const pagesToProcess = pages === 'all'
|
||||
? Array.from({ length: pdfPages.length }, (_, i) => i)
|
||||
: pages;
|
||||
|
||||
const outputFiles: string[] = [];
|
||||
|
||||
for (const pageNum of pagesToProcess) {
|
||||
if (pageNum >= pdfPages.length) {
|
||||
logger.warn(`Page ${pageNum + 1} does not exist in the PDF`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const outputPath = join(outputDir, `page-${pageNum + 1}.${format}`);
|
||||
await saveImage(pdfPages[pageNum], outputPath, format, quality);
|
||||
logger.info(`Saved page ${pageNum + 1} to ${outputPath}`);
|
||||
outputFiles.push(outputPath);
|
||||
}
|
||||
|
||||
return outputFiles;
|
||||
} catch (error) {
|
||||
logger.error('Error converting PDF to images:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export { ConversionOptions, ConversionOptionsSchema } from './types.js';
|
||||
import yargs from 'yargs';
|
||||
import { hideBin } from 'yargs/helpers';
|
||||
|
||||
yargs(hideBin(process.argv))
|
||||
.command(require('./commands/convert.js'))
|
||||
.demandCommand(1, 'You need to specify a command')
|
||||
.strict()
|
||||
.argv;
|
||||
|
||||
51
packages/content/ref/pdf-to-images/src/lib/pdf.ts
Normal file
51
packages/content/ref/pdf-to-images/src/lib/pdf.ts
Normal file
@ -0,0 +1,51 @@
|
||||
import * as mupdf from 'mupdf';
|
||||
import { Logger } from 'tslog';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
|
||||
export type ImageFormat = 'png' | 'jpg';
|
||||
|
||||
export interface PdfToImageOptions {
|
||||
outputPathPrefix: string;
|
||||
dpi: number;
|
||||
format: ImageFormat;
|
||||
logger?: Logger<any>;
|
||||
}
|
||||
|
||||
export async function convertPdfToImages(
|
||||
pdfData: Buffer,
|
||||
options: PdfToImageOptions
|
||||
): Promise<string[]> {
|
||||
const logger = options.logger || new Logger <any>();
|
||||
const outputFiles: string[] = [];
|
||||
|
||||
try {
|
||||
const doc = mupdf.Document.openDocument(pdfData, 'pdf');
|
||||
const pageCount = doc.countPages();
|
||||
|
||||
logger.info(`Processing ${pageCount} pages`);
|
||||
|
||||
for (let i = 0; i < pageCount; i++) {
|
||||
const page = doc.loadPage(i);
|
||||
const pixmap = page.toPixmap(
|
||||
[1, 0, 0, 1, 0, 0],
|
||||
mupdf.ColorSpace.DeviceRGB,
|
||||
false
|
||||
);
|
||||
|
||||
const outputPath = `${options.outputPathPrefix}_${i + 1}.${options.format}`;
|
||||
const imageData = options.format === 'png'
|
||||
? pixmap.asPNG()
|
||||
: pixmap.asJPEG(90, false);
|
||||
|
||||
await writeFile(outputPath, imageData);
|
||||
outputFiles.push(outputPath);
|
||||
logger.info(`Converted page ${i + 1} to ${outputPath}`);
|
||||
}
|
||||
|
||||
return outputFiles;
|
||||
} catch (error) {
|
||||
logger.error('Error converting PDF to images:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,15 +1,22 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
export const ConversionOptionsSchema = z.object({
|
||||
inputPath: z.string(),
|
||||
outputDir: z.string(),
|
||||
format: z.enum(['png', 'jpeg']).default('png'),
|
||||
quality: z.number().min(1).max(100).default(90),
|
||||
pages: z.union([
|
||||
z.array(z.number()),
|
||||
z.literal('all')
|
||||
]).default('all'),
|
||||
import { z } from 'zod';
|
||||
import type { ImageFormat } from './lib/pdf.js';
|
||||
|
||||
export const ConvertCommandSchema = z.object({
|
||||
input: z.string(),
|
||||
output: z.string(),
|
||||
dpi: z.number().default(300),
|
||||
format: z.enum(['png', 'jpg'] as const).default('png')
|
||||
});
|
||||
|
||||
export type ConversionOptions = z.infer<typeof ConversionOptionsSchema>;
|
||||
export type ConvertCommandConfig = z.infer<typeof ConvertCommandSchema>;
|
||||
|
||||
export const ConfigSchema = z.object({
|
||||
input: z.string().min(1),
|
||||
output: z.string().min(1),
|
||||
dpi: z.number().int().positive().default(300),
|
||||
format: z.enum(['png', 'jpg']).default('png')
|
||||
});
|
||||
|
||||
export type Config = z.infer<typeof ConfigSchema>;
|
||||
|
||||
|
||||
@ -3,16 +3,13 @@
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"lib": ["ES2022"],
|
||||
"outDir": "dist",
|
||||
"rootDir": "src",
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true
|
||||
"forceConsistentCasingInFileNames": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "**/*.test.ts"]
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
import { defineConfig } from 'tsup';
|
||||
|
||||
export default defineConfig({
|
||||
entry: ['src/index.ts', 'src/cli.ts'],
|
||||
format: ['esm'],
|
||||
dts: true,
|
||||
splitting: false,
|
||||
sourcemap: true,
|
||||
clean: true,
|
||||
});
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
coverage: {
|
||||
provider: 'v8',
|
||||
reporter: ['text', 'json', 'html'],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
Loading…
Reference in New Issue
Block a user