70 lines
3.3 KiB
JavaScript
70 lines
3.3 KiB
JavaScript
import * as mupdf from 'mupdf';
|
|
import { Logger } from 'tslog';
|
|
import { dirname } from 'node:path';
|
|
import { resolveVariables } from '@polymech/commons';
|
|
import { sync as mkdir } from '@polymech/fs/dir';
|
|
import { writeFileSync } from 'node:fs';
|
|
import { Buffer } from 'node:buffer';
|
|
// Helper function to convert object-like image data to Buffer
|
|
function imageDataObjectToBuffer(imageDataObject) {
|
|
const keys = Object.keys(imageDataObject).map(Number).sort((a, b) => a - b);
|
|
const bufferLength = keys.length > 0 ? keys[keys.length - 1] + 1 : 0; // Determine length based on max index + 1
|
|
const buffer = Buffer.allocUnsafe(bufferLength); // Use allocUnsafe for performance if overwriting all bytes
|
|
for (const key in imageDataObject) {
|
|
if (Object.prototype.hasOwnProperty.call(imageDataObject, key)) {
|
|
const index = parseInt(key, 10);
|
|
if (!isNaN(index) && index >= 0 && index < bufferLength) {
|
|
buffer[index] = imageDataObject[key];
|
|
}
|
|
}
|
|
}
|
|
return buffer;
|
|
}
|
|
export async function convertPdfToImages(pdfData, options) {
|
|
const logger = options.logger || new Logger();
|
|
const outputFiles = [];
|
|
try {
|
|
const doc = mupdf.Document.openDocument(pdfData, 'pdf');
|
|
const pageCount = doc.countPages();
|
|
// Validate and determine page range (adjusting for 0-based index)
|
|
const start = (options.startPage ?? 1) - 1;
|
|
const end = (options.endPage ?? pageCount) - 1;
|
|
if (start < 0 || start >= pageCount) {
|
|
throw new Error(`startPage (${options.startPage}) is out of valid range (1-${pageCount})`);
|
|
}
|
|
if (end < 0 || end >= pageCount) {
|
|
throw new Error(`endPage (${options.endPage}) is out of valid range (1-${pageCount})`);
|
|
}
|
|
if (start > end) {
|
|
// This should also be caught by Zod schema, but good to double-check
|
|
throw new Error(`startPage (${options.startPage}) cannot be greater than endPage (${options.endPage})`);
|
|
}
|
|
const numPagesToProcess = end - start + 1;
|
|
logger.info(`Processing pages ${start + 1} to ${end + 1} (${numPagesToProcess} pages) of ${pageCount} total`);
|
|
for (let i = start; i <= end; i++) {
|
|
const pageNumber = i + 1; // User-facing page number (1-based)
|
|
// Create page-specific variables
|
|
const pageVariables = {
|
|
...options.baseVariables,
|
|
PAGE: pageNumber.toString()
|
|
};
|
|
// Resolve the output path using the template and page-specific variables
|
|
const outputPath = await resolveVariables(options.outputPathTemplate, false, pageVariables);
|
|
const page = doc.loadPage(i);
|
|
const pixmap = page.toPixmap([1, 0, 0, 1, 0, 0], mupdf.ColorSpace.DeviceRGB, false);
|
|
const imageData = options.format === 'png'
|
|
? pixmap.asPNG()
|
|
: pixmap.asJPEG(100, false);
|
|
mkdir(dirname(outputPath));
|
|
writeFileSync(outputPath, imageDataObjectToBuffer(imageData));
|
|
outputFiles.push(outputPath);
|
|
logger.info(`Converted page ${pageNumber} to ${outputPath}`);
|
|
}
|
|
return outputFiles;
|
|
}
|
|
catch (error) {
|
|
logger.error('Error converting PDF to images:', error);
|
|
throw error;
|
|
}
|
|
}
|