From d2e960d231aebdbb4bfc61abf64e181d34e833e8 Mon Sep 17 00:00:00 2001 From: babayaga Date: Sun, 4 May 2025 17:32:46 +0200 Subject: [PATCH] CLI : scale --- .../ref/pdf-to-images/dist/lib/convert.js | 1 + .../content/ref/pdf-to-images/dist/lib/pdf.js | 12 ++++++++++-- packages/content/ref/pdf-to-images/dist/types.js | 4 +++- .../content/ref/pdf-to-images/src/lib/convert.ts | 1 + .../content/ref/pdf-to-images/src/lib/pdf.ts | 16 ++++++++++++++-- packages/content/ref/pdf-to-images/src/types.ts | 4 +++- 6 files changed, 32 insertions(+), 6 deletions(-) diff --git a/packages/content/ref/pdf-to-images/dist/lib/convert.js b/packages/content/ref/pdf-to-images/dist/lib/convert.js index 4c839d77..773afc49 100644 --- a/packages/content/ref/pdf-to-images/dist/lib/convert.js +++ b/packages/content/ref/pdf-to-images/dist/lib/convert.js @@ -109,6 +109,7 @@ export async function runConversion(config, logger) { outputPathTemplate, dpi: config.dpi, format: config.format, + scale: config.scale, startPage: config.startPage, endPage: config.endPage, logger diff --git a/packages/content/ref/pdf-to-images/dist/lib/pdf.js b/packages/content/ref/pdf-to-images/dist/lib/pdf.js index 362fe194..d1910577 100644 --- a/packages/content/ref/pdf-to-images/dist/lib/pdf.js +++ b/packages/content/ref/pdf-to-images/dist/lib/pdf.js @@ -41,6 +41,10 @@ export async function convertPdfToImages(pdfData, options) { } const numPagesToProcess = end - start + 1; logger.info(`Processing pages ${start + 1} to ${end + 1} (${numPagesToProcess} pages) of ${pageCount} total`); + // Determine the scaling matrix + const scaleValue = options.scale ?? 2; + const matrix = scaleValue === 1 ? mupdf.Matrix.identity : mupdf.Matrix.scale(scaleValue, scaleValue); + logger.info(`Using scale factor: ${scaleValue}`); for (let i = start; i <= end; i++) { const pageNumber = i + 1; // User-facing page number (1-based) // Create page-specific variables @@ -51,10 +55,14 @@ export async function convertPdfToImages(pdfData, options) { // Resolve the output path using the template and page-specific variables const outputPath = await resolveVariables(options.outputPathTemplate, false, pageVariables); const page = doc.loadPage(i); - const pixmap = page.toPixmap([1, 0, 0, 1, 0, 0], mupdf.ColorSpace.DeviceRGB, false); + // Use the scaling matrix here + const pixmap = page.toPixmap(matrix, mupdf.ColorSpace.DeviceRGB, false); + // Note: DPI is implicitly handled by the scaling factor now. + // The pixmap dimensions will be scaled * scaleFactor. + // We might want to remove the explicit DPI option later if it's confusing. const imageData = options.format === 'png' ? pixmap.asPNG() - : pixmap.asJPEG(100, false); + : pixmap.asJPEG(60, false); mkdir(dirname(outputPath)); writeFileSync(outputPath, imageDataObjectToBuffer(imageData)); outputFiles.push(outputPath); diff --git a/packages/content/ref/pdf-to-images/dist/types.js b/packages/content/ref/pdf-to-images/dist/types.js index e29259d3..fb55bf2f 100644 --- a/packages/content/ref/pdf-to-images/dist/types.js +++ b/packages/content/ref/pdf-to-images/dist/types.js @@ -4,6 +4,7 @@ export const ConvertCommandArgsSchema = z.object({ input: z.string().describe('Path to the input PDF file'), output: z.string().describe('Output path template (e.g., output/page_{PAGE}.png)').optional(), dpi: z.number().int().positive().default(300).describe('Resolution for the output images'), + scale: z.number().positive().default(2).describe('Scaling factor to apply before rendering (e.g., 2 for 2x size)').optional(), format: z.enum(['png', 'jpg']).default('png').describe('Output image format'), startPage: z.number().int().positive().describe('First page to convert (1-based index)').optional(), endPage: z.number().int().positive().describe('Last page to convert (1-based index)').optional() @@ -20,10 +21,11 @@ export const ConvertCommandSchema = ConvertCommandArgsSchema format: data.format, startPage: data.startPage, endPage: data.endPage, + scale: data.scale, }; // Keep only extra properties (like var-*) const extras = Object.keys(data) - .filter(key => !['input', 'output', 'dpi', 'format', 'startPage', 'endPage', '_', '$0'].includes(key)) + .filter(key => !['input', 'output', 'dpi', 'format', 'startPage', 'endPage', 'scale', '_', '$0'].includes(key)) .reduce((acc, key) => { acc[key] = data[key]; return acc; }, {}); return { ...known, ...extras }; }) diff --git a/packages/content/ref/pdf-to-images/src/lib/convert.ts b/packages/content/ref/pdf-to-images/src/lib/convert.ts index 1c86be4d..3adcee16 100644 --- a/packages/content/ref/pdf-to-images/src/lib/convert.ts +++ b/packages/content/ref/pdf-to-images/src/lib/convert.ts @@ -119,6 +119,7 @@ export async function runConversion(config: ConvertCommandConfig, logger: Logger outputPathTemplate, dpi: config.dpi, format: config.format, + scale: config.scale, startPage: config.startPage, endPage: config.endPage, logger diff --git a/packages/content/ref/pdf-to-images/src/lib/pdf.ts b/packages/content/ref/pdf-to-images/src/lib/pdf.ts index 0bdd9983..6f684b82 100644 --- a/packages/content/ref/pdf-to-images/src/lib/pdf.ts +++ b/packages/content/ref/pdf-to-images/src/lib/pdf.ts @@ -31,6 +31,7 @@ export interface PdfToImageOptions { baseVariables: Record; outputPathTemplate: string; dpi: number; + scale?: number; format: ImageFormat; startPage?: number; endPage?: number; @@ -66,6 +67,12 @@ export async function convertPdfToImages( const numPagesToProcess = end - start + 1; logger.info(`Processing pages ${start + 1} to ${end + 1} (${numPagesToProcess} pages) of ${pageCount} total`); + // Determine the scaling matrix + const scaleValue = options.scale ?? 2; + const matrix = scaleValue === 1 ? mupdf.Matrix.identity : mupdf.Matrix.scale(scaleValue, scaleValue); + + logger.info(`Using scale factor: ${scaleValue}`); + for (let i = start; i <= end; i++) { const pageNumber = i + 1; // User-facing page number (1-based) @@ -79,15 +86,20 @@ export async function convertPdfToImages( const outputPath = await resolveVariables(options.outputPathTemplate, false, pageVariables); const page = doc.loadPage(i); + // Use the scaling matrix here const pixmap = page.toPixmap( - [1, 0, 0, 1, 0, 0], + matrix, mupdf.ColorSpace.DeviceRGB, false ); + // Note: DPI is implicitly handled by the scaling factor now. + // The pixmap dimensions will be scaled * scaleFactor. + // We might want to remove the explicit DPI option later if it's confusing. + const imageData = options.format === 'png' ? pixmap.asPNG() - : pixmap.asJPEG(100, false); + : pixmap.asJPEG(60, false); mkdir(dirname(outputPath)); writeFileSync(outputPath, imageDataObjectToBuffer(imageData as any)) diff --git a/packages/content/ref/pdf-to-images/src/types.ts b/packages/content/ref/pdf-to-images/src/types.ts index 66fd601d..a0850fc2 100644 --- a/packages/content/ref/pdf-to-images/src/types.ts +++ b/packages/content/ref/pdf-to-images/src/types.ts @@ -5,6 +5,7 @@ export const ConvertCommandArgsSchema = z.object({ input: z.string().describe('Path to the input PDF file'), output: z.string().describe('Output path template (e.g., output/page_{PAGE}.png)').optional(), dpi: z.number().int().positive().default(300).describe('Resolution for the output images'), + scale: z.number().positive().default(2).describe('Scaling factor to apply before rendering (e.g., 2 for 2x size)').optional(), format: z.enum(['png', 'jpg']).default('png').describe('Output image format'), startPage: z.number().int().positive().describe('First page to convert (1-based index)').optional(), endPage: z.number().int().positive().describe('Last page to convert (1-based index)').optional() @@ -22,10 +23,11 @@ export const ConvertCommandSchema = ConvertCommandArgsSchema format: data.format, startPage: data.startPage, endPage: data.endPage, + scale: data.scale, }; // Keep only extra properties (like var-*) const extras = Object.keys(data) - .filter(key => !['input', 'output', 'dpi', 'format', 'startPage', 'endPage', '_', '$0'].includes(key)) + .filter(key => !['input', 'output', 'dpi', 'format', 'startPage', 'endPage', 'scale', '_', '$0'].includes(key)) .reduce((acc, key) => { acc[key] = data[key]; return acc; }, {} as any); return { ...known, ...extras };