sacktreten pro

This commit is contained in:
lovebird 2025-04-23 16:19:22 +02:00
parent 289130448d
commit 0762a888fd
55 changed files with 1407212 additions and 104 deletions

View File

@ -0,0 +1,21 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\dist\\index.js",
"preLaunchTask": "tsc: build - tsconfig.json",
"outFiles": [
"${workspaceFolder}/dist/**/*.js"
]
}
]
}

View File

@ -1,10 +1,8 @@
import { Logger } from 'tslog';
import { ConvertCommandSchema } from '../types.js';
import { convertPdfToImages } from '../lib/pdf.js';
import { existsSync } from 'node:fs';
import { dirname, sep, extname, basename } from 'node:path';
import { mkdir, readFile } from 'node:fs/promises';
import * as z from 'zod';
import { runConversion } from '../lib/convert.js';
export const command = 'convert';
export const desc = 'Convert PDF to images';
export const builder = {
@ -17,8 +15,7 @@ export const builder = {
output: {
alias: 'o',
type: 'string',
description: 'Output directory prefix for images',
demandOption: true
description: 'Output path pattern or directory. Variables like ${SRC_DIR}, ${PAGE} etc. are supported. Uses a default pattern if omitted.',
},
dpi: {
type: 'number',
@ -51,30 +48,9 @@ export async function handler(argv) {
if (!existsSync(config.input)) {
throw new Error(`Input file ${config.input} does not exist`);
}
// Ensure the full output directory path exists
// config.output is the prefix, e.g., "tests/e5dc/image"
// We need to create the directory part, e.g., "tests/e5dc/"
const outputDir = dirname(config.output);
// Check if output path itself ends with a separator or if the base name contains no extension
// This helps determine if the output path is intended as a directory.
const isOutputDir = config.output.endsWith(sep) || config.output.endsWith('/') || !extname(basename(config.output));
const dirToCreate = isOutputDir ? config.output : outputDir;
// Check if dirToCreate is not empty and not the root directory before creating
if (dirToCreate && dirToCreate !== '.' && dirToCreate !== '/' && dirToCreate !== sep) {
await mkdir(dirToCreate, { recursive: true });
logger.info(`Ensured output directory exists: ${dirToCreate}`);
}
logger.info(`Converting PDF ${config.input} to images...`);
const pdfData = await readFile(config.input);
const outputFiles = await convertPdfToImages(pdfData, {
outputPathPrefix: config.output,
dpi: config.dpi,
format: config.format,
startPage: config.startPage,
endPage: config.endPage,
logger
});
logger.info('Conversion completed successfully');
logger.info("Calling conversion library function...");
const outputFiles = await runConversion(config);
logger.info(`Conversion completed successfully`);
logger.info(`Generated ${outputFiles.length} images`);
}
catch (error) {
@ -83,7 +59,7 @@ export async function handler(argv) {
}
else {
const message = error instanceof Error ? error.message : String(error);
logger.error('Error during conversion:', message, error);
logger.error('Error during conversion command:', message, error);
}
process.exit(1);
}

View File

@ -0,0 +1,5 @@
/**
* Default output path template when no output is specified.
* Variables: ${SRC_DIR}, ${SRC_NAME}, ${PAGE}, ${FORMAT}
*/
export const DEFAULT_OUTPUT_TEMPLATE = "${SRC_DIR}/${SRC_NAME}_${PAGE}.${FORMAT}";

View File

@ -10,6 +10,5 @@ const commandModule = {
yargs(hideBin(process.argv))
.command(commandModule)
.demandCommand(1, 'You need to specify a command')
.strict()
.help()
.parse();

View File

@ -0,0 +1,118 @@
import { Logger } from "tslog";
import { statSync } from "node:fs";
import { sep, resolve as pathResolve, parse as pathParse, relative as pathRelative } from "node:path";
import { readFile } from "node:fs/promises";
import { DEFAULT_ROOTS, DEFAULT_VARS, pathInfoEx } from "@polymech/commons";
import { convertPdfToImages } from "./pdf.js"; // Import the actual PDF conversion function
import { DEFAULT_OUTPUT_TEMPLATE } from "../constants.js"; // Import the constant
/**
* Runs the PDF to images conversion process.
* Generates variables, determines output path, reads PDF, and calls the conversion engine.
* @param config - The conversion configuration options.
* @returns A promise that resolves with an array of generated image file paths.
*/
export async function runConversion(config) {
const logger = config.logger || new Logger();
const inputPath = pathResolve(config.input);
let srcInfo = {};
try {
srcInfo = pathInfoEx(inputPath);
const parsed = pathParse(inputPath);
srcInfo = {
...srcInfo,
SRC_DIR: parsed.dir,
SRC_NAME: parsed.name,
SRC_EXT: parsed.ext,
};
}
catch (e) {
logger.warn("pathInfoEx not found or failed, using basic path.parse");
}
let baseVariables = {
...DEFAULT_ROOTS,
...DEFAULT_VARS({}),
...srcInfo,
DPI: config.dpi,
FORMAT: config.format,
};
if (baseVariables.ROOT && baseVariables.SRC_DIR) {
baseVariables.SRC_REL = pathRelative(baseVariables.ROOT, baseVariables.SRC_DIR);
}
const srcName = baseVariables.SRC_NAME || '';
const dashed = srcName.split('-');
if (dashed.length > 1) {
for (let i = 0; i < dashed.length; i++) {
baseVariables[`SRC_NAME-${i}`] = dashed[i];
}
}
const dotted = srcName.split('.');
if (dotted.length > 1) {
for (let i = 0; i < dotted.length; i++) {
baseVariables[`SRC_NAME.${i}`] = dotted[i];
}
}
const underscored = srcName.split('_');
if (underscored.length > 1) {
for (let i = 0; i < underscored.length; i++) {
baseVariables[`SRC_NAME_${i}`] = underscored[i];
}
}
// Process var-* arguments directly from config object passed in
const cliVars = Object.keys(config).filter(k => k.startsWith('var-')).reduce((acc, k) => {
acc[k.replace('var-', '').toUpperCase()] = config[k];
return acc;
}, {});
// Uppercase base variable keys
baseVariables = Object.keys(baseVariables).reduce((acc, key) => {
acc[key.toUpperCase()] = baseVariables[key];
return acc;
}, {});
baseVariables = { ...baseVariables, ...cliVars };
let outputPathTemplate;
let isExplicitDir = false;
if (config.output) {
const outputPath = pathResolve(config.output);
try {
const stats = statSync(outputPath);
if (stats.isDirectory()) {
isExplicitDir = true;
}
}
catch (e) {
if (config.output.endsWith(sep) || config.output.endsWith("/")) {
isExplicitDir = true;
}
else {
isExplicitDir = false;
}
}
if (isExplicitDir) {
baseVariables["OUT_DIR"] = outputPath;
outputPathTemplate = "${OUT_DIR}/${SRC_NAME}_${PAGE}.${FORMAT}";
logger.info(`Output directory specified: ${outputPath}`);
}
else {
outputPathTemplate = config.output;
logger.info(`Using output path pattern: ${outputPathTemplate}`);
}
}
else {
// Use default pattern directly from constant
outputPathTemplate = DEFAULT_OUTPUT_TEMPLATE;
logger.info(`Using default output path pattern: ${outputPathTemplate}`);
}
// --- Read PDF and Call Conversion (moved from commands/convert.ts) ---
logger.info(`Reading PDF: ${config.input}`);
const pdfData = await readFile(config.input);
logger.info(`Starting conversion process...`);
const outputFiles = await convertPdfToImages(pdfData, {
baseVariables,
outputPathTemplate,
dpi: config.dpi,
format: config.format,
startPage: config.startPage,
endPage: config.endPage,
logger
});
return outputFiles;
}

View File

@ -1,6 +1,7 @@
import * as mupdf from 'mupdf';
import { Logger } from 'tslog';
import { writeFile } from 'node:fs/promises';
import { resolveVariables } from '@polymech/commons';
import { sync as write } from '@polymech/fs/write';
export async function convertPdfToImages(pdfData, options) {
const logger = options.logger || new Logger();
const outputFiles = [];
@ -24,13 +25,19 @@ export async function convertPdfToImages(pdfData, options) {
logger.info(`Processing pages ${start + 1} to ${end + 1} (${numPagesToProcess} pages) of ${pageCount} total`);
for (let i = start; i <= end; i++) {
const pageNumber = i + 1; // User-facing page number (1-based)
// Create page-specific variables
const pageVariables = {
...options.baseVariables,
PAGE: pageNumber.toString()
};
// Resolve the output path using the template and page-specific variables
const outputPath = await resolveVariables(options.outputPathTemplate, false, pageVariables);
const page = doc.loadPage(i);
const pixmap = page.toPixmap([1, 0, 0, 1, 0, 0], mupdf.ColorSpace.DeviceRGB, false);
const outputPath = `${options.outputPathPrefix}_${pageNumber}.${options.format}`;
const imageData = options.format === 'png'
? pixmap.asPNG()
: pixmap.asJPEG(100, false);
await writeFile(outputPath, imageData);
write(outputPath, imageData);
outputFiles.push(outputPath);
logger.info(`Converted page ${pageNumber} to ${outputPath}`);
}

View File

@ -1,7 +1,7 @@
import { z } from 'zod';
export const ConvertCommandSchema = z.object({
input: z.string(),
output: z.string(),
output: z.string().optional(),
dpi: z.number().int().positive().default(300),
format: z.enum(['png', 'jpg']).default('png'),
startPage: z.number().int().positive().optional(),

View File

@ -9,13 +9,16 @@
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@polymech/commons": "file:../../../commons",
"@polymech/fs": "file:../../../fs",
"@types/yargs": "^17.0.33",
"mupdf": "^1.3.3",
"p-map": "^7.0.3",
"tslog": "^4.9.3",
"typescript": "^5.8.2",
"vitest": "^3.1.1",
"yargs": "^17.7.2",
"zod": "^3.24.2"
"zod": "^3.24.3"
},
"bin": {
"pdf-to-images": "dist/index.js"
@ -24,6 +27,64 @@
"@types/node": "^22.13.10"
}
},
"../../../commons": {
"name": "@polymech/commons",
"version": "0.2.6",
"license": "BSD",
"dependencies": {
"@polymech/core": "file:../core",
"@polymech/fs": "file:../fs",
"@repo/typescript-config": "file:../typescript-config",
"@schemastore/package": "^0.0.10",
"env-var": "^7.5.0",
"glob": "^10.4.5",
"js-yaml": "^4.1.0",
"jsonpath-plus": "^10.3.0",
"normalize-url": "^8.0.1",
"p-map": "^7.0.3",
"p-throttle": "^4.1.1",
"tslog": "^3.3.3",
"tsup": "^2.0.3",
"yargs": "^17.7.2",
"zod": "^3.24.2",
"zod-to-json-schema": "^3.24.1",
"zod-to-ts": "^1.2.0"
},
"devDependencies": {
"@types/node": "^22.12.0",
"typescript": "^5.7.3"
}
},
"../../../fs": {
"name": "@polymech/fs",
"version": "0.13.41",
"license": "BSD-3-Clause",
"dependencies": {
"@polymech/core": "file:../core",
"@repo/typescript-config": "file:../typescript-config",
"denodeify": "^1.2.1",
"glob": "^10.4.1",
"mime": "^2.0.3",
"minimatch": "^10.0.1",
"mkdirp": "^3.0.1",
"q": "^1.4.1",
"rimraf": "^6.0.1",
"write-file-atomic": "^6.0.0",
"yargs": "^17.7.2"
},
"devDependencies": {
"@types/denodeify": "^1.2.31",
"@types/mime": "^2.0.0",
"@types/node": "^22.10.2",
"fs-extra": "^4.0.2",
"globals": "^15.14.0",
"ts-node": "^10.9.1",
"typescript": "^5.7.2"
},
"engines": {
"node": ">= 8.0.0"
}
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.25.2",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.2.tgz",
@ -430,6 +491,14 @@
"integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==",
"license": "MIT"
},
"node_modules/@polymech/commons": {
"resolved": "../../../commons",
"link": true
},
"node_modules/@polymech/fs": {
"resolved": "../../../fs",
"link": true
},
"node_modules/@rollup/rollup-android-arm-eabi": {
"version": "4.40.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.40.0.tgz",
@ -1122,6 +1191,18 @@
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
}
},
"node_modules/p-map": {
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.3.tgz",
"integrity": "sha512-VkndIv2fIB99swvQoA65bm+fsmt6UNdGeIB0oxBs+WhAhdh08QA04JXpI7rbB9r08/nkbysKoya9rtDERYOYMA==",
"license": "MIT",
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/pathe": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz",
@ -1606,9 +1687,9 @@
}
},
"node_modules/zod": {
"version": "3.24.2",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz",
"integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==",
"version": "3.24.3",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.3.tgz",
"integrity": "sha512-HhY1oqzWCQWuUqvBFnsyrtZRhyPeR7SUGv+C4+MsisMuVfSPx8HpwWqH8tRahSlt6M3PiFAcoeFhZAqIXTxoSg==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"

View File

@ -11,7 +11,8 @@
"build": "tsc",
"start": "node dist/index.js",
"test:pdf": "node dist/index.js convert -i tests/e5dc.pdf -o tests/out/e5dc/ --startPage 3 --endPage 5",
"test:basic": "vitest run"
"test:basic": "vitest run",
"test:variables": "vitest run tests/cli/variables.test.ts"
},
"keywords": [
"pdf",
@ -23,13 +24,16 @@
"license": "ISC",
"type": "module",
"dependencies": {
"@polymech/commons": "file:../../../commons",
"@polymech/fs": "file:../../../fs",
"@types/yargs": "^17.0.33",
"mupdf": "^1.3.3",
"p-map": "^7.0.3",
"tslog": "^4.9.3",
"typescript": "^5.8.2",
"vitest": "^3.1.1",
"yargs": "^17.7.2",
"zod": "^3.24.2"
"zod": "^3.24.3"
},
"devDependencies": {
"@types/node": "^22.13.10"

View File

@ -0,0 +1,97 @@
# PDF to Markdown Integration
This directory contains the necessary setup and guidance for integrating the `pdf2markdown` tool from the [opendatalab/PDF-Extract-Kit](https://github.com/opendatalab/PDF-Extract-Kit/tree/main/project/pdf2markdown) repository.
## Setup Instructions
1. **Clone the Repository:** Clone the `PDF-Extract-Kit` repository into a suitable location (e.g., a `vendor` directory or similar within this project, or manage it as a git submodule).
```bash
# Example: Cloning into a vendor directory
git clone https://github.com/opendatalab/PDF-Extract-Kit.git ../../vendor/PDF-Extract-Kit
# Or using a submodule
# git submodule add https://github.com/opendatalab/PDF-Extract-Kit.git vendor/PDF-Extract-Kit
```
2. **Install Python Dependencies:** The `pdf2markdown` tool relies on several Python libraries. You need to have Python installed (check the repository for specific version requirements, likely Python 3.x). Set up a virtual environment and install the required packages. Navigate to the cloned repository directory. While the repository doesn't seem to have a top-level `requirements.txt`, you might need to install dependencies based on the components used (YOLOv8, UniMERNet, StructEqTable, PaddleOCR). You may need to piece together the requirements from the individual components or look for specific setup instructions within the `PDF-Extract-Kit` documentation if available.
```bash
# Navigate to the cloned repo (adjust path as needed)
cd ../../vendor/PDF-Extract-Kit
# Create a virtual environment (recommended)
python -m venv venv
source venv/bin/activate # On Windows use `venv\Scripts\activate`
# Install common dependencies (this is a guess, refer to PDF-Extract-Kit docs for specifics)
# You'll likely need libraries for YOLO, OCR (PaddleOCR), etc.
# pip install -r requirements.txt # Look for requirements files in subdirectories if they exist
# Example: Install PaddleOCR (check their docs for CPU/GPU versions)
# pip install paddlepaddle paddleocr
# You will need to research and install the specific dependencies for YOLOv8,
# UniMERNet, and StructEqTable as used by this project.
```
3. **Configuration:** The tool uses a YAML configuration file (`project/pdf2markdown/configs/pdf2markdown.yaml`). You might need to adjust paths or settings within this file, especially if models need to be downloaded or paths to resources are specific to your environment.
## Usage from TypeScript CLI
You can execute the Python script from your TypeScript code using Node.js's `child_process` module.
```typescript
import { exec } from 'child_process';
import path from 'path';
async function convertPdfToMarkdown(pdfFilePath: string, outputMarkdownPath: string): Promise<void> {
// Adjust these paths based on where you cloned the repo and the location of this script
const repoRoot = path.resolve(__dirname, '../../vendor/PDF-Extract-Kit'); // Example path
const scriptPath = path.join(repoRoot, 'project/pdf2markdown/scripts/run_project.py');
const configPath = path.join(repoRoot, 'project/pdf2markdown/configs/pdf2markdown.yaml');
const pythonExecutable = path.join(repoRoot, 'venv/bin/python'); // Or venv\Scripts\python.exe on Windows, or just 'python' if in PATH
// Construct the command
// IMPORTANT: You'll need to modify the run_project.py script or its config
// to accept input PDF path and output MD path as arguments, or handle
// input/output in a way that suits your CLI (e.g., reading config, environment variables).
// The current script seems to rely solely on the config file.
// For now, let's assume you modify the config file or the script handles it.
// You might need to dynamically update the config file before running.
// Placeholder command - needs refinement based on how run_project.py handles I/O
const command = `${pythonExecutable} ${scriptPath} --config ${configPath} --input ${pdfFilePath} --output ${outputMarkdownPath}`; // Hypothetical arguments
console.log(`Executing: ${command}`);
return new Promise((resolve, reject) => {
exec(command, (error, stdout, stderr) => {
if (error) {
console.error(`Error executing pdf2markdown: ${error.message}`);
console.error(`Stderr: ${stderr}`);
reject(error);
return;
}
console.log(`Stdout: ${stdout}`);
console.warn(`Stderr: ${stderr}`); // Log stderr even on success, as it might contain warnings
resolve();
});
});
}
// Example usage in your CLI command:
// const inputPdf = 'path/to/your/input.pdf';
// const outputMd = 'path/to/your/output.md';
// convertPdfToMarkdown(inputPdf, outputMd)
// .then(() => console.log('PDF converted to Markdown successfully.'))
// .catch(err => console.error('Conversion failed:', err));
```
## Important Considerations
* **Dependency Management:** Managing Python dependencies within a TypeScript project can be complex. Consider using Docker to encapsulate the Python environment or ensuring clear setup steps for developers.
* **Script Modification:** The provided `run_project.py` script seems tailored to use its YAML config file directly. You will likely need to modify this Python script (or the way it's called) to accept input PDF file paths and desired output Markdown file paths as command-line arguments for seamless integration into your CLI.
* **Error Handling:** Robust error handling is crucial. The Python script might fail for various reasons (invalid PDF, missing dependencies, model errors). Ensure your TypeScript wrapper handles errors from the child process gracefully.
* **Performance:** Executing a Python process involves overhead. For high-throughput scenarios, explore potential optimizations or alternative libraries.
* **Model Downloads:** The underlying models (YOLO, etc.) might require downloading large files during the first run or setup. Account for this in your setup instructions and potentially during the first execution from your CLI.

View File

@ -0,0 +1,142 @@
import * as path from 'path'
import { sync as exists } from "@polymech/fs/exists"
import { sync as read } from "@polymech/fs/read"
import { resolve, isFile } from '@polymech/commons'
import { substitute } from '@polymech/commons'
import { IResizeOptions } from '../types'
export const fileAsBuffer = (path: string) => read(path, 'buffer') as Buffer || Buffer.from("-")
const clone = (obj) => {
if (null == obj || "object" != typeof obj) return obj;
var copy = obj.constructor();
for (var attr in obj) {
if (obj.hasOwnProperty(attr)) copy[attr] = obj[attr];
}
return copy;
}
export const targets = (f: string, options: IResizeOptions) => {
const srcParts = path.parse(f)
const variables = clone(options.variables || {})
const targets: string[] = []
const rel = path.relative(options.srcInfo.DIR, srcParts.dir)
if (options.dstInfo.IS_GLOB) {
options.dstInfo.GLOB_EXTENSIONS.forEach((e) => {
variables.SRC_NAME = srcParts.name
variables.SRC_DIR = srcParts.dir
let targetPath: string = substitute(options.alt, variables.DST_PATH, variables)
targetPath = targetPath.replace(variables.DST_GLOB, '')
if(variables.DST_FILE_EXT){
targetPath = targetPath.replace('.' + variables.DST_FILE_EXT, '')
}
const parts = path.parse(targetPath)
//back compat
if (variables.DST_NAME === '*') {
variables.DST_NAME = ''
}
if (!parts.ext) {
if (variables.DST_PATH.indexOf(`{SRC_NAME}`) === -1) {
targetPath = path.join(targetPath, rel, srcParts.name + variables.DST_NAME.replace(variables.DST_GLOB, '') + '.' + e)
} else {
targetPath = targetPath + variables.DST_NAME.replace(variables.DST_GLOB, '') + '.' + e
}
}
//src.base contains dots
if (!targetPath.endsWith('.' + e)) {
targetPath += '.' + e
}
targets.push(path.resolve(targetPath))
})
} else {
let targetPath = ''
if (!variables.DST_PATH) {
targetPath = path.join(srcParts.dir, srcParts.base)
} else {
variables.SRC_NAME = srcParts.name
variables.SRC_DIR = srcParts.dir
targetPath = substitute(options.alt, variables.DST_PATH, variables)
if (isFile(f) && exists(f)) {
// targetPath = path.join(targetPath, srcParts.base)
} else {
targetPath = path.join(targetPath, srcParts.base)
}
const targetParts = path.parse(targetPath)
if (!targetParts.ext) {
targetPath = path.join(targetPath, srcParts.base)
}
}
targets.push(path.resolve(resolve(targetPath, options.alt, variables)));
}
return targets;
}
export const targetsNext = (f: string, options: IResizeOptions) => {
const srcParts = path.parse(f)
const variables = clone(options.variables || {})
const targets: string[] = []
const rel = path.relative(options.srcInfo.DIR, srcParts.dir)
if (options.dstInfo.IS_GLOB) {
options.dstInfo.GLOB_EXTENSIONS.forEach((e) => {
variables.SRC_NAME = srcParts.name
variables.SRC_DIR = srcParts.dir
let targetPath: string = substitute(options.alt, variables.DST_PATH, variables)
targetPath = targetPath.replace(variables.DST_GLOB, '')
if(variables.DST_FILE_EXT){
targetPath = targetPath.replace('.' + variables.DST_FILE_EXT, '')
}
const parts = path.parse(targetPath)
//back compat
if (variables.DST_NAME === '*') {
variables.DST_NAME = ''
}
if (!parts.ext) {
if (variables.DST_PATH.indexOf(`{SRC_NAME}`) === -1) {
targetPath = path.join(targetPath, rel, srcParts.name + variables.DST_NAME.replace(variables.DST_GLOB, '') + '.' + e)
} else {
targetPath = targetPath + variables.DST_NAME.replace(variables.DST_GLOB, '') + '.' + e
}
}
//src.base contains dots
if (!targetPath.endsWith('.' + e)) {
targetPath += '.' + e
}
targets.push(path.resolve(targetPath))
})
} else {
let targetPath = ''
if (!variables.DST_PATH) {
targetPath = path.join(srcParts.dir, srcParts.base)
} else {
variables.SRC_NAME = srcParts.name
variables.SRC_DIR = srcParts.dir
variables.SRC_DIR = srcParts.dir
targetPath = substitute(options.alt, variables.DST_PATH, variables)
if (isFile(f) && exists(f)) {
// targetPath = path.join(targetPath, srcParts.base)
} else {
targetPath = path.join(targetPath, srcParts.base)
}
const targetParts = path.parse(targetPath)
if (!targetParts.ext) {
targetPath = path.join(targetPath, srcParts.base)
}
}
targets.push(path.resolve(resolve(targetPath, options.alt, variables)));
}
return targets
}

View File

@ -0,0 +1,167 @@
import * as path from 'path'
import * as pMap from 'p-map'
import * as sharp from 'sharp'
import { sync as exists } from "@polymech/fs/exists"
import { async as move } from "@polymech/fs/move"
import { sync as dir } from "@polymech/fs/dir"
import { createItem as toNode } from "@polymech/fs/inspect"
import {
logger,
ERR_PERM_RETRY_DELAY,
ERR_PERM_RETRY_MAX,
IOptions,
IResizeOptions
} from '../../../index'
import {
meta
} from './lib'
import {
targets,
targetsNext
} from '../..'
export const resizeFile = async (source: string, target: string, onNode: (data: sharp.Sharp) => void = () => { }, options: IResizeOptions): Promise<sharp.Sharp | undefined> => {
//const osr_cache = OSR_CACHE()
//const ca_options = JSON.parse(JSON.stringify({ ...options, target, skip: null }))
//const cached = await get_cached(file, ca_options, MODULE_NAME)
const targetOri = '' + target
let inPlace = false
if (path.resolve(source) === path.resolve(target)) {
const parts = path.parse(target)
target = path.join(parts.dir, parts.name + '_tmp' + parts.ext)
inPlace = true
}
let image: sharp.Sharp
try {
image = sharp(source)
} catch (e) {
logger.error(`Error reading file, ${source}`, e)
return
}
onNode(image)
let metaData: any = await meta(source, image) || {}
const percent = options.percent
const dstParts = path.parse(target)
const node = toNode(source, {
size: true,
mime: true
})
if (!exists(dstParts.dir)) {
dir(dstParts.dir)
}
if (options.width && options.minWidth && options.width <= options.minWidth) {
logger.error(`Error resizing : options.width <= options.minWidth`)
return
}
if (metaData.width && options.width && options.minWidth) {
if (metaData.width <= options.minWidth) {
return image
}
}
if (metaData.height && options.height && options.minHeight) {
if (metaData.height <= options.minHeight) {
return image
}
}
if (options.minSize && node.size && options.minSize >= node.size) {
return image
}
const resizeOptions = {
height: options.height,
fastShrinkOnLoad: options.fastShrinkOnLoad,
withoutEnlargement: options.withoutEnlargement,
withoutReduction: options.withoutReduction,
fit: options.fit,
position: options.position,
background: options.background || 'white'
}
if (percent && metaData.width) {
image = image.resize({
width: Math.round(metaData.width * (percent / 100)),
...resizeOptions
})
} else if (options.width || options.height) {
image = image.resize({
width: options.width,
...resizeOptions
})
} else {
logger.error(`Error resizing, invalid options for ${source} - no width, height or percent`)
return image
}
if(dstParts.ext.toLowerCase() === '.webp' ||
dstParts.ext.toLowerCase() === '.png') {
image = image.rotate()
}
if (metaData.width) {
await image.withMetadata().toFile(target)
} else {
try {
await image.toFile(target)
} catch (e) {
logger.error(`Error writing file out, ${source}`, e)
return
}
}
if (inPlace) {
const timeout = async (retry) => new Promise((resolve) =>
setTimeout(resolve, ERR_PERM_RETRY_DELAY * retry)
)
const moveRetry = async (src, dst, retry = 0) => {
if (retry > ERR_PERM_RETRY_MAX) {
logger.error(`Error moving file failed, max retries reached ${src}`)
return
}
try {
await move(target, targetOri)
} catch (e) {
if (e.code === 'EPERM') {
logger.warn(`Error moving file out, retry ${source}`, e)
await timeout(retry)
moveRetry(src, dst, retry + 1)
}
}
}
await moveRetry(source, targetOri)
}
logger.debug(`Resized Image ${source} to ${targetOri}`)
return image
}
export const _resize = async (file, targets: string[], onNode: (data: any) => void = () => { }, options: IOptions) => {
return pMap(targets, async (target) => {
logger.debug(`Resizing ${file} to ${target}`)
if (options.dry) {
return Promise.resolve()
}
return resizeFile(file, target, onNode, options);
}, { concurrency: 1 })
}
export const resize = async (options: IResizeOptions) => {
let reports: any = []
logger.setSettings({ minLevel: options.logLevel || 'info' as any })
const onNode = (data: any) => reports.push(data)
if (options.srcInfo) {
logger.debug(`Convert ${options.srcInfo.FILES.length} files`)
return await pMap(options.srcInfo.FILES, async (f) => {
const outputs = targetsNext(f, options)
logger.debug(`Convert ${f} to `, outputs)
return _resize(f, outputs, onNode, options)
}, { concurrency: 1 })
} else {
logger.error(`Invalid source info`)
}
return reports
}

View File

@ -0,0 +1,64 @@
import * as path from 'node:path'
import { pathInfoEx } from '@polymech/commons'
import { DEFAULT_ROOTS, DEFAULT_VARS } from '@polymech/commons'
export const variables = (options: IKBotTask) => {
const { model, router,baseURL } = options
let ret = {
model,
router,
baseURL,
...DEFAULT_ROOTS,
...DEFAULT_VARS({})
}
if (options?.include?.length === 1) {
const [include] = options.include
const { } = pathInfoEx(include)
const srcParts = path.parse(include)
const srcVariables: Record<string, string> = {}
srcVariables.SRC_NAME = srcParts.name
srcVariables.SRC_DIR = srcParts.dir
srcVariables.SRC_EXT = srcParts.ext
if (srcVariables.ROOT) {
srcVariables.SRC_REL = path.relative(srcVariables.ROOT, srcParts.dir)
}
const dashed = srcParts.name.split('-')
if (dashed.length > 1) {
for (let i = 0; i < dashed.length; i++) {
srcVariables[`SRC_NAME-${i}`] = dashed[i]
}
}
const dotted = srcParts.name.split('.')
if (dotted.length > 1) {
for (let i = 0; i < dotted.length; i++) {
srcVariables[`SRC_NAME.${i}`] = dotted[i]
}
}
const underscored = srcParts.name.split('_')
if (underscored.length > 1) {
for (let i = 0; i < underscored.length; i++) {
srcVariables[`SRC_NAME_${i}`] = underscored[i]
}
}
ret = { ...ret, ...srcVariables }
}
// CLI argv variables
let variables = Object.assign({}, ...Object.keys(options).filter((k) => k.startsWith('var-')).map((k) => {
return {
[k.replace('var-', '')]: options[k]
}
}))
ret = Object.keys(ret).reduce((acc, key) => {
acc[key.toUpperCase()] = ret[key];
return acc;
}, {});
return { ...ret, ...variables }
}

View File

@ -1,12 +1,11 @@
import { Arguments } from 'yargs';
import { Logger } from 'tslog';
import { ConvertCommandSchema, ConvertCommandConfig } from '../types.js';
import { convertPdfToImages } from '../lib/pdf.js';
import { existsSync } from 'node:fs';
import { dirname, sep, extname, basename } from 'node:path';
import { mkdir, readFile } from 'node:fs/promises';
import { resolve as pathResolve } from 'node:path';
import * as z from 'zod';
import type { Options } from 'yargs';
import { runConversion, IRunConversionOptions } from '../lib/convert.js';
export const command = 'convert';
export const desc = 'Convert PDF to images';
@ -21,8 +20,7 @@ export const builder: { [key: string]: Options } = {
output: {
alias: 'o',
type: 'string',
description: 'Output directory prefix for images',
demandOption: true
description: 'Output path pattern or directory. Variables like ${SRC_DIR}, ${PAGE} etc. are supported. Uses a default pattern if omitted.',
},
dpi: {
type: 'number',
@ -54,46 +52,22 @@ export async function handler(argv: Arguments<ConvertCommandConfig>): Promise<vo
try {
const config = ConvertCommandSchema.parse(argv);
if (!existsSync(config.input)) {
throw new Error(`Input file ${config.input} does not exist`);
}
// Ensure the full output directory path exists
// config.output is the prefix, e.g., "tests/e5dc/image"
// We need to create the directory part, e.g., "tests/e5dc/"
const outputDir = dirname(config.output);
// Check if output path itself ends with a separator or if the base name contains no extension
// This helps determine if the output path is intended as a directory.
const isOutputDir = config.output.endsWith(sep) || config.output.endsWith('/') || !extname(basename(config.output));
const dirToCreate = isOutputDir ? config.output : outputDir;
logger.info("Calling conversion library function...");
const outputFiles = await runConversion(config as IRunConversionOptions);
// Check if dirToCreate is not empty and not the root directory before creating
if (dirToCreate && dirToCreate !== '.' && dirToCreate !== '/' && dirToCreate !== sep) {
await mkdir(dirToCreate, { recursive: true });
logger.info(`Ensured output directory exists: ${dirToCreate}`);
}
logger.info(`Converting PDF ${config.input} to images...`);
const pdfData = await readFile(config.input);
const outputFiles = await convertPdfToImages(pdfData, {
outputPathPrefix: config.output,
dpi: config.dpi,
format: config.format,
startPage: config.startPage,
endPage: config.endPage,
logger
});
logger.info('Conversion completed successfully');
logger.info(`Conversion completed successfully`);
logger.info(`Generated ${outputFiles.length} images`);
} catch (error) {
if (error instanceof z.ZodError) {
logger.error('Invalid arguments:', error.flatten());
} else {
const message = error instanceof Error ? error.message : String(error);
logger.error('Error during conversion:', message, error);
logger.error('Error during conversion command:', message, error);
}
process.exit(1);
}

View File

@ -0,0 +1,5 @@
/**
* Default output path template when no output is specified.
* Variables: ${SRC_DIR}, ${SRC_NAME}, ${PAGE}, ${FORMAT}
*/
export const DEFAULT_OUTPUT_TEMPLATE = "${SRC_DIR}/${SRC_NAME}_${PAGE}.${FORMAT}";

View File

@ -14,6 +14,5 @@ const commandModule: CommandModule<{}, ConvertCommandConfig> = {
yargs(hideBin(process.argv))
.command(commandModule)
.demandCommand(1, 'You need to specify a command')
.strict()
.help()
.parse();

View File

@ -0,0 +1,141 @@
import { Logger } from "tslog";
import { statSync } from "node:fs";
import { sep, resolve as pathResolve, parse as pathParse, relative as pathRelative } from "node:path";
import { readFile } from "node:fs/promises";
import { DEFAULT_ROOTS, DEFAULT_VARS, pathInfoEx } from "@polymech/commons";
import { convertPdfToImages } from "./pdf.js"; // Import the actual PDF conversion function
import { DEFAULT_OUTPUT_TEMPLATE } from "../constants.js"; // Import the constant
// Define an interface for the configuration options needed by the library function
// This might be similar to SimpleOptions or ConvertCommandConfig, but tailored for the library
export interface IRunConversionOptions {
input: string;
output?: string;
dpi: number;
format: "png" | "jpg";
startPage?: number;
endPage?: number;
logger?: Logger<any>;
[key: string]: any; // Allow other properties like var-*
}
/**
* Runs the PDF to images conversion process.
* Generates variables, determines output path, reads PDF, and calls the conversion engine.
* @param config - The conversion configuration options.
* @returns A promise that resolves with an array of generated image file paths.
*/
export async function runConversion(config: IRunConversionOptions): Promise<string[]> {
const logger = config.logger || new Logger<any>();
const inputPath = pathResolve(config.input);
let srcInfo: any = {};
try {
srcInfo = pathInfoEx(inputPath);
const parsed = pathParse(inputPath);
srcInfo = {
...srcInfo,
SRC_DIR: parsed.dir,
SRC_NAME: parsed.name,
SRC_EXT: parsed.ext,
};
} catch (e) {
logger.warn("pathInfoEx not found or failed, using basic path.parse");
}
let baseVariables: Record<string, any> = {
...DEFAULT_ROOTS,
...DEFAULT_VARS({}),
...srcInfo,
DPI: config.dpi,
FORMAT: config.format,
};
if (baseVariables.ROOT && baseVariables.SRC_DIR) {
baseVariables.SRC_REL = pathRelative(baseVariables.ROOT, baseVariables.SRC_DIR);
}
const srcName = baseVariables.SRC_NAME || '';
const dashed = srcName.split('-');
if (dashed.length > 1) {
for (let i = 0; i < dashed.length; i++) {
baseVariables[`SRC_NAME-${i}`] = dashed[i];
}
}
const dotted = srcName.split('.');
if (dotted.length > 1) {
for (let i = 0; i < dotted.length; i++) {
baseVariables[`SRC_NAME.${i}`] = dotted[i];
}
}
const underscored = srcName.split('_');
if (underscored.length > 1) {
for (let i = 0; i < underscored.length; i++) {
baseVariables[`SRC_NAME_${i}`] = underscored[i];
}
}
// Process var-* arguments directly from config object passed in
const cliVars = Object.keys(config).filter(k => k.startsWith('var-')).reduce((acc, k) => {
acc[k.replace('var-', '').toUpperCase()] = config[k];
return acc;
}, {} as Record<string, any>);
// Uppercase base variable keys
baseVariables = Object.keys(baseVariables).reduce((acc, key) => {
acc[key.toUpperCase()] = baseVariables[key];
return acc;
}, {} as Record<string, any>);
baseVariables = { ...baseVariables, ...cliVars };
let outputPathTemplate: string;
let isExplicitDir = false;
if (config.output) {
const outputPath = pathResolve(config.output);
try {
const stats = statSync(outputPath);
if (stats.isDirectory()) {
isExplicitDir = true;
}
} catch (e: any) {
if (config.output.endsWith(sep) || config.output.endsWith("/")) {
isExplicitDir = true;
} else {
isExplicitDir = false;
}
}
if (isExplicitDir) {
baseVariables["OUT_DIR"] = outputPath;
outputPathTemplate = "${OUT_DIR}/${SRC_NAME}_${PAGE}.${FORMAT}";
logger.info(`Output directory specified: ${outputPath}`);
} else {
outputPathTemplate = config.output;
logger.info(`Using output path pattern: ${outputPathTemplate}`);
}
} else {
// Use default pattern directly from constant
outputPathTemplate = DEFAULT_OUTPUT_TEMPLATE;
logger.info(`Using default output path pattern: ${outputPathTemplate}`);
}
// --- Read PDF and Call Conversion (moved from commands/convert.ts) ---
logger.info(`Reading PDF: ${config.input}`);
const pdfData = await readFile(config.input);
logger.info(`Starting conversion process...`);
const outputFiles = await convertPdfToImages(pdfData, {
baseVariables,
outputPathTemplate,
dpi: config.dpi,
format: config.format,
startPage: config.startPage,
endPage: config.endPage,
logger
});
return outputFiles;
}

View File

@ -1,11 +1,15 @@
import * as mupdf from 'mupdf';
import { Logger } from 'tslog';
import { writeFile } from 'node:fs/promises';
import * as mupdf from 'mupdf'
import { Logger } from 'tslog'
import { writeFile, mkdir } from 'node:fs/promises'
import { dirname } from 'node:path'
import { resolveVariables, pathInfoEx } from '@polymech/commons'
import { sync as write } from '@polymech/fs/write'
export type ImageFormat = 'png' | 'jpg';
export interface PdfToImageOptions {
outputPathPrefix: string;
baseVariables: Record<string, any>;
outputPathTemplate: string;
dpi: number;
format: ImageFormat;
startPage?: number;
@ -44,6 +48,16 @@ export async function convertPdfToImages(
for (let i = start; i <= end; i++) {
const pageNumber = i + 1; // User-facing page number (1-based)
// Create page-specific variables
const pageVariables: Record<string, string> = {
...options.baseVariables,
PAGE: pageNumber.toString()
};
// Resolve the output path using the template and page-specific variables
const outputPath = await resolveVariables(options.outputPathTemplate, false, pageVariables);
const page = doc.loadPage(i);
const pixmap = page.toPixmap(
[1, 0, 0, 1, 0, 0],
@ -51,12 +65,11 @@ export async function convertPdfToImages(
false
);
const outputPath = `${options.outputPathPrefix}_${pageNumber}.${options.format}`;
const imageData = options.format === 'png'
? pixmap.asPNG()
: pixmap.asJPEG(100, false);
await writeFile(outputPath, imageData);
write(outputPath, imageData)
outputFiles.push(outputPath);
logger.info(`Converted page ${pageNumber} to ${outputPath}`);
}

View File

@ -3,7 +3,7 @@ import type { ImageFormat } from './lib/pdf.js';
export const ConvertCommandSchema = z.object({
input: z.string(),
output: z.string(),
output: z.string().optional(),
dpi: z.number().int().positive().default(300),
format: z.enum(['png', 'jpg']).default('png'),
startPage: z.number().int().positive().optional(),

Binary file not shown.

View File

@ -4,19 +4,30 @@ import { describe, it, expect, vi, beforeEach, Mock, beforeAll } from 'vitest';
import type { ConvertCommandConfig } from '../../src/types.js';
import type { Arguments } from 'yargs';
import { Buffer } from 'node:buffer';
import path from 'path';
// --- Define Mock Functions ---
const mockConvertPdfToImagesFn = vi.fn();
const mockExistsSync = vi.fn();
const mockStatSync = vi.fn();
const mockReadFile = vi.fn();
const mockMkdir = vi.fn();
const mockDirname = vi.fn();
const mockBasename = vi.fn();
const mockExtname = vi.fn();
const mockResolve = vi.fn();
const mockParse = vi.fn();
const mockRelative = vi.fn();
const mockLoggerInfo = vi.fn();
const mockLoggerError = vi.fn();
const mockProcessExit = vi.spyOn(process, 'exit').mockImplementation((() => {}) as any);
// Mocks for @polymech/commons
const mockResolveVariables = vi.fn();
const mockPathInfoEx = vi.fn();
const mockDEFAULT_ROOTS = { CWD: '/test/cwd', SCRIPT_DIR: '/test/script' };
const mockDEFAULT_VARS = vi.fn().mockReturnValue({ SOME_DEFAULT: 'value' });
// Use beforeAll for mocks
beforeAll(() => {
// Mock dependencies using vi.doMock
@ -25,6 +36,7 @@ beforeAll(() => {
}));
vi.doMock('node:fs', () => ({
existsSync: mockExistsSync,
statSync: mockStatSync,
}));
vi.doMock('node:fs/promises', () => ({
readFile: mockReadFile,
@ -34,6 +46,9 @@ beforeAll(() => {
dirname: mockDirname,
basename: mockBasename,
extname: mockExtname,
resolve: mockResolve,
parse: mockParse,
relative: mockRelative,
sep: '/',
}));
vi.doMock('tslog', () => ({
@ -42,6 +57,13 @@ beforeAll(() => {
error: mockLoggerError,
})),
}));
// Mock @polymech/commons
vi.doMock('@polymech/commons', () => ({
resolveVariables: mockResolveVariables,
pathInfoEx: mockPathInfoEx,
DEFAULT_ROOTS: mockDEFAULT_ROOTS,
DEFAULT_VARS: mockDEFAULT_VARS,
}));
});
// --- Test Suite ---
@ -56,14 +78,14 @@ describe('Convert Command CLI Handler', () => {
});
// --- Helper Function to Run Handler ---
async function runHandlerHelper(args: Partial<ConvertCommandConfig & { _: (string | number)[], $0: string }>) {
async function runHandlerHelper(args: Partial<ConvertCommandConfig & { _: (string | number)[], $0: string, output?: string }>) {
const fullArgs = {
_: ['convert'],
$0: 'test',
dpi: 300,
format: 'png',
...args,
} as Arguments<ConvertCommandConfig>;
} as Arguments<ConvertCommandConfig & {output?: string}>;
// Make sure handler is loaded before calling
if (!convertHandler) throw new Error('Handler not loaded');
await convertHandler(fullArgs);
@ -76,29 +98,219 @@ describe('Convert Command CLI Handler', () => {
mockExistsSync.mockReturnValue(true);
mockReadFile.mockResolvedValue(Buffer.from('fake-pdf-data'));
mockMkdir.mockResolvedValue(undefined);
mockDirname.mockImplementation((p) => p.substring(0, p.lastIndexOf('/') > 0 ? p.lastIndexOf('/') : p.length));
// Mock path functions more robustly
mockDirname.mockImplementation((p) => {
if (!p || p === '/') return '/';
const lastSlash = p.lastIndexOf('/');
if (lastSlash === -1) return '.'; // No slash, return current dir indicator
if (lastSlash === 0) return '/'; // Root directory
return p.substring(0, lastSlash);
});
mockBasename.mockImplementation((p) => p.substring(p.lastIndexOf('/') > 0 ? p.lastIndexOf('/') + 1 : 0));
mockExtname.mockImplementation((p) => {
const lastSlash = p.lastIndexOf('/');
const dotIndex = p.lastIndexOf('.');
return dotIndex > 0 ? p.substring(dotIndex) : '';
return dotIndex > (lastSlash > -1 ? lastSlash : -1) ? p.substring(dotIndex) : '';
});
// Improved mockResolve to handle absolute/relative paths based on /test/cwd
mockResolve.mockImplementation((...paths) => {
let currentPath = '/test/cwd'; // Assume CWD
for (const p of paths) {
if (path.isAbsolute(p)) { // Use actual path.isAbsolute for check
currentPath = p;
} else {
currentPath = path.join(currentPath, p); // Use actual path.join
}
}
// Normalize (e.g., remove //, resolve ..)
return path.normalize(currentPath).replace(/\\/g, '/');
});
mockParse.mockImplementation((p) => ({
root: '/',
dir: mockDirname(p),
base: mockBasename(p),
ext: mockExtname(p),
name: mockBasename(p, mockExtname(p)),
}));
mockRelative.mockImplementation((from, to) => to.startsWith(from) ? to.substring(from.length + 1) : to);
// Reset @polymech/commons mocks
mockResolveVariables.mockImplementation(async (template, _bool, vars) => template.replace(/\${(.*?)}/g, (_, key) => vars[key] ?? 'UNDEFINED'));
mockPathInfoEx.mockImplementation((p) => ({
ROOT: '/test/cwd',
SRC_DIR: mockDirname(p),
SRC_NAME: mockBasename(p, mockExtname(p)),
SRC_EXT: mockExtname(p),
}));
mockStatSync.mockImplementation((p) => { throw new Error('File not found'); });
mockProcessExit.mockClear();
});
// --- Test cases ---
it('should call convertPdfToImages with correct args', async () => {
it('should call convertPdfToImages with correct default args when output is omitted', async () => {
const args = {
input: 'pdfs/document.pdf',
};
// Setup mocks for this case
mockExistsSync.mockReturnValueOnce(true); // Explicitly mock for this input
mockResolve.mockImplementation((p) => p.startsWith('/') ? p : `/test/cwd/${p}`);
const expectedSrcDir = '/test/cwd/pdfs';
const expectedSrcName = 'document';
mockPathInfoEx.mockReturnValue({
ROOT: '/test/cwd',
SRC_DIR: expectedSrcDir,
SRC_NAME: expectedSrcName,
SRC_EXT: '.pdf'
});
await runHandlerHelper(args);
expect(mockExistsSync).toHaveBeenCalledWith(args.input);
expect(mockReadFile).toHaveBeenCalledWith(args.input);
expect(mockMkdir).toHaveBeenCalledWith(expectedSrcDir, { recursive: true });
expect(mockConvertPdfToImagesFn).toHaveBeenCalledTimes(1);
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), {
baseVariables: expect.objectContaining({
SRC_DIR: expectedSrcDir,
SRC_NAME: expectedSrcName,
FORMAT: 'png',
DPI: 300,
SOME_DEFAULT: 'value',
CWD: mockDEFAULT_ROOTS.CWD
}),
outputPathTemplate: `${'${SRC_DIR}'}/${'${SRC_NAME}'}_${'${PAGE}'}.${'${FORMAT}'}`,
dpi: 300,
format: 'png',
startPage: undefined,
endPage: undefined,
logger: expect.anything(),
});
expect(mockProcessExit).not.toHaveBeenCalled();
});
it('should use custom output path template when provided', async () => {
const args = {
input: 'in.pdf',
output: 'images/custom_${SRC_NAME}_page${PAGE}.${FORMAT}',
};
mockExistsSync.mockReturnValueOnce(true); // Explicitly mock for this input
mockResolve.mockImplementation((p) => p.startsWith('/') ? p : `/test/cwd/${p}`);
mockPathInfoEx.mockReturnValue({
ROOT: '/test/cwd',
SRC_DIR: '/test/cwd',
SRC_NAME: 'in',
SRC_EXT: '.pdf'
});
const expectedPatternDir = '/test/cwd/images';
// Ensure dirname mock works for the expected resolved path
// mockDirname.mockImplementation((p) => p === '/test/cwd/images/custom_in_pageUNDEFINED.png' ? expectedPatternDir : '/'); // Old complex mock removed, rely on general mock
await runHandlerHelper(args);
expect(mockMkdir).toHaveBeenCalledWith(expectedPatternDir, { recursive: true });
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), expect.objectContaining({
outputPathTemplate: args.output,
baseVariables: expect.objectContaining({ SRC_NAME: 'in' }),
}));
expect(mockProcessExit).not.toHaveBeenCalled();
});
it('should handle output path as a directory', async () => {
const args = {
input: 'some/path/doc.pdf',
output: 'output/images/',
};
const resolvedOutputDir = '/test/cwd/output/images';
mockResolve.mockImplementation((p) => p === args.output ? resolvedOutputDir : p );
mockStatSync.mockImplementation((p) => {
if (p === resolvedOutputDir) {
return { isDirectory: () => true };
}
throw new Error('Not found');
});
mockPathInfoEx.mockReturnValue({
ROOT: '/test/cwd',
SRC_DIR: '/test/cwd/some/path',
SRC_NAME: 'doc',
SRC_EXT: '.pdf'
});
await runHandlerHelper(args);
expect(mockMkdir).toHaveBeenCalledWith(resolvedOutputDir, { recursive: true });
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), expect.objectContaining({
outputPathTemplate: `${'${OUT_DIR}'}/${'${SRC_NAME}'}_${'${PAGE}'}.${'${FORMAT}'}`,
baseVariables: expect.objectContaining({
OUT_DIR: resolvedOutputDir,
SRC_NAME: 'doc',
}),
}));
expect(mockProcessExit).not.toHaveBeenCalled();
});
it('should handle output path that looks like a directory (ends with /) but doesnt exist yet', async () => {
const args = {
input: 'other.pdf',
output: 'new_dir/',
};
const resolvedOutputDir = '/test/cwd/new_dir';
mockResolve.mockImplementation((p) => p === args.output ? resolvedOutputDir : p );
mockStatSync.mockImplementation((p) => { throw new Error('Not found'); });
mockPathInfoEx.mockReturnValue({
ROOT: '/test/cwd',
SRC_DIR: '/test/cwd',
SRC_NAME: 'other',
SRC_EXT: '.pdf'
});
await runHandlerHelper(args);
expect(mockMkdir).toHaveBeenCalledWith(resolvedOutputDir, { recursive: true });
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), expect.objectContaining({
outputPathTemplate: `${'${OUT_DIR}'}/${'${SRC_NAME}'}_${'${PAGE}'}.${'${FORMAT}'}`,
baseVariables: expect.objectContaining({
OUT_DIR: resolvedOutputDir,
SRC_NAME: 'other',
}),
}));
expect(mockProcessExit).not.toHaveBeenCalled();
});
it('should call convertPdfToImages with specific args', async () => {
const args = {
input: 'input.pdf',
output: 'output/prefix',
dpi: 150,
format: 'jpg',
format: 'jpg' as const,
startPage: 2,
endPage: 5,
} as const;
};
mockResolve.mockImplementation((p) => p.startsWith('/') ? p : `/test/cwd/${p}`);
const expectedPatternDir = '/test/cwd/output';
mockDirname.mockImplementation((p) => p.startsWith(expectedPatternDir) ? expectedPatternDir : '/');
mockPathInfoEx.mockReturnValue({ ROOT: '/test/cwd', SRC_DIR: '/test/cwd', SRC_NAME: 'input', SRC_EXT: '.pdf' });
await runHandlerHelper(args);
expect(mockExistsSync).toHaveBeenCalledWith(args.input);
expect(mockReadFile).toHaveBeenCalledWith(args.input);
// ... rest of assertions ...
expect(mockMkdir).toHaveBeenCalledWith(expectedPatternDir, { recursive: true });
expect(mockConvertPdfToImagesFn).toHaveBeenCalledTimes(1);
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), {
baseVariables: expect.objectContaining({
SRC_NAME: 'input',
FORMAT: 'jpg',
DPI: 150
}),
outputPathTemplate: args.output,
dpi: args.dpi,
format: args.format,
startPage: args.startPage,
endPage: args.endPage,
logger: expect.anything(),
});
expect(mockProcessExit).not.toHaveBeenCalled();
});
@ -128,16 +340,4 @@ describe('Convert Command CLI Handler', () => {
);
expect(mockProcessExit).toHaveBeenCalledWith(1);
});
it('should create output directory correctly when output is a directory path', async () => {
const args = { input: 'in.pdf', output: 'output/subdir/' };
await runHandlerHelper(args);
// ... assertions ...
});
it('should create parent directory when output is a file prefix', async () => {
const args = { input: 'in.pdf', output: 'output/subdir/file_prefix' };
await runHandlerHelper(args);
// ... assertions ...
});
});

View File

@ -0,0 +1,61 @@
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { execSync } from 'node:child_process';
import { existsSync, rmSync, readdirSync } from 'node:fs';
import * as path from 'node:path';
const packageRoot = process.cwd(); // Assumes test runs from package root
const inputPdf = path.join('tests', 'RS485-780.pdf');
const outputDir = path.join(packageRoot, 'tests', 'out', 'RS485-780');
const outputPattern = '${SRC_DIR}/out/${SRC_NAME}/${SRC_NAME}-${PAGE}.${FORMAT}';
// Expected number of pages for RS485-780.pdf
const expectedPageCount = 29;
const expectedBaseName = 'RS485-780';
const expectedFormat = 'png'; // Default format
describe('CLI Integration Test - Variable Output Path', () => {
beforeAll(() => {
if (existsSync(outputDir)) {
rmSync(outputDir, { recursive: true, force: true });
}
});
afterAll(() => {
if (existsSync(outputDir)) {
// rmSync(outputDir, { recursive: true, force: true }); // Optional: clean up after tests
}
});
it('should create images in the correct directory with the correct filenames using variable substitution', () => {
// Construct the command
// Ensure paths in the command are relative to the execution directory if needed,
// but here inputPdf is relative, and outputPattern relies on internal resolution.
// Quote the output pattern for safety in the shell.
const command = `node dist/index.js convert -i "${inputPdf}" -o "${outputPattern}"`;
// Execute the command
let commandOutput = '';
try {
// Use { stdio: 'pipe' } to potentially suppress noisy output or capture errors
commandOutput = execSync(command, { encoding: 'utf8', stdio: 'pipe' });
console.log('Command execution output:', commandOutput);
} catch (error: any) {
// If the command fails, log the error and fail the test
console.error('Command execution failed:', error.stderr || error.stdout || error.message);
expect.fail(`Command execution failed: ${error.message}`);
}
// 1. Check if the output directory exists
expect(existsSync(outputDir), `Output directory "${outputDir}" should exist`).toBe(true);
// 2. Check the number of files created
const files = readdirSync(outputDir);
expect(files.length, `Should have created ${expectedPageCount} files`).toBe(expectedPageCount);
// 3. Check filenames
for (let i = 1; i <= expectedPageCount; i++) {
const expectedFilename = `${expectedBaseName}-${i}.${expectedFormat}`;
expect(files, `File list should include "${expectedFilename}"`).toContain(expectedFilename);
}
});
});

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB