sacktreten pro
This commit is contained in:
parent
289130448d
commit
0762a888fd
21
packages/content/ref/pdf-to-images/.vscode/launch.json
vendored
Normal file
21
packages/content/ref/pdf-to-images/.vscode/launch.json
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Launch Program",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"program": "${workspaceFolder}\\dist\\index.js",
|
||||
"preLaunchTask": "tsc: build - tsconfig.json",
|
||||
"outFiles": [
|
||||
"${workspaceFolder}/dist/**/*.js"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1,10 +1,8 @@
|
||||
import { Logger } from 'tslog';
|
||||
import { ConvertCommandSchema } from '../types.js';
|
||||
import { convertPdfToImages } from '../lib/pdf.js';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { dirname, sep, extname, basename } from 'node:path';
|
||||
import { mkdir, readFile } from 'node:fs/promises';
|
||||
import * as z from 'zod';
|
||||
import { runConversion } from '../lib/convert.js';
|
||||
export const command = 'convert';
|
||||
export const desc = 'Convert PDF to images';
|
||||
export const builder = {
|
||||
@ -17,8 +15,7 @@ export const builder = {
|
||||
output: {
|
||||
alias: 'o',
|
||||
type: 'string',
|
||||
description: 'Output directory prefix for images',
|
||||
demandOption: true
|
||||
description: 'Output path pattern or directory. Variables like ${SRC_DIR}, ${PAGE} etc. are supported. Uses a default pattern if omitted.',
|
||||
},
|
||||
dpi: {
|
||||
type: 'number',
|
||||
@ -51,30 +48,9 @@ export async function handler(argv) {
|
||||
if (!existsSync(config.input)) {
|
||||
throw new Error(`Input file ${config.input} does not exist`);
|
||||
}
|
||||
// Ensure the full output directory path exists
|
||||
// config.output is the prefix, e.g., "tests/e5dc/image"
|
||||
// We need to create the directory part, e.g., "tests/e5dc/"
|
||||
const outputDir = dirname(config.output);
|
||||
// Check if output path itself ends with a separator or if the base name contains no extension
|
||||
// This helps determine if the output path is intended as a directory.
|
||||
const isOutputDir = config.output.endsWith(sep) || config.output.endsWith('/') || !extname(basename(config.output));
|
||||
const dirToCreate = isOutputDir ? config.output : outputDir;
|
||||
// Check if dirToCreate is not empty and not the root directory before creating
|
||||
if (dirToCreate && dirToCreate !== '.' && dirToCreate !== '/' && dirToCreate !== sep) {
|
||||
await mkdir(dirToCreate, { recursive: true });
|
||||
logger.info(`Ensured output directory exists: ${dirToCreate}`);
|
||||
}
|
||||
logger.info(`Converting PDF ${config.input} to images...`);
|
||||
const pdfData = await readFile(config.input);
|
||||
const outputFiles = await convertPdfToImages(pdfData, {
|
||||
outputPathPrefix: config.output,
|
||||
dpi: config.dpi,
|
||||
format: config.format,
|
||||
startPage: config.startPage,
|
||||
endPage: config.endPage,
|
||||
logger
|
||||
});
|
||||
logger.info('Conversion completed successfully');
|
||||
logger.info("Calling conversion library function...");
|
||||
const outputFiles = await runConversion(config);
|
||||
logger.info(`Conversion completed successfully`);
|
||||
logger.info(`Generated ${outputFiles.length} images`);
|
||||
}
|
||||
catch (error) {
|
||||
@ -83,7 +59,7 @@ export async function handler(argv) {
|
||||
}
|
||||
else {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error('Error during conversion:', message, error);
|
||||
logger.error('Error during conversion command:', message, error);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
5
packages/content/ref/pdf-to-images/dist/constants.js
vendored
Normal file
5
packages/content/ref/pdf-to-images/dist/constants.js
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Default output path template when no output is specified.
|
||||
* Variables: ${SRC_DIR}, ${SRC_NAME}, ${PAGE}, ${FORMAT}
|
||||
*/
|
||||
export const DEFAULT_OUTPUT_TEMPLATE = "${SRC_DIR}/${SRC_NAME}_${PAGE}.${FORMAT}";
|
||||
@ -10,6 +10,5 @@ const commandModule = {
|
||||
yargs(hideBin(process.argv))
|
||||
.command(commandModule)
|
||||
.demandCommand(1, 'You need to specify a command')
|
||||
.strict()
|
||||
.help()
|
||||
.parse();
|
||||
|
||||
118
packages/content/ref/pdf-to-images/dist/lib/convert.js
vendored
Normal file
118
packages/content/ref/pdf-to-images/dist/lib/convert.js
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
import { Logger } from "tslog";
|
||||
import { statSync } from "node:fs";
|
||||
import { sep, resolve as pathResolve, parse as pathParse, relative as pathRelative } from "node:path";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { DEFAULT_ROOTS, DEFAULT_VARS, pathInfoEx } from "@polymech/commons";
|
||||
import { convertPdfToImages } from "./pdf.js"; // Import the actual PDF conversion function
|
||||
import { DEFAULT_OUTPUT_TEMPLATE } from "../constants.js"; // Import the constant
|
||||
/**
|
||||
* Runs the PDF to images conversion process.
|
||||
* Generates variables, determines output path, reads PDF, and calls the conversion engine.
|
||||
* @param config - The conversion configuration options.
|
||||
* @returns A promise that resolves with an array of generated image file paths.
|
||||
*/
|
||||
export async function runConversion(config) {
|
||||
const logger = config.logger || new Logger();
|
||||
const inputPath = pathResolve(config.input);
|
||||
let srcInfo = {};
|
||||
try {
|
||||
srcInfo = pathInfoEx(inputPath);
|
||||
const parsed = pathParse(inputPath);
|
||||
srcInfo = {
|
||||
...srcInfo,
|
||||
SRC_DIR: parsed.dir,
|
||||
SRC_NAME: parsed.name,
|
||||
SRC_EXT: parsed.ext,
|
||||
};
|
||||
}
|
||||
catch (e) {
|
||||
logger.warn("pathInfoEx not found or failed, using basic path.parse");
|
||||
}
|
||||
let baseVariables = {
|
||||
...DEFAULT_ROOTS,
|
||||
...DEFAULT_VARS({}),
|
||||
...srcInfo,
|
||||
DPI: config.dpi,
|
||||
FORMAT: config.format,
|
||||
};
|
||||
if (baseVariables.ROOT && baseVariables.SRC_DIR) {
|
||||
baseVariables.SRC_REL = pathRelative(baseVariables.ROOT, baseVariables.SRC_DIR);
|
||||
}
|
||||
const srcName = baseVariables.SRC_NAME || '';
|
||||
const dashed = srcName.split('-');
|
||||
if (dashed.length > 1) {
|
||||
for (let i = 0; i < dashed.length; i++) {
|
||||
baseVariables[`SRC_NAME-${i}`] = dashed[i];
|
||||
}
|
||||
}
|
||||
const dotted = srcName.split('.');
|
||||
if (dotted.length > 1) {
|
||||
for (let i = 0; i < dotted.length; i++) {
|
||||
baseVariables[`SRC_NAME.${i}`] = dotted[i];
|
||||
}
|
||||
}
|
||||
const underscored = srcName.split('_');
|
||||
if (underscored.length > 1) {
|
||||
for (let i = 0; i < underscored.length; i++) {
|
||||
baseVariables[`SRC_NAME_${i}`] = underscored[i];
|
||||
}
|
||||
}
|
||||
// Process var-* arguments directly from config object passed in
|
||||
const cliVars = Object.keys(config).filter(k => k.startsWith('var-')).reduce((acc, k) => {
|
||||
acc[k.replace('var-', '').toUpperCase()] = config[k];
|
||||
return acc;
|
||||
}, {});
|
||||
// Uppercase base variable keys
|
||||
baseVariables = Object.keys(baseVariables).reduce((acc, key) => {
|
||||
acc[key.toUpperCase()] = baseVariables[key];
|
||||
return acc;
|
||||
}, {});
|
||||
baseVariables = { ...baseVariables, ...cliVars };
|
||||
let outputPathTemplate;
|
||||
let isExplicitDir = false;
|
||||
if (config.output) {
|
||||
const outputPath = pathResolve(config.output);
|
||||
try {
|
||||
const stats = statSync(outputPath);
|
||||
if (stats.isDirectory()) {
|
||||
isExplicitDir = true;
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
if (config.output.endsWith(sep) || config.output.endsWith("/")) {
|
||||
isExplicitDir = true;
|
||||
}
|
||||
else {
|
||||
isExplicitDir = false;
|
||||
}
|
||||
}
|
||||
if (isExplicitDir) {
|
||||
baseVariables["OUT_DIR"] = outputPath;
|
||||
outputPathTemplate = "${OUT_DIR}/${SRC_NAME}_${PAGE}.${FORMAT}";
|
||||
logger.info(`Output directory specified: ${outputPath}`);
|
||||
}
|
||||
else {
|
||||
outputPathTemplate = config.output;
|
||||
logger.info(`Using output path pattern: ${outputPathTemplate}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Use default pattern directly from constant
|
||||
outputPathTemplate = DEFAULT_OUTPUT_TEMPLATE;
|
||||
logger.info(`Using default output path pattern: ${outputPathTemplate}`);
|
||||
}
|
||||
// --- Read PDF and Call Conversion (moved from commands/convert.ts) ---
|
||||
logger.info(`Reading PDF: ${config.input}`);
|
||||
const pdfData = await readFile(config.input);
|
||||
logger.info(`Starting conversion process...`);
|
||||
const outputFiles = await convertPdfToImages(pdfData, {
|
||||
baseVariables,
|
||||
outputPathTemplate,
|
||||
dpi: config.dpi,
|
||||
format: config.format,
|
||||
startPage: config.startPage,
|
||||
endPage: config.endPage,
|
||||
logger
|
||||
});
|
||||
return outputFiles;
|
||||
}
|
||||
@ -1,6 +1,7 @@
|
||||
import * as mupdf from 'mupdf';
|
||||
import { Logger } from 'tslog';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { resolveVariables } from '@polymech/commons';
|
||||
import { sync as write } from '@polymech/fs/write';
|
||||
export async function convertPdfToImages(pdfData, options) {
|
||||
const logger = options.logger || new Logger();
|
||||
const outputFiles = [];
|
||||
@ -24,13 +25,19 @@ export async function convertPdfToImages(pdfData, options) {
|
||||
logger.info(`Processing pages ${start + 1} to ${end + 1} (${numPagesToProcess} pages) of ${pageCount} total`);
|
||||
for (let i = start; i <= end; i++) {
|
||||
const pageNumber = i + 1; // User-facing page number (1-based)
|
||||
// Create page-specific variables
|
||||
const pageVariables = {
|
||||
...options.baseVariables,
|
||||
PAGE: pageNumber.toString()
|
||||
};
|
||||
// Resolve the output path using the template and page-specific variables
|
||||
const outputPath = await resolveVariables(options.outputPathTemplate, false, pageVariables);
|
||||
const page = doc.loadPage(i);
|
||||
const pixmap = page.toPixmap([1, 0, 0, 1, 0, 0], mupdf.ColorSpace.DeviceRGB, false);
|
||||
const outputPath = `${options.outputPathPrefix}_${pageNumber}.${options.format}`;
|
||||
const imageData = options.format === 'png'
|
||||
? pixmap.asPNG()
|
||||
: pixmap.asJPEG(100, false);
|
||||
await writeFile(outputPath, imageData);
|
||||
write(outputPath, imageData);
|
||||
outputFiles.push(outputPath);
|
||||
logger.info(`Converted page ${pageNumber} to ${outputPath}`);
|
||||
}
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import { z } from 'zod';
|
||||
export const ConvertCommandSchema = z.object({
|
||||
input: z.string(),
|
||||
output: z.string(),
|
||||
output: z.string().optional(),
|
||||
dpi: z.number().int().positive().default(300),
|
||||
format: z.enum(['png', 'jpg']).default('png'),
|
||||
startPage: z.number().int().positive().optional(),
|
||||
|
||||
89
packages/content/ref/pdf-to-images/package-lock.json
generated
89
packages/content/ref/pdf-to-images/package-lock.json
generated
@ -9,13 +9,16 @@
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@polymech/commons": "file:../../../commons",
|
||||
"@polymech/fs": "file:../../../fs",
|
||||
"@types/yargs": "^17.0.33",
|
||||
"mupdf": "^1.3.3",
|
||||
"p-map": "^7.0.3",
|
||||
"tslog": "^4.9.3",
|
||||
"typescript": "^5.8.2",
|
||||
"vitest": "^3.1.1",
|
||||
"yargs": "^17.7.2",
|
||||
"zod": "^3.24.2"
|
||||
"zod": "^3.24.3"
|
||||
},
|
||||
"bin": {
|
||||
"pdf-to-images": "dist/index.js"
|
||||
@ -24,6 +27,64 @@
|
||||
"@types/node": "^22.13.10"
|
||||
}
|
||||
},
|
||||
"../../../commons": {
|
||||
"name": "@polymech/commons",
|
||||
"version": "0.2.6",
|
||||
"license": "BSD",
|
||||
"dependencies": {
|
||||
"@polymech/core": "file:../core",
|
||||
"@polymech/fs": "file:../fs",
|
||||
"@repo/typescript-config": "file:../typescript-config",
|
||||
"@schemastore/package": "^0.0.10",
|
||||
"env-var": "^7.5.0",
|
||||
"glob": "^10.4.5",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonpath-plus": "^10.3.0",
|
||||
"normalize-url": "^8.0.1",
|
||||
"p-map": "^7.0.3",
|
||||
"p-throttle": "^4.1.1",
|
||||
"tslog": "^3.3.3",
|
||||
"tsup": "^2.0.3",
|
||||
"yargs": "^17.7.2",
|
||||
"zod": "^3.24.2",
|
||||
"zod-to-json-schema": "^3.24.1",
|
||||
"zod-to-ts": "^1.2.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.12.0",
|
||||
"typescript": "^5.7.3"
|
||||
}
|
||||
},
|
||||
"../../../fs": {
|
||||
"name": "@polymech/fs",
|
||||
"version": "0.13.41",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@polymech/core": "file:../core",
|
||||
"@repo/typescript-config": "file:../typescript-config",
|
||||
"denodeify": "^1.2.1",
|
||||
"glob": "^10.4.1",
|
||||
"mime": "^2.0.3",
|
||||
"minimatch": "^10.0.1",
|
||||
"mkdirp": "^3.0.1",
|
||||
"q": "^1.4.1",
|
||||
"rimraf": "^6.0.1",
|
||||
"write-file-atomic": "^6.0.0",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/denodeify": "^1.2.31",
|
||||
"@types/mime": "^2.0.0",
|
||||
"@types/node": "^22.10.2",
|
||||
"fs-extra": "^4.0.2",
|
||||
"globals": "^15.14.0",
|
||||
"ts-node": "^10.9.1",
|
||||
"typescript": "^5.7.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.25.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.2.tgz",
|
||||
@ -430,6 +491,14 @@
|
||||
"integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@polymech/commons": {
|
||||
"resolved": "../../../commons",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@polymech/fs": {
|
||||
"resolved": "../../../fs",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@rollup/rollup-android-arm-eabi": {
|
||||
"version": "4.40.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.40.0.tgz",
|
||||
@ -1122,6 +1191,18 @@
|
||||
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/p-map": {
|
||||
"version": "7.0.3",
|
||||
"resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.3.tgz",
|
||||
"integrity": "sha512-VkndIv2fIB99swvQoA65bm+fsmt6UNdGeIB0oxBs+WhAhdh08QA04JXpI7rbB9r08/nkbysKoya9rtDERYOYMA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/pathe": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz",
|
||||
@ -1606,9 +1687,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.24.2",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz",
|
||||
"integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==",
|
||||
"version": "3.24.3",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.3.tgz",
|
||||
"integrity": "sha512-HhY1oqzWCQWuUqvBFnsyrtZRhyPeR7SUGv+C4+MsisMuVfSPx8HpwWqH8tRahSlt6M3PiFAcoeFhZAqIXTxoSg==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
|
||||
@ -11,7 +11,8 @@
|
||||
"build": "tsc",
|
||||
"start": "node dist/index.js",
|
||||
"test:pdf": "node dist/index.js convert -i tests/e5dc.pdf -o tests/out/e5dc/ --startPage 3 --endPage 5",
|
||||
"test:basic": "vitest run"
|
||||
"test:basic": "vitest run",
|
||||
"test:variables": "vitest run tests/cli/variables.test.ts"
|
||||
},
|
||||
"keywords": [
|
||||
"pdf",
|
||||
@ -23,13 +24,16 @@
|
||||
"license": "ISC",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"@polymech/commons": "file:../../../commons",
|
||||
"@polymech/fs": "file:../../../fs",
|
||||
"@types/yargs": "^17.0.33",
|
||||
"mupdf": "^1.3.3",
|
||||
"p-map": "^7.0.3",
|
||||
"tslog": "^4.9.3",
|
||||
"typescript": "^5.8.2",
|
||||
"vitest": "^3.1.1",
|
||||
"yargs": "^17.7.2",
|
||||
"zod": "^3.24.2"
|
||||
"zod": "^3.24.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.13.10"
|
||||
|
||||
97
packages/content/ref/pdf-to-images/parser/markdown/README.md
Normal file
97
packages/content/ref/pdf-to-images/parser/markdown/README.md
Normal file
@ -0,0 +1,97 @@
|
||||
# PDF to Markdown Integration
|
||||
|
||||
This directory contains the necessary setup and guidance for integrating the `pdf2markdown` tool from the [opendatalab/PDF-Extract-Kit](https://github.com/opendatalab/PDF-Extract-Kit/tree/main/project/pdf2markdown) repository.
|
||||
|
||||
## Setup Instructions
|
||||
|
||||
1. **Clone the Repository:** Clone the `PDF-Extract-Kit` repository into a suitable location (e.g., a `vendor` directory or similar within this project, or manage it as a git submodule).
|
||||
|
||||
```bash
|
||||
# Example: Cloning into a vendor directory
|
||||
git clone https://github.com/opendatalab/PDF-Extract-Kit.git ../../vendor/PDF-Extract-Kit
|
||||
# Or using a submodule
|
||||
# git submodule add https://github.com/opendatalab/PDF-Extract-Kit.git vendor/PDF-Extract-Kit
|
||||
```
|
||||
|
||||
2. **Install Python Dependencies:** The `pdf2markdown` tool relies on several Python libraries. You need to have Python installed (check the repository for specific version requirements, likely Python 3.x). Set up a virtual environment and install the required packages. Navigate to the cloned repository directory. While the repository doesn't seem to have a top-level `requirements.txt`, you might need to install dependencies based on the components used (YOLOv8, UniMERNet, StructEqTable, PaddleOCR). You may need to piece together the requirements from the individual components or look for specific setup instructions within the `PDF-Extract-Kit` documentation if available.
|
||||
|
||||
```bash
|
||||
# Navigate to the cloned repo (adjust path as needed)
|
||||
cd ../../vendor/PDF-Extract-Kit
|
||||
|
||||
# Create a virtual environment (recommended)
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows use `venv\Scripts\activate`
|
||||
|
||||
# Install common dependencies (this is a guess, refer to PDF-Extract-Kit docs for specifics)
|
||||
# You'll likely need libraries for YOLO, OCR (PaddleOCR), etc.
|
||||
# pip install -r requirements.txt # Look for requirements files in subdirectories if they exist
|
||||
|
||||
# Example: Install PaddleOCR (check their docs for CPU/GPU versions)
|
||||
# pip install paddlepaddle paddleocr
|
||||
|
||||
# You will need to research and install the specific dependencies for YOLOv8,
|
||||
# UniMERNet, and StructEqTable as used by this project.
|
||||
```
|
||||
|
||||
3. **Configuration:** The tool uses a YAML configuration file (`project/pdf2markdown/configs/pdf2markdown.yaml`). You might need to adjust paths or settings within this file, especially if models need to be downloaded or paths to resources are specific to your environment.
|
||||
|
||||
## Usage from TypeScript CLI
|
||||
|
||||
You can execute the Python script from your TypeScript code using Node.js's `child_process` module.
|
||||
|
||||
```typescript
|
||||
import { exec } from 'child_process';
|
||||
import path from 'path';
|
||||
|
||||
async function convertPdfToMarkdown(pdfFilePath: string, outputMarkdownPath: string): Promise<void> {
|
||||
// Adjust these paths based on where you cloned the repo and the location of this script
|
||||
const repoRoot = path.resolve(__dirname, '../../vendor/PDF-Extract-Kit'); // Example path
|
||||
const scriptPath = path.join(repoRoot, 'project/pdf2markdown/scripts/run_project.py');
|
||||
const configPath = path.join(repoRoot, 'project/pdf2markdown/configs/pdf2markdown.yaml');
|
||||
const pythonExecutable = path.join(repoRoot, 'venv/bin/python'); // Or venv\Scripts\python.exe on Windows, or just 'python' if in PATH
|
||||
|
||||
// Construct the command
|
||||
// IMPORTANT: You'll need to modify the run_project.py script or its config
|
||||
// to accept input PDF path and output MD path as arguments, or handle
|
||||
// input/output in a way that suits your CLI (e.g., reading config, environment variables).
|
||||
// The current script seems to rely solely on the config file.
|
||||
// For now, let's assume you modify the config file or the script handles it.
|
||||
// You might need to dynamically update the config file before running.
|
||||
|
||||
// Placeholder command - needs refinement based on how run_project.py handles I/O
|
||||
const command = `${pythonExecutable} ${scriptPath} --config ${configPath} --input ${pdfFilePath} --output ${outputMarkdownPath}`; // Hypothetical arguments
|
||||
|
||||
console.log(`Executing: ${command}`);
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
exec(command, (error, stdout, stderr) => {
|
||||
if (error) {
|
||||
console.error(`Error executing pdf2markdown: ${error.message}`);
|
||||
console.error(`Stderr: ${stderr}`);
|
||||
reject(error);
|
||||
return;
|
||||
}
|
||||
console.log(`Stdout: ${stdout}`);
|
||||
console.warn(`Stderr: ${stderr}`); // Log stderr even on success, as it might contain warnings
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Example usage in your CLI command:
|
||||
// const inputPdf = 'path/to/your/input.pdf';
|
||||
// const outputMd = 'path/to/your/output.md';
|
||||
// convertPdfToMarkdown(inputPdf, outputMd)
|
||||
// .then(() => console.log('PDF converted to Markdown successfully.'))
|
||||
// .catch(err => console.error('Conversion failed:', err));
|
||||
|
||||
```
|
||||
|
||||
## Important Considerations
|
||||
|
||||
* **Dependency Management:** Managing Python dependencies within a TypeScript project can be complex. Consider using Docker to encapsulate the Python environment or ensuring clear setup steps for developers.
|
||||
* **Script Modification:** The provided `run_project.py` script seems tailored to use its YAML config file directly. You will likely need to modify this Python script (or the way it's called) to accept input PDF file paths and desired output Markdown file paths as command-line arguments for seamless integration into your CLI.
|
||||
* **Error Handling:** Robust error handling is crucial. The Python script might fail for various reasons (invalid PDF, missing dependencies, model errors). Ensure your TypeScript wrapper handles errors from the child process gracefully.
|
||||
* **Performance:** Executing a Python process involves overhead. For high-throughput scenarios, explore potential optimizations or alternative libraries.
|
||||
* **Model Downloads:** The underlying models (YOLO, etc.) might require downloading large files during the first run or setup. Account for this in your setup instructions and potentially during the first execution from your CLI.
|
||||
142
packages/content/ref/pdf-to-images/ref/index.ts
Normal file
142
packages/content/ref/pdf-to-images/ref/index.ts
Normal file
@ -0,0 +1,142 @@
|
||||
import * as path from 'path'
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
import { sync as read } from "@polymech/fs/read"
|
||||
import { resolve, isFile } from '@polymech/commons'
|
||||
|
||||
import { substitute } from '@polymech/commons'
|
||||
import { IResizeOptions } from '../types'
|
||||
|
||||
export const fileAsBuffer = (path: string) => read(path, 'buffer') as Buffer || Buffer.from("-")
|
||||
|
||||
const clone = (obj) => {
|
||||
if (null == obj || "object" != typeof obj) return obj;
|
||||
var copy = obj.constructor();
|
||||
for (var attr in obj) {
|
||||
if (obj.hasOwnProperty(attr)) copy[attr] = obj[attr];
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
export const targets = (f: string, options: IResizeOptions) => {
|
||||
const srcParts = path.parse(f)
|
||||
const variables = clone(options.variables || {})
|
||||
const targets: string[] = []
|
||||
const rel = path.relative(options.srcInfo.DIR, srcParts.dir)
|
||||
if (options.dstInfo.IS_GLOB) {
|
||||
options.dstInfo.GLOB_EXTENSIONS.forEach((e) => {
|
||||
|
||||
variables.SRC_NAME = srcParts.name
|
||||
variables.SRC_DIR = srcParts.dir
|
||||
|
||||
let targetPath: string = substitute(options.alt, variables.DST_PATH, variables)
|
||||
targetPath = targetPath.replace(variables.DST_GLOB, '')
|
||||
if(variables.DST_FILE_EXT){
|
||||
targetPath = targetPath.replace('.' + variables.DST_FILE_EXT, '')
|
||||
}
|
||||
|
||||
const parts = path.parse(targetPath)
|
||||
//back compat
|
||||
if (variables.DST_NAME === '*') {
|
||||
variables.DST_NAME = ''
|
||||
}
|
||||
if (!parts.ext) {
|
||||
if (variables.DST_PATH.indexOf(`{SRC_NAME}`) === -1) {
|
||||
targetPath = path.join(targetPath, rel, srcParts.name + variables.DST_NAME.replace(variables.DST_GLOB, '') + '.' + e)
|
||||
} else {
|
||||
targetPath = targetPath + variables.DST_NAME.replace(variables.DST_GLOB, '') + '.' + e
|
||||
}
|
||||
}
|
||||
//src.base contains dots
|
||||
if (!targetPath.endsWith('.' + e)) {
|
||||
targetPath += '.' + e
|
||||
}
|
||||
|
||||
targets.push(path.resolve(targetPath))
|
||||
})
|
||||
} else {
|
||||
|
||||
let targetPath = ''
|
||||
if (!variables.DST_PATH) {
|
||||
targetPath = path.join(srcParts.dir, srcParts.base)
|
||||
} else {
|
||||
variables.SRC_NAME = srcParts.name
|
||||
variables.SRC_DIR = srcParts.dir
|
||||
targetPath = substitute(options.alt, variables.DST_PATH, variables)
|
||||
if (isFile(f) && exists(f)) {
|
||||
// targetPath = path.join(targetPath, srcParts.base)
|
||||
} else {
|
||||
targetPath = path.join(targetPath, srcParts.base)
|
||||
}
|
||||
|
||||
const targetParts = path.parse(targetPath)
|
||||
if (!targetParts.ext) {
|
||||
targetPath = path.join(targetPath, srcParts.base)
|
||||
}
|
||||
}
|
||||
targets.push(path.resolve(resolve(targetPath, options.alt, variables)));
|
||||
}
|
||||
return targets;
|
||||
}
|
||||
|
||||
export const targetsNext = (f: string, options: IResizeOptions) => {
|
||||
const srcParts = path.parse(f)
|
||||
const variables = clone(options.variables || {})
|
||||
const targets: string[] = []
|
||||
const rel = path.relative(options.srcInfo.DIR, srcParts.dir)
|
||||
if (options.dstInfo.IS_GLOB) {
|
||||
options.dstInfo.GLOB_EXTENSIONS.forEach((e) => {
|
||||
|
||||
variables.SRC_NAME = srcParts.name
|
||||
variables.SRC_DIR = srcParts.dir
|
||||
|
||||
let targetPath: string = substitute(options.alt, variables.DST_PATH, variables)
|
||||
targetPath = targetPath.replace(variables.DST_GLOB, '')
|
||||
if(variables.DST_FILE_EXT){
|
||||
targetPath = targetPath.replace('.' + variables.DST_FILE_EXT, '')
|
||||
}
|
||||
|
||||
const parts = path.parse(targetPath)
|
||||
|
||||
//back compat
|
||||
if (variables.DST_NAME === '*') {
|
||||
variables.DST_NAME = ''
|
||||
}
|
||||
if (!parts.ext) {
|
||||
if (variables.DST_PATH.indexOf(`{SRC_NAME}`) === -1) {
|
||||
targetPath = path.join(targetPath, rel, srcParts.name + variables.DST_NAME.replace(variables.DST_GLOB, '') + '.' + e)
|
||||
} else {
|
||||
targetPath = targetPath + variables.DST_NAME.replace(variables.DST_GLOB, '') + '.' + e
|
||||
}
|
||||
}
|
||||
//src.base contains dots
|
||||
if (!targetPath.endsWith('.' + e)) {
|
||||
targetPath += '.' + e
|
||||
}
|
||||
|
||||
targets.push(path.resolve(targetPath))
|
||||
})
|
||||
} else {
|
||||
|
||||
let targetPath = ''
|
||||
if (!variables.DST_PATH) {
|
||||
targetPath = path.join(srcParts.dir, srcParts.base)
|
||||
} else {
|
||||
variables.SRC_NAME = srcParts.name
|
||||
variables.SRC_DIR = srcParts.dir
|
||||
variables.SRC_DIR = srcParts.dir
|
||||
targetPath = substitute(options.alt, variables.DST_PATH, variables)
|
||||
if (isFile(f) && exists(f)) {
|
||||
// targetPath = path.join(targetPath, srcParts.base)
|
||||
} else {
|
||||
targetPath = path.join(targetPath, srcParts.base)
|
||||
}
|
||||
|
||||
const targetParts = path.parse(targetPath)
|
||||
if (!targetParts.ext) {
|
||||
targetPath = path.join(targetPath, srcParts.base)
|
||||
}
|
||||
}
|
||||
targets.push(path.resolve(resolve(targetPath, options.alt, variables)));
|
||||
}
|
||||
return targets
|
||||
}
|
||||
167
packages/content/ref/pdf-to-images/ref/resize.ts
Normal file
167
packages/content/ref/pdf-to-images/ref/resize.ts
Normal file
@ -0,0 +1,167 @@
|
||||
import * as path from 'path'
|
||||
import * as pMap from 'p-map'
|
||||
import * as sharp from 'sharp'
|
||||
|
||||
import { sync as exists } from "@polymech/fs/exists"
|
||||
import { async as move } from "@polymech/fs/move"
|
||||
import { sync as dir } from "@polymech/fs/dir"
|
||||
import { createItem as toNode } from "@polymech/fs/inspect"
|
||||
|
||||
import {
|
||||
logger,
|
||||
ERR_PERM_RETRY_DELAY,
|
||||
ERR_PERM_RETRY_MAX,
|
||||
IOptions,
|
||||
IResizeOptions
|
||||
} from '../../../index'
|
||||
|
||||
import {
|
||||
meta
|
||||
} from './lib'
|
||||
|
||||
import {
|
||||
targets,
|
||||
targetsNext
|
||||
} from '../..'
|
||||
|
||||
|
||||
export const resizeFile = async (source: string, target: string, onNode: (data: sharp.Sharp) => void = () => { }, options: IResizeOptions): Promise<sharp.Sharp | undefined> => {
|
||||
//const osr_cache = OSR_CACHE()
|
||||
//const ca_options = JSON.parse(JSON.stringify({ ...options, target, skip: null }))
|
||||
//const cached = await get_cached(file, ca_options, MODULE_NAME)
|
||||
const targetOri = '' + target
|
||||
let inPlace = false
|
||||
if (path.resolve(source) === path.resolve(target)) {
|
||||
const parts = path.parse(target)
|
||||
target = path.join(parts.dir, parts.name + '_tmp' + parts.ext)
|
||||
inPlace = true
|
||||
}
|
||||
|
||||
let image: sharp.Sharp
|
||||
try {
|
||||
image = sharp(source)
|
||||
} catch (e) {
|
||||
logger.error(`Error reading file, ${source}`, e)
|
||||
return
|
||||
}
|
||||
|
||||
onNode(image)
|
||||
let metaData: any = await meta(source, image) || {}
|
||||
const percent = options.percent
|
||||
|
||||
const dstParts = path.parse(target)
|
||||
const node = toNode(source, {
|
||||
size: true,
|
||||
mime: true
|
||||
})
|
||||
if (!exists(dstParts.dir)) {
|
||||
dir(dstParts.dir)
|
||||
}
|
||||
if (options.width && options.minWidth && options.width <= options.minWidth) {
|
||||
logger.error(`Error resizing : options.width <= options.minWidth`)
|
||||
return
|
||||
}
|
||||
if (metaData.width && options.width && options.minWidth) {
|
||||
if (metaData.width <= options.minWidth) {
|
||||
return image
|
||||
}
|
||||
}
|
||||
if (metaData.height && options.height && options.minHeight) {
|
||||
if (metaData.height <= options.minHeight) {
|
||||
return image
|
||||
}
|
||||
}
|
||||
if (options.minSize && node.size && options.minSize >= node.size) {
|
||||
return image
|
||||
}
|
||||
const resizeOptions = {
|
||||
height: options.height,
|
||||
fastShrinkOnLoad: options.fastShrinkOnLoad,
|
||||
withoutEnlargement: options.withoutEnlargement,
|
||||
withoutReduction: options.withoutReduction,
|
||||
fit: options.fit,
|
||||
position: options.position,
|
||||
background: options.background || 'white'
|
||||
}
|
||||
if (percent && metaData.width) {
|
||||
image = image.resize({
|
||||
width: Math.round(metaData.width * (percent / 100)),
|
||||
...resizeOptions
|
||||
|
||||
})
|
||||
|
||||
} else if (options.width || options.height) {
|
||||
image = image.resize({
|
||||
width: options.width,
|
||||
...resizeOptions
|
||||
})
|
||||
} else {
|
||||
logger.error(`Error resizing, invalid options for ${source} - no width, height or percent`)
|
||||
return image
|
||||
}
|
||||
|
||||
if(dstParts.ext.toLowerCase() === '.webp' ||
|
||||
dstParts.ext.toLowerCase() === '.png') {
|
||||
image = image.rotate()
|
||||
}
|
||||
|
||||
if (metaData.width) {
|
||||
await image.withMetadata().toFile(target)
|
||||
} else {
|
||||
try {
|
||||
await image.toFile(target)
|
||||
} catch (e) {
|
||||
logger.error(`Error writing file out, ${source}`, e)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if (inPlace) {
|
||||
const timeout = async (retry) => new Promise((resolve) =>
|
||||
setTimeout(resolve, ERR_PERM_RETRY_DELAY * retry)
|
||||
)
|
||||
const moveRetry = async (src, dst, retry = 0) => {
|
||||
if (retry > ERR_PERM_RETRY_MAX) {
|
||||
logger.error(`Error moving file failed, max retries reached ${src}`)
|
||||
return
|
||||
}
|
||||
try {
|
||||
await move(target, targetOri)
|
||||
} catch (e) {
|
||||
if (e.code === 'EPERM') {
|
||||
logger.warn(`Error moving file out, retry ${source}`, e)
|
||||
await timeout(retry)
|
||||
moveRetry(src, dst, retry + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
await moveRetry(source, targetOri)
|
||||
}
|
||||
logger.debug(`Resized Image ${source} to ${targetOri}`)
|
||||
return image
|
||||
}
|
||||
export const _resize = async (file, targets: string[], onNode: (data: any) => void = () => { }, options: IOptions) => {
|
||||
return pMap(targets, async (target) => {
|
||||
logger.debug(`Resizing ${file} to ${target}`)
|
||||
if (options.dry) {
|
||||
return Promise.resolve()
|
||||
}
|
||||
return resizeFile(file, target, onNode, options);
|
||||
}, { concurrency: 1 })
|
||||
}
|
||||
export const resize = async (options: IResizeOptions) => {
|
||||
let reports: any = []
|
||||
logger.setSettings({ minLevel: options.logLevel || 'info' as any })
|
||||
const onNode = (data: any) => reports.push(data)
|
||||
if (options.srcInfo) {
|
||||
logger.debug(`Convert ${options.srcInfo.FILES.length} files`)
|
||||
return await pMap(options.srcInfo.FILES, async (f) => {
|
||||
const outputs = targetsNext(f, options)
|
||||
logger.debug(`Convert ${f} to `, outputs)
|
||||
return _resize(f, outputs, onNode, options)
|
||||
}, { concurrency: 1 })
|
||||
} else {
|
||||
logger.error(`Invalid source info`)
|
||||
}
|
||||
return reports
|
||||
}
|
||||
64
packages/content/ref/pdf-to-images/ref/variables.ts
Normal file
64
packages/content/ref/pdf-to-images/ref/variables.ts
Normal file
@ -0,0 +1,64 @@
|
||||
import * as path from 'node:path'
|
||||
import { pathInfoEx } from '@polymech/commons'
|
||||
import { DEFAULT_ROOTS, DEFAULT_VARS } from '@polymech/commons'
|
||||
|
||||
export const variables = (options: IKBotTask) => {
|
||||
const { model, router,baseURL } = options
|
||||
let ret = {
|
||||
model,
|
||||
router,
|
||||
baseURL,
|
||||
...DEFAULT_ROOTS,
|
||||
...DEFAULT_VARS({})
|
||||
}
|
||||
|
||||
if (options?.include?.length === 1) {
|
||||
const [include] = options.include
|
||||
const { } = pathInfoEx(include)
|
||||
|
||||
const srcParts = path.parse(include)
|
||||
const srcVariables: Record<string, string> = {}
|
||||
|
||||
srcVariables.SRC_NAME = srcParts.name
|
||||
srcVariables.SRC_DIR = srcParts.dir
|
||||
srcVariables.SRC_EXT = srcParts.ext
|
||||
|
||||
if (srcVariables.ROOT) {
|
||||
srcVariables.SRC_REL = path.relative(srcVariables.ROOT, srcParts.dir)
|
||||
}
|
||||
|
||||
const dashed = srcParts.name.split('-')
|
||||
if (dashed.length > 1) {
|
||||
for (let i = 0; i < dashed.length; i++) {
|
||||
srcVariables[`SRC_NAME-${i}`] = dashed[i]
|
||||
}
|
||||
}
|
||||
const dotted = srcParts.name.split('.')
|
||||
if (dotted.length > 1) {
|
||||
for (let i = 0; i < dotted.length; i++) {
|
||||
srcVariables[`SRC_NAME.${i}`] = dotted[i]
|
||||
}
|
||||
}
|
||||
|
||||
const underscored = srcParts.name.split('_')
|
||||
if (underscored.length > 1) {
|
||||
for (let i = 0; i < underscored.length; i++) {
|
||||
srcVariables[`SRC_NAME_${i}`] = underscored[i]
|
||||
}
|
||||
}
|
||||
ret = { ...ret, ...srcVariables }
|
||||
}
|
||||
|
||||
// CLI argv variables
|
||||
let variables = Object.assign({}, ...Object.keys(options).filter((k) => k.startsWith('var-')).map((k) => {
|
||||
return {
|
||||
[k.replace('var-', '')]: options[k]
|
||||
}
|
||||
}))
|
||||
ret = Object.keys(ret).reduce((acc, key) => {
|
||||
acc[key.toUpperCase()] = ret[key];
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return { ...ret, ...variables }
|
||||
}
|
||||
@ -1,12 +1,11 @@
|
||||
import { Arguments } from 'yargs';
|
||||
import { Logger } from 'tslog';
|
||||
import { ConvertCommandSchema, ConvertCommandConfig } from '../types.js';
|
||||
import { convertPdfToImages } from '../lib/pdf.js';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { dirname, sep, extname, basename } from 'node:path';
|
||||
import { mkdir, readFile } from 'node:fs/promises';
|
||||
import { resolve as pathResolve } from 'node:path';
|
||||
import * as z from 'zod';
|
||||
import type { Options } from 'yargs';
|
||||
import { runConversion, IRunConversionOptions } from '../lib/convert.js';
|
||||
|
||||
export const command = 'convert';
|
||||
export const desc = 'Convert PDF to images';
|
||||
@ -21,8 +20,7 @@ export const builder: { [key: string]: Options } = {
|
||||
output: {
|
||||
alias: 'o',
|
||||
type: 'string',
|
||||
description: 'Output directory prefix for images',
|
||||
demandOption: true
|
||||
description: 'Output path pattern or directory. Variables like ${SRC_DIR}, ${PAGE} etc. are supported. Uses a default pattern if omitted.',
|
||||
},
|
||||
dpi: {
|
||||
type: 'number',
|
||||
@ -59,41 +57,17 @@ export async function handler(argv: Arguments<ConvertCommandConfig>): Promise<vo
|
||||
throw new Error(`Input file ${config.input} does not exist`);
|
||||
}
|
||||
|
||||
// Ensure the full output directory path exists
|
||||
// config.output is the prefix, e.g., "tests/e5dc/image"
|
||||
// We need to create the directory part, e.g., "tests/e5dc/"
|
||||
const outputDir = dirname(config.output);
|
||||
// Check if output path itself ends with a separator or if the base name contains no extension
|
||||
// This helps determine if the output path is intended as a directory.
|
||||
const isOutputDir = config.output.endsWith(sep) || config.output.endsWith('/') || !extname(basename(config.output));
|
||||
const dirToCreate = isOutputDir ? config.output : outputDir;
|
||||
logger.info("Calling conversion library function...");
|
||||
const outputFiles = await runConversion(config as IRunConversionOptions);
|
||||
|
||||
// Check if dirToCreate is not empty and not the root directory before creating
|
||||
if (dirToCreate && dirToCreate !== '.' && dirToCreate !== '/' && dirToCreate !== sep) {
|
||||
await mkdir(dirToCreate, { recursive: true });
|
||||
logger.info(`Ensured output directory exists: ${dirToCreate}`);
|
||||
}
|
||||
|
||||
logger.info(`Converting PDF ${config.input} to images...`);
|
||||
|
||||
const pdfData = await readFile(config.input);
|
||||
const outputFiles = await convertPdfToImages(pdfData, {
|
||||
outputPathPrefix: config.output,
|
||||
dpi: config.dpi,
|
||||
format: config.format,
|
||||
startPage: config.startPage,
|
||||
endPage: config.endPage,
|
||||
logger
|
||||
});
|
||||
|
||||
logger.info('Conversion completed successfully');
|
||||
logger.info(`Conversion completed successfully`);
|
||||
logger.info(`Generated ${outputFiles.length} images`);
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
logger.error('Invalid arguments:', error.flatten());
|
||||
} else {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error('Error during conversion:', message, error);
|
||||
logger.error('Error during conversion command:', message, error);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
5
packages/content/ref/pdf-to-images/src/constants.ts
Normal file
5
packages/content/ref/pdf-to-images/src/constants.ts
Normal file
@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Default output path template when no output is specified.
|
||||
* Variables: ${SRC_DIR}, ${SRC_NAME}, ${PAGE}, ${FORMAT}
|
||||
*/
|
||||
export const DEFAULT_OUTPUT_TEMPLATE = "${SRC_DIR}/${SRC_NAME}_${PAGE}.${FORMAT}";
|
||||
@ -14,6 +14,5 @@ const commandModule: CommandModule<{}, ConvertCommandConfig> = {
|
||||
yargs(hideBin(process.argv))
|
||||
.command(commandModule)
|
||||
.demandCommand(1, 'You need to specify a command')
|
||||
.strict()
|
||||
.help()
|
||||
.parse();
|
||||
|
||||
141
packages/content/ref/pdf-to-images/src/lib/convert.ts
Normal file
141
packages/content/ref/pdf-to-images/src/lib/convert.ts
Normal file
@ -0,0 +1,141 @@
|
||||
import { Logger } from "tslog";
|
||||
import { statSync } from "node:fs";
|
||||
import { sep, resolve as pathResolve, parse as pathParse, relative as pathRelative } from "node:path";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { DEFAULT_ROOTS, DEFAULT_VARS, pathInfoEx } from "@polymech/commons";
|
||||
import { convertPdfToImages } from "./pdf.js"; // Import the actual PDF conversion function
|
||||
import { DEFAULT_OUTPUT_TEMPLATE } from "../constants.js"; // Import the constant
|
||||
|
||||
// Define an interface for the configuration options needed by the library function
|
||||
// This might be similar to SimpleOptions or ConvertCommandConfig, but tailored for the library
|
||||
export interface IRunConversionOptions {
|
||||
input: string;
|
||||
output?: string;
|
||||
dpi: number;
|
||||
format: "png" | "jpg";
|
||||
startPage?: number;
|
||||
endPage?: number;
|
||||
logger?: Logger<any>;
|
||||
[key: string]: any; // Allow other properties like var-*
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the PDF to images conversion process.
|
||||
* Generates variables, determines output path, reads PDF, and calls the conversion engine.
|
||||
* @param config - The conversion configuration options.
|
||||
* @returns A promise that resolves with an array of generated image file paths.
|
||||
*/
|
||||
export async function runConversion(config: IRunConversionOptions): Promise<string[]> {
|
||||
const logger = config.logger || new Logger<any>();
|
||||
|
||||
const inputPath = pathResolve(config.input);
|
||||
let srcInfo: any = {};
|
||||
try {
|
||||
srcInfo = pathInfoEx(inputPath);
|
||||
const parsed = pathParse(inputPath);
|
||||
srcInfo = {
|
||||
...srcInfo,
|
||||
SRC_DIR: parsed.dir,
|
||||
SRC_NAME: parsed.name,
|
||||
SRC_EXT: parsed.ext,
|
||||
};
|
||||
} catch (e) {
|
||||
logger.warn("pathInfoEx not found or failed, using basic path.parse");
|
||||
}
|
||||
|
||||
let baseVariables: Record<string, any> = {
|
||||
...DEFAULT_ROOTS,
|
||||
...DEFAULT_VARS({}),
|
||||
...srcInfo,
|
||||
DPI: config.dpi,
|
||||
FORMAT: config.format,
|
||||
};
|
||||
|
||||
if (baseVariables.ROOT && baseVariables.SRC_DIR) {
|
||||
baseVariables.SRC_REL = pathRelative(baseVariables.ROOT, baseVariables.SRC_DIR);
|
||||
}
|
||||
|
||||
const srcName = baseVariables.SRC_NAME || '';
|
||||
const dashed = srcName.split('-');
|
||||
if (dashed.length > 1) {
|
||||
for (let i = 0; i < dashed.length; i++) {
|
||||
baseVariables[`SRC_NAME-${i}`] = dashed[i];
|
||||
}
|
||||
}
|
||||
const dotted = srcName.split('.');
|
||||
if (dotted.length > 1) {
|
||||
for (let i = 0; i < dotted.length; i++) {
|
||||
baseVariables[`SRC_NAME.${i}`] = dotted[i];
|
||||
}
|
||||
}
|
||||
const underscored = srcName.split('_');
|
||||
if (underscored.length > 1) {
|
||||
for (let i = 0; i < underscored.length; i++) {
|
||||
baseVariables[`SRC_NAME_${i}`] = underscored[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Process var-* arguments directly from config object passed in
|
||||
const cliVars = Object.keys(config).filter(k => k.startsWith('var-')).reduce((acc, k) => {
|
||||
acc[k.replace('var-', '').toUpperCase()] = config[k];
|
||||
return acc;
|
||||
}, {} as Record<string, any>);
|
||||
|
||||
// Uppercase base variable keys
|
||||
baseVariables = Object.keys(baseVariables).reduce((acc, key) => {
|
||||
acc[key.toUpperCase()] = baseVariables[key];
|
||||
return acc;
|
||||
}, {} as Record<string, any>);
|
||||
|
||||
baseVariables = { ...baseVariables, ...cliVars };
|
||||
|
||||
let outputPathTemplate: string;
|
||||
let isExplicitDir = false;
|
||||
|
||||
if (config.output) {
|
||||
const outputPath = pathResolve(config.output);
|
||||
try {
|
||||
const stats = statSync(outputPath);
|
||||
if (stats.isDirectory()) {
|
||||
isExplicitDir = true;
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (config.output.endsWith(sep) || config.output.endsWith("/")) {
|
||||
isExplicitDir = true;
|
||||
} else {
|
||||
isExplicitDir = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (isExplicitDir) {
|
||||
baseVariables["OUT_DIR"] = outputPath;
|
||||
outputPathTemplate = "${OUT_DIR}/${SRC_NAME}_${PAGE}.${FORMAT}";
|
||||
logger.info(`Output directory specified: ${outputPath}`);
|
||||
} else {
|
||||
outputPathTemplate = config.output;
|
||||
logger.info(`Using output path pattern: ${outputPathTemplate}`);
|
||||
}
|
||||
|
||||
} else {
|
||||
// Use default pattern directly from constant
|
||||
outputPathTemplate = DEFAULT_OUTPUT_TEMPLATE;
|
||||
logger.info(`Using default output path pattern: ${outputPathTemplate}`);
|
||||
}
|
||||
|
||||
// --- Read PDF and Call Conversion (moved from commands/convert.ts) ---
|
||||
logger.info(`Reading PDF: ${config.input}`);
|
||||
const pdfData = await readFile(config.input);
|
||||
|
||||
logger.info(`Starting conversion process...`);
|
||||
const outputFiles = await convertPdfToImages(pdfData, {
|
||||
baseVariables,
|
||||
outputPathTemplate,
|
||||
dpi: config.dpi,
|
||||
format: config.format,
|
||||
startPage: config.startPage,
|
||||
endPage: config.endPage,
|
||||
logger
|
||||
});
|
||||
|
||||
return outputFiles;
|
||||
}
|
||||
@ -1,11 +1,15 @@
|
||||
import * as mupdf from 'mupdf';
|
||||
import { Logger } from 'tslog';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import * as mupdf from 'mupdf'
|
||||
import { Logger } from 'tslog'
|
||||
import { writeFile, mkdir } from 'node:fs/promises'
|
||||
import { dirname } from 'node:path'
|
||||
import { resolveVariables, pathInfoEx } from '@polymech/commons'
|
||||
import { sync as write } from '@polymech/fs/write'
|
||||
|
||||
export type ImageFormat = 'png' | 'jpg';
|
||||
|
||||
export interface PdfToImageOptions {
|
||||
outputPathPrefix: string;
|
||||
baseVariables: Record<string, any>;
|
||||
outputPathTemplate: string;
|
||||
dpi: number;
|
||||
format: ImageFormat;
|
||||
startPage?: number;
|
||||
@ -44,6 +48,16 @@ export async function convertPdfToImages(
|
||||
|
||||
for (let i = start; i <= end; i++) {
|
||||
const pageNumber = i + 1; // User-facing page number (1-based)
|
||||
|
||||
// Create page-specific variables
|
||||
const pageVariables: Record<string, string> = {
|
||||
...options.baseVariables,
|
||||
PAGE: pageNumber.toString()
|
||||
};
|
||||
|
||||
// Resolve the output path using the template and page-specific variables
|
||||
const outputPath = await resolveVariables(options.outputPathTemplate, false, pageVariables);
|
||||
|
||||
const page = doc.loadPage(i);
|
||||
const pixmap = page.toPixmap(
|
||||
[1, 0, 0, 1, 0, 0],
|
||||
@ -51,12 +65,11 @@ export async function convertPdfToImages(
|
||||
false
|
||||
);
|
||||
|
||||
const outputPath = `${options.outputPathPrefix}_${pageNumber}.${options.format}`;
|
||||
const imageData = options.format === 'png'
|
||||
? pixmap.asPNG()
|
||||
: pixmap.asJPEG(100, false);
|
||||
|
||||
await writeFile(outputPath, imageData);
|
||||
write(outputPath, imageData)
|
||||
outputFiles.push(outputPath);
|
||||
logger.info(`Converted page ${pageNumber} to ${outputPath}`);
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@ import type { ImageFormat } from './lib/pdf.js';
|
||||
|
||||
export const ConvertCommandSchema = z.object({
|
||||
input: z.string(),
|
||||
output: z.string(),
|
||||
output: z.string().optional(),
|
||||
dpi: z.number().int().positive().default(300),
|
||||
format: z.enum(['png', 'jpg']).default('png'),
|
||||
startPage: z.number().int().positive().optional(),
|
||||
|
||||
BIN
packages/content/ref/pdf-to-images/tests/RS485-780.pdf
Normal file
BIN
packages/content/ref/pdf-to-images/tests/RS485-780.pdf
Normal file
Binary file not shown.
@ -4,19 +4,30 @@ import { describe, it, expect, vi, beforeEach, Mock, beforeAll } from 'vitest';
|
||||
import type { ConvertCommandConfig } from '../../src/types.js';
|
||||
import type { Arguments } from 'yargs';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import path from 'path';
|
||||
|
||||
// --- Define Mock Functions ---
|
||||
const mockConvertPdfToImagesFn = vi.fn();
|
||||
const mockExistsSync = vi.fn();
|
||||
const mockStatSync = vi.fn();
|
||||
const mockReadFile = vi.fn();
|
||||
const mockMkdir = vi.fn();
|
||||
const mockDirname = vi.fn();
|
||||
const mockBasename = vi.fn();
|
||||
const mockExtname = vi.fn();
|
||||
const mockResolve = vi.fn();
|
||||
const mockParse = vi.fn();
|
||||
const mockRelative = vi.fn();
|
||||
const mockLoggerInfo = vi.fn();
|
||||
const mockLoggerError = vi.fn();
|
||||
const mockProcessExit = vi.spyOn(process, 'exit').mockImplementation((() => {}) as any);
|
||||
|
||||
// Mocks for @polymech/commons
|
||||
const mockResolveVariables = vi.fn();
|
||||
const mockPathInfoEx = vi.fn();
|
||||
const mockDEFAULT_ROOTS = { CWD: '/test/cwd', SCRIPT_DIR: '/test/script' };
|
||||
const mockDEFAULT_VARS = vi.fn().mockReturnValue({ SOME_DEFAULT: 'value' });
|
||||
|
||||
// Use beforeAll for mocks
|
||||
beforeAll(() => {
|
||||
// Mock dependencies using vi.doMock
|
||||
@ -25,6 +36,7 @@ beforeAll(() => {
|
||||
}));
|
||||
vi.doMock('node:fs', () => ({
|
||||
existsSync: mockExistsSync,
|
||||
statSync: mockStatSync,
|
||||
}));
|
||||
vi.doMock('node:fs/promises', () => ({
|
||||
readFile: mockReadFile,
|
||||
@ -34,6 +46,9 @@ beforeAll(() => {
|
||||
dirname: mockDirname,
|
||||
basename: mockBasename,
|
||||
extname: mockExtname,
|
||||
resolve: mockResolve,
|
||||
parse: mockParse,
|
||||
relative: mockRelative,
|
||||
sep: '/',
|
||||
}));
|
||||
vi.doMock('tslog', () => ({
|
||||
@ -42,6 +57,13 @@ beforeAll(() => {
|
||||
error: mockLoggerError,
|
||||
})),
|
||||
}));
|
||||
// Mock @polymech/commons
|
||||
vi.doMock('@polymech/commons', () => ({
|
||||
resolveVariables: mockResolveVariables,
|
||||
pathInfoEx: mockPathInfoEx,
|
||||
DEFAULT_ROOTS: mockDEFAULT_ROOTS,
|
||||
DEFAULT_VARS: mockDEFAULT_VARS,
|
||||
}));
|
||||
});
|
||||
|
||||
// --- Test Suite ---
|
||||
@ -56,14 +78,14 @@ describe('Convert Command CLI Handler', () => {
|
||||
});
|
||||
|
||||
// --- Helper Function to Run Handler ---
|
||||
async function runHandlerHelper(args: Partial<ConvertCommandConfig & { _: (string | number)[], $0: string }>) {
|
||||
async function runHandlerHelper(args: Partial<ConvertCommandConfig & { _: (string | number)[], $0: string, output?: string }>) {
|
||||
const fullArgs = {
|
||||
_: ['convert'],
|
||||
$0: 'test',
|
||||
dpi: 300,
|
||||
format: 'png',
|
||||
...args,
|
||||
} as Arguments<ConvertCommandConfig>;
|
||||
} as Arguments<ConvertCommandConfig & {output?: string}>;
|
||||
// Make sure handler is loaded before calling
|
||||
if (!convertHandler) throw new Error('Handler not loaded');
|
||||
await convertHandler(fullArgs);
|
||||
@ -76,29 +98,219 @@ describe('Convert Command CLI Handler', () => {
|
||||
mockExistsSync.mockReturnValue(true);
|
||||
mockReadFile.mockResolvedValue(Buffer.from('fake-pdf-data'));
|
||||
mockMkdir.mockResolvedValue(undefined);
|
||||
mockDirname.mockImplementation((p) => p.substring(0, p.lastIndexOf('/') > 0 ? p.lastIndexOf('/') : p.length));
|
||||
|
||||
// Mock path functions more robustly
|
||||
mockDirname.mockImplementation((p) => {
|
||||
if (!p || p === '/') return '/';
|
||||
const lastSlash = p.lastIndexOf('/');
|
||||
if (lastSlash === -1) return '.'; // No slash, return current dir indicator
|
||||
if (lastSlash === 0) return '/'; // Root directory
|
||||
return p.substring(0, lastSlash);
|
||||
});
|
||||
mockBasename.mockImplementation((p) => p.substring(p.lastIndexOf('/') > 0 ? p.lastIndexOf('/') + 1 : 0));
|
||||
mockExtname.mockImplementation((p) => {
|
||||
const lastSlash = p.lastIndexOf('/');
|
||||
const dotIndex = p.lastIndexOf('.');
|
||||
return dotIndex > 0 ? p.substring(dotIndex) : '';
|
||||
return dotIndex > (lastSlash > -1 ? lastSlash : -1) ? p.substring(dotIndex) : '';
|
||||
});
|
||||
// Improved mockResolve to handle absolute/relative paths based on /test/cwd
|
||||
mockResolve.mockImplementation((...paths) => {
|
||||
let currentPath = '/test/cwd'; // Assume CWD
|
||||
for (const p of paths) {
|
||||
if (path.isAbsolute(p)) { // Use actual path.isAbsolute for check
|
||||
currentPath = p;
|
||||
} else {
|
||||
currentPath = path.join(currentPath, p); // Use actual path.join
|
||||
}
|
||||
}
|
||||
// Normalize (e.g., remove //, resolve ..)
|
||||
return path.normalize(currentPath).replace(/\\/g, '/');
|
||||
});
|
||||
mockParse.mockImplementation((p) => ({
|
||||
root: '/',
|
||||
dir: mockDirname(p),
|
||||
base: mockBasename(p),
|
||||
ext: mockExtname(p),
|
||||
name: mockBasename(p, mockExtname(p)),
|
||||
}));
|
||||
mockRelative.mockImplementation((from, to) => to.startsWith(from) ? to.substring(from.length + 1) : to);
|
||||
|
||||
// Reset @polymech/commons mocks
|
||||
mockResolveVariables.mockImplementation(async (template, _bool, vars) => template.replace(/\${(.*?)}/g, (_, key) => vars[key] ?? 'UNDEFINED'));
|
||||
mockPathInfoEx.mockImplementation((p) => ({
|
||||
ROOT: '/test/cwd',
|
||||
SRC_DIR: mockDirname(p),
|
||||
SRC_NAME: mockBasename(p, mockExtname(p)),
|
||||
SRC_EXT: mockExtname(p),
|
||||
}));
|
||||
mockStatSync.mockImplementation((p) => { throw new Error('File not found'); });
|
||||
|
||||
mockProcessExit.mockClear();
|
||||
});
|
||||
|
||||
// --- Test cases ---
|
||||
it('should call convertPdfToImages with correct args', async () => {
|
||||
it('should call convertPdfToImages with correct default args when output is omitted', async () => {
|
||||
const args = {
|
||||
input: 'pdfs/document.pdf',
|
||||
};
|
||||
// Setup mocks for this case
|
||||
mockExistsSync.mockReturnValueOnce(true); // Explicitly mock for this input
|
||||
mockResolve.mockImplementation((p) => p.startsWith('/') ? p : `/test/cwd/${p}`);
|
||||
const expectedSrcDir = '/test/cwd/pdfs';
|
||||
const expectedSrcName = 'document';
|
||||
mockPathInfoEx.mockReturnValue({
|
||||
ROOT: '/test/cwd',
|
||||
SRC_DIR: expectedSrcDir,
|
||||
SRC_NAME: expectedSrcName,
|
||||
SRC_EXT: '.pdf'
|
||||
});
|
||||
|
||||
await runHandlerHelper(args);
|
||||
|
||||
expect(mockExistsSync).toHaveBeenCalledWith(args.input);
|
||||
expect(mockReadFile).toHaveBeenCalledWith(args.input);
|
||||
expect(mockMkdir).toHaveBeenCalledWith(expectedSrcDir, { recursive: true });
|
||||
expect(mockConvertPdfToImagesFn).toHaveBeenCalledTimes(1);
|
||||
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), {
|
||||
baseVariables: expect.objectContaining({
|
||||
SRC_DIR: expectedSrcDir,
|
||||
SRC_NAME: expectedSrcName,
|
||||
FORMAT: 'png',
|
||||
DPI: 300,
|
||||
SOME_DEFAULT: 'value',
|
||||
CWD: mockDEFAULT_ROOTS.CWD
|
||||
}),
|
||||
outputPathTemplate: `${'${SRC_DIR}'}/${'${SRC_NAME}'}_${'${PAGE}'}.${'${FORMAT}'}`,
|
||||
dpi: 300,
|
||||
format: 'png',
|
||||
startPage: undefined,
|
||||
endPage: undefined,
|
||||
logger: expect.anything(),
|
||||
});
|
||||
expect(mockProcessExit).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should use custom output path template when provided', async () => {
|
||||
const args = {
|
||||
input: 'in.pdf',
|
||||
output: 'images/custom_${SRC_NAME}_page${PAGE}.${FORMAT}',
|
||||
};
|
||||
mockExistsSync.mockReturnValueOnce(true); // Explicitly mock for this input
|
||||
mockResolve.mockImplementation((p) => p.startsWith('/') ? p : `/test/cwd/${p}`);
|
||||
mockPathInfoEx.mockReturnValue({
|
||||
ROOT: '/test/cwd',
|
||||
SRC_DIR: '/test/cwd',
|
||||
SRC_NAME: 'in',
|
||||
SRC_EXT: '.pdf'
|
||||
});
|
||||
const expectedPatternDir = '/test/cwd/images';
|
||||
// Ensure dirname mock works for the expected resolved path
|
||||
// mockDirname.mockImplementation((p) => p === '/test/cwd/images/custom_in_pageUNDEFINED.png' ? expectedPatternDir : '/'); // Old complex mock removed, rely on general mock
|
||||
|
||||
await runHandlerHelper(args);
|
||||
|
||||
expect(mockMkdir).toHaveBeenCalledWith(expectedPatternDir, { recursive: true });
|
||||
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), expect.objectContaining({
|
||||
outputPathTemplate: args.output,
|
||||
baseVariables: expect.objectContaining({ SRC_NAME: 'in' }),
|
||||
}));
|
||||
expect(mockProcessExit).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle output path as a directory', async () => {
|
||||
const args = {
|
||||
input: 'some/path/doc.pdf',
|
||||
output: 'output/images/',
|
||||
};
|
||||
const resolvedOutputDir = '/test/cwd/output/images';
|
||||
mockResolve.mockImplementation((p) => p === args.output ? resolvedOutputDir : p );
|
||||
mockStatSync.mockImplementation((p) => {
|
||||
if (p === resolvedOutputDir) {
|
||||
return { isDirectory: () => true };
|
||||
}
|
||||
throw new Error('Not found');
|
||||
});
|
||||
mockPathInfoEx.mockReturnValue({
|
||||
ROOT: '/test/cwd',
|
||||
SRC_DIR: '/test/cwd/some/path',
|
||||
SRC_NAME: 'doc',
|
||||
SRC_EXT: '.pdf'
|
||||
});
|
||||
|
||||
await runHandlerHelper(args);
|
||||
|
||||
expect(mockMkdir).toHaveBeenCalledWith(resolvedOutputDir, { recursive: true });
|
||||
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), expect.objectContaining({
|
||||
outputPathTemplate: `${'${OUT_DIR}'}/${'${SRC_NAME}'}_${'${PAGE}'}.${'${FORMAT}'}`,
|
||||
baseVariables: expect.objectContaining({
|
||||
OUT_DIR: resolvedOutputDir,
|
||||
SRC_NAME: 'doc',
|
||||
}),
|
||||
}));
|
||||
expect(mockProcessExit).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle output path that looks like a directory (ends with /) but doesnt exist yet', async () => {
|
||||
const args = {
|
||||
input: 'other.pdf',
|
||||
output: 'new_dir/',
|
||||
};
|
||||
const resolvedOutputDir = '/test/cwd/new_dir';
|
||||
mockResolve.mockImplementation((p) => p === args.output ? resolvedOutputDir : p );
|
||||
mockStatSync.mockImplementation((p) => { throw new Error('Not found'); });
|
||||
mockPathInfoEx.mockReturnValue({
|
||||
ROOT: '/test/cwd',
|
||||
SRC_DIR: '/test/cwd',
|
||||
SRC_NAME: 'other',
|
||||
SRC_EXT: '.pdf'
|
||||
});
|
||||
|
||||
await runHandlerHelper(args);
|
||||
|
||||
expect(mockMkdir).toHaveBeenCalledWith(resolvedOutputDir, { recursive: true });
|
||||
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), expect.objectContaining({
|
||||
outputPathTemplate: `${'${OUT_DIR}'}/${'${SRC_NAME}'}_${'${PAGE}'}.${'${FORMAT}'}`,
|
||||
baseVariables: expect.objectContaining({
|
||||
OUT_DIR: resolvedOutputDir,
|
||||
SRC_NAME: 'other',
|
||||
}),
|
||||
}));
|
||||
expect(mockProcessExit).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should call convertPdfToImages with specific args', async () => {
|
||||
const args = {
|
||||
input: 'input.pdf',
|
||||
output: 'output/prefix',
|
||||
dpi: 150,
|
||||
format: 'jpg',
|
||||
format: 'jpg' as const,
|
||||
startPage: 2,
|
||||
endPage: 5,
|
||||
} as const;
|
||||
};
|
||||
mockResolve.mockImplementation((p) => p.startsWith('/') ? p : `/test/cwd/${p}`);
|
||||
const expectedPatternDir = '/test/cwd/output';
|
||||
mockDirname.mockImplementation((p) => p.startsWith(expectedPatternDir) ? expectedPatternDir : '/');
|
||||
mockPathInfoEx.mockReturnValue({ ROOT: '/test/cwd', SRC_DIR: '/test/cwd', SRC_NAME: 'input', SRC_EXT: '.pdf' });
|
||||
|
||||
await runHandlerHelper(args);
|
||||
|
||||
expect(mockExistsSync).toHaveBeenCalledWith(args.input);
|
||||
expect(mockReadFile).toHaveBeenCalledWith(args.input);
|
||||
// ... rest of assertions ...
|
||||
expect(mockMkdir).toHaveBeenCalledWith(expectedPatternDir, { recursive: true });
|
||||
expect(mockConvertPdfToImagesFn).toHaveBeenCalledTimes(1);
|
||||
expect(mockConvertPdfToImagesFn).toHaveBeenCalledWith(expect.any(Buffer), {
|
||||
baseVariables: expect.objectContaining({
|
||||
SRC_NAME: 'input',
|
||||
FORMAT: 'jpg',
|
||||
DPI: 150
|
||||
}),
|
||||
outputPathTemplate: args.output,
|
||||
dpi: args.dpi,
|
||||
format: args.format,
|
||||
startPage: args.startPage,
|
||||
endPage: args.endPage,
|
||||
logger: expect.anything(),
|
||||
});
|
||||
expect(mockProcessExit).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
@ -128,16 +340,4 @@ describe('Convert Command CLI Handler', () => {
|
||||
);
|
||||
expect(mockProcessExit).toHaveBeenCalledWith(1);
|
||||
});
|
||||
|
||||
it('should create output directory correctly when output is a directory path', async () => {
|
||||
const args = { input: 'in.pdf', output: 'output/subdir/' };
|
||||
await runHandlerHelper(args);
|
||||
// ... assertions ...
|
||||
});
|
||||
|
||||
it('should create parent directory when output is a file prefix', async () => {
|
||||
const args = { input: 'in.pdf', output: 'output/subdir/file_prefix' };
|
||||
await runHandlerHelper(args);
|
||||
// ... assertions ...
|
||||
});
|
||||
});
|
||||
@ -0,0 +1,61 @@
|
||||
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { existsSync, rmSync, readdirSync } from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
|
||||
const packageRoot = process.cwd(); // Assumes test runs from package root
|
||||
const inputPdf = path.join('tests', 'RS485-780.pdf');
|
||||
const outputDir = path.join(packageRoot, 'tests', 'out', 'RS485-780');
|
||||
const outputPattern = '${SRC_DIR}/out/${SRC_NAME}/${SRC_NAME}-${PAGE}.${FORMAT}';
|
||||
|
||||
// Expected number of pages for RS485-780.pdf
|
||||
const expectedPageCount = 29;
|
||||
const expectedBaseName = 'RS485-780';
|
||||
const expectedFormat = 'png'; // Default format
|
||||
|
||||
describe('CLI Integration Test - Variable Output Path', () => {
|
||||
beforeAll(() => {
|
||||
if (existsSync(outputDir)) {
|
||||
rmSync(outputDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (existsSync(outputDir)) {
|
||||
// rmSync(outputDir, { recursive: true, force: true }); // Optional: clean up after tests
|
||||
}
|
||||
});
|
||||
|
||||
it('should create images in the correct directory with the correct filenames using variable substitution', () => {
|
||||
// Construct the command
|
||||
// Ensure paths in the command are relative to the execution directory if needed,
|
||||
// but here inputPdf is relative, and outputPattern relies on internal resolution.
|
||||
// Quote the output pattern for safety in the shell.
|
||||
const command = `node dist/index.js convert -i "${inputPdf}" -o "${outputPattern}"`;
|
||||
|
||||
// Execute the command
|
||||
let commandOutput = '';
|
||||
try {
|
||||
// Use { stdio: 'pipe' } to potentially suppress noisy output or capture errors
|
||||
commandOutput = execSync(command, { encoding: 'utf8', stdio: 'pipe' });
|
||||
console.log('Command execution output:', commandOutput);
|
||||
} catch (error: any) {
|
||||
// If the command fails, log the error and fail the test
|
||||
console.error('Command execution failed:', error.stderr || error.stdout || error.message);
|
||||
expect.fail(`Command execution failed: ${error.message}`);
|
||||
}
|
||||
|
||||
// 1. Check if the output directory exists
|
||||
expect(existsSync(outputDir), `Output directory "${outputDir}" should exist`).toBe(true);
|
||||
|
||||
// 2. Check the number of files created
|
||||
const files = readdirSync(outputDir);
|
||||
expect(files.length, `Should have created ${expectedPageCount} files`).toBe(expectedPageCount);
|
||||
|
||||
// 3. Check filenames
|
||||
for (let i = 1; i <= expectedPageCount; i++) {
|
||||
const expectedFilename = `${expectedBaseName}-${i}.${expectedFormat}`;
|
||||
expect(files, `File list should include "${expectedFilename}"`).toContain(expectedFilename);
|
||||
}
|
||||
});
|
||||
});
|
||||
47448
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-1.png
Normal file
47448
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-1.png
Normal file
File diff suppressed because it is too large
Load Diff
55647
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-10.png
Normal file
55647
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-10.png
Normal file
File diff suppressed because it is too large
Load Diff
62187
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-11.png
Normal file
62187
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-11.png
Normal file
File diff suppressed because it is too large
Load Diff
58606
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-12.png
Normal file
58606
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-12.png
Normal file
File diff suppressed because it is too large
Load Diff
53385
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-13.png
Normal file
53385
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-13.png
Normal file
File diff suppressed because it is too large
Load Diff
48789
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-14.png
Normal file
48789
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-14.png
Normal file
File diff suppressed because it is too large
Load Diff
63873
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-15.png
Normal file
63873
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-15.png
Normal file
File diff suppressed because it is too large
Load Diff
51373
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-16.png
Normal file
51373
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-16.png
Normal file
File diff suppressed because it is too large
Load Diff
54583
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-17.png
Normal file
54583
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-17.png
Normal file
File diff suppressed because it is too large
Load Diff
45332
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-18.png
Normal file
45332
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-18.png
Normal file
File diff suppressed because it is too large
Load Diff
41495
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-19.png
Normal file
41495
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-19.png
Normal file
File diff suppressed because it is too large
Load Diff
51153
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-2.png
Normal file
51153
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-2.png
Normal file
File diff suppressed because it is too large
Load Diff
41218
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-20.png
Normal file
41218
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-20.png
Normal file
File diff suppressed because it is too large
Load Diff
47932
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-21.png
Normal file
47932
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-21.png
Normal file
File diff suppressed because it is too large
Load Diff
35965
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-22.png
Normal file
35965
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-22.png
Normal file
File diff suppressed because it is too large
Load Diff
37573
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-23.png
Normal file
37573
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-23.png
Normal file
File diff suppressed because it is too large
Load Diff
52130
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-24.png
Normal file
52130
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-24.png
Normal file
File diff suppressed because it is too large
Load Diff
47166
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-25.png
Normal file
47166
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-25.png
Normal file
File diff suppressed because it is too large
Load Diff
48886
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-26.png
Normal file
48886
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-26.png
Normal file
File diff suppressed because it is too large
Load Diff
52206
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-27.png
Normal file
52206
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-27.png
Normal file
File diff suppressed because it is too large
Load Diff
49947
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-28.png
Normal file
49947
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-28.png
Normal file
File diff suppressed because it is too large
Load Diff
40713
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-29.png
Normal file
40713
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-29.png
Normal file
File diff suppressed because it is too large
Load Diff
45387
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-3.png
Normal file
45387
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-3.png
Normal file
File diff suppressed because it is too large
Load Diff
35293
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-4.png
Normal file
35293
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-4.png
Normal file
File diff suppressed because it is too large
Load Diff
56880
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-5.png
Normal file
56880
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-5.png
Normal file
File diff suppressed because it is too large
Load Diff
41134
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-6.png
Normal file
41134
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-6.png
Normal file
File diff suppressed because it is too large
Load Diff
49332
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-7.png
Normal file
49332
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-7.png
Normal file
File diff suppressed because it is too large
Load Diff
43323
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-8.png
Normal file
43323
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-8.png
Normal file
File diff suppressed because it is too large
Load Diff
47078
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-9.png
Normal file
47078
packages/content/ref/pdf-to-images/tests/out/RS485-780/RS485-780-9.png
Normal file
File diff suppressed because it is too large
Load Diff
BIN
packages/content/ref/pdf-to-images/tests/out/e5dc/_3.png
Normal file
BIN
packages/content/ref/pdf-to-images/tests/out/e5dc/_3.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 37 KiB |
BIN
packages/content/ref/pdf-to-images/tests/out/e5dc/_4.png
Normal file
BIN
packages/content/ref/pdf-to-images/tests/out/e5dc/_4.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 88 KiB |
BIN
packages/content/ref/pdf-to-images/tests/out/e5dc/_5.png
Normal file
BIN
packages/content/ref/pdf-to-images/tests/out/e5dc/_5.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 83 KiB |
BIN
packages/content/ref/pdf-to-images/tests/out/simple_test.png
Normal file
BIN
packages/content/ref/pdf-to-images/tests/out/simple_test.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 40 KiB |
Loading…
Reference in New Issue
Block a user