This commit is contained in:
lovebird 2025-04-21 21:03:58 +02:00
parent 59673bb1a2
commit 289130448d
6 changed files with 16 additions and 13 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,13 +1,11 @@
import * as path from 'path'
import { RunnableToolFunction } from 'openai/lib/RunnableFunction'
import { sync as rm } from '@polymech/fs/remove'
//import { filesEx as glob } from '@polymech/commons/_glob'
import { isString } from '@polymech/core/primitives'
import { sync as write } from '@polymech/fs/write'
import { sync as read } from '@polymech/fs/read'
import { sync as rename } from '@polymech/fs/rename'
import { sync as exists } from '@polymech/fs/exists'
import { sanitizeFilename } from "@polymech/fs/utils"
import { sanitize } from "@polymech/fs/utils"
import { filesEx } from '@polymech/commons'

View File

@ -148,4 +148,8 @@ examplePageRange();
* `convertPdfToImages(pdfData: Buffer, options: PdfToImageOptions): Promise<string[]>`: Converts a PDF buffer to images.
* `ImageFormat`: Type alias for `'png' | 'jpg'`.
* `PdfToImageOptions`: Interface for conversion options (`outputPathPrefix`, `dpi`, `format`, optional `startPage`, optional `endPage`, optional `logger`).
```
### References
- https://github.com/opendatalab/PDF-Extract-Kit/tree/main/project/pdf2markdown (Using Yolo)

@ -1 +1 @@
Subproject commit be7024a44d3904082a1b2cbaa2ed4ee229f7628a
Subproject commit 63a9f0d93b4388c2cb76e7596b60cfc59dd742d7

View File

@ -38,11 +38,14 @@ echo "Using host volume: $DOCS_DIR_HOST"
# Use exec to replace the shell process with the docker command
# This ensures signals (like Ctrl+C) are passed correctly to the container
# Added --rm to automatically remove the container on exit
exec docker run -it --rm --name gpt-researcher \
-p 8000:8000 \
--env-file "$ENV_FILE" \
-v "$DOCS_DIR_HOST":"$DOCS_DIR_CONTAINER" \
gpt-researcher "$@" # Pass any extra script arguments to the container command
cd "$PROJECT_DIR" || exit 1
docker-compose up --build
#pwd
#exec docker run -it --rm --name gpt-researcher \
# -p 8000:8000 \
# --env-file "$ENV_FILE" \
# -v "$DOCS_DIR_HOST":"$DOCS_DIR_CONTAINER" \
# gpt-researcher "$@" # Pass any extra script arguments to the container command
# --- End Docker Launch Command ---

View File

@ -54,7 +54,6 @@ describe('crawlAndExtract Integration Tests', () => {
});
afterAll(async () => {
// Cleanup temp files after all tests run
const filesToClean = [tempSchemaPath, tempBrowserConfigPath, tempCrawlerConfigPath];
for (const filePath of filesToClean) {
if (filePath) {