mono/packages/kbot/src/source.ts
2025-02-20 18:22:14 +01:00

127 lines
3.6 KiB
TypeScript

import * as path from 'path'
import * as fs from 'fs'
import { sync as read } from '@polymech/fs/read'
import { sync as dir } from '@polymech/fs/dir'
import { createItem as toNode } from '@polymech/fs/inspect'
import { INode } from '@polymech/fs/interfaces'
import { sync as exists } from '@polymech/fs/exists'
import { isFile, forward_slash } from '@polymech/osr-commons'
import { logger } from './index'
import { lookup } from 'mime-types'
import { globSync } from 'glob'
import { EXCLUDE_GLOB, MAX_FILE_SIZE } from './constants'
import { defaultMimeRegistry, IHandlerResult } from './mime-handlers'
import { ChatCompletionContentPartImage } from 'openai/resources/index.mjs'
export const default_filters = {
isFile,
exists,
size: (filePath: string) => toNode(filePath).size < MAX_FILE_SIZE,
};
const isPathInside = (childPath: string, parentPath: string): boolean => {
const relation = path.relative(parentPath, childPath);
return Boolean(
relation &&
!relation.startsWith('..') &&
!relation.startsWith('..' + path.sep)
);
};
export const isPathOutsideSafe = (pathA: string, pathB: string): boolean => {
const realA = fs.realpathSync(pathA);
const realB = fs.realpathSync(pathB);
return !isPathInside(realA, realB);
};
export const base64 = (filePath: string): string | null => {
try {
const fileBuffer = fs.readFileSync(filePath);
const mimeType = lookup(filePath);
if (!mimeType) {
throw new Error('Unable to determine MIME type.');
}
const base64Data = fileBuffer.toString('base64');
return `data:${mimeType};base64,${base64Data}`;
} catch (error) {
logger.error('fileToBase64 : Error reading file:', error);
return null;
}
};
export const images = (files: string[]): ChatCompletionContentPartImage[] => {
return files.map((f) => ({
type: "image_url",
image_url: { url: base64(f) }
}))
}
export const glob = (
projectPath: string,
include: string[] = []
) => {
if (!exists(projectPath)) {
dir(projectPath)
return []
}
const filters = new Set<string>()
const absolutePaths = new Set<string>()
EXCLUDE_GLOB.forEach(pattern => filters.add(pattern))
include.forEach(pattern => {
if (path.isAbsolute(pattern)) {
if (isPathInside(pattern, projectPath)) {
filters.add(pattern)
} else {
absolutePaths.add(pattern)
}
} else {
filters.add(pattern)
}
})
const globFiles = globSync([...filters], {
cwd: projectPath,
absolute: false,
ignore: EXCLUDE_GLOB
})
const allFiles = Array.from(new Set([
...globFiles.map(file => path.join(projectPath, file)),
...Array.from(absolutePaths)
]))
let files = allFiles.filter((f) =>
Object.keys(default_filters).every((key) => default_filters[key](f))
)
return files
}
export async function get(
projectPath: string,
include: string[] = []
): Promise<Array<IHandlerResult>> {
let files = glob(projectPath, include)
let ret = files.map((fullPath) => {
try {
const relativePath = forward_slash(path.relative(projectPath, fullPath))
if (isFile(fullPath) && exists(fullPath)) {
const mimeType = lookup(fullPath) || 'text/plain'
const handler = defaultMimeRegistry.getHandler(mimeType)
if (handler) {
return handler.handle(fullPath, relativePath)
}
return defaultMimeRegistry.getHandler('text/*')?.handle(fullPath, relativePath) || null
}
return null
} catch (error) {
logger.error(`Error reading file ${fullPath}:`, error)
return null
}
})
ret = await ret.filter((r) => r !== null)
return ret
}