145 lines
12 KiB
JavaScript
145 lines
12 KiB
JavaScript
import * as path from 'node:path';
|
|
import * as fs from 'node:fs';
|
|
import { sync as dir } from '@polymech/fs/dir';
|
|
import { createItem as toNode } from '@polymech/fs/inspect';
|
|
import { sync as exists } from '@polymech/fs/exists';
|
|
import { isFile, forward_slash } from '@polymech/commons';
|
|
import { logger } from './index.js';
|
|
import { lookup } from 'mime-types';
|
|
import { globSync } from 'glob';
|
|
import { EXCLUDE_GLOB, MAX_FILE_SIZE } from './constants.js';
|
|
import { defaultMimeRegistry } from './mime-handlers.js';
|
|
import { supported } from './commands/run-assistant.js';
|
|
/**
|
|
* @todos
|
|
* - add support for vector stores : https://platform.openai.com/docs/assistants/tools/file-search?lang=node.js
|
|
*/
|
|
export const default_filters = {
|
|
isFile,
|
|
exists,
|
|
size: (filePath) => toNode(filePath).size < MAX_FILE_SIZE,
|
|
};
|
|
const isPathInside = (childPath, parentPath) => {
|
|
const relation = path.relative(parentPath, childPath);
|
|
return Boolean(relation &&
|
|
!relation.startsWith('..') &&
|
|
!relation.startsWith('..' + path.sep));
|
|
};
|
|
export const isPathOutsideSafe = (pathA, pathB) => {
|
|
const realA = fs.realpathSync(pathA);
|
|
const realB = fs.realpathSync(pathB);
|
|
return !isPathInside(realA, realB);
|
|
};
|
|
export const base64 = (filePath) => {
|
|
try {
|
|
const fileBuffer = fs.readFileSync(filePath);
|
|
const mimeType = lookup(filePath);
|
|
if (!mimeType) {
|
|
throw new Error('Unable to determine MIME type.');
|
|
}
|
|
const base64Data = fileBuffer.toString('base64');
|
|
return `data:${mimeType};base64,${base64Data}`;
|
|
}
|
|
catch (error) {
|
|
logger.error('fileToBase64 : Error reading file:', error);
|
|
return null;
|
|
}
|
|
};
|
|
export const images = (files) => {
|
|
return files.map((f) => ({
|
|
type: "image_url",
|
|
image_url: { url: base64(f) }
|
|
}));
|
|
};
|
|
export const glob = (projectPath, include = []) => {
|
|
if (!exists(projectPath)) {
|
|
dir(projectPath);
|
|
return [];
|
|
}
|
|
const filters = new Set();
|
|
const absolutePaths = new Set();
|
|
EXCLUDE_GLOB.forEach(pattern => filters.add(pattern));
|
|
include.forEach(pattern => {
|
|
if (path.isAbsolute(pattern)) {
|
|
if (isPathInside(pattern, projectPath)) {
|
|
filters.add(pattern);
|
|
}
|
|
else {
|
|
absolutePaths.add(pattern);
|
|
}
|
|
}
|
|
else {
|
|
filters.add(pattern);
|
|
}
|
|
});
|
|
const globFiles = globSync([...filters], {
|
|
cwd: projectPath,
|
|
absolute: false,
|
|
ignore: EXCLUDE_GLOB
|
|
});
|
|
const allFiles = Array.from(new Set([
|
|
...globFiles.map(file => path.join(projectPath, file)),
|
|
...Array.from(absolutePaths)
|
|
]));
|
|
let files = allFiles.filter((f) => Object.keys(default_filters).every((key) => default_filters[key](f)));
|
|
return files;
|
|
};
|
|
export async function get(projectPath, include = [], options) {
|
|
let files = glob(projectPath, include);
|
|
let ret = files.map((fullPath) => {
|
|
try {
|
|
const relativePath = forward_slash(path.relative(projectPath, fullPath));
|
|
if (isFile(fullPath) && exists(fullPath)) {
|
|
const mimeType = lookup(fullPath) || 'text/plain';
|
|
const handler = defaultMimeRegistry.getHandler(mimeType);
|
|
if (handler) {
|
|
return handler.handle(fullPath, relativePath);
|
|
}
|
|
return defaultMimeRegistry.getHandler('text/*')?.handle(fullPath, relativePath) || null;
|
|
}
|
|
return null;
|
|
}
|
|
catch (error) {
|
|
logger.error(`Error reading file ${fullPath}:`, error);
|
|
return null;
|
|
}
|
|
});
|
|
ret = await ret.filter((r) => r !== null);
|
|
return ret;
|
|
}
|
|
export async function vectorize(file, options) {
|
|
if (!options.client) {
|
|
throw new Error('OpenAI client is required for vectorization');
|
|
}
|
|
const ext = path.extname(file).toLowerCase();
|
|
if (!(ext in supported)) {
|
|
throw new Error(`Unsupported file format: ${ext}. Supported formats: ${Object.keys(supported).join(', ')}`);
|
|
}
|
|
try {
|
|
// Create a vector store
|
|
const vectorStore = await options.client.vectorStores.create({
|
|
name: path.basename(file)
|
|
});
|
|
// Upload file to vector store
|
|
const fileStream = fs.createReadStream(file);
|
|
await options.client.vectorStores.fileBatches.uploadAndPoll(vectorStore.id, {
|
|
files: [fileStream]
|
|
});
|
|
// Create meta file path by appending .meta.json to the original file path
|
|
const metaPath = `${file}.meta.json`;
|
|
const metaData = {
|
|
vectorStoreId: vectorStore.id,
|
|
vectorizedAt: new Date().toISOString(),
|
|
originalPath: file,
|
|
mimeType: supported[ext]
|
|
};
|
|
// Write meta data to file
|
|
fs.writeFileSync(metaPath, JSON.stringify(metaData, null, 2));
|
|
return vectorStore.id;
|
|
}
|
|
catch (error) {
|
|
logger.error(`Failed to vectorize file ${file}:`, error);
|
|
throw error;
|
|
}
|
|
}
|
|
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic291cmNlLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vc3JjL3NvdXJjZS50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSxPQUFPLEtBQUssSUFBSSxNQUFNLFdBQVcsQ0FBQTtBQUNqQyxPQUFPLEtBQUssRUFBRSxNQUFNLFNBQVMsQ0FBQTtBQUU3QixPQUFPLEVBQUUsSUFBSSxJQUFJLEdBQUcsRUFBRSxNQUFNLGtCQUFrQixDQUFBO0FBRTlDLE9BQU8sRUFBRSxVQUFVLElBQUksTUFBTSxFQUFFLE1BQU0sc0JBQXNCLENBQUE7QUFDM0QsT0FBTyxFQUFFLElBQUksSUFBSSxNQUFNLEVBQUUsTUFBTSxxQkFBcUIsQ0FBQTtBQUNwRCxPQUFPLEVBQUUsTUFBTSxFQUFFLGFBQWEsRUFBRSxNQUFNLG1CQUFtQixDQUFBO0FBQ3pELE9BQU8sRUFBRSxNQUFNLEVBQUUsTUFBTSxZQUFZLENBQUE7QUFDbkMsT0FBTyxFQUFFLE1BQU0sRUFBRSxNQUFNLFlBQVksQ0FBQTtBQUNuQyxPQUFPLEVBQUUsUUFBUSxFQUFFLE1BQU0sTUFBTSxDQUFBO0FBQy9CLE9BQU8sRUFBRSxZQUFZLEVBQUUsYUFBYSxFQUFFLE1BQU0sZ0JBQWdCLENBQUE7QUFDNUQsT0FBTyxFQUFFLG1CQUFtQixFQUFrQixNQUFNLG9CQUFvQixDQUFBO0FBR3hFLE9BQU8sRUFBRSxTQUFTLEVBQUUsTUFBTSw2QkFBNkIsQ0FBQTtBQUV2RDs7O0dBR0c7QUFFSCxNQUFNLENBQUMsTUFBTSxlQUFlLEdBQUc7SUFDN0IsTUFBTTtJQUNOLE1BQU07SUFDTixJQUFJLEVBQUUsQ0FBQyxRQUFnQixFQUFFLEVBQUUsQ0FBQyxNQUFNLENBQUMsUUFBUSxDQUFDLENBQUMsSUFBSSxHQUFHLGFBQWE7Q0FDbEUsQ0FBQztBQUVGLE1BQU0sWUFBWSxHQUFHLENBQUMsU0FBaUIsRUFBRSxVQUFrQixFQUFXLEVBQUU7SUFDdEUsTUFBTSxRQUFRLEdBQUcsSUFBSSxDQUFDLFFBQVEsQ0FBQyxVQUFVLEVBQUUsU0FBUyxDQUFDLENBQUM7SUFDdEQsT0FBTyxPQUFPLENBQ1osUUFBUTtRQUNSLENBQUMsUUFBUSxDQUFDLFVBQVUsQ0FBQyxJQUFJLENBQUM7UUFDMUIsQ0FBQyxRQUFRLENBQUMsVUFBVSxDQUFDLElBQUksR0FBRyxJQUFJLENBQUMsR0FBRyxDQUFDLENBQ3RDLENBQUM7QUFDSixDQUFDLENBQUM7QUFFRixNQUFNLENBQUMsTUFBTSxpQkFBaUIsR0FBRyxDQUFDLEtBQWEsRUFBRSxLQUFhLEVBQVcsRUFBRTtJQUN6RSxNQUFNLEtBQUssR0FBRyxFQUFFLENBQUMsWUFBWSxDQUFDLEtBQUssQ0FBQyxDQUFDO0lBQ3JDLE1BQU0sS0FBSyxHQUFHLEVBQUUsQ0FBQyxZQUFZLENBQUMsS0FBSyxDQUFDLENBQUM7SUFDckMsT0FBTyxDQUFDLFlBQVksQ0FBQyxLQUFLLEVBQUUsS0FBSyxDQUFDLENBQUM7QUFDckMsQ0FBQyxDQUFDO0FBRUYsTUFBTSxDQUFDLE1BQU0sTUFBTSxHQUFHLENBQUMsUUFBZ0IsRUFBaUIsRUFBRTtJQUN4RCxJQUFJLENBQUM7UUFDSCxNQUFNLFVBQVUsR0FBRyxFQUFFLENBQUMsWUFBWSxDQUFDLFFBQVEsQ0FBQyxDQUFDO1FBQzdDLE1BQU0sUUFBUSxHQUFHLE1BQU0sQ0FBQyxRQUFRLENBQUMsQ0FBQztRQUNsQyxJQUFJLENBQUMsUUFBUSxFQUFFLENBQUM7WUFDZCxNQUFNLElBQUksS0FBSyxDQUFDLGdDQUFnQyxDQUFDLENBQUM7UUFDcEQsQ0FBQztRQUNELE1BQU0sVUFBVSxHQUFHLFVBQVUsQ0FBQyxRQUFRLENBQUMsUUFBUSxDQUFDLENBQUM7UUFDakQsT0FBTyxRQUFRLFFBQVEsV0FBVyxVQUFVLEVBQUUsQ0FBQztJQUNqRCxDQUFDO0lBQUMsT0FBTyxLQUFLLEVBQUUsQ0FBQztRQUNmLE1BQU0sQ0FBQyxLQUFLLENBQUMsb0NBQW9DLEVBQUUsS0FBSyxDQUFDLENBQUM7UUFDMUQsT0FBTyxJQUFJLENBQUM7SUFDZCxDQUFDO0FBQ0gsQ0FBQyxDQUFDO0FBRUYsTUFBTSxDQUFDLE1BQU0sTUFBTSxHQUFHLENBQUMsS0FBZSxFQUFvQyxFQUFFO0lBQzFFLE9BQU8sS0FBSyxDQUFDLEdBQUcsQ0FBQyxDQUFDLENBQUMsRUFBRSxFQUFFLENBQUMsQ0FBQztRQUN2QixJQUFJLEVBQUUsV0FBVztRQUNqQixTQUFTLEVBQUUsRUFBRSxHQUFHLEVBQUUsTUFBTSxDQUFDLENBQUMsQ0FBQyxFQUFFO0tBQzlCLENBQUMsQ0FBQyxDQUFBO0FBQ0wsQ0FBQyxDQUFBO0FBRUQsTUFBTSxDQUFDLE1BQU0sSUFBSSxHQUFHLENBQ2xCLFdBQW1CLEVBQ25CLFVBQW9CLEVBQUUsRUFDdEIsRUFBRTtJQUNGLElBQUksQ0FBQyxNQUFNLENBQUMsV0FBVyxDQUFDLEVBQUUsQ0FBQztRQUN6QixHQUFHLENBQUMsV0FBVyxDQUFDLENBQUE7UUFDaEIsT0FBTyxFQUFFLENBQUE7SUFDWCxDQUFDO0lBRUQsTUFBTSxPQUFPLEdBQUcsSUFBSSxHQUFHLEVBQVUsQ0FBQTtJQUNqQyxNQUFNLGFBQWEsR0FBRyxJQUFJLEdBQUcsRUFBVSxDQUFBO0lBRXZDLFlBQVksQ0FBQyxPQUFPLENBQUMsT0FBTyxDQUFDLEVBQUUsQ0FBQyxPQUFPLENBQUMsR0FBRyxDQUFDLE9BQU8sQ0FBQyxDQUFDLENBQUE7SUFFckQsT0FBTyxDQUFDLE9BQU8sQ0FBQyxPQUFPLENBQUMsRUFBRTtRQUN4QixJQUFJLElBQUksQ0FBQyxVQUFVLENBQUMsT0FBTyxDQUFDLEVBQUUsQ0FBQztZQUM3QixJQUFJLFlBQVksQ0FBQyxPQUFPLEVBQUUsV0FBVyxDQUFDLEVBQUUsQ0FBQztnQkFDdkMsT0FBTyxDQUFDLEdBQUcsQ0FBQyxPQUFPLENBQUMsQ0FBQTtZQUN0QixDQUFDO2lCQUFNLENBQUM7Z0JBQ04sYUFBYSxDQUFDLEdBQUcsQ0FBQyxPQUFPLENBQUMsQ0FBQTtZQUM1QixDQUFDO1FBQ0gsQ0FBQzthQUFNLENBQUM7WUFDTixPQUFPLENBQUMsR0FBRyxDQUFDLE9BQU8sQ0FBQyxDQUFBO1FBQ3RCLENBQUM7SUFDSCxDQUFDLENBQUMsQ0FBQTtJQUVGLE1BQU0sU0FBUyxHQUFHLFFBQVEsQ0FBQyxDQUFDLEdBQUcsT0FBTyxDQUFDLEVBQUU7UUFDdkMsR0FBRyxFQUFFLFdBQVc7UUFDaEIsUUFBUSxFQUFFLEtBQUs7UUFDZixNQUFNLEVBQUUsWUFBWTtLQUNyQixDQUFDLENBQUE7SUFFRixNQUFNLFFBQVEsR0FBRyxLQUFLLENBQUMsSUFBSSxDQUFDLElBQUksR0FBRyxDQUFDO1FBQ2xDLEdBQUcsU0FBUyxDQUFDLEdBQUcsQ0FBQyxJQUFJLENBQUMsRUFBRSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsV0FBVyxFQUFFLElBQUksQ0FBQyxDQUFDO1FBQ3RELEdBQUcsS0FBSyxDQUFDLElBQUksQ0FBQyxhQUFhLENBQUM7S0FDN0IsQ0FBQyxDQUFDLENBQUE7SUFFSCxJQUFJLEtBQUssR0FBRyxRQUFRLENBQUMsTUFBTSxDQUFDLENBQUMsQ0FBQyxFQUFFLEVBQUUsQ0FDaEMsTUFBTSxDQUFDLElBQUksQ0FBQyxlQUFlLENBQUMsQ0FBQyxLQUFLLENBQUMsQ0FBQyxHQUFHLEVBQUUsRUFBRSxDQUFDLGVBQWUsQ0FBQyxHQUFHLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUNyRSxDQUFBO0lBQ0QsT0FBTyxLQUFLLENBQUE7QUFDZCxDQUFDLENBQUE7QUFFRCxNQUFNLENBQUMsS0FBSyxVQUFVLEdBQUcsQ0FDdkIsV0FBbUIsRUFDbkIsVUFBb0IsRUFBRSxFQUN0QixPQUFrQjtJQUVsQixJQUFJLEtBQUssR0FBRyxJQUFJLENBQUMsV0FBVyxFQUFFLE9BQU8sQ0FBQyxDQUFBO0lBQ3RDLElBQUksR0FBRyxHQUFHLEtBQUssQ0FBQyxHQUFHLENBQUMsQ0FBQyxRQUFRLEVBQUUsRUFBRTtRQUMvQixJQUFJLENBQUM7WUFDSCxNQUFNLFlBQVksR0FBRyxhQUFhLENBQUMsSUFBSSxDQUFDLFFBQVEsQ0FBQyxXQUFXLEVBQUUsUUFBUSxDQUFDLENBQUMsQ0FBQTtZQUN4RSxJQUFJLE1BQU0sQ0FBQyxRQUFRLENBQUMsSUFBSSxNQUFNLENBQUMsUUFBUSxDQUFDLEVBQUUsQ0FBQztnQkFDekMsTUFBTSxRQUFRLEdBQUcsTUFBTSxDQUFDLFFBQVEsQ0FBQyxJQUFJLFlBQVksQ0FBQTtnQkFDakQsTUFBTSxPQUFPLEdBQUcsbUJBQW1CLENBQUMsVUFBVSxDQUFDLFFBQVEsQ0FBQyxDQUFBO2dCQUN4RCxJQUFJLE9BQU8sRUFBRSxDQUFDO29CQUNaLE9BQU8sT0FBTyxDQUFDLE1BQU0sQ0FBQyxRQUFRLEVBQUUsWUFBWSxDQUFDLENBQUE7Z0JBQy9DLENBQUM7Z0JBQ0QsT0FBTyxtQkFBbUIsQ0FBQyxVQUFVLENBQUMsUUFBUSxDQUFDLEVBQUUsTUFBTSxDQUFDLFFBQVEsRUFBRSxZQUFZLENBQUMsSUFBSSxJQUFJLENBQUE7WUFDekYsQ0FBQztZQUNELE9BQU8sSUFBSSxDQUFBO1FBQ2IsQ0FBQztRQUFDLE9BQU8sS0FBSyxFQUFFLENBQUM7WUFDZixNQUFNLENBQUMsS0FBSyxDQUFDLHNCQUFzQixRQUFRLEdBQUcsRUFBRSxLQUFLLENBQUMsQ0FBQTtZQUN0RCxPQUFPLElBQUksQ0FBQTtRQUNiLENBQUM7SUFDSCxDQUFDLENBQUMsQ0FBQTtJQUNGLEdBQUcsR0FBRyxNQUFNLEdBQUcsQ0FBQyxNQUFNLENBQUMsQ0FBQyxDQUFDLEVBQUUsRUFBRSxDQUFDLENBQUMsS0FBSyxJQUFJLENBQUMsQ0FBQTtJQUN6QyxPQUFPLEdBQUcsQ0FBQTtBQUNaLENBQUM7QUFFRCxNQUFNLENBQUMsS0FBSyxVQUFVLFNBQVMsQ0FBQyxJQUFZLEVBQUUsT0FBa0I7SUFDOUQsSUFBSSxDQUFDLE9BQU8sQ0FBQyxNQUFNLEVBQUUsQ0FBQztRQUNwQixNQUFNLElBQUksS0FBSyxDQUFDLDZDQUE2QyxDQUFDLENBQUE7SUFDaEUsQ0FBQztJQUVELE1BQU0sR0FBRyxHQUFHLElBQUksQ0FBQyxPQUFPLENBQUMsSUFBSSxDQUFDLENBQUMsV0FBVyxFQUFFLENBQUE7SUFDNUMsSUFBSSxDQUFDLENBQUMsR0FBRyxJQUFJLFNBQVMsQ0FBQyxFQUFFLENBQUM7UUFDeEIsTUFBTSxJQUFJLEtBQUssQ0FBQyw0QkFBNEIsR0FBRyx3QkFBd0IsTUFBTSxDQUFDLElBQUksQ0FBQyxTQUFTLENBQUMsQ0FBQyxJQUFJLENBQUMsSUFBSSxDQUFDLEVBQUUsQ0FBQyxDQUFBO0lBQzdHLENBQUM7SUFFRCxJQUFJLENBQUM7UUFDSCx3QkFBd0I7UUFDeEIsTUFBTSxXQUFXLEdBQUcsTUFBTSxPQUFPLENBQUMsTUFBTSxDQUFDLFlBQVksQ0FBQyxNQUFNLENBQUM7WUFDM0QsSUFBSSxFQUFFLElBQUksQ0FBQyxRQUFRLENBQUMsSUFBSSxDQUFDO1NBQzFCLENBQUMsQ0FBQTtRQUVGLDhCQUE4QjtRQUM5QixNQUFNLFVBQVUsR0FBRyxFQUFFLENBQUMsZ0JBQWdCLENBQUMsSUFBSSxDQUFDLENBQUE7UUFDNUMsTUFBTSxPQUFPLENBQUMsTUFBTSxDQUFDLFlBQVksQ0FBQyxXQUFXLENBQUMsYUFBYSxDQUFDLFdBQVcsQ0FBQyxFQUFFLEVBQUU7WUFDMUUsS0FBSyxFQUFFLENBQUMsVUFBVSxDQUFDO1NBQ3BCLENBQUMsQ0FBQTtRQUVGLDBFQUEwRTtRQUMxRSxNQUFNLFFBQVEsR0FBRyxHQUFHLElBQUksWUFBWSxDQUFBO1FBQ3BDLE1BQU0sUUFBUSxHQUFHO1lBQ2YsYUFBYSxFQUFFLFdBQVcsQ0FBQyxFQUFFO1lBQzdCLFlBQVksRUFBRSxJQUFJLElBQUksRUFBRSxDQUFDLFdBQVcsRUFBRTtZQUN0QyxZQUFZLEVBQUUsSUFBSTtZQUNsQixRQUFRLEVBQUUsU0FBUyxDQUFDLEdBQUcsQ0FBQztTQUN6QixDQUFBO1FBRUQsMEJBQTBCO1FBQzFCLEVBQUUsQ0FBQyxhQUFhLENBQUMsUUFBUSxFQUFFLElBQUksQ0FBQyxTQUFTLENBQUMsUUFBUSxFQUFFLElBQUksRUFBRSxDQUFDLENBQUMsQ0FBQyxDQUFBO1FBRTdELE9BQU8sV0FBVyxDQUFDLEVBQUUsQ0FBQTtJQUN2QixDQUFDO0lBQUMsT0FBTyxLQUFLLEVBQUUsQ0FBQztRQUNmLE1BQU0sQ0FBQyxLQUFLLENBQUMsNEJBQTRCLElBQUksR0FBRyxFQUFFLEtBQUssQ0FBQyxDQUFBO1FBQ3hELE1BQU0sS0FBSyxDQUFBO0lBQ2IsQ0FBQztBQUNILENBQUMifQ==
|