125 lines
11 KiB
JavaScript
125 lines
11 KiB
JavaScript
import * as path from 'node:path';
|
|
import * as fs from 'node:fs';
|
|
// import { sync as dir } from '@polymech/fs/dir' // Moved to glob.ts if only used there
|
|
// import { createItem as toNode } from '@polymech/fs/inspect' // Moved to glob.ts
|
|
import { sync as exists } from '@polymech/fs/exists'; // Still needed for vectorize
|
|
import { isFile, forward_slash } from '@polymech/commons'; // isFile potentially still needed for vectorize
|
|
import { logger } from './index.js';
|
|
import { lookup } from 'mime-types';
|
|
// import { globSync } from 'glob' // Moved to glob.ts
|
|
// import { EXCLUDE_GLOB, MAX_FILE_SIZE } from './constants.js' // Moved to glob.ts
|
|
import { defaultMimeRegistry } from './mime-handlers.js';
|
|
import { supported } from './commands/run-assistant.js';
|
|
import { handleWebUrl } from './http.js';
|
|
import { glob } from './glob.js'; // Import glob from glob.ts
|
|
/**
|
|
* @todos
|
|
* - add support for vector stores : https://platform.openai.com/docs/assistants/tools/file-search?lang=node.js
|
|
*/
|
|
// default_filters moved to glob.ts
|
|
// isPathInside moved to glob.ts
|
|
// isWebUrl moved to glob.ts (or handled by handleWebUrl directly)
|
|
export const isPathOutsideSafe = (pathA, pathB) => {
|
|
const realA = fs.realpathSync(pathA);
|
|
const realB = fs.realpathSync(pathB);
|
|
// Assuming isPathInside was a local helper, if it's broadly used, it should be in commons or imported
|
|
// For now, this might break if isPathInside is not accessible.
|
|
// Let's assume for now it was only for the old glob. If not, this needs to be addressed.
|
|
const relation = path.relative(realB, realA); // Corrected order for typical usage
|
|
return Boolean(relation &&
|
|
!relation.startsWith('..') &&
|
|
!relation.startsWith('..' + path.sep));
|
|
};
|
|
export const base64 = (filePath) => {
|
|
try {
|
|
const fileBuffer = fs.readFileSync(filePath);
|
|
const mimeType = lookup(filePath);
|
|
if (!mimeType) {
|
|
throw new Error('Unable to determine MIME type.');
|
|
}
|
|
const base64Data = fileBuffer.toString('base64');
|
|
return `data:${mimeType};base64,${base64Data}`;
|
|
}
|
|
catch (error) {
|
|
logger.error('fileToBase64 : Error reading file:', error);
|
|
return null;
|
|
}
|
|
};
|
|
export const images = (files) => {
|
|
return files.map((f) => ({
|
|
type: "image_url",
|
|
image_url: { url: base64(f) }
|
|
}));
|
|
};
|
|
// glob function definition removed from here
|
|
export async function get(projectPath, include = [], options) {
|
|
const { files, webUrls } = glob(projectPath, include, options.exclude, options);
|
|
const fileResults = files.map((fullPath) => {
|
|
try {
|
|
const relativePath = forward_slash(path.relative(projectPath, fullPath));
|
|
if (isFile(fullPath) && exists(fullPath)) {
|
|
const mimeType = lookup(fullPath) || 'text/plain';
|
|
const handler = defaultMimeRegistry.getHandler(mimeType);
|
|
if (handler) {
|
|
return handler.handle(fullPath, relativePath);
|
|
}
|
|
return defaultMimeRegistry.getHandler('text/*')?.handle(fullPath, relativePath) || null;
|
|
}
|
|
return null;
|
|
}
|
|
catch (error) {
|
|
logger.error(`Error reading file ${fullPath}:`, error);
|
|
return null;
|
|
}
|
|
});
|
|
// Reinstantiate web URL processing
|
|
const webUrlPromises = Array.from(webUrls).map(async (url) => {
|
|
try {
|
|
return await handleWebUrl(url);
|
|
}
|
|
catch (error) {
|
|
logger.error(`Error processing web URL ${url}:`, error);
|
|
return null;
|
|
}
|
|
});
|
|
const webResults = await Promise.all(webUrlPromises);
|
|
// Combine and filter results
|
|
const results = [...fileResults, ...webResults].filter((r) => r !== null);
|
|
return results;
|
|
}
|
|
export async function vectorize(file, options) {
|
|
if (!options.client) {
|
|
throw new Error('OpenAI client is required for vectorization');
|
|
}
|
|
const ext = path.extname(file).toLowerCase();
|
|
if (!(ext in supported)) {
|
|
throw new Error(`Unsupported file format: ${ext}. Supported formats: ${Object.keys(supported).join(', ')}`);
|
|
}
|
|
try {
|
|
// Create a vector store
|
|
const vectorStore = await options.client.vectorStores.create({
|
|
name: path.basename(file)
|
|
});
|
|
// Upload file to vector store
|
|
const fileStream = fs.createReadStream(file);
|
|
await options.client.vectorStores.fileBatches.uploadAndPoll(vectorStore.id, {
|
|
files: [fileStream]
|
|
});
|
|
// Create meta file path by appending .meta.json to the original file path
|
|
const metaPath = `${file}.meta.json`;
|
|
const metaData = {
|
|
vectorStoreId: vectorStore.id,
|
|
vectorizedAt: new Date().toISOString(),
|
|
originalPath: file,
|
|
mimeType: supported[ext]
|
|
};
|
|
// Write meta data to file
|
|
fs.writeFileSync(metaPath, JSON.stringify(metaData, null, 2));
|
|
return vectorStore.id;
|
|
}
|
|
catch (error) {
|
|
logger.error(`Failed to vectorize file ${file}:`, error);
|
|
throw error;
|
|
}
|
|
}
|
|
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic291cmNlLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vc3JjL3NvdXJjZS50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSxPQUFPLEtBQUssSUFBSSxNQUFNLFdBQVcsQ0FBQTtBQUNqQyxPQUFPLEtBQUssRUFBRSxNQUFNLFNBQVMsQ0FBQTtBQUU3Qix3RkFBd0Y7QUFFeEYsa0ZBQWtGO0FBQ2xGLE9BQU8sRUFBRSxJQUFJLElBQUksTUFBTSxFQUFFLE1BQU0scUJBQXFCLENBQUEsQ0FBQyw2QkFBNkI7QUFDbEYsT0FBTyxFQUFFLE1BQU0sRUFBRSxhQUFhLEVBQUUsTUFBTSxtQkFBbUIsQ0FBQSxDQUFDLGdEQUFnRDtBQUMxRyxPQUFPLEVBQUUsTUFBTSxFQUFFLE1BQU0sWUFBWSxDQUFBO0FBQ25DLE9BQU8sRUFBRSxNQUFNLEVBQUUsTUFBTSxZQUFZLENBQUE7QUFDbkMsc0RBQXNEO0FBQ3RELG1GQUFtRjtBQUNuRixPQUFPLEVBQUUsbUJBQW1CLEVBQWtCLE1BQU0sb0JBQW9CLENBQUE7QUFHeEUsT0FBTyxFQUFFLFNBQVMsRUFBRSxNQUFNLDZCQUE2QixDQUFBO0FBQ3ZELE9BQU8sRUFBRSxZQUFZLEVBQUUsTUFBTSxXQUFXLENBQUE7QUFDeEMsT0FBTyxFQUFFLElBQUksRUFBRSxNQUFNLFdBQVcsQ0FBQSxDQUFDLDJCQUEyQjtBQUU1RDs7O0dBR0c7QUFFSCxtQ0FBbUM7QUFDbkMsZ0NBQWdDO0FBQ2hDLGtFQUFrRTtBQUVsRSxNQUFNLENBQUMsTUFBTSxpQkFBaUIsR0FBRyxDQUFDLEtBQWEsRUFBRSxLQUFhLEVBQVcsRUFBRTtJQUN6RSxNQUFNLEtBQUssR0FBRyxFQUFFLENBQUMsWUFBWSxDQUFDLEtBQUssQ0FBQyxDQUFDO0lBQ3JDLE1BQU0sS0FBSyxHQUFHLEVBQUUsQ0FBQyxZQUFZLENBQUMsS0FBSyxDQUFDLENBQUM7SUFDckMsc0dBQXNHO0lBQ3RHLGdFQUFnRTtJQUNoRSx5RkFBeUY7SUFDekYsTUFBTSxRQUFRLEdBQUcsSUFBSSxDQUFDLFFBQVEsQ0FBQyxLQUFLLEVBQUUsS0FBSyxDQUFDLENBQUMsQ0FBQyxvQ0FBb0M7SUFDbEYsT0FBTyxPQUFPLENBQ1osUUFBUTtRQUNSLENBQUMsUUFBUSxDQUFDLFVBQVUsQ0FBQyxJQUFJLENBQUM7UUFDMUIsQ0FBQyxRQUFRLENBQUMsVUFBVSxDQUFDLElBQUksR0FBRyxJQUFJLENBQUMsR0FBRyxDQUFDLENBQ3RDLENBQUM7QUFDSixDQUFDLENBQUM7QUFFRixNQUFNLENBQUMsTUFBTSxNQUFNLEdBQUcsQ0FBQyxRQUFnQixFQUFpQixFQUFFO0lBQ3hELElBQUksQ0FBQztRQUNILE1BQU0sVUFBVSxHQUFHLEVBQUUsQ0FBQyxZQUFZLENBQUMsUUFBUSxDQUFDLENBQUM7UUFDN0MsTUFBTSxRQUFRLEdBQUcsTUFBTSxDQUFDLFFBQVEsQ0FBQyxDQUFDO1FBQ2xDLElBQUksQ0FBQyxRQUFRLEVBQUUsQ0FBQztZQUNkLE1BQU0sSUFBSSxLQUFLLENBQUMsZ0NBQWdDLENBQUMsQ0FBQztRQUNwRCxDQUFDO1FBQ0QsTUFBTSxVQUFVLEdBQUcsVUFBVSxDQUFDLFFBQVEsQ0FBQyxRQUFRLENBQUMsQ0FBQztRQUNqRCxPQUFPLFFBQVEsUUFBUSxXQUFXLFVBQVUsRUFBRSxDQUFDO0lBQ2pELENBQUM7SUFBQyxPQUFPLEtBQUssRUFBRSxDQUFDO1FBQ2YsTUFBTSxDQUFDLEtBQUssQ0FBQyxvQ0FBb0MsRUFBRSxLQUFLLENBQUMsQ0FBQztRQUMxRCxPQUFPLElBQUksQ0FBQztJQUNkLENBQUM7QUFDSCxDQUFDLENBQUM7QUFFRixNQUFNLENBQUMsTUFBTSxNQUFNLEdBQUcsQ0FBQyxLQUFlLEVBQW9DLEVBQUU7SUFDMUUsT0FBTyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQyxFQUFFLEVBQUUsQ0FBQyxDQUFDO1FBQ3ZCLElBQUksRUFBRSxXQUFXO1FBQ2pCLFNBQVMsRUFBRSxFQUFFLEdBQUcsRUFBRSxNQUFNLENBQUMsQ0FBQyxDQUFDLEVBQUU7S0FDOUIsQ0FBQyxDQUFDLENBQUE7QUFDTCxDQUFDLENBQUE7QUFFRCw2Q0FBNkM7QUFFN0MsTUFBTSxDQUFDLEtBQUssVUFBVSxHQUFHLENBQ3ZCLFdBQW1CLEVBQ25CLFVBQW9CLEVBQUUsRUFDdEIsT0FBa0I7SUFFbEIsTUFBTSxFQUFFLEtBQUssRUFBRSxPQUFPLEVBQUUsR0FBRyxJQUFJLENBQUMsV0FBVyxFQUFFLE9BQU8sRUFBRSxPQUFPLENBQUMsT0FBTyxFQUFFLE9BQU8sQ0FBQyxDQUFBO0lBRS9FLE1BQU0sV0FBVyxHQUFHLEtBQUssQ0FBQyxHQUFHLENBQUMsQ0FBQyxRQUFRLEVBQUUsRUFBRTtRQUN6QyxJQUFJLENBQUM7WUFDSCxNQUFNLFlBQVksR0FBRyxhQUFhLENBQUMsSUFBSSxDQUFDLFFBQVEsQ0FBQyxXQUFXLEVBQUUsUUFBUSxDQUFDLENBQUMsQ0FBQTtZQUN4RSxJQUFJLE1BQU0sQ0FBQyxRQUFRLENBQUMsSUFBSSxNQUFNLENBQUMsUUFBUSxDQUFDLEVBQUUsQ0FBQztnQkFDekMsTUFBTSxRQUFRLEdBQUcsTUFBTSxDQUFDLFFBQVEsQ0FBQyxJQUFJLFlBQVksQ0FBQTtnQkFDakQsTUFBTSxPQUFPLEdBQUcsbUJBQW1CLENBQUMsVUFBVSxDQUFDLFFBQVEsQ0FBQyxDQUFBO2dCQUN4RCxJQUFJLE9BQU8sRUFBRSxDQUFDO29CQUNaLE9BQU8sT0FBTyxDQUFDLE1BQU0sQ0FBQyxRQUFRLEVBQUUsWUFBWSxDQUFDLENBQUE7Z0JBQy9DLENBQUM7Z0JBQ0QsT0FBTyxtQkFBbUIsQ0FBQyxVQUFVLENBQUMsUUFBUSxDQUFDLEVBQUUsTUFBTSxDQUFDLFFBQVEsRUFBRSxZQUFZLENBQUMsSUFBSSxJQUFJLENBQUE7WUFDekYsQ0FBQztZQUNELE9BQU8sSUFBSSxDQUFBO1FBQ2IsQ0FBQztRQUFDLE9BQU8sS0FBSyxFQUFFLENBQUM7WUFDZixNQUFNLENBQUMsS0FBSyxDQUFDLHNCQUFzQixRQUFRLEdBQUcsRUFBRSxLQUFLLENBQUMsQ0FBQTtZQUN0RCxPQUFPLElBQUksQ0FBQTtRQUNiLENBQUM7SUFDSCxDQUFDLENBQUMsQ0FBQTtJQUVGLG1DQUFtQztJQUNuQyxNQUFNLGNBQWMsR0FBRyxLQUFLLENBQUMsSUFBSSxDQUFDLE9BQU8sQ0FBQyxDQUFDLEdBQUcsQ0FBQyxLQUFLLEVBQUUsR0FBVyxFQUFFLEVBQUU7UUFDbkUsSUFBSSxDQUFDO1lBQ0gsT0FBTyxNQUFNLFlBQVksQ0FBQyxHQUFHLENBQUMsQ0FBQztRQUNqQyxDQUFDO1FBQUMsT0FBTyxLQUFLLEVBQUUsQ0FBQztZQUNmLE1BQU0sQ0FBQyxLQUFLLENBQUMsNEJBQTRCLEdBQUcsR0FBRyxFQUFFLEtBQUssQ0FBQyxDQUFDO1lBQ3hELE9BQU8sSUFBSSxDQUFDO1FBQ2QsQ0FBQztJQUNILENBQUMsQ0FBQyxDQUFDO0lBRUgsTUFBTSxVQUFVLEdBQUcsTUFBTSxPQUFPLENBQUMsR0FBRyxDQUFDLGNBQWMsQ0FBQyxDQUFDO0lBRXJELDZCQUE2QjtJQUM3QixNQUFNLE9BQU8sR0FBRyxDQUFDLEdBQUcsV0FBVyxFQUFFLEdBQUcsVUFBVSxDQUFDLENBQUMsTUFBTSxDQUFDLENBQUMsQ0FBQyxFQUFFLEVBQUUsQ0FBQyxDQUFDLEtBQUssSUFBSSxDQUFDLENBQUM7SUFDMUUsT0FBTyxPQUFPLENBQUM7QUFDakIsQ0FBQztBQUVELE1BQU0sQ0FBQyxLQUFLLFVBQVUsU0FBUyxDQUFDLElBQVksRUFBRSxPQUFrQjtJQUM5RCxJQUFJLENBQUMsT0FBTyxDQUFDLE1BQU0sRUFBRSxDQUFDO1FBQ3BCLE1BQU0sSUFBSSxLQUFLLENBQUMsNkNBQTZDLENBQUMsQ0FBQTtJQUNoRSxDQUFDO0lBRUQsTUFBTSxHQUFHLEdBQUcsSUFBSSxDQUFDLE9BQU8sQ0FBQyxJQUFJLENBQUMsQ0FBQyxXQUFXLEVBQUUsQ0FBQTtJQUM1QyxJQUFJLENBQUMsQ0FBQyxHQUFHLElBQUksU0FBUyxDQUFDLEVBQUUsQ0FBQztRQUN4QixNQUFNLElBQUksS0FBSyxDQUFDLDRCQUE0QixHQUFHLHdCQUF3QixNQUFNLENBQUMsSUFBSSxDQUFDLFNBQVMsQ0FBQyxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsRUFBRSxDQUFDLENBQUE7SUFDN0csQ0FBQztJQUVELElBQUksQ0FBQztRQUNILHdCQUF3QjtRQUN4QixNQUFNLFdBQVcsR0FBRyxNQUFNLE9BQU8sQ0FBQyxNQUFNLENBQUMsWUFBWSxDQUFDLE1BQU0sQ0FBQztZQUMzRCxJQUFJLEVBQUUsSUFBSSxDQUFDLFFBQVEsQ0FBQyxJQUFJLENBQUM7U0FDMUIsQ0FBQyxDQUFBO1FBRUYsOEJBQThCO1FBQzlCLE1BQU0sVUFBVSxHQUFHLEVBQUUsQ0FBQyxnQkFBZ0IsQ0FBQyxJQUFJLENBQUMsQ0FBQTtRQUM1QyxNQUFNLE9BQU8sQ0FBQyxNQUFNLENBQUMsWUFBWSxDQUFDLFdBQVcsQ0FBQyxhQUFhLENBQUMsV0FBVyxDQUFDLEVBQUUsRUFBRTtZQUMxRSxLQUFLLEVBQUUsQ0FBQyxVQUFVLENBQUM7U0FDcEIsQ0FBQyxDQUFBO1FBRUYsMEVBQTBFO1FBQzFFLE1BQU0sUUFBUSxHQUFHLEdBQUcsSUFBSSxZQUFZLENBQUE7UUFDcEMsTUFBTSxRQUFRLEdBQUc7WUFDZixhQUFhLEVBQUUsV0FBVyxDQUFDLEVBQUU7WUFDN0IsWUFBWSxFQUFFLElBQUksSUFBSSxFQUFFLENBQUMsV0FBVyxFQUFFO1lBQ3RDLFlBQVksRUFBRSxJQUFJO1lBQ2xCLFFBQVEsRUFBRSxTQUFTLENBQUMsR0FBRyxDQUFDO1NBQ3pCLENBQUE7UUFFRCwwQkFBMEI7UUFDMUIsRUFBRSxDQUFDLGFBQWEsQ0FBQyxRQUFRLEVBQUUsSUFBSSxDQUFDLFNBQVMsQ0FBQyxRQUFRLEVBQUUsSUFBSSxFQUFFLENBQUMsQ0FBQyxDQUFDLENBQUE7UUFFN0QsT0FBTyxXQUFXLENBQUMsRUFBRSxDQUFBO0lBQ3ZCLENBQUM7SUFBQyxPQUFPLEtBQUssRUFBRSxDQUFDO1FBQ2YsTUFBTSxDQUFDLEtBQUssQ0FBQyw0QkFBNEIsSUFBSSxHQUFHLEVBQUUsS0FBSyxDQUFDLENBQUE7UUFDeEQsTUFBTSxLQUFLLENBQUE7SUFDYixDQUFDO0FBQ0gsQ0FBQyJ9
|