157 lines
2.6 KiB
TypeScript
157 lines
2.6 KiB
TypeScript
/**
|
|
* Binary file extensions to skip for text-based operations.
|
|
* These files can't be meaningfully compared as text and are often large.
|
|
*/
|
|
export const BINARY_EXTENSIONS = new Set([
|
|
// Images
|
|
'.png',
|
|
'.jpg',
|
|
'.jpeg',
|
|
'.gif',
|
|
'.bmp',
|
|
'.ico',
|
|
'.webp',
|
|
'.tiff',
|
|
'.tif',
|
|
// Videos
|
|
'.mp4',
|
|
'.mov',
|
|
'.avi',
|
|
'.mkv',
|
|
'.webm',
|
|
'.wmv',
|
|
'.flv',
|
|
'.m4v',
|
|
'.mpeg',
|
|
'.mpg',
|
|
// Audio
|
|
'.mp3',
|
|
'.wav',
|
|
'.ogg',
|
|
'.flac',
|
|
'.aac',
|
|
'.m4a',
|
|
'.wma',
|
|
'.aiff',
|
|
'.opus',
|
|
// Archives
|
|
'.zip',
|
|
'.tar',
|
|
'.gz',
|
|
'.bz2',
|
|
'.7z',
|
|
'.rar',
|
|
'.xz',
|
|
'.z',
|
|
'.tgz',
|
|
'.iso',
|
|
// Executables/binaries
|
|
'.exe',
|
|
'.dll',
|
|
'.so',
|
|
'.dylib',
|
|
'.bin',
|
|
'.o',
|
|
'.a',
|
|
'.obj',
|
|
'.lib',
|
|
'.app',
|
|
'.msi',
|
|
'.deb',
|
|
'.rpm',
|
|
// Documents (PDF is here; FileReadTool excludes it at the call site)
|
|
'.pdf',
|
|
'.doc',
|
|
'.docx',
|
|
'.xls',
|
|
'.xlsx',
|
|
'.ppt',
|
|
'.pptx',
|
|
'.odt',
|
|
'.ods',
|
|
'.odp',
|
|
// Fonts
|
|
'.ttf',
|
|
'.otf',
|
|
'.woff',
|
|
'.woff2',
|
|
'.eot',
|
|
// Bytecode / VM artifacts
|
|
'.pyc',
|
|
'.pyo',
|
|
'.class',
|
|
'.jar',
|
|
'.war',
|
|
'.ear',
|
|
'.node',
|
|
'.wasm',
|
|
'.rlib',
|
|
// Database files
|
|
'.sqlite',
|
|
'.sqlite3',
|
|
'.db',
|
|
'.mdb',
|
|
'.idx',
|
|
// Design / 3D
|
|
'.psd',
|
|
'.ai',
|
|
'.eps',
|
|
'.sketch',
|
|
'.fig',
|
|
'.xd',
|
|
'.blend',
|
|
'.3ds',
|
|
'.max',
|
|
// Flash
|
|
'.swf',
|
|
'.fla',
|
|
// Lock/profiling data
|
|
'.lockb',
|
|
'.dat',
|
|
'.data',
|
|
])
|
|
|
|
/**
|
|
* Check if a file path has a binary extension.
|
|
*/
|
|
export function hasBinaryExtension(filePath: string): boolean {
|
|
const ext = filePath.slice(filePath.lastIndexOf('.')).toLowerCase()
|
|
return BINARY_EXTENSIONS.has(ext)
|
|
}
|
|
|
|
/**
|
|
* Number of bytes to read for binary content detection.
|
|
*/
|
|
const BINARY_CHECK_SIZE = 8192
|
|
|
|
/**
|
|
* Check if a buffer contains binary content by looking for null bytes
|
|
* or a high proportion of non-printable characters.
|
|
*/
|
|
export function isBinaryContent(buffer: Buffer): boolean {
|
|
// Check first BINARY_CHECK_SIZE bytes (or full buffer if smaller)
|
|
const checkSize = Math.min(buffer.length, BINARY_CHECK_SIZE)
|
|
|
|
let nonPrintable = 0
|
|
for (let i = 0; i < checkSize; i++) {
|
|
const byte = buffer[i]!
|
|
// Null byte is a strong indicator of binary
|
|
if (byte === 0) {
|
|
return true
|
|
}
|
|
// Count non-printable, non-whitespace bytes
|
|
// Printable ASCII is 32-126, plus common whitespace (9, 10, 13)
|
|
if (
|
|
byte < 32 &&
|
|
byte !== 9 && // tab
|
|
byte !== 10 && // newline
|
|
byte !== 13 // carriage return
|
|
) {
|
|
nonPrintable++
|
|
}
|
|
}
|
|
|
|
// If more than 10% non-printable, likely binary
|
|
return nonPrintable / checkSize > 0.1
|
|
}
|