fs:path & name sanitize

This commit is contained in:
lovebird 2025-03-17 16:14:36 +01:00
parent 133e251676
commit 53e82267ea
9 changed files with 223 additions and 109 deletions

3
packages/fs/dist/constants.d.ts vendored Normal file
View File

@ -0,0 +1,3 @@
export declare const EMOJIES_MIN: RegExp;
export declare const EMOJIES_STD: RegExp;
export declare const EMOJIES_ALL: RegExp;

3
packages/fs/dist/constants.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -6,9 +6,11 @@ export declare enum E_FilenameError {
LEADING_TRAILING_SPACE = 8,// Starts/ends with space
ONLY_DOTS = 16
}
export interface I_SanitizeOptions {
lowercase?: boolean;
whitespace?: boolean;
export declare enum E_Sanitize {
NONE = 0,
LOWERCASE = 1,// Convert to lowercase
REPLACE_WHITESPACE = 2,// Replace spaces with underscores
REMOVE_EMOJIS = 4
}
export interface I_ValidationResult {
isValid: boolean;
@ -21,7 +23,7 @@ export interface I_ValidationResult {
* @param options - Configuration options
* @returns Sanitized filename
*/
export declare function sanitizeFilename(filename?: string, options?: I_SanitizeOptions): string;
export declare function sanitizeFilename(filename?: string, flags?: E_Sanitize): string;
/**
* Validates a filename and returns a flag-based error representation.
*

View File

@ -1,3 +1,4 @@
import { EMOJIES_ALL, EMOJIES_MIN, EMOJIES_STD } from '../constants.js';
const RESERVED_NAMES = new Set([
"con", "prn", "aux", "nul",
"com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9",
@ -13,6 +14,13 @@ export var E_FilenameError;
E_FilenameError[E_FilenameError["LEADING_TRAILING_SPACE"] = 8] = "LEADING_TRAILING_SPACE";
E_FilenameError[E_FilenameError["ONLY_DOTS"] = 16] = "ONLY_DOTS"; // Filename is only "." or ".."
})(E_FilenameError || (E_FilenameError = {}));
export var E_Sanitize;
(function (E_Sanitize) {
E_Sanitize[E_Sanitize["NONE"] = 0] = "NONE";
E_Sanitize[E_Sanitize["LOWERCASE"] = 1] = "LOWERCASE";
E_Sanitize[E_Sanitize["REPLACE_WHITESPACE"] = 2] = "REPLACE_WHITESPACE";
E_Sanitize[E_Sanitize["REMOVE_EMOJIS"] = 4] = "REMOVE_EMOJIS"; // Remove emoji characters
})(E_Sanitize || (E_Sanitize = {}));
/**
* Sanitizes a filename by removing invalid characters and normalizing it.
*
@ -20,22 +28,28 @@ export var E_FilenameError;
* @param options - Configuration options
* @returns Sanitized filename
*/
export function sanitizeFilename(filename = "", options = { lowercase: false, whitespace: false }) {
const { lowercase = false, whitespace = true } = options;
// Normalize Unicode (removes diacritics)
export function sanitizeFilename(filename = "", flags = E_Sanitize.LOWERCASE | E_Sanitize.REPLACE_WHITESPACE | E_Sanitize.REMOVE_EMOJIS) {
let sanitized = filename
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "") // Strip accents
.replace(/[^\w.\- ]/g, "") // Keep only alphanumeric, dot, hyphen, underscore, and space
.trim(); // Remove leading/trailing spaces
// Replace spaces with underscores if enabled
if (whitespace) {
// Remove emojis if flag is set
if (flags & E_Sanitize.REMOVE_EMOJIS) {
sanitized = sanitized.replace(/[\p{Emoji}]/gu, "");
}
// Replace spaces with underscores if flag is set
if (flags & E_Sanitize.REPLACE_WHITESPACE) {
sanitized = sanitized.replace(/\s+/g, "_");
}
// Convert to lowercase if enabled
if (lowercase) {
// Convert to lowercase if flag is set
if (flags & E_Sanitize.LOWERCASE) {
sanitized = sanitized.toLowerCase();
}
// Prevent empty filenames
if (!sanitized || sanitized === "." || sanitized === "..") {
return "untitled";
}
// Prevent reserved names (Windows)
if (RESERVED_NAMES.has(sanitized.toLowerCase())) {
return sanitized + "_safe";
@ -44,6 +58,13 @@ export function sanitizeFilename(filename = "", options = { lowercase: false, wh
if (!sanitized || sanitized === "." || sanitized === "..") {
return "untitled";
}
// Remove emojis if flag is set
if (flags & E_Sanitize.REMOVE_EMOJIS) {
sanitized = sanitized.replace(/[\p{Emoji}]/gu, "")
.replace(EMOJIES_STD, "") // Remove emojis
.replace(EMOJIES_MIN, "") // Remove emojis
.replace(EMOJIES_ALL, ""); // Remove emojis
}
return sanitized;
}
/**

View File

@ -1,6 +1,15 @@
export declare const substitute: (alt: boolean, template: string, vars: Record<string, string>) => any;
export declare const resolve: (_path: string, alt?: boolean, vars?: Record<string, string>) => any;
export declare const sep = "/";
/**
* The native path separator depending on the OS.
*/
import { E_Sanitize } from "./name.js";
export declare enum E_PathError {
NONE = 0,
INVALID_CHAR = 1,// Invalid characters in path segments
RESERVED_NAME = 2,// Contains a Windows reserved filename
LEADING_TRAILING_SPACE = 4,// Segment has leading/trailing spaces
PATH_TOO_LONG = 8
}
export declare function sanitize(filePath: string, flags: any): string;
export interface I_PathValidationResult {
isValid: boolean;
errorFlags: number;
}
export declare function validatePath(filePath?: string): I_PathValidationResult;
export declare function renameFileIfNeeded(filePath: string, flags: E_Sanitize): string;

View File

@ -1,41 +1,63 @@
import { substitute as _substitute, substituteAlt as _substituteAlt } from "@polymech/core/strings";
export const substitute = (alt, template, vars) => alt ? _substituteAlt(template, vars) : _substitute(template, vars);
export const resolve = (_path, alt = false, vars = {}) => substitute(alt, _path, {
...vars
});
export const sep = '/';
/**
* The native path separator depending on the OS.
*/
/*
export const nativeSep = isWindows ? '\\' : '/';
export function relative(from: string, to: string): string {
// ignore trailing slashes
const originalNormalizedFrom = rtrim(normalize(from), sep);
const originalNormalizedTo = rtrim(normalize(to), sep);
// we're assuming here that any non=linux OS is case insensitive
// so we must compare each part in its lowercase form
const normalizedFrom = isLinux ? originalNormalizedFrom : originalNormalizedFrom.toLowerCase();
const normalizedTo = isLinux ? originalNormalizedTo : originalNormalizedTo.toLowerCase();
const fromParts = normalizedFrom.split(sep);
const toParts = normalizedTo.split(sep);
let i = 0, max = Math.min(fromParts.length, toParts.length);
for (; i < max; i++) {
if (fromParts[i] !== toParts[i]) {
break;
import path from "node:path";
import os from "node:os";
import { sanitizeFilename, validateFilename, E_FilenameError } from "./name.js";
import { sync as move } from "../move.js";
import { sync as exists } from "../exists.js";
export var E_PathError;
(function (E_PathError) {
E_PathError[E_PathError["NONE"] = 0] = "NONE";
E_PathError[E_PathError["INVALID_CHAR"] = 1] = "INVALID_CHAR";
E_PathError[E_PathError["RESERVED_NAME"] = 2] = "RESERVED_NAME";
E_PathError[E_PathError["LEADING_TRAILING_SPACE"] = 4] = "LEADING_TRAILING_SPACE";
E_PathError[E_PathError["PATH_TOO_LONG"] = 8] = "PATH_TOO_LONG"; // Path exceeds Windows MAX_PATH limit
})(E_PathError || (E_PathError = {}));
export function sanitize(filePath, flags) {
const segments = path.normalize(filePath).split(path.sep);
const sanitizedSegments = segments.map(segment => sanitizeFilename(segment, flags));
return sanitizedSegments.join(path.sep);
}
export function validatePath(filePath = "") {
let errorFlags = E_PathError.NONE;
// Check for Windows MAX_PATH limit
if (os.platform() === "win32" && filePath.length > 260) {
errorFlags |= E_PathError.PATH_TOO_LONG;
}
const segments = path.normalize(filePath).split(path.sep);
for (const segment of segments) {
if (!segment)
continue;
const validation = validateFilename(segment);
if (validation.errorFlags & E_FilenameError.INVALID_CHAR) {
errorFlags |= E_PathError.INVALID_CHAR;
}
if (validation.errorFlags & E_FilenameError.RESERVED_NAME) {
errorFlags |= E_PathError.RESERVED_NAME;
}
if (validation.errorFlags & E_FilenameError.LEADING_TRAILING_SPACE) {
errorFlags |= E_PathError.LEADING_TRAILING_SPACE;
}
}
const result = [
...fill(fromParts.length - i, () => '..'),
...originalNormalizedTo.split(sep).slice(i)
];
return result.join(sep);
return {
isValid: errorFlags === E_PathError.NONE,
errorFlags
};
}
export function renameFileIfNeeded(filePath, flags) {
if (!exists(filePath)) {
return filePath;
}
const dir = path.dirname(filePath);
const originalFilename = path.basename(filePath);
const sanitizedFilename = sanitizeFilename(originalFilename, flags);
if (originalFilename === sanitizedFilename) {
return filePath;
}
const newPath = path.join(dir, sanitizedFilename);
try {
move(filePath, newPath);
return newPath;
}
catch (error) {
return filePath;
}
}
*/

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,5 @@
import { EMOJIES_ALL, EMOJIES_MIN, EMOJIES_STD } from '../constants.js'
const RESERVED_NAMES = new Set([
"con", "prn", "aux", "nul",
"com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9",
@ -14,9 +16,11 @@ export enum E_FilenameError {
ONLY_DOTS = 1 << 4 // Filename is only "." or ".."
}
export interface I_SanitizeOptions {
lowercase?: boolean; // Convert to lowercase (default: false)
whitespace?: boolean; // Replace spaces with underscores (default: true)
export enum E_Sanitize {
NONE = 0,
LOWERCASE = 1 << 0, // Convert to lowercase
REPLACE_WHITESPACE = 1 << 1, // Replace spaces with underscores
REMOVE_EMOJIS = 1 << 2 // Remove emoji characters
}
export interface I_ValidationResult {
@ -31,38 +35,54 @@ export interface I_ValidationResult {
* @param options - Configuration options
* @returns Sanitized filename
*/
export function sanitizeFilename(filename: string = "", options: I_SanitizeOptions = { lowercase: false, whitespace: false }): string {
export function sanitizeFilename(filename: string = "", flags: E_Sanitize = E_Sanitize.LOWERCASE | E_Sanitize.REPLACE_WHITESPACE | E_Sanitize.REMOVE_EMOJIS): string {
const { lowercase = false, whitespace = true } = options;
// Normalize Unicode (removes diacritics)
let sanitized = filename
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "") // Strip accents
.replace(/[^\w.\- ]/g, "") // Keep only alphanumeric, dot, hyphen, underscore, and space
.trim(); // Remove leading/trailing spaces
// Replace spaces with underscores if enabled
if (whitespace) {
sanitized = sanitized.replace(/\s+/g, "_");
// Remove emojis if flag is set
if (flags & E_Sanitize.REMOVE_EMOJIS) {
sanitized = sanitized.replace(/[\p{Emoji}]/gu, "");
}
// Convert to lowercase if enabled
if (lowercase) {
sanitized = sanitized.toLowerCase();
// Replace spaces with underscores if flag is set
if (flags & E_Sanitize.REPLACE_WHITESPACE) {
sanitized = sanitized.replace(/\s+/g, "_")
}
// Convert to lowercase if flag is set
if (flags & E_Sanitize.LOWERCASE) {
sanitized = sanitized.toLowerCase()
}
// Prevent empty filenames
if (!sanitized || sanitized === "." || sanitized === "..") {
return "untitled"
}
// Prevent reserved names (Windows)
if (RESERVED_NAMES.has(sanitized.toLowerCase())) {
return sanitized + "_safe";
return sanitized + "_safe"
}
// Prevent filenames that are just dots or empty
if (!sanitized || sanitized === "." || sanitized === "..") {
return "untitled";
return "untitled"
}
return sanitized;
// Remove emojis if flag is set
if (flags & E_Sanitize.REMOVE_EMOJIS) {
sanitized = sanitized.replace(/[\p{Emoji}]/gu, "")
.replace(EMOJIES_STD, "") // Remove emojis
.replace(EMOJIES_MIN, "") // Remove emojis
.replace(EMOJIES_ALL, "") // Remove emojis
}
return sanitized
}
/**

View File

@ -1,47 +1,76 @@
import { substitute as _substitute, substituteAlt as _substituteAlt } from "@polymech/core/strings"
import path from "node:path"
import os from "node:os"
export const substitute = (alt: boolean, template: string, vars: Record<string, string>) =>
alt ? _substituteAlt(template, vars) : _substitute(template, vars)
import { sanitizeFilename, validateFilename, E_FilenameError, E_Sanitize } from "./name.js"
import { sync as move } from "../move.js"
import { sync as exists } from "../exists.js"
export const resolve = (_path: string, alt = false, vars: Record<string, string> = {}) =>
substitute(alt, _path, {
...vars
})
export enum E_PathError {
NONE = 0,
INVALID_CHAR = 1 << 0, // Invalid characters in path segments
RESERVED_NAME = 1 << 1, // Contains a Windows reserved filename
LEADING_TRAILING_SPACE = 1 << 2, // Segment has leading/trailing spaces
PATH_TOO_LONG = 1 << 3 // Path exceeds Windows MAX_PATH limit
}
export const sep = '/';
export function sanitize(filePath: string, flags): string {
const segments = path.normalize(filePath).split(path.sep);
const sanitizedSegments = segments.map(segment => sanitizeFilename(segment, flags))
return sanitizedSegments.join(path.sep)
}
/**
* The native path separator depending on the OS.
*/
/*
export const nativeSep = isWindows ? '\\' : '/';
export interface I_PathValidationResult {
isValid: boolean;
errorFlags: number;
}
export function relative(from: string, to: string): string {
// ignore trailing slashes
const originalNormalizedFrom = rtrim(normalize(from), sep);
const originalNormalizedTo = rtrim(normalize(to), sep);
// we're assuming here that any non=linux OS is case insensitive
// so we must compare each part in its lowercase form
const normalizedFrom = isLinux ? originalNormalizedFrom : originalNormalizedFrom.toLowerCase();
const normalizedTo = isLinux ? originalNormalizedTo : originalNormalizedTo.toLowerCase();
const fromParts = normalizedFrom.split(sep);
const toParts = normalizedTo.split(sep);
let i = 0, max = Math.min(fromParts.length, toParts.length);
for (; i < max; i++) {
if (fromParts[i] !== toParts[i]) {
break;
}
export function validatePath(filePath: string = ""): I_PathValidationResult {
let errorFlags = E_PathError.NONE;
// Check for Windows MAX_PATH limit
if (os.platform() === "win32" && filePath.length > 260) {
errorFlags |= E_PathError.PATH_TOO_LONG;
}
const result = [
...fill(fromParts.length - i, () => '..'),
...originalNormalizedTo.split(sep).slice(i)
];
const segments = path.normalize(filePath).split(path.sep);
return result.join(sep);
for (const segment of segments) {
if (!segment) continue
const validation = validateFilename(segment)
if (validation.errorFlags & E_FilenameError.INVALID_CHAR) {
errorFlags |= E_PathError.INVALID_CHAR;
}
if (validation.errorFlags & E_FilenameError.RESERVED_NAME) {
errorFlags |= E_PathError.RESERVED_NAME;
}
if (validation.errorFlags & E_FilenameError.LEADING_TRAILING_SPACE) {
errorFlags |= E_PathError.LEADING_TRAILING_SPACE;
}
}
return {
isValid: errorFlags === E_PathError.NONE,
errorFlags
}
}
*/
export function renameFileIfNeeded(filePath: string, flags: E_Sanitize): string {
if(!exists(filePath)) {
return filePath
}
const dir = path.dirname(filePath)
const originalFilename = path.basename(filePath)
const sanitizedFilename = sanitizeFilename(originalFilename, flags)
if (originalFilename === sanitizedFilename) {
return filePath;
}
const newPath = path.join(dir, sanitizedFilename)
try {
move(filePath, newPath)
return newPath
} catch (error) {
return filePath
}
}