293 lines
11 KiB
TypeScript
293 lines
11 KiB
TypeScript
import { lstat, realpath } from 'fs/promises'
|
||
import { dirname, join, resolve, sep } from 'path'
|
||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
|
||
import { getErrnoCode } from '../utils/errors.js'
|
||
import { getAutoMemPath, isAutoMemoryEnabled } from './paths.js'
|
||
|
||
/**
|
||
* Error thrown when a path validation detects a traversal or injection attempt.
|
||
*/
|
||
export class PathTraversalError extends Error {
|
||
constructor(message: string) {
|
||
super(message)
|
||
this.name = 'PathTraversalError'
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Sanitize a file path key by rejecting dangerous patterns.
|
||
* Checks for null bytes, URL-encoded traversals, and other injection vectors.
|
||
* Returns the sanitized string or throws PathTraversalError.
|
||
*/
|
||
function sanitizePathKey(key: string): string {
|
||
// Null bytes can truncate paths in C-based syscalls
|
||
if (key.includes('\0')) {
|
||
throw new PathTraversalError(`Null byte in path key: "${key}"`)
|
||
}
|
||
// URL-encoded traversals (e.g. %2e%2e%2f = ../)
|
||
let decoded: string
|
||
try {
|
||
decoded = decodeURIComponent(key)
|
||
} catch {
|
||
// Malformed percent-encoding (e.g. %ZZ, lone %) — not valid URL-encoding,
|
||
// so no URL-encoded traversal is possible
|
||
decoded = key
|
||
}
|
||
if (decoded !== key && (decoded.includes('..') || decoded.includes('/'))) {
|
||
throw new PathTraversalError(`URL-encoded traversal in path key: "${key}"`)
|
||
}
|
||
// Unicode normalization attacks: fullwidth ../ (U+FF0E U+FF0F) normalize
|
||
// to ASCII ../ under NFKC. While path.resolve/fs.writeFile treat these as
|
||
// literal bytes (not separators), downstream layers or filesystems may
|
||
// normalize — reject for defense-in-depth (PSR M22187 vector 4).
|
||
const normalized = key.normalize('NFKC')
|
||
if (
|
||
normalized !== key &&
|
||
(normalized.includes('..') ||
|
||
normalized.includes('/') ||
|
||
normalized.includes('\\') ||
|
||
normalized.includes('\0'))
|
||
) {
|
||
throw new PathTraversalError(
|
||
`Unicode-normalized traversal in path key: "${key}"`,
|
||
)
|
||
}
|
||
// Reject backslashes (Windows path separator used as traversal vector)
|
||
if (key.includes('\\')) {
|
||
throw new PathTraversalError(`Backslash in path key: "${key}"`)
|
||
}
|
||
// Reject absolute paths
|
||
if (key.startsWith('/')) {
|
||
throw new PathTraversalError(`Absolute path key: "${key}"`)
|
||
}
|
||
return key
|
||
}
|
||
|
||
/**
|
||
* Whether team memory features are enabled.
|
||
* Team memory is a subdirectory of auto memory, so it requires auto memory
|
||
* to be enabled. This keeps all team-memory consumers (prompt, content
|
||
* injection, sync watcher, file detection) consistent when auto memory is
|
||
* disabled via env var or settings.
|
||
*/
|
||
export function isTeamMemoryEnabled(): boolean {
|
||
if (!isAutoMemoryEnabled()) {
|
||
return false
|
||
}
|
||
return getFeatureValue_CACHED_MAY_BE_STALE('tengu_herring_clock', false)
|
||
}
|
||
|
||
/**
|
||
* Returns the team memory path: <memoryBase>/projects/<sanitized-project-root>/memory/team/
|
||
* Lives as a subdirectory of the auto-memory directory, scoped per-project.
|
||
*/
|
||
export function getTeamMemPath(): string {
|
||
return (join(getAutoMemPath(), 'team') + sep).normalize('NFC')
|
||
}
|
||
|
||
/**
|
||
* Returns the team memory entrypoint: <memoryBase>/projects/<sanitized-project-root>/memory/team/MEMORY.md
|
||
* Lives as a subdirectory of the auto-memory directory, scoped per-project.
|
||
*/
|
||
export function getTeamMemEntrypoint(): string {
|
||
return join(getAutoMemPath(), 'team', 'MEMORY.md')
|
||
}
|
||
|
||
/**
|
||
* Resolve symlinks for the deepest existing ancestor of a path.
|
||
* The target file may not exist yet (we may be about to create it), so we
|
||
* walk up the directory tree until realpath() succeeds, then rejoin the
|
||
* non-existing tail onto the resolved ancestor.
|
||
*
|
||
* SECURITY (PSR M22186): path.resolve() does NOT resolve symlinks. An attacker
|
||
* who can place a symlink inside teamDir pointing outside (e.g. to
|
||
* ~/.ssh/authorized_keys) would pass a resolve()-based containment check.
|
||
* Using realpath() on the deepest existing ancestor ensures we compare the
|
||
* actual filesystem location, not the symbolic path.
|
||
*
|
||
*/
|
||
async function realpathDeepestExisting(absolutePath: string): Promise<string> {
|
||
const tail: string[] = []
|
||
let current = absolutePath
|
||
// Walk up until realpath succeeds. ENOENT means this segment doesn't exist
|
||
// yet; pop it onto the tail and try the parent. ENOTDIR means a non-directory
|
||
// component sits in the middle of the path; pop and retry so we can realpath
|
||
// the ancestor to detect symlink escapes.
|
||
// Loop terminates when we reach the filesystem root (dirname('/') === '/').
|
||
for (
|
||
let parent = dirname(current);
|
||
current !== parent;
|
||
parent = dirname(current)
|
||
) {
|
||
try {
|
||
const realCurrent = await realpath(current)
|
||
// Rejoin the non-existing tail in reverse order (deepest popped first)
|
||
return tail.length === 0
|
||
? realCurrent
|
||
: join(realCurrent, ...tail.reverse())
|
||
} catch (e: unknown) {
|
||
const code = getErrnoCode(e)
|
||
if (code === 'ENOENT') {
|
||
// Could be truly non-existent (safe to walk up) OR a dangling symlink
|
||
// whose target doesn't exist. Dangling symlinks are an attack vector:
|
||
// writeFile would follow the link and create the target outside teamDir.
|
||
// lstat distinguishes: it succeeds for dangling symlinks (the link entry
|
||
// itself exists), fails with ENOENT for truly non-existent paths.
|
||
try {
|
||
const st = await lstat(current)
|
||
if (st.isSymbolicLink()) {
|
||
throw new PathTraversalError(
|
||
`Dangling symlink detected (target does not exist): "${current}"`,
|
||
)
|
||
}
|
||
// lstat succeeded but isn't a symlink — ENOENT from realpath was
|
||
// caused by a dangling symlink in an ancestor. Walk up to find it.
|
||
} catch (lstatErr: unknown) {
|
||
if (lstatErr instanceof PathTraversalError) {
|
||
throw lstatErr
|
||
}
|
||
// lstat also failed (truly non-existent or inaccessible) — safe to walk up.
|
||
}
|
||
} else if (code === 'ELOOP') {
|
||
// Symlink loop — corrupted or malicious filesystem state.
|
||
throw new PathTraversalError(
|
||
`Symlink loop detected in path: "${current}"`,
|
||
)
|
||
} else if (code !== 'ENOTDIR' && code !== 'ENAMETOOLONG') {
|
||
// EACCES, EIO, etc. — cannot verify containment. Fail closed by wrapping
|
||
// as PathTraversalError so the caller can skip this entry gracefully
|
||
// instead of aborting the entire batch.
|
||
throw new PathTraversalError(
|
||
`Cannot verify path containment (${code}): "${current}"`,
|
||
)
|
||
}
|
||
tail.push(current.slice(parent.length + sep.length))
|
||
current = parent
|
||
}
|
||
}
|
||
// Reached filesystem root without finding an existing ancestor (rare —
|
||
// root normally exists). Fall back to the input; containment check will reject.
|
||
return absolutePath
|
||
}
|
||
|
||
/**
|
||
* Check whether a real (symlink-resolved) path is within the real team
|
||
* memory directory. Both sides are realpath'd so the comparison is between
|
||
* canonical filesystem locations.
|
||
*
|
||
* If teamDir does not exist, returns true (skips the check). This is safe:
|
||
* a symlink escape requires a pre-existing symlink inside teamDir, which
|
||
* requires teamDir to exist. If there's no directory, there's no symlink,
|
||
* and the first-pass string-level containment check is sufficient.
|
||
*/
|
||
async function isRealPathWithinTeamDir(
|
||
realCandidate: string,
|
||
): Promise<boolean> {
|
||
let realTeamDir: string
|
||
try {
|
||
// getTeamMemPath() includes a trailing separator; strip it because
|
||
// realpath() rejects trailing separators on some platforms.
|
||
realTeamDir = await realpath(getTeamMemPath().replace(/[/\\]+$/, ''))
|
||
} catch (e: unknown) {
|
||
const code = getErrnoCode(e)
|
||
if (code === 'ENOENT' || code === 'ENOTDIR') {
|
||
// Team dir doesn't exist — symlink escape impossible, skip check.
|
||
return true
|
||
}
|
||
// Unexpected error (EACCES, EIO) — fail closed.
|
||
return false
|
||
}
|
||
if (realCandidate === realTeamDir) {
|
||
return true
|
||
}
|
||
// Prefix-attack protection: require separator after the prefix so that
|
||
// "/foo/team-evil" doesn't match "/foo/team".
|
||
return realCandidate.startsWith(realTeamDir + sep)
|
||
}
|
||
|
||
/**
|
||
* Check if a resolved absolute path is within the team memory directory.
|
||
* Uses path.resolve() to convert relative paths and eliminate traversal segments.
|
||
* Does NOT resolve symlinks — for write validation use validateTeamMemWritePath()
|
||
* or validateTeamMemKey() which include symlink resolution.
|
||
*/
|
||
export function isTeamMemPath(filePath: string): boolean {
|
||
// SECURITY: resolve() converts to absolute and eliminates .. segments,
|
||
// preventing path traversal attacks (e.g. "team/../../etc/passwd")
|
||
const resolvedPath = resolve(filePath)
|
||
const teamDir = getTeamMemPath()
|
||
return resolvedPath.startsWith(teamDir)
|
||
}
|
||
|
||
/**
|
||
* Validate that an absolute file path is safe for writing to the team memory directory.
|
||
* Returns the resolved absolute path if valid.
|
||
* Throws PathTraversalError if the path contains injection vectors, escapes the
|
||
* directory via .. segments, or escapes via a symlink (PSR M22186).
|
||
*/
|
||
export async function validateTeamMemWritePath(
|
||
filePath: string,
|
||
): Promise<string> {
|
||
if (filePath.includes('\0')) {
|
||
throw new PathTraversalError(`Null byte in path: "${filePath}"`)
|
||
}
|
||
// First pass: normalize .. segments and check string-level containment.
|
||
// This is a fast rejection for obvious traversal attempts before we touch
|
||
// the filesystem.
|
||
const resolvedPath = resolve(filePath)
|
||
const teamDir = getTeamMemPath()
|
||
// Prefix attack protection: teamDir already ends with sep (from getTeamMemPath),
|
||
// so "team-evil/" won't match "team/"
|
||
if (!resolvedPath.startsWith(teamDir)) {
|
||
throw new PathTraversalError(
|
||
`Path escapes team memory directory: "${filePath}"`,
|
||
)
|
||
}
|
||
// Second pass: resolve symlinks on the deepest existing ancestor and verify
|
||
// the real path is still within the real team dir. This catches symlink-based
|
||
// escapes that path.resolve() alone cannot detect.
|
||
const realPath = await realpathDeepestExisting(resolvedPath)
|
||
if (!(await isRealPathWithinTeamDir(realPath))) {
|
||
throw new PathTraversalError(
|
||
`Path escapes team memory directory via symlink: "${filePath}"`,
|
||
)
|
||
}
|
||
return resolvedPath
|
||
}
|
||
|
||
/**
|
||
* Validate a relative path key from the server against the team memory directory.
|
||
* Sanitizes the key, joins with the team dir, resolves symlinks on the deepest
|
||
* existing ancestor, and verifies containment against the real team dir.
|
||
* Returns the resolved absolute path.
|
||
* Throws PathTraversalError if the key is malicious (PSR M22186).
|
||
*/
|
||
export async function validateTeamMemKey(relativeKey: string): Promise<string> {
|
||
sanitizePathKey(relativeKey)
|
||
const teamDir = getTeamMemPath()
|
||
const fullPath = join(teamDir, relativeKey)
|
||
// First pass: normalize .. segments and check string-level containment.
|
||
const resolvedPath = resolve(fullPath)
|
||
if (!resolvedPath.startsWith(teamDir)) {
|
||
throw new PathTraversalError(
|
||
`Key escapes team memory directory: "${relativeKey}"`,
|
||
)
|
||
}
|
||
// Second pass: resolve symlinks and verify real containment.
|
||
const realPath = await realpathDeepestExisting(resolvedPath)
|
||
if (!(await isRealPathWithinTeamDir(realPath))) {
|
||
throw new PathTraversalError(
|
||
`Key escapes team memory directory via symlink: "${relativeKey}"`,
|
||
)
|
||
}
|
||
return resolvedPath
|
||
}
|
||
|
||
/**
|
||
* Check if a file path is within the team memory directory
|
||
* and team memory is enabled.
|
||
*/
|
||
export function isTeamMemFile(filePath: string): boolean {
|
||
return isTeamMemoryEnabled() && isTeamMemPath(filePath)
|
||
}
|