import { logEvent } from 'src/services/analytics/index.js' import { extractHeredocs } from '../../utils/bash/heredoc.js' import { ParsedCommand } from '../../utils/bash/ParsedCommand.js' import { hasMalformedTokens, hasShellQuoteSingleQuoteBug, tryParseShellCommand, } from '../../utils/bash/shellQuote.js' import type { TreeSitterAnalysis } from '../../utils/bash/treeSitterAnalysis.js' import type { PermissionResult } from '../../utils/permissions/PermissionResult.js' const HEREDOC_IN_SUBSTITUTION = /\$\(.*<\(/, message: 'process substitution >()' }, { pattern: /=\(/, message: 'Zsh process substitution =()' }, // Zsh EQUALS expansion: =cmd at word start expands to $(which cmd). // `=curl evil.com` → `/usr/bin/curl evil.com`, bypassing Bash(curl:*) deny // rules since the parser sees `=curl` as the base command, not `curl`. // Only matches word-initial = followed by a command-name char (not VAR=val). { pattern: /(?:^|[\s;&|])=[a-zA-Z_]/, message: 'Zsh equals expansion (=cmd)', }, { pattern: /\$\(/, message: '$() command substitution' }, { pattern: /\$\{/, message: '${} parameter substitution' }, { pattern: /\$\[/, message: '$[] legacy arithmetic expansion' }, { pattern: /~\[/, message: 'Zsh-style parameter expansion' }, { pattern: /\(e:/, message: 'Zsh-style glob qualifiers' }, { pattern: /\(\+/, message: 'Zsh glob qualifier with command execution' }, { pattern: /\}\s*always\s*\{/, message: 'Zsh always block (try/always construct)', }, // Defense in depth: Block PowerShell comment syntax even though we don't execute in PowerShell // Added as protection against future changes that might introduce PowerShell execution { pattern: /<#/, message: 'PowerShell comment syntax' }, ] // Zsh-specific dangerous commands that can bypass security checks. // These are checked against the base command (first word) of each command segment. const ZSH_DANGEROUS_COMMANDS = new Set([ // zmodload is the gateway to many dangerous module-based attacks: // zsh/mapfile (invisible file I/O via array assignment), // zsh/system (sysopen/syswrite two-step file access), // zsh/zpty (pseudo-terminal command execution), // zsh/net/tcp (network exfiltration via ztcp), // zsh/files (builtin rm/mv/ln/chmod that bypass binary checks) 'zmodload', // emulate with -c flag is an eval-equivalent that executes arbitrary code 'emulate', // Zsh module builtins that enable dangerous operations. // These require zmodload first, but we block them as defense-in-depth // in case zmodload is somehow bypassed or the module is pre-loaded. 'sysopen', // Opens files with fine-grained control (zsh/system) 'sysread', // Reads from file descriptors (zsh/system) 'syswrite', // Writes to file descriptors (zsh/system) 'sysseek', // Seeks on file descriptors (zsh/system) 'zpty', // Executes commands on pseudo-terminals (zsh/zpty) 'ztcp', // Creates TCP connections for exfiltration (zsh/net/tcp) 'zsocket', // Creates Unix/TCP sockets (zsh/net/socket) 'mapfile', // Not actually a command, but the associative array is set via zmodload 'zf_rm', // Builtin rm from zsh/files 'zf_mv', // Builtin mv from zsh/files 'zf_ln', // Builtin ln from zsh/files 'zf_chmod', // Builtin chmod from zsh/files 'zf_chown', // Builtin chown from zsh/files 'zf_mkdir', // Builtin mkdir from zsh/files 'zf_rmdir', // Builtin rmdir from zsh/files 'zf_chgrp', // Builtin chgrp from zsh/files ]) // Numeric identifiers for bash security checks (to avoid logging strings) const BASH_SECURITY_CHECK_IDS = { INCOMPLETE_COMMANDS: 1, JQ_SYSTEM_FUNCTION: 2, JQ_FILE_ARGUMENTS: 3, OBFUSCATED_FLAGS: 4, SHELL_METACHARACTERS: 5, DANGEROUS_VARIABLES: 6, NEWLINES: 7, DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION: 8, DANGEROUS_PATTERNS_INPUT_REDIRECTION: 9, DANGEROUS_PATTERNS_OUTPUT_REDIRECTION: 10, IFS_INJECTION: 11, GIT_COMMIT_SUBSTITUTION: 12, PROC_ENVIRON_ACCESS: 13, MALFORMED_TOKEN_INJECTION: 14, BACKSLASH_ESCAPED_WHITESPACE: 15, BRACE_EXPANSION: 16, CONTROL_CHARACTERS: 17, UNICODE_WHITESPACE: 18, MID_WORD_HASH: 19, ZSH_DANGEROUS_COMMANDS: 20, BACKSLASH_ESCAPED_OPERATORS: 21, COMMENT_QUOTE_DESYNC: 22, QUOTED_NEWLINE: 23, } as const type ValidationContext = { originalCommand: string baseCommand: string unquotedContent: string fullyUnquotedContent: string /** fullyUnquoted before stripSafeRedirections — used by validateBraceExpansion * to avoid false negatives from redirection stripping creating backslash adjacencies */ fullyUnquotedPreStrip: string /** Like fullyUnquotedPreStrip but preserves quote characters ('/"): e.g., * echo 'x'# → echo ''# (the quote chars remain, revealing adjacency to #) */ unquotedKeepQuoteChars: string /** Tree-sitter analysis data, if available. Validators can use this for * more accurate analysis when present, falling back to regex otherwise. */ treeSitter?: TreeSitterAnalysis | null } type QuoteExtraction = { withDoubleQuotes: string fullyUnquoted: string /** Like fullyUnquoted but preserves quote characters ('/"): strips quoted * content while keeping the delimiters. Used by validateMidWordHash to detect * quote-adjacent # (e.g., 'x'# where quote stripping would hide adjacency). */ unquotedKeepQuoteChars: string } function extractQuotedContent(command: string, isJq = false): QuoteExtraction { let withDoubleQuotes = '' let fullyUnquoted = '' let unquotedKeepQuoteChars = '' let inSingleQuote = false let inDoubleQuote = false let escaped = false for (let i = 0; i < command.length; i++) { const char = command[i] if (escaped) { escaped = false if (!inSingleQuote) withDoubleQuotes += char if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char continue } if (char === '\\' && !inSingleQuote) { escaped = true if (!inSingleQuote) withDoubleQuotes += char if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char continue } if (char === "'" && !inDoubleQuote) { inSingleQuote = !inSingleQuote unquotedKeepQuoteChars += char continue } if (char === '"' && !inSingleQuote) { inDoubleQuote = !inDoubleQuote unquotedKeepQuoteChars += char // For jq, include quotes in extraction to ensure content is properly analyzed if (!isJq) continue } if (!inSingleQuote) withDoubleQuotes += char if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char } return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars } } function stripSafeRedirections(content: string): string { // SECURITY: All three patterns MUST have a trailing boundary (?=\s|$). // Without it, `> /dev/nullo` matches `/dev/null` as a PREFIX, strips // `> /dev/null` leaving `o`, so `echo hi > /dev/nullo` becomes `echo hi o`. // validateRedirections then sees no `>` and passes. The file write to // /dev/nullo is auto-allowed via the read-only path (checkReadOnlyConstraints). // Main bashPermissions flow is protected (checkPathConstraints validates the // original command), but speculation.ts uses checkReadOnlyConstraints alone. return content .replace(/\s+2\s*>&\s*1(?=\s|$)/g, '') .replace(/[012]?\s*>\s*\/dev\/null(?=\s|$)/g, '') .replace(/\s*<\s*\/dev\/null(?=\s|$)/g, '') } /** * Checks if content contains an unescaped occurrence of a single character. * Handles bash escape sequences correctly where a backslash escapes the following character. * * IMPORTANT: This function only handles single characters, not strings. If you need to extend * this to handle multi-character strings, be EXTREMELY CAREFUL about shell ANSI-C quoting * (e.g., $'\n', $'\x41', $'\u0041') which can encode arbitrary characters and strings in ways * that are very difficult to parse correctly. Incorrect handling could introduce security * vulnerabilities by allowing attackers to bypass security checks. * * @param content - The string to search (typically from extractQuotedContent) * @param char - Single character to search for (e.g., '`') * @returns true if unescaped occurrence found, false otherwise * * Examples: * hasUnescapedChar("test \`safe\`", '`') → false (escaped backticks) * hasUnescapedChar("test `dangerous`", '`') → true (unescaped backticks) * hasUnescapedChar("test\\`date`", '`') → true (escaped backslash + unescaped backtick) */ function hasUnescapedChar(content: string, char: string): boolean { if (char.length !== 1) { throw new Error('hasUnescapedChar only works with single characters') } let i = 0 while (i < content.length) { // If we see a backslash, skip it and the next character (they form an escape sequence) if (content[i] === '\\' && i + 1 < content.length) { i += 2 // Skip backslash and escaped character continue } // Check if current character matches if (content[i] === char) { return true // Found unescaped occurrence } i++ } return false // No unescaped occurrences found } function validateEmpty(context: ValidationContext): PermissionResult { if (!context.originalCommand.trim()) { return { behavior: 'allow', updatedInput: { command: context.originalCommand }, decisionReason: { type: 'other', reason: 'Empty command is safe' }, } } return { behavior: 'passthrough', message: 'Command is not empty' } } function validateIncompleteCommands( context: ValidationContext, ): PermissionResult { const { originalCommand } = context const trimmed = originalCommand.trim() if (/^\s*\t/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS, subId: 1, }) return { behavior: 'ask', message: 'Command appears to be an incomplete fragment (starts with tab)', } } if (trimmed.startsWith('-')) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS, subId: 2, }) return { behavior: 'ask', message: 'Command appears to be an incomplete fragment (starts with flags)', } } if (/^\s*(&&|\|\||;|>>?|<)/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS, subId: 3, }) return { behavior: 'ask', message: 'Command appears to be a continuation line (starts with operator)', } } return { behavior: 'passthrough', message: 'Command appears complete' } } /** * Checks if a command is a "safe" heredoc-in-substitution pattern that can * bypass the generic $() validator. * * This is an EARLY-ALLOW path: returning `true` causes bashCommandIsSafe to * return `passthrough`, bypassing ALL subsequent validators. Given this * authority, the check must be PROVABLY safe, not "probably safe". * * The only pattern we allow is: * [prefix] $(cat <<'DELIM'\n * [body lines]\n * DELIM\n * ) [suffix] * * Where: * - The delimiter must be single-quoted ('DELIM') or escaped (\DELIM) so the * body is literal text with no expansion * - The closing delimiter must be on a line BY ITSELF (or with only trailing * whitespace + `)` for the $(cat <<'EOF'\n...\nEOF)` inline form) * - The closing delimiter must be the FIRST such line — matching bash's * behavior exactly (no skipping past early delimiters to find EOF)) * - There must be non-whitespace text BEFORE the $( (i.e., the substitution * is used in argument position, not as a command name). Otherwise the * heredoc body becomes an arbitrary command name with [suffix] as args. * - The remaining text (with the heredoc stripped) must pass all validators * * This implementation uses LINE-BASED matching, not regex [\s\S]*?, to * precisely replicate bash's heredoc-closing behavior. */ function isSafeHeredoc(command: string): boolean { if (!HEREDOC_IN_SUBSTITUTION.test(command)) return false // SECURITY: Use [ \t] (not \s) between << and the delimiter. \s matches // newlines, but bash requires the delimiter word on the same line as <<. // Matching across newlines could accept malformed syntax that bash rejects. // Handle quote variations: 'EOF', ''EOF'' (splitCommand may mangle quotes). const heredocPattern = /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g let match type HeredocMatch = { start: number operatorEnd: number delimiter: string isDash: boolean } const safeHeredocs: HeredocMatch[] = [] while ((match = heredocPattern.exec(command)) !== null) { const delimiter = match[2] || match[3] if (delimiter) { safeHeredocs.push({ start: match.index, operatorEnd: match.index + match[0].length, delimiter, isDash: match[1] === '-', }) } } // If no safe heredoc patterns found, it's not safe if (safeHeredocs.length === 0) return false // SECURITY: For each heredoc, find the closing delimiter using LINE-BASED // matching that exactly replicates bash's behavior. Bash closes a heredoc // at the FIRST line that exactly matches the delimiter. Any subsequent // occurrence of the delimiter is just content (or a new command). Regex // [\s\S]*? can skip past the first delimiter to find a later `DELIM)` // pattern, hiding injected commands between the two delimiters. type VerifiedHeredoc = { start: number; end: number } const verified: VerifiedHeredoc[] = [] for (const { start, operatorEnd, delimiter, isDash } of safeHeredocs) { // The opening line must end immediately after the delimiter (only // horizontal whitespace allowed before the newline). If there's other // content (like `; rm -rf /`), this is not a simple safe heredoc. const afterOperator = command.slice(operatorEnd) const openLineEnd = afterOperator.indexOf('\n') if (openLineEnd === -1) return false // No content at all const openLineTail = afterOperator.slice(0, openLineEnd) if (!/^[ \t]*$/.test(openLineTail)) return false // Extra content on open line // Body starts after the newline const bodyStart = operatorEnd + openLineEnd + 1 const body = command.slice(bodyStart) const bodyLines = body.split('\n') // Find the FIRST line that closes the heredoc. There are two valid forms: // 1. `DELIM` alone on a line (bash-standard), followed by `)` on the // next line (with only whitespace before it) // 2. `DELIM)` on a line (the inline $(cat <<'EOF'\n...\nEOF) form, // where bash's PST_EOFTOKEN closes both heredoc and substitution) // For <<-, leading tabs are stripped before matching. let closingLineIdx = -1 let closeParenLineIdx = -1 // Line index where `)` appears let closeParenColIdx = -1 // Column index of `)` on that line for (let i = 0; i < bodyLines.length; i++) { const rawLine = bodyLines[i]! const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine // Form 1: delimiter alone on a line if (line === delimiter) { closingLineIdx = i // The `)` must be on the NEXT line with only whitespace before it const nextLine = bodyLines[i + 1] if (nextLine === undefined) return false // No closing `)` const parenMatch = nextLine.match(/^([ \t]*)\)/) if (!parenMatch) return false // `)` not at start of next line closeParenLineIdx = i + 1 closeParenColIdx = parenMatch[1]!.length // Position of `)` break } // Form 2: delimiter immediately followed by `)` (PST_EOFTOKEN form) // Only whitespace allowed between delimiter and `)`. if (line.startsWith(delimiter)) { const afterDelim = line.slice(delimiter.length) const parenMatch = afterDelim.match(/^([ \t]*)\)/) if (parenMatch) { closingLineIdx = i closeParenLineIdx = i // Column is in rawLine (pre-tab-strip), so recompute const tabPrefix = isDash ? (rawLine.match(/^\t*/)?.[0] ?? '') : '' closeParenColIdx = tabPrefix.length + delimiter.length + parenMatch[1]!.length break } // Line starts with delimiter but has other trailing content — // this is NOT the closing line (bash requires exact match or EOF`)`). // But it's also a red flag: if this were inside $(), bash might // close early via PST_EOFTOKEN with other shell metacharacters. // We already handle that case in extractHeredocs — here we just // reject it as not matching our safe pattern. if (/^[)}`|&;(<>]/.test(afterDelim)) { return false // Ambiguous early-closure pattern } } } if (closingLineIdx === -1) return false // No closing delimiter found // Compute the absolute end position (one past the `)` character) let endPos = bodyStart for (let i = 0; i < closeParenLineIdx; i++) { endPos += bodyLines[i]!.length + 1 // +1 for newline } endPos += closeParenColIdx + 1 // +1 to include the `)` itself verified.push({ start, end: endPos }) } // SECURITY: Reject nested matches. The regex finds $(cat <<'X' patterns // in RAW TEXT without understanding quoted-heredoc semantics. When the // outer heredoc has a quoted delimiter (<<'A'), its body is LITERAL text // in bash — any inner $(cat <<'B' is just characters, not a real heredoc. // But our regex matches both, producing NESTED ranges. Stripping nested // ranges corrupts indices: after stripping the inner range, the outer // range's `end` is stale (points past the shrunken string), causing // `remaining.slice(end)` to return '' and silently drop any suffix // (e.g., `; rm -rf /`). Since all our matched heredocs have quoted/escaped // delimiters, a nested match inside the body is ALWAYS literal text — // no legitimate user writes this pattern. Bail to safe fallback. for (const outer of verified) { for (const inner of verified) { if (inner === outer) continue if (inner.start > outer.start && inner.start < outer.end) { return false } } } // Strip all verified heredocs from the command, building `remaining`. // Process in reverse order so earlier indices stay valid. const sortedVerified = [...verified].sort((a, b) => b.start - a.start) let remaining = command for (const { start, end } of sortedVerified) { remaining = remaining.slice(0, start) + remaining.slice(end) } // SECURITY: The remaining text must NOT start with only whitespace before // the (now-stripped) heredoc position IF there's non-whitespace after it. // If the $() is in COMMAND-NAME position (no prefix), its output becomes // the command to execute, with any suffix text as arguments: // $(cat <<'EOF'\nchmod\nEOF\n) 777 /etc/shadow // → runs `chmod 777 /etc/shadow` // We only allow the substitution in ARGUMENT position: there must be a // command word before the $(. // After stripping, `remaining` should look like `cmd args... [more args]`. // If remaining starts with only whitespace (or is empty), the $() WAS the // command — that's only safe if there are no trailing arguments. const trimmedRemaining = remaining.trim() if (trimmedRemaining.length > 0) { // There's a prefix command — good. But verify the original command // also had a non-whitespace prefix before the FIRST $( (the heredoc // could be one of several; we need the first one's prefix). const firstHeredocStart = Math.min(...verified.map(v => v.start)) const prefix = command.slice(0, firstHeredocStart) if (prefix.trim().length === 0) { // $() is in command-name position but there's trailing text — UNSAFE. // The heredoc body becomes the command name, trailing text becomes args. return false } } // Check that remaining text contains only safe characters. // After stripping safe heredocs, the remaining text should only be command // names, arguments, quotes, and whitespace. Reject ANY shell metacharacter // to prevent operators (|, &, &&, ||, ;) or expansions ($, `, {, <, >) from // being used to chain dangerous commands after a safe heredoc. // SECURITY: Use explicit ASCII space/tab only — \s matches unicode whitespace // like \u00A0 which can be used to hide content. Newlines are also blocked // (they would indicate multi-line commands outside the heredoc body). if (!/^[a-zA-Z0-9 \t"'.\-/_@=,:+~]*$/.test(remaining)) return false // SECURITY: The remaining text (command with heredocs stripped) must also // pass all security validators. Without this, appending a safe heredoc to a // dangerous command (e.g., `zmodload zsh/system $(cat <<'EOF'\nx\nEOF\n)`) // causes this early-allow path to return passthrough, bypassing // validateZshDangerousCommands, validateProcEnvironAccess, and any other // main validator that checks allowlist-safe character patterns. // No recursion risk: `remaining` has no `$(... <<` pattern, so the recursive // call's validateSafeCommandSubstitution returns passthrough immediately. if (bashCommandIsSafe_DEPRECATED(remaining).behavior !== 'passthrough') return false return true } /** * Detects well-formed $(cat <<'DELIM'...DELIM) heredoc substitution patterns. * Returns the command with matched heredocs stripped, or null if none found. * Used by the pre-split gate to strip safe heredocs and re-check the remainder. */ export function stripSafeHeredocSubstitutions(command: string): string | null { if (!HEREDOC_IN_SUBSTITUTION.test(command)) return null const heredocPattern = /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g let result = command let found = false let match const ranges: Array<{ start: number; end: number }> = [] while ((match = heredocPattern.exec(command)) !== null) { if (match.index > 0 && command[match.index - 1] === '\\') continue const delimiter = match[2] || match[3] if (!delimiter) continue const isDash = match[1] === '-' const operatorEnd = match.index + match[0].length const afterOperator = command.slice(operatorEnd) const openLineEnd = afterOperator.indexOf('\n') if (openLineEnd === -1) continue if (!/^[ \t]*$/.test(afterOperator.slice(0, openLineEnd))) continue const bodyStart = operatorEnd + openLineEnd + 1 const bodyLines = command.slice(bodyStart).split('\n') for (let i = 0; i < bodyLines.length; i++) { const rawLine = bodyLines[i]! const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine if (line.startsWith(delimiter)) { const after = line.slice(delimiter.length) let closePos = -1 if (/^[ \t]*\)/.test(after)) { const lineStart = bodyStart + bodyLines.slice(0, i).join('\n').length + (i > 0 ? 1 : 0) closePos = command.indexOf(')', lineStart) } else if (after === '') { const nextLine = bodyLines[i + 1] if (nextLine !== undefined && /^[ \t]*\)/.test(nextLine)) { const nextLineStart = bodyStart + bodyLines.slice(0, i + 1).join('\n').length + 1 closePos = command.indexOf(')', nextLineStart) } } if (closePos !== -1) { ranges.push({ start: match.index, end: closePos + 1 }) found = true } break } } } if (!found) return null for (let i = ranges.length - 1; i >= 0; i--) { const r = ranges[i]! result = result.slice(0, r.start) + result.slice(r.end) } return result } /** Detection-only check: does the command contain a safe heredoc substitution? */ export function hasSafeHeredocSubstitution(command: string): boolean { return stripSafeHeredocSubstitutions(command) !== null } function validateSafeCommandSubstitution( context: ValidationContext, ): PermissionResult { const { originalCommand } = context if (!HEREDOC_IN_SUBSTITUTION.test(originalCommand)) { return { behavior: 'passthrough', message: 'No heredoc in substitution' } } if (isSafeHeredoc(originalCommand)) { return { behavior: 'allow', updatedInput: { command: originalCommand }, decisionReason: { type: 'other', reason: 'Safe command substitution: cat with quoted/escaped heredoc delimiter', }, } } return { behavior: 'passthrough', message: 'Command substitution needs validation', } } function validateGitCommit(context: ValidationContext): PermissionResult { const { originalCommand, baseCommand } = context if (baseCommand !== 'git' || !/^git\s+commit\s+/.test(originalCommand)) { return { behavior: 'passthrough', message: 'Not a git commit' } } // SECURITY: Backslashes can cause our regex to mis-identify quote boundaries // (e.g., `git commit -m "test\"msg" && evil`). Legitimate commit messages // virtually never contain backslashes, so bail to the full validator chain. if (originalCommand.includes('\\')) { return { behavior: 'passthrough', message: 'Git commit contains backslash, needs full validation', } } // SECURITY: The `.*?` before `-m` must NOT match shell operators. Previously // `.*?` matched anything except `\n`, including `;`, `&`, `|`, `` ` ``, `$(`. // For `git commit ; curl evil.com -m 'x'`, `.*?` swallowed `; curl evil.com ` // leaving remainder=`` (falsy → remainder check skipped) → returned `allow` // for a compound command. Early-allow skips ALL main validators (line ~1908), // nullifying validateQuotedNewline, validateBackslashEscapedOperators, etc. // While splitCommand currently catches this downstream, early-allow is a // POSITIVE ASSERTION that the FULL command is safe — which it is NOT. // // Also: `\s+` between `git` and `commit` must NOT match `\n`/`\r` (command // separators in bash). Use `[ \t]+` for horizontal-only whitespace. // // The `[^;&|`$<>()\n\r]*?` class excludes shell metacharacters. We also // exclude `<` and `>` here (redirects) — they're allowed in the REMAINDER // for `--author="Name "` but must not appear BEFORE `-m`. const messageMatch = originalCommand.match( /^git[ \t]+commit[ \t]+[^;&|`$<>()\n\r]*?-m[ \t]+(["'])([\s\S]*?)\1(.*)$/, ) if (messageMatch) { const [, quote, messageContent, remainder] = messageMatch if (quote === '"' && messageContent && /\$\(|`|\$\{/.test(messageContent)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.GIT_COMMIT_SUBSTITUTION, subId: 1, }) return { behavior: 'ask', message: 'Git commit message contains command substitution patterns', } } // SECURITY: Check remainder for shell operators that could chain commands // or redirect output. The `.*` before `-m` in the regex can swallow flags // like `--amend`, leaving `&& evil` or `> ~/.bashrc` in the remainder. // Previously we only checked for $() / `` / ${} here, missing operators // like ; | & && || < >. // // `<` and `>` can legitimately appear INSIDE quotes in --author values // like `--author="Name "`. An UNQUOTED `>` is a shell redirect // operator. Because validateGitCommit is an EARLY validator, returning // `allow` here short-circuits bashCommandIsSafe and SKIPS // validateRedirections. So we must bail to passthrough on unquoted `<>` // to let the main validators handle it. // // Attack: `git commit --allow-empty -m 'payload' > ~/.bashrc` // validateGitCommit returns allow → bashCommandIsSafe short-circuits → // validateRedirections NEVER runs → ~/.bashrc overwritten with git // stdout containing `payload` → RCE on next shell login. if (remainder && /[;|&()`]|\$\(|\$\{/.test(remainder)) { return { behavior: 'passthrough', message: 'Git commit remainder contains shell metacharacters', } } if (remainder) { // Strip quoted content, then check for `<` or `>`. Quoted `<>` (email // brackets in --author) are safe; unquoted `<>` are shell redirects. // NOTE: This simple quote tracker has NO backslash handling. `\'`/`\"` // outside quotes would desync it (bash: \' = literal ', tracker: toggles // SQ). BUT line 584 already bailed on ANY backslash in originalCommand, // so we never reach here with backslashes. For backslash-free input, // simple quote toggling is correct (no way to escape quotes without \\). let unquoted = '' let inSQ = false let inDQ = false for (let i = 0; i < remainder.length; i++) { const c = remainder[i] if (c === "'" && !inDQ) { inSQ = !inSQ continue } if (c === '"' && !inSQ) { inDQ = !inDQ continue } if (!inSQ && !inDQ) unquoted += c } if (/[<>]/.test(unquoted)) { return { behavior: 'passthrough', message: 'Git commit remainder contains unquoted redirect operator', } } } // Security hardening: block messages starting with dash // This catches potential obfuscation patterns like git commit -m "---" if (messageContent && messageContent.startsWith('-')) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 5, }) return { behavior: 'ask', message: 'Command contains quoted characters in flag names', } } return { behavior: 'allow', updatedInput: { command: originalCommand }, decisionReason: { type: 'other', reason: 'Git commit with simple quoted message is allowed', }, } } return { behavior: 'passthrough', message: 'Git commit needs validation' } } function validateJqCommand(context: ValidationContext): PermissionResult { const { originalCommand, baseCommand } = context if (baseCommand !== 'jq') { return { behavior: 'passthrough', message: 'Not jq' } } if (/\bsystem\s*\(/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.JQ_SYSTEM_FUNCTION, subId: 1, }) return { behavior: 'ask', message: 'jq command contains system() function which executes arbitrary commands', } } // File arguments are now allowed - they will be validated by path validation in readOnlyValidation.ts // Only block dangerous flags that could read files into jq variables const afterJq = originalCommand.substring(3).trim() if ( /(?:^|\s)(?:-f\b|--from-file|--rawfile|--slurpfile|-L\b|--library-path)/.test( afterJq, ) ) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.JQ_FILE_ARGUMENTS, subId: 1, }) return { behavior: 'ask', message: 'jq command contains dangerous flags that could execute code or read arbitrary files', } } return { behavior: 'passthrough', message: 'jq command is safe' } } function validateShellMetacharacters( context: ValidationContext, ): PermissionResult { const { unquotedContent } = context const message = 'Command contains shell metacharacters (;, |, or &) in arguments' if (/(?:^|\s)["'][^"']*[;&][^"']*["'](?:\s|$)/.test(unquotedContent)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS, subId: 1, }) return { behavior: 'ask', message } } const globPatterns = [ /-name\s+["'][^"']*[;|&][^"']*["']/, /-path\s+["'][^"']*[;|&][^"']*["']/, /-iname\s+["'][^"']*[;|&][^"']*["']/, ] if (globPatterns.some(p => p.test(unquotedContent))) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS, subId: 2, }) return { behavior: 'ask', message } } if (/-regex\s+["'][^"']*[;&][^"']*["']/.test(unquotedContent)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS, subId: 3, }) return { behavior: 'ask', message } } return { behavior: 'passthrough', message: 'No metacharacters' } } function validateDangerousVariables( context: ValidationContext, ): PermissionResult { const { fullyUnquotedContent } = context if ( /[<>|]\s*\$[A-Za-z_]/.test(fullyUnquotedContent) || /\$[A-Za-z_][A-Za-z0-9_]*\s*[|<>]/.test(fullyUnquotedContent) ) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_VARIABLES, subId: 1, }) return { behavior: 'ask', message: 'Command contains variables in dangerous contexts (redirections or pipes)', } } return { behavior: 'passthrough', message: 'No dangerous variables' } } function validateDangerousPatterns( context: ValidationContext, ): PermissionResult { const { unquotedContent } = context // Special handling for backticks - check for UNESCAPED backticks only // Escaped backticks (e.g., \`) are safe and commonly used in SQL commands if (hasUnescapedChar(unquotedContent, '`')) { return { behavior: 'ask', message: 'Command contains backticks (`) for command substitution', } } // Other command substitution checks (include double-quoted content) for (const { pattern, message } of COMMAND_SUBSTITUTION_PATTERNS) { if (pattern.test(unquotedContent)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION, subId: 1, }) return { behavior: 'ask', message: `Command contains ${message}` } } } return { behavior: 'passthrough', message: 'No dangerous patterns' } } function validateRedirections(context: ValidationContext): PermissionResult { const { fullyUnquotedContent } = context if (//.test(fullyUnquotedContent)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_OUTPUT_REDIRECTION, subId: 1, }) return { behavior: 'ask', message: 'Command contains output redirection (>) which could write to arbitrary files', } } return { behavior: 'passthrough', message: 'No redirections' } } function validateNewlines(context: ValidationContext): PermissionResult { // Use fullyUnquotedPreStrip (before stripSafeRedirections) to prevent bypasses // where stripping `>/dev/null` creates a phantom backslash-newline continuation. // E.g., `cmd \>/dev/null\nwhoami` → after stripping becomes `cmd \\nwhoami` // which looks like a safe continuation but actually hides a second command. const { fullyUnquotedPreStrip } = context // Check for newlines in unquoted content if (!/[\n\r]/.test(fullyUnquotedPreStrip)) { return { behavior: 'passthrough', message: 'No newlines' } } // Flag any newline/CR followed by non-whitespace, EXCEPT backslash-newline // continuations at word boundaries. In bash, `\` is a line // continuation (both chars removed), which is safe when the backslash // follows whitespace (e.g., `cmd \--flag`). Mid-word continuations // like `tr\aceroute` are still flagged because they can hide // dangerous command names from allowlist checks. // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() + gated by /[\n\r]/.test() above const looksLikeCommand = /(? typeof entry === 'object' && entry !== null && 'op' in entry && (entry.op === ';' || entry.op === '&&' || entry.op === '||'), ) if (!hasCommandSeparator) { return { behavior: 'passthrough', message: 'No command separators' } } // Check for malformed tokens (unbalanced delimiters) if (hasMalformedTokens(originalCommand, parsed)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.MALFORMED_TOKEN_INJECTION, subId: 1, }) return { behavior: 'ask', message: 'Command contains ambiguous syntax with command separators that could be misinterpreted', } } return { behavior: 'passthrough', message: 'No malformed token injection detected', } } function validateObfuscatedFlags(context: ValidationContext): PermissionResult { // Block shell quoting bypass patterns used to circumvent negative lookaheads we use in our regexes to block known dangerous flags const { originalCommand, baseCommand } = context // Echo is safe for obfuscated flags, BUT only for simple echo commands. // For compound commands (with |, &, ;), we need to check the whole command // because the dangerous ANSI-C quoting might be after the operator. const hasShellOperators = /[|&;]/.test(originalCommand) if (baseCommand === 'echo' && !hasShellOperators) { return { behavior: 'passthrough', message: 'echo command is safe and has no dangerous flags', } } // COMPREHENSIVE OBFUSCATION DETECTION // These checks catch various ways to hide flags using shell quoting // 1. Block ANSI-C quoting ($'...') - can encode any character via escape sequences // Simple pattern that matches $'...' anywhere. This correctly handles: // - grep '$' file => no match ($ is regex anchor inside quotes, no $'...' structure) // - 'test'$'-exec' => match (quote concatenation with ANSI-C) // - Zero-width space and other invisible chars => match // The pattern requires $' followed by content (can be empty) followed by closing ' if (/\$'[^']*'/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 5, }) return { behavior: 'ask', message: 'Command contains ANSI-C quoting which can hide characters', } } // 2. Block locale quoting ($"...") - can also use escape sequences // Same simple pattern as ANSI-C quoting above if (/\$"[^"]*"/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 6, }) return { behavior: 'ask', message: 'Command contains locale quoting which can hide characters', } } // 3. Block empty ANSI-C or locale quotes followed by dash // $''-exec or $""-exec if (/\$['"]{2}\s*-/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 9, }) return { behavior: 'ask', message: 'Command contains empty special quotes before dash (potential bypass)', } } // 4. Block ANY sequence of empty quotes followed by dash // This catches: ''- ""- ''""- ""''- ''""''- etc. // The pattern looks for one or more empty quote pairs followed by optional whitespace and dash if (/(?:^|\s)(?:''|"")+\s*-/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 7, }) return { behavior: 'ask', message: 'Command contains empty quotes before dash (potential bypass)', } } // 4b. SECURITY: Block homogeneous empty quote pair(s) immediately adjacent // to a quoted dash. Patterns like `"""-f"` (empty `""` + quoted `"-f"`) // concatenate in bash to `-f` but slip past all the above checks: // - Regex (4) above: `(?:''|"")+\s*-` matches `""` pair, then expects // optional space and dash — but finds a third `"` instead. No match. // - Quote-content scanner (below): Sees the first `""` pair with empty // content (doesn't start with dash). The third `"` opens a new quoted // region handled by the main quote-state tracker. // - Quote-state tracker: `""` toggles inDoubleQuote on/off; third `"` // opens it again. The `-` inside `"-f"` is INSIDE quotes → skipped. // - Flag scanner: Looks for `\s` before `-`. The `-` is preceded by `"`. // - fullyUnquotedContent: Both `""` and `"-f"` get stripped. // // In bash, `"""-f"` = empty string + string "-f" = `-f`. This bypass works // for ANY dangerous-flag check (jq -f, find -exec, fc -e) with a matching // prefix permission (Bash(jq:*), Bash(find:*)). // // The regex `(?:""|'')+['"]-` matches: // - One or more HOMOGENEOUS empty pairs (`""` or `''`) — the concatenation // point where bash joins the empty string to the flag. // - Immediately followed by ANY quote char — opens the flag-quoted region. // - Immediately followed by `-` — the obfuscated flag. // // POSITION-AGNOSTIC: We do NOT require word-start (`(?:^|\s)`) because // prefixes like `$x"""-f"` (unset/empty variable) concatenate the same way. // The homogeneous-empty-pair requirement filters out the `'"'"'` idiom // (no homogeneous empty pair — it's close, double-quoted-content, open). // // FALSE POSITIVE: Matches `echo '"""-f" text'` (pattern inside single-quoted // string). Extremely rare (requires echoing the literal attack). Acceptable. if (/(?:""|'')+['"]-/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 10, }) return { behavior: 'ask', message: 'Command contains empty quote pair adjacent to quoted dash (potential flag obfuscation)', } } // 4c. SECURITY: Also block 3+ consecutive quotes at word start even without // an immediate dash. Broader safety net for multi-quote obfuscation patterns // not enumerated above (e.g., `"""x"-f` where content between quotes shifts // the dash position). Legitimate commands never need `"""x"` when `"x"` works. if (/(?:^|\s)['"]{3,}/.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 11, }) return { behavior: 'ask', message: 'Command contains consecutive quote characters at word start (potential obfuscation)', } } // Track quote state to avoid false positives for flags inside quoted strings let inSingleQuote = false let inDoubleQuote = false let escaped = false for (let i = 0; i < originalCommand.length - 1; i++) { const currentChar = originalCommand[i] const nextChar = originalCommand[i + 1] // Update quote state if (escaped) { escaped = false continue } // SECURITY: Only treat backslash as escape OUTSIDE single quotes. In bash, // `\` inside `'...'` is LITERAL. Without this guard, `'\'` desyncs the // quote tracker: `\` sets escaped=true, closing `'` is consumed by the // escaped-skip above instead of toggling inSingleQuote. Parser stays in // single-quote mode, and the `if (inSingleQuote || inDoubleQuote) continue` // at line ~1121 skips ALL subsequent flag detection for the rest of the // command. Example: `jq '\' "-f" evil` — bash gets `-f` arg, but desynced // parser thinks ` "-f" evil` is inside quotes → flag detection bypassed. // Defense-in-depth: hasShellQuoteSingleQuoteBug catches `'\'` patterns at // line ~1856 before this runs. But we fix the tracker for consistency with // the CORRECT implementations elsewhere in this file (hasBackslashEscaped*, // extractQuotedContent) which all guard with `!inSingleQuote`. if (currentChar === '\\' && !inSingleQuote) { escaped = true continue } if (currentChar === "'" && !inDoubleQuote) { inSingleQuote = !inSingleQuote continue } if (currentChar === '"' && !inSingleQuote) { inDoubleQuote = !inDoubleQuote continue } // Only look for flags when not inside quoted strings // This prevents false positives like: make test TEST="file.py -v" if (inSingleQuote || inDoubleQuote) { continue } // Look for whitespace followed by quote that contains a dash (potential flag obfuscation) // SECURITY: Block ANY quoted content starting with dash - err on side of safety // Catches: "-"exec, "-file", "--flag", '-'output, etc. // Users can approve manually if legitimate (e.g., find . -name "-file") if ( currentChar && nextChar && /\s/.test(currentChar) && /['"`]/.test(nextChar) ) { const quoteChar = nextChar let j = i + 2 // Start after the opening quote let insideQuote = '' // Collect content inside the quote while (j < originalCommand.length && originalCommand[j] !== quoteChar) { insideQuote += originalCommand[j]! j++ } // If we found a closing quote and the content looks like an obfuscated flag, block it. // Three attack patterns to catch: // 1. Flag name inside quotes: "--flag", "-exec", "-X" (dashes + letters inside) // 2. Split-quote flag: "-"exec, "--"output (dashes inside, letters continue after quote) // 3. Chained quotes: "-""exec" (dashes in first quote, second quote contains letters) // Pure-dash strings like "---" or "--" followed by whitespace/separator are separators, // not flags, and should not trigger this check. const charAfterQuote = originalCommand[j + 1] // Inside double quotes, $VAR and `cmd` expand at runtime, so "-$VAR" can // become -exec. Blocking $ and ` here over-blocks single-quoted literals // like grep '-$' (where $ is literal), but main's startsWith('-') already // blocked those — this restores status quo, not a new false positive. // Brace expansion ({) does NOT happen inside quotes, so { is not needed here. const hasFlagCharsInside = /^-+[a-zA-Z0-9$`]/.test(insideQuote) // Characters that can continue a flag after a closing quote. This catches: // a-zA-Z0-9: "-"exec → -exec (direct concatenation) // \\: "-"\exec → -exec (backslash escape is stripped) // -: "-"-output → --output (extra dashes) // {: "-"{exec,delete} → -exec -delete (brace expansion) // $: "-"$VAR → -exec when VAR=exec (variable expansion) // `: "-"`echo exec` → -exec (command substitution) // Note: glob chars (*?[) are omitted — they require attacker-controlled // filenames in CWD to exploit, and blocking them would break patterns // like `ls -- "-"*` for listing files that start with dash. const FLAG_CONTINUATION_CHARS = /[a-zA-Z0-9\\${`-]/ const hasFlagCharsContinuing = /^-+$/.test(insideQuote) && charAfterQuote !== undefined && FLAG_CONTINUATION_CHARS.test(charAfterQuote) // Handle adjacent quote chaining: "-""exec" or "-""-"exec or """-"exec concatenates // to -exec in shell. Follow the chain of adjacent quoted segments until // we find one containing an alphanumeric char or hit a non-quote boundary. // Also handles empty prefix quotes: """-"exec where "" is followed by "-"exec // The combined segments form a flag if they contain dash(es) followed by alphanumerics. const hasFlagCharsInNextQuote = // Trigger when: first segment is only dashes OR empty (could be prefix for flag) (insideQuote === '' || /^-+$/.test(insideQuote)) && charAfterQuote !== undefined && /['"`]/.test(charAfterQuote) && (() => { let pos = j + 1 // Start at charAfterQuote (an opening quote) let combinedContent = insideQuote // Track what the shell will see while ( pos < originalCommand.length && /['"`]/.test(originalCommand[pos]!) ) { const segQuote = originalCommand[pos]! let end = pos + 1 while ( end < originalCommand.length && originalCommand[end] !== segQuote ) { end++ } const segment = originalCommand.slice(pos + 1, end) combinedContent += segment // Check if combined content so far forms a flag pattern. // Include $ and ` for in-quote expansion: "-""$VAR" → -exec if (/^-+[a-zA-Z0-9$`]/.test(combinedContent)) return true // If this segment has alphanumeric/expansion and we already have dashes, // it's a flag. Catches "-""$*" where segment='$*' has no alnum but // expands to positional params at runtime. // Guard against segment.length === 0: slice(0, -0) → slice(0, 0) → ''. const priorContent = segment.length > 0 ? combinedContent.slice(0, -segment.length) : combinedContent if (/^-+$/.test(priorContent)) { if (/[a-zA-Z0-9$`]/.test(segment)) return true } if (end >= originalCommand.length) break // Unclosed quote pos = end + 1 // Move past closing quote to check next segment } // Also check the unquoted char at the end of the chain if ( pos < originalCommand.length && FLAG_CONTINUATION_CHARS.test(originalCommand[pos]!) ) { // If we have dashes in combined content, the trailing char completes a flag if (/^-+$/.test(combinedContent) || combinedContent === '') { // Check if we're about to form a flag with the following content const nextChar = originalCommand[pos]! if (nextChar === '-') { // More dashes, could still form a flag return true } if (/[a-zA-Z0-9\\${`]/.test(nextChar) && combinedContent !== '') { // We have dashes and now alphanumeric/expansion follows return true } } // Original check for dashes followed by alphanumeric if (/^-/.test(combinedContent)) { return true } } return false })() if ( j < originalCommand.length && originalCommand[j] === quoteChar && (hasFlagCharsInside || hasFlagCharsContinuing || hasFlagCharsInNextQuote) ) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 4, }) return { behavior: 'ask', message: 'Command contains quoted characters in flag names', } } } // Look for whitespace followed by dash - this starts a flag if (currentChar && nextChar && /\s/.test(currentChar) && nextChar === '-') { let j = i + 1 // Start at the dash let flagContent = '' // Collect flag content while (j < originalCommand.length) { const flagChar = originalCommand[j] if (!flagChar) break // End flag content once we hit whitespace or an equals sign if (/[\s=]/.test(flagChar)) { break } // End flag collection if we hit quote followed by non-flag character. This is needed to handle cases like -d"," which should be parsed as just -d if (/['"`]/.test(flagChar)) { // Special case for cut -d flag: the delimiter value can be quoted // Example: cut -d'"' should parse as flag name: -d, value: '"' // Note: We only apply this exception to cut -d specifically to avoid bypasses. // Without this restriction, a command like `find -e"xec"` could be parsed as // flag name: -e, bypassing our blocklist for -exec. By restricting to cut -d, // we allow the legitimate use case while preventing obfuscation attacks on other // commands where quoted flag values could hide dangerous flag names. if ( baseCommand === 'cut' && flagContent === '-d' && /['"`]/.test(flagChar) ) { // This is cut -d followed by a quoted delimiter - flagContent is already '-d' break } // Look ahead to see what follows the quote if (j + 1 < originalCommand.length) { const nextFlagChar = originalCommand[j + 1] if (nextFlagChar && !/[a-zA-Z0-9_'"-]/.test(nextFlagChar)) { // Quote followed by something that is clearly not part of a flag, end the parsing break } } } flagContent += flagChar j++ } if (flagContent.includes('"') || flagContent.includes("'")) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 1, }) return { behavior: 'ask', message: 'Command contains quoted characters in flag names', } } } } // Also handle flags that start with quotes: "--"output, '-'-output, etc. // Use fullyUnquotedContent to avoid false positives from legitimate quoted content like echo "---" if (/\s['"`]-/.test(context.fullyUnquotedContent)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 2, }) return { behavior: 'ask', message: 'Command contains quoted characters in flag names', } } // Also handles cases like ""--output // Use fullyUnquotedContent to avoid false positives from legitimate quoted content if (/['"`]{2}-/.test(context.fullyUnquotedContent)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, subId: 3, }) return { behavior: 'ask', message: 'Command contains quoted characters in flag names', } } return { behavior: 'passthrough', message: 'No obfuscated flags detected' } } /** * Detects backslash-escaped whitespace characters (space, tab) outside of quotes. * * In bash, `echo\ test` is a single token (command named "echo test"), but * shell-quote decodes the escape and produces `echo test` (two separate tokens). * This discrepancy allows path traversal attacks like: * echo\ test/../../../usr/bin/touch /tmp/file * which the parser sees as `echo test/.../touch /tmp/file` (an echo command) * but bash resolves as `/usr/bin/touch /tmp/file` (via directory "echo test"). */ function hasBackslashEscapedWhitespace(command: string): boolean { let inSingleQuote = false let inDoubleQuote = false for (let i = 0; i < command.length; i++) { const char = command[i] if (char === '\\' && !inSingleQuote) { if (!inDoubleQuote) { const nextChar = command[i + 1] if (nextChar === ' ' || nextChar === '\t') { return true } } // Skip the escaped character (both outside quotes and inside double quotes, // where \\, \", \$, \` are valid escape sequences) i++ continue } if (char === '"' && !inSingleQuote) { inDoubleQuote = !inDoubleQuote continue } if (char === "'" && !inDoubleQuote) { inSingleQuote = !inSingleQuote continue } } return false } function validateBackslashEscapedWhitespace( context: ValidationContext, ): PermissionResult { if (hasBackslashEscapedWhitespace(context.originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_WHITESPACE, }) return { behavior: 'ask', message: 'Command contains backslash-escaped whitespace that could alter command parsing', } } return { behavior: 'passthrough', message: 'No backslash-escaped whitespace', } } /** * Detects a backslash immediately preceding a shell operator outside of quotes. * * SECURITY: splitCommand normalizes `\;` to a bare `;` in its output string. * When downstream code (checkReadOnlyConstraints, checkPathConstraints, etc.) * re-parses that normalized string, the bare `;` is seen as an operator and * causes a false split. This enables arbitrary file read bypassing path checks: * * cat safe.txt \; echo ~/.ssh/id_rsa * * In bash: ONE cat command reading safe.txt, ;, echo, ~/.ssh/id_rsa as files. * After splitCommand normalizes: "cat safe.txt ; echo ~/.ssh/id_rsa" * Nested re-parse: ["cat safe.txt", "echo ~/.ssh/id_rsa"] — both segments * pass isCommandReadOnly, sensitive path hidden in echo segment is never * validated by path constraints. Auto-allowed. Private key leaked. * * This check flags any \ regardless of backslash parity. Even counts * (\\;) are dangerous in bash (\\ → \, ; separates). Odd counts (\;) are safe * in bash but trigger the double-parse bug above. Both must be flagged. * * Known false positive: `find . -exec cmd {} \;` — users will be prompted once. * * Note: `(` and `)` are NOT in this set — splitCommand preserves `\(` and `\)` * in its output (round-trip safe), so they don't trigger the double-parse bug. * This allows `find . \( -name x -o -name y \)` to pass without false positives. */ const SHELL_OPERATORS = new Set([';', '|', '&', '<', '>']) function hasBackslashEscapedOperator(command: string): boolean { let inSingleQuote = false let inDoubleQuote = false for (let i = 0; i < command.length; i++) { const char = command[i] // SECURITY: Handle backslash FIRST, before quote toggles. In bash, inside // double quotes, `\"` is an escape sequence producing a literal `"` — it // does NOT close the quote. If we process quote toggles first, `\"` inside // `"..."` desyncs the tracker: // - `\` is ignored (gated by !inDoubleQuote) // - `"` toggles inDoubleQuote to FALSE (wrong — bash says still inside) // - next `"` (the real closing quote) toggles BACK to TRUE — locked desync // - subsequent `\;` is missed because !inDoubleQuote is false // Exploit: `tac "x\"y" \; echo ~/.ssh/id_rsa` — bash runs ONE tac reading // all args as files (leaking id_rsa), but desynced tracker misses `\;` and // splitCommand's double-parse normalization "sees" two safe commands. // // Fix structure matches hasBackslashEscapedWhitespace (which was correctly // fixed for this in commit prior to d000dfe84e): backslash check first, // gated only by !inSingleQuote (since backslash IS literal inside '...'), // unconditional i++ to skip the escaped char even inside double quotes. if (char === '\\' && !inSingleQuote) { // Only flag \ when OUTSIDE double quotes (inside double quotes, // operators like ;|&<> are already not special, so \; is harmless there). if (!inDoubleQuote) { const nextChar = command[i + 1] if (nextChar && SHELL_OPERATORS.has(nextChar)) { return true } } // Skip the escaped character unconditionally. Inside double quotes, this // correctly consumes backslash pairs: `"x\\"` → pos 6 (`\`) skips pos 7 // (`\`), then pos 8 (`"`) toggles inDoubleQuote off correctly. Without // unconditional skip, pos 7 would see `\`, see pos 8 (`"`) as nextChar, // skip it, and the closing quote would NEVER toggle inDoubleQuote — // permanently desyncing and missing subsequent `\;` outside quotes. // Exploit: `cat "x\\" \; echo /etc/passwd` — bash reads /etc/passwd. // // This correctly handles backslash parity: odd-count `\;` (1, 3, 5...) // is flagged (the unpaired `\` before `;` is detected). Even-count `\\;` // (2, 4...) is NOT flagged, which is CORRECT — bash treats `\\` as // literal `\` and `;` as a separator, so splitCommand handles it // normally (no double-parse bug). This matches // hasBackslashEscapedWhitespace line ~1340. i++ continue } // Quote toggles come AFTER backslash handling (backslash already skipped // any escaped quote char, so these toggles only fire on unescaped quotes). if (char === "'" && !inDoubleQuote) { inSingleQuote = !inSingleQuote continue } if (char === '"' && !inSingleQuote) { inDoubleQuote = !inDoubleQuote continue } } return false } function validateBackslashEscapedOperators( context: ValidationContext, ): PermissionResult { // Tree-sitter path: if tree-sitter confirms no actual operator nodes exist // in the AST, then any \; is just an escaped character in a word argument // (e.g., `find . -exec cmd {} \;`). Skip the expensive regex check. if (context.treeSitter && !context.treeSitter.hasActualOperatorNodes) { return { behavior: 'passthrough', message: 'No operator nodes in AST' } } if (hasBackslashEscapedOperator(context.originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_OPERATORS, }) return { behavior: 'ask', message: 'Command contains a backslash before a shell operator (;, |, &, <, >) which can hide command structure', } } return { behavior: 'passthrough', message: 'No backslash-escaped operators', } } /** * Checks if a character at position `pos` in `content` is escaped by counting * consecutive backslashes before it. An odd number means it's escaped. */ function isEscapedAtPosition(content: string, pos: number): boolean { let backslashCount = 0 let i = pos - 1 while (i >= 0 && content[i] === '\\') { backslashCount++ i-- } return backslashCount % 2 === 1 } /** * Detects unquoted brace expansion syntax that Bash expands but shell-quote/tree-sitter * treat as literal strings. This parsing discrepancy allows permission bypass: * git ls-remote {--upload-pack="touch /tmp/test",test} * Parser sees one literal arg, but Bash expands to: --upload-pack="touch /tmp/test" test * * Brace expansion has two forms: * 1. Comma-separated: {a,b,c} → a b c * 2. Sequence: {1..5} → 1 2 3 4 5 * * Both single and double quotes suppress brace expansion in Bash, so we use * fullyUnquotedContent which has both quote types stripped. * Backslash-escaped braces (\{, \}) also suppress expansion. */ function validateBraceExpansion(context: ValidationContext): PermissionResult { // Use pre-strip content to avoid false negatives from stripSafeRedirections // creating backslash adjacencies (e.g., `\>/dev/null{a,b}` → `\{a,b}` after // stripping, making isEscapedAtPosition think the brace is escaped). const content = context.fullyUnquotedPreStrip // SECURITY: Check for MISMATCHED brace counts in fullyUnquoted content. // A mismatch indicates that quoted braces (e.g., `'{'` or `"{"`) were // stripped by extractQuotedContent, leaving unbalanced braces in the content // we analyze. Our depth-matching algorithm below assumes balanced braces — // with a mismatch, it closes at the WRONG position, missing commas that // bash's algorithm WOULD find. // // Exploit: `git diff {@'{'0},--output=/tmp/pwned}` // - Original: 2 `{`, 2 `}` (quoted `'{'` counts as content, not operator) // - fullyUnquoted: `git diff {@0},--output=/tmp/pwned}` — 1 `{`, 2 `}`! // - Our depth-matcher: closes at first `}` (after `0`), inner=`@0`, no `,` // - Bash (on original): quoted `{` is content; first unquoted `}` has no // `,` yet → bash treats as literal content, keeps scanning → finds `,` // → final `}` closes → expands to `@{0} --output=/tmp/pwned` // - git writes diff to /tmp/pwned. ARBITRARY FILE WRITE, ZERO PERMISSIONS. // // We count ONLY unescaped braces (backslash-escaped braces are literal in // bash). If counts mismatch AND at least one unescaped `{` exists, block — // our depth-matching cannot be trusted on this content. let unescapedOpenBraces = 0 let unescapedCloseBraces = 0 for (let i = 0; i < content.length; i++) { if (content[i] === '{' && !isEscapedAtPosition(content, i)) { unescapedOpenBraces++ } else if (content[i] === '}' && !isEscapedAtPosition(content, i)) { unescapedCloseBraces++ } } // Only block when CLOSE count EXCEEDS open count — this is the specific // attack signature. More `}` than `{` means a quoted `{` was stripped // (bash saw it as content, we see extra `}` unaccounted for). The inverse // (more `{` than `}`) is usually legitimate unclosed/escaped braces like // `{foo` or `{a,b\}` where bash doesn't expand anyway. if (unescapedOpenBraces > 0 && unescapedCloseBraces > unescapedOpenBraces) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION, subId: 2, }) return { behavior: 'ask', message: 'Command has excess closing braces after quote stripping, indicating possible brace expansion obfuscation', } } // SECURITY: Additionally, check the ORIGINAL command (before quote stripping) // for `'{'` or `"{"` INSIDE an unquoted brace context — this is the specific // attack primitive. A quoted brace inside an outer unquoted `{...}` is // essentially always an obfuscation attempt; legitimate commands don't nest // quoted braces inside brace expansion (awk/find patterns are fully quoted, // like `awk '{print $1}'` where the OUTER brace is inside quotes too). // // This catches the attack even if an attacker crafts a payload with balanced // stripped braces (defense-in-depth). We use a simple heuristic: if the // original command has `'{'` or `'}'` or `"{"` or `"}"` (quoted single brace) // AND also has an unquoted `{`, that's suspicious. if (unescapedOpenBraces > 0) { const orig = context.originalCommand // Look for quoted single-brace patterns: '{', '}', "{", "}" // These are the attack primitive — a brace char wrapped in quotes. if (/['"][{}]['"]/.test(orig)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION, subId: 3, }) return { behavior: 'ask', message: 'Command contains quoted brace character inside brace context (potential brace expansion obfuscation)', } } } // Scan for unescaped `{` characters, then check if they form brace expansion. // We use a manual scan rather than a simple regex lookbehind because // lookbehinds can't handle double-escaped backslashes (\\{ is unescaped `{`). for (let i = 0; i < content.length; i++) { if (content[i] !== '{') continue if (isEscapedAtPosition(content, i)) continue // Find matching unescaped `}` by tracking nesting depth. // Previous approach broke on nested `{`, missing commas between the outer // `{` and the nested one (e.g., `{--upload-pack="evil",{test}}`). let depth = 1 let matchingClose = -1 for (let j = i + 1; j < content.length; j++) { const ch = content[j] if (ch === '{' && !isEscapedAtPosition(content, j)) { depth++ } else if (ch === '}' && !isEscapedAtPosition(content, j)) { depth-- if (depth === 0) { matchingClose = j break } } } if (matchingClose === -1) continue // Check for `,` or `..` at the outermost nesting level between this // `{` and its matching `}`. Only depth-0 triggers matter — bash splits // brace expansion at outer-level commas/sequences. let innerDepth = 0 for (let k = i + 1; k < matchingClose; k++) { const ch = content[k] if (ch === '{' && !isEscapedAtPosition(content, k)) { innerDepth++ } else if (ch === '}' && !isEscapedAtPosition(content, k)) { innerDepth-- } else if (innerDepth === 0) { if ( ch === ',' || (ch === '.' && k + 1 < matchingClose && content[k + 1] === '.') ) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION, subId: 1, }) return { behavior: 'ask', message: 'Command contains brace expansion that could alter command parsing', } } } } // No expansion at this level — don't skip past; inner pairs will be // caught by subsequent iterations of the outer loop. } return { behavior: 'passthrough', message: 'No brace expansion detected', } } // Matches Unicode whitespace characters that shell-quote treats as word // separators but bash treats as literal word content. While this differential // is defense-favorable (shell-quote over-splits), blocking these proactively // prevents future edge cases. // eslint-disable-next-line no-misleading-character-class const UNICODE_WS_RE = /[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]/ function validateUnicodeWhitespace( context: ValidationContext, ): PermissionResult { const { originalCommand } = context if (UNICODE_WS_RE.test(originalCommand)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.UNICODE_WHITESPACE, }) return { behavior: 'ask', message: 'Command contains Unicode whitespace characters that could cause parsing inconsistencies', } } return { behavior: 'passthrough', message: 'No Unicode whitespace' } } function validateMidWordHash(context: ValidationContext): PermissionResult { const { unquotedKeepQuoteChars } = context // Match # preceded by a non-whitespace character (mid-word hash). // shell-quote treats mid-word # as comment-start but bash treats it as a // literal character, creating a parser differential. // // Uses unquotedKeepQuoteChars (which preserves quote delimiters but strips // quoted content) to catch quote-adjacent # like 'x'# — fullyUnquotedPreStrip // would strip both quotes and content, turning 'x'# into just # (word-start). // // SECURITY: Also check the CONTINUATION-JOINED version. The context is built // from the original command (pre-continuation-join). For `foo\#bar`, // pre-join the `#` is preceded by `\n` (whitespace → `/\S#/` doesn't match), // but post-join it's preceded by `o` (non-whitespace → matches). shell-quote // operates on the post-join text (line continuations are joined in // splitCommand), so the parser differential manifests on the joined text. // While not directly exploitable (the `#...` fragment still prompts as its // own subcommand), this is a defense-in-depth gap — shell-quote would drop // post-`#` content from path extraction. // // Exclude ${# which is bash string-length syntax (e.g., ${#var}). // Note: the lookbehind must be placed immediately before # (not before \S) // so that it checks the correct 2-char window. const joined = unquotedKeepQuoteChars.replace(/\\+\n/g, match => { const backslashCount = match.length - 1 return backslashCount % 2 === 1 ? '\\'.repeat(backslashCount - 1) : match }) if ( // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() with atom search: fast when # absent /\S(?#' ~/.ssh/id_rsa ./exfil_dir * Bash: moves ./decoy AND ~/.ssh/id_rsa into ./exfil_dir/ (errors on `\n#`). * stripSafeWrappers: line 2 starts with `#` → stripped → "mv ./decoy '". * shell-quote: drops unbalanced trailing quote → ["mv", "./decoy"]. * checkPathConstraints: only sees ./decoy (in cwd) → passthrough. * acceptEdits mode: mv with all-cwd paths → ALLOW. Zero clicks, no warning. * * Also works with cp (exfil), rm/rm -rf (delete arbitrary files/dirs). * * Defense: block ONLY the specific stripCommentLines trigger — a newline inside * quotes where the next line starts with `#` after trim. This is the minimal * check that catches the parser differential while preserving legitimate * multi-line quoted arguments (echo 'line1\nline2', grep patterns, etc.). * Safe heredocs ($(cat <<'EOF'...)) and git commit -m "..." are handled by * early validators and never reach this check. * * This validator is NOT in nonMisparsingValidators — its ask result gets * isBashSecurityCheckForMisparsing: true, causing an early block in the * permission flow at bashPermissions.ts before any line-based processing runs. */ function validateQuotedNewline(context: ValidationContext): PermissionResult { const { originalCommand } = context // Fast path: must have both a newline byte AND a # character somewhere. // stripCommentLines only strips lines where trim().startsWith('#'), so // no # means no possible trigger. if (!originalCommand.includes('\n') || !originalCommand.includes('#')) { return { behavior: 'passthrough', message: 'No newline or no hash' } } // Track quote state. Mirrors extractQuotedContent / validateCommentQuoteDesync: // - single quotes don't toggle inside double quotes // - backslash escapes the next char (but not inside single quotes) // stripCommentLines splits on '\n' (not \r), so we only treat \n as a line // separator. \r inside a line is removed by trim() and doesn't change the // trimmed-starts-with-# check. let inSingleQuote = false let inDoubleQuote = false let escaped = false for (let i = 0; i < originalCommand.length; i++) { const char = originalCommand[i] if (escaped) { escaped = false continue } if (char === '\\' && !inSingleQuote) { escaped = true continue } if (char === "'" && !inDoubleQuote) { inSingleQuote = !inSingleQuote continue } if (char === '"' && !inSingleQuote) { inDoubleQuote = !inDoubleQuote continue } // A newline inside quotes: the NEXT line (from bash's perspective) starts // inside a quoted string. Check if that line would be stripped by // stripCommentLines — i.e., after trim(), does it start with `#`? // This exactly mirrors: lines.filter(l => !l.trim().startsWith('#')) if (char === '\n' && (inSingleQuote || inDoubleQuote)) { const lineStart = i + 1 const nextNewline = originalCommand.indexOf('\n', lineStart) const lineEnd = nextNewline === -1 ? originalCommand.length : nextNewline const nextLine = originalCommand.slice(lineStart, lineEnd) if (nextLine.trim().startsWith('#')) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.QUOTED_NEWLINE, }) return { behavior: 'ask', message: 'Command contains a quoted newline followed by a #-prefixed line, which can hide arguments from line-based permission checks', } } } } return { behavior: 'passthrough', message: 'No quoted newline-hash pattern' } } /** * Validates that the command doesn't use Zsh-specific dangerous commands that * can bypass security checks. These commands provide capabilities like loading * kernel modules, raw file I/O, network access, and pseudo-terminal execution * that circumvent normal permission checks. * * Also catches `fc -e` which can execute arbitrary editors on command history, * and `emulate` which with `-c` is an eval-equivalent. */ function validateZshDangerousCommands( context: ValidationContext, ): PermissionResult { const { originalCommand } = context // Extract the base command from the original command, stripping leading // whitespace, env var assignments, and Zsh precommand modifiers. // e.g., "FOO=bar command builtin zmodload" -> "zmodload" const ZSH_PRECOMMAND_MODIFIERS = new Set([ 'command', 'builtin', 'noglob', 'nocorrect', ]) const trimmed = originalCommand.trim() const tokens = trimmed.split(/\s+/) let baseCmd = '' for (const token of tokens) { // Skip env var assignments (VAR=value) if (/^[A-Za-z_]\w*=/.test(token)) continue // Skip Zsh precommand modifiers (they don't change what command runs) if (ZSH_PRECOMMAND_MODIFIERS.has(token)) continue baseCmd = token break } if (ZSH_DANGEROUS_COMMANDS.has(baseCmd)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS, subId: 1, }) return { behavior: 'ask', message: `Command uses Zsh-specific '${baseCmd}' which can bypass security checks`, } } // Check for `fc -e` which allows executing arbitrary commands via editor // fc without -e is safe (just lists history), but -e specifies an editor // to run on the command, effectively an eval if (baseCmd === 'fc' && /\s-\S*e/.test(trimmed)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS, subId: 2, }) return { behavior: 'ask', message: "Command uses 'fc -e' which can execute arbitrary commands via editor", } } return { behavior: 'passthrough', message: 'No Zsh dangerous commands', } } // Matches non-printable control characters that have no legitimate use in shell // commands: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F. Excludes tab (0x09), // newline (0x0A), and carriage return (0x0D) which are handled by other // validators. Bash silently drops null bytes and ignores most control chars, // so an attacker can use them to slip metacharacters past our checks while // bash still executes them (e.g., "echo safe\x00; rm -rf /"). // eslint-disable-next-line no-control-regex const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/ /** * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is * unavailable. The primary gate is parseForSecurity (ast.ts). */ export function bashCommandIsSafe_DEPRECATED( command: string, ): PermissionResult { // SECURITY: Block control characters before any other processing. Null bytes // and other non-printable chars are silently dropped by bash but confuse our // validators, allowing metacharacters adjacent to them to slip through. if (CONTROL_CHAR_RE.test(command)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS, }) return { behavior: 'ask', message: 'Command contains non-printable control characters that could be used to bypass security checks', isBashSecurityCheckForMisparsing: true, } } // SECURITY: Detect '\' patterns that exploit shell-quote's incorrect handling // of backslashes inside single quotes. Must run before shell-quote parsing. if (hasShellQuoteSingleQuoteBug(command)) { return { behavior: 'ask', message: 'Command contains single-quoted backslash pattern that could bypass security checks', isBashSecurityCheckForMisparsing: true, } } // SECURITY: Strip heredoc bodies before running security validators. // Only strip bodies for quoted/escaped delimiters (<<'EOF', <<\EOF) where // the body is literal text — $(), backticks, and ${} are NOT expanded. // Unquoted heredocs (<`, it // returns ask-without-flag — but validateBackslashEscapedOperators (index 12, // misparsing) would have caught `\;` WITH the flag. Short-circuiting lets a // payload like `cat safe.txt \; echo /etc/passwd > ./out` slip through. // // Fix: defer non-misparsing ask results. Continue running validators; if any // misparsing validator fires, return THAT (with the flag). Only if we reach // the end without a misparsing ask, return the deferred non-misparsing ask. let deferredNonMisparsingResult: PermissionResult | null = null for (const validator of validators) { const result = validator(context) if (result.behavior === 'ask') { if (nonMisparsingValidators.has(validator)) { if (deferredNonMisparsingResult === null) { deferredNonMisparsingResult = result } continue } return { ...result, isBashSecurityCheckForMisparsing: true as const } } } if (deferredNonMisparsingResult !== null) { return deferredNonMisparsingResult } return { behavior: 'passthrough', message: 'Command passed all security checks', } } /** * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is * unavailable. The primary gate is parseForSecurity (ast.ts). * * Async version of bashCommandIsSafe that uses tree-sitter when available * for more accurate parsing. Falls back to the sync regex version when * tree-sitter is not available. * * This should be used by async callers (bashPermissions.ts, bashCommandHelpers.ts). * Sync callers (readOnlyValidation.ts) should continue using bashCommandIsSafe(). */ export async function bashCommandIsSafeAsync_DEPRECATED( command: string, onDivergence?: () => void, ): Promise { // Try to get tree-sitter analysis const parsed = await ParsedCommand.parse(command) const tsAnalysis = parsed?.getTreeSitterAnalysis() ?? null // If no tree-sitter, fall back to sync version if (!tsAnalysis) { return bashCommandIsSafe_DEPRECATED(command) } // Run the same security checks but with tree-sitter enriched context. // The early checks (control chars, shell-quote bug) don't benefit from // tree-sitter, so we run them identically. if (CONTROL_CHAR_RE.test(command)) { logEvent('tengu_bash_security_check_triggered', { checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS, }) return { behavior: 'ask', message: 'Command contains non-printable control characters that could be used to bypass security checks', isBashSecurityCheckForMisparsing: true, } } if (hasShellQuoteSingleQuoteBug(command)) { return { behavior: 'ask', message: 'Command contains single-quoted backslash pattern that could bypass security checks', isBashSecurityCheckForMisparsing: true, } } const { processedCommand } = extractHeredocs(command, { quotedOnly: true }) const baseCommand = command.split(' ')[0] || '' // Use tree-sitter quote context for more accurate analysis const tsQuote = tsAnalysis.quoteContext const regexQuote = extractQuotedContent( processedCommand, baseCommand === 'jq', ) // Use tree-sitter quote context as primary, but keep regex as reference // for divergence logging const withDoubleQuotes = tsQuote.withDoubleQuotes const fullyUnquoted = tsQuote.fullyUnquoted const unquotedKeepQuoteChars = tsQuote.unquotedKeepQuoteChars const context: ValidationContext = { originalCommand: command, baseCommand, unquotedContent: withDoubleQuotes, fullyUnquotedContent: stripSafeRedirections(fullyUnquoted), fullyUnquotedPreStrip: fullyUnquoted, unquotedKeepQuoteChars, treeSitter: tsAnalysis, } // Log divergence between tree-sitter and regex quote extraction. // Skip for heredoc commands: tree-sitter strips (quoted) heredoc bodies // to nothing while the regex path replaces them with placeholder strings // (via extractHeredocs), so the two outputs can never match. Logging // divergence for every heredoc command would poison the signal. // // onDivergence callback: when called in a fanout loop (bashPermissions.ts // Promise.all over subcommands), the caller batches divergences into a // single logEvent instead of N separate calls. Each logEvent triggers // getEventMetadata() → buildProcessMetrics() → process.memoryUsage() → // /proc/self/stat read; with memoized metadata these resolve as microtasks // and starve the event loop (CC-643). Single-command callers omit the // callback and get the original per-call logEvent behavior. if (!tsAnalysis.dangerousPatterns.hasHeredoc) { const hasDivergence = tsQuote.fullyUnquoted !== regexQuote.fullyUnquoted || tsQuote.withDoubleQuotes !== regexQuote.withDoubleQuotes if (hasDivergence) { if (onDivergence) { onDivergence() } else { logEvent('tengu_tree_sitter_security_divergence', { quoteContextDivergence: true, }) } } } const earlyValidators = [ validateEmpty, validateIncompleteCommands, validateSafeCommandSubstitution, validateGitCommit, ] for (const validator of earlyValidators) { const result = validator(context) if (result.behavior === 'allow') { return { behavior: 'passthrough', message: result.decisionReason?.type === 'other' || result.decisionReason?.type === 'safetyCheck' ? result.decisionReason.reason : 'Command allowed', } } if (result.behavior !== 'passthrough') { return result.behavior === 'ask' ? { ...result, isBashSecurityCheckForMisparsing: true as const } : result } } const nonMisparsingValidators = new Set([ validateNewlines, validateRedirections, ]) const validators = [ validateJqCommand, validateObfuscatedFlags, validateShellMetacharacters, validateDangerousVariables, validateCommentQuoteDesync, validateQuotedNewline, validateCarriageReturn, validateNewlines, validateIFSInjection, validateProcEnvironAccess, validateDangerousPatterns, validateRedirections, validateBackslashEscapedWhitespace, validateBackslashEscapedOperators, validateUnicodeWhitespace, validateMidWordHash, validateBraceExpansion, validateZshDangerousCommands, validateMalformedTokenInjection, ] let deferredNonMisparsingResult: PermissionResult | null = null for (const validator of validators) { const result = validator(context) if (result.behavior === 'ask') { if (nonMisparsingValidators.has(validator)) { if (deferredNonMisparsingResult === null) { deferredNonMisparsingResult = result } continue } return { ...result, isBashSecurityCheckForMisparsing: true as const } } } if (deferredNonMisparsingResult !== null) { return deferredNonMisparsingResult } return { behavior: 'passthrough', message: 'Command passed all security checks', } }