1805 lines
65 KiB
TypeScript
1805 lines
65 KiB
TypeScript
import { execa } from 'execa'
|
||
import { logForDebugging } from '../debug.js'
|
||
import { memoizeWithLRU } from '../memoize.js'
|
||
import { getCachedPowerShellPath } from '../shell/powershellDetection.js'
|
||
import { jsonParse } from '../slowOperations.js'
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Public types describing the parsed output returned to callers.
|
||
// These map to System.Management.Automation.Language AST classes.
|
||
// Raw internal types (RawParsedOutput etc.) are defined further below.
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/**
|
||
* The PowerShell AST element type for pipeline elements.
|
||
* Maps directly to CommandBaseAst derivatives in System.Management.Automation.Language.
|
||
*/
|
||
type PipelineElementType =
|
||
| 'CommandAst'
|
||
| 'CommandExpressionAst'
|
||
| 'ParenExpressionAst'
|
||
|
||
/**
|
||
* The AST node type for individual command elements (arguments, expressions).
|
||
* Used to classify each element during the AST walk so TypeScript can derive
|
||
* security flags without extra Find-AstNodes calls in PowerShell.
|
||
*/
|
||
type CommandElementType =
|
||
| 'ScriptBlock'
|
||
| 'SubExpression'
|
||
| 'ExpandableString'
|
||
| 'MemberInvocation'
|
||
| 'Variable'
|
||
| 'StringConstant'
|
||
| 'Parameter'
|
||
| 'Other'
|
||
|
||
/**
|
||
* A child node of a command element (one level deep). Populated for
|
||
* CommandParameterAst → .Argument (colon-bound parameters like
|
||
* `-InputObject:$env:SECRET`). Consumers check `child.type` to classify
|
||
* the bound value (Variable, StringConstant, Other) without parsing text.
|
||
*/
|
||
export type CommandElementChild = {
|
||
type: CommandElementType
|
||
text: string
|
||
}
|
||
|
||
/**
|
||
* The PowerShell AST statement type.
|
||
* Maps directly to StatementAst derivatives in System.Management.Automation.Language.
|
||
*/
|
||
type StatementType =
|
||
| 'PipelineAst'
|
||
| 'PipelineChainAst'
|
||
| 'AssignmentStatementAst'
|
||
| 'IfStatementAst'
|
||
| 'ForStatementAst'
|
||
| 'ForEachStatementAst'
|
||
| 'WhileStatementAst'
|
||
| 'DoWhileStatementAst'
|
||
| 'DoUntilStatementAst'
|
||
| 'SwitchStatementAst'
|
||
| 'TryStatementAst'
|
||
| 'TrapStatementAst'
|
||
| 'FunctionDefinitionAst'
|
||
| 'DataStatementAst'
|
||
| 'UnknownStatementAst'
|
||
|
||
/**
|
||
* A command invocation within a pipeline segment.
|
||
*/
|
||
export type ParsedCommandElement = {
|
||
/** The command/cmdlet name (e.g., "Get-ChildItem", "git") */
|
||
name: string
|
||
/** The command name type: cmdlet, application (exe), or unknown */
|
||
nameType: 'cmdlet' | 'application' | 'unknown'
|
||
/** The AST element type from PowerShell's parser */
|
||
elementType: PipelineElementType
|
||
/** All arguments as strings (includes flags like "-Recurse") */
|
||
args: string[]
|
||
/** The full text of this command element */
|
||
text: string
|
||
/** AST node types for each element in this command (arguments, expressions, etc.) */
|
||
elementTypes?: CommandElementType[]
|
||
/**
|
||
* Child nodes of each argument, aligned with `args[]` (so
|
||
* `children[i]` ↔ `args[i]` ↔ `elementTypes[i+1]`). Only populated for
|
||
* Parameter elements with a colon-bound argument. Undefined for elements
|
||
* with no children. Lets consumers check `children[i].some(c => c.type
|
||
* !== 'StringConstant')` instead of parsing the arg text for `:` + `$`.
|
||
*/
|
||
children?: (CommandElementChild[] | undefined)[]
|
||
/** Redirections on this command element (from nested commands in && / || chains) */
|
||
redirections?: ParsedRedirection[]
|
||
}
|
||
|
||
/**
|
||
* A redirection found in the command.
|
||
*/
|
||
type ParsedRedirection = {
|
||
/** The redirection operator */
|
||
operator: '>' | '>>' | '2>' | '2>>' | '*>' | '*>>' | '2>&1'
|
||
/** The target (file path or stream number) */
|
||
target: string
|
||
/** Whether this is a merging redirection like 2>&1 */
|
||
isMerging: boolean
|
||
}
|
||
|
||
/**
|
||
* A parsed statement from PowerShell.
|
||
* Can be a pipeline, assignment, control flow statement, etc.
|
||
*/
|
||
type ParsedStatement = {
|
||
/** The AST statement type from PowerShell's parser */
|
||
statementType: StatementType
|
||
/** Individual commands in this statement (for pipelines) */
|
||
commands: ParsedCommandElement[]
|
||
/** Redirections on this statement */
|
||
redirections: ParsedRedirection[]
|
||
/** Full text of the statement */
|
||
text: string
|
||
/**
|
||
* For control flow statements (if, for, foreach, while, try, etc.),
|
||
* commands found recursively inside the body blocks.
|
||
* Uses FindAll() to extract ALL nested CommandAst nodes at any depth.
|
||
*/
|
||
nestedCommands?: ParsedCommandElement[]
|
||
/**
|
||
* Security-relevant AST patterns found via FindAll() on the entire statement,
|
||
* regardless of statement type. This catches patterns that elementTypes may
|
||
* miss (e.g. member invocations inside assignments, subexpressions in
|
||
* non-pipeline statements). Computed in the PS1 script using instanceof
|
||
* checks against the PowerShell AST type system.
|
||
*/
|
||
securityPatterns?: {
|
||
hasMemberInvocations?: boolean
|
||
hasSubExpressions?: boolean
|
||
hasExpandableStrings?: boolean
|
||
hasScriptBlocks?: boolean
|
||
}
|
||
}
|
||
|
||
/**
|
||
* A variable reference found in the command.
|
||
*/
|
||
type ParsedVariable = {
|
||
/** The variable path (e.g., "HOME", "env:PATH", "global:x") */
|
||
path: string
|
||
/** Whether this variable uses splatting (@var instead of $var) */
|
||
isSplatted: boolean
|
||
}
|
||
|
||
/**
|
||
* A parse error from PowerShell's parser.
|
||
*/
|
||
type ParseError = {
|
||
message: string
|
||
errorId: string
|
||
}
|
||
|
||
/**
|
||
* The complete parsed result from the PowerShell AST parser.
|
||
*/
|
||
export type ParsedPowerShellCommand = {
|
||
/** Whether the command parsed successfully (no syntax errors) */
|
||
valid: boolean
|
||
/** Parse errors, if any */
|
||
errors: ParseError[]
|
||
/** Top-level statements, separated by ; or newlines */
|
||
statements: ParsedStatement[]
|
||
/** All variable references found */
|
||
variables: ParsedVariable[]
|
||
/** Whether the token stream contains a stop-parsing (--%) token */
|
||
hasStopParsing: boolean
|
||
/** The original command text */
|
||
originalCommand: string
|
||
/**
|
||
* All .NET type literals found anywhere in the AST (TypeExpressionAst +
|
||
* TypeConstraintAst). TypeName.FullName — the literal text as written, NOT
|
||
* the resolved .NET type (e.g. [int] → "int", not "System.Int32").
|
||
* Consumed by the CLM-allowlist check in powershellSecurity.ts.
|
||
*/
|
||
typeLiterals?: string[]
|
||
/**
|
||
* Whether the command contains `using module` or `using assembly` statements.
|
||
* These load external code (modules/assemblies) and execute their top-level
|
||
* script body or module initializers. The using statement is a sibling of
|
||
* the named blocks on ScriptBlockAst, not a child, so it is not visible
|
||
* to Process-BlockStatements or any downstream command walker.
|
||
*/
|
||
hasUsingStatements?: boolean
|
||
/**
|
||
* Whether the command contains `#Requires` directives (ScriptRequirements).
|
||
* `#Requires -Modules <name>` triggers module loading from PSModulePath.
|
||
*/
|
||
hasScriptRequirements?: boolean
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
|
||
// Default 5s is fine for interactive use (warm pwsh spawn is ~450ms). Windows
|
||
// CI under Defender/AMSI load can exceed 5s on consecutive spawns even after
|
||
// CAN_SPAWN_PARSE_SCRIPT() warms the JIT (run 23574701241 windows-shard-5:
|
||
// attackVectors F1 hit 2×5s timeout → valid:false → 'ask' instead of 'deny').
|
||
// Override via env for tests. Read inside parsePowerShellCommandImpl, not
|
||
// top-level, per CLAUDE.md (globalSettings.env ordering).
|
||
const DEFAULT_PARSE_TIMEOUT_MS = 5_000
|
||
function getParseTimeoutMs(): number {
|
||
const env = process.env.CLAUDE_CODE_PWSH_PARSE_TIMEOUT_MS
|
||
if (env) {
|
||
const parsed = parseInt(env, 10)
|
||
if (!isNaN(parsed) && parsed > 0) return parsed
|
||
}
|
||
return DEFAULT_PARSE_TIMEOUT_MS
|
||
}
|
||
// MAX_COMMAND_LENGTH is derived from PARSE_SCRIPT_BODY.length below (after the
|
||
// script body is defined) so it cannot go stale as the script grows.
|
||
|
||
/**
|
||
* The PowerShell parse script inlined as a string constant.
|
||
* This avoids needing to read from disk at runtime (the file may not exist
|
||
* in bundled builds). The script uses the native PowerShell AST parser to
|
||
* analyze a command and output structured JSON.
|
||
*/
|
||
// Raw types describing PS script JSON output (exported for testing)
|
||
export type RawCommandElement = {
|
||
type: string // .GetType().Name e.g. "StringConstantExpressionAst"
|
||
text: string // .Extent.Text
|
||
value?: string // .Value if available (resolves backtick escapes)
|
||
expressionType?: string // .Expression.GetType().Name for CommandExpressionAst
|
||
children?: { type: string; text: string }[] // CommandParameterAst.Argument, one level
|
||
}
|
||
|
||
export type RawRedirection = {
|
||
type: string // "FileRedirectionAst" or "MergingRedirectionAst"
|
||
append?: boolean // .Append (FileRedirectionAst only)
|
||
fromStream?: string // .FromStream.ToString() e.g. "Output", "Error", "All"
|
||
locationText?: string // .Location.Extent.Text (FileRedirectionAst only)
|
||
}
|
||
|
||
export type RawPipelineElement = {
|
||
type: string // .GetType().Name e.g. "CommandAst", "CommandExpressionAst"
|
||
text: string // .Extent.Text
|
||
commandElements?: RawCommandElement[]
|
||
redirections?: RawRedirection[]
|
||
expressionType?: string // for CommandExpressionAst: .Expression.GetType().Name
|
||
}
|
||
|
||
export type RawStatement = {
|
||
type: string // .GetType().Name e.g. "PipelineAst", "IfStatementAst", "TrapStatementAst"
|
||
text: string // .Extent.Text
|
||
elements?: RawPipelineElement[] // for PipelineAst: the pipeline elements
|
||
nestedCommands?: RawPipelineElement[] // commands found via FindAll (all statement types)
|
||
redirections?: RawRedirection[] // FileRedirectionAst found via FindAll (non-PipelineAst only)
|
||
securityPatterns?: {
|
||
// Security-relevant AST node types found via FindAll on the statement
|
||
hasMemberInvocations?: boolean
|
||
hasSubExpressions?: boolean
|
||
hasExpandableStrings?: boolean
|
||
hasScriptBlocks?: boolean
|
||
}
|
||
}
|
||
|
||
type RawParsedOutput = {
|
||
valid: boolean
|
||
errors: { message: string; errorId: string }[]
|
||
statements: RawStatement[]
|
||
variables: { path: string; isSplatted: boolean }[]
|
||
hasStopParsing: boolean
|
||
originalCommand: string
|
||
typeLiterals?: string[]
|
||
hasUsingStatements?: boolean
|
||
hasScriptRequirements?: boolean
|
||
}
|
||
|
||
// This is the canonical copy of the parse script. There is no separate .ps1 file.
|
||
/**
|
||
* The core parse logic.
|
||
* The command is passed via Base64-encoded $EncodedCommand variable
|
||
* to avoid here-string injection attacks.
|
||
*
|
||
* SECURITY — top-level ParamBlock: ScriptBlockAst.ParamBlock is a SIBLING of
|
||
* the named blocks (Begin/Process/End/Clean/DynamicParam), not nested inside
|
||
* them, so Process-BlockStatements never reaches it. Commands inside param()
|
||
* default-value expressions and attribute arguments (e.g. [ValidateScript({...})])
|
||
* were invisible to every downstream check. PoC:
|
||
* param($x = (Remove-Item /)); Get-Process → only Get-Process surfaced
|
||
* param([ValidateScript({rm /;$true})]$x='t') → rm invisible, runs on bind
|
||
* Function-level param() IS covered: FindAll on the FunctionDefinitionAst
|
||
* statement recurses into its descendants. The gap was only the script-level
|
||
* ParamBlock. ParamBlockAst has .Parameters (not .Statements) so we FindAll
|
||
* on it directly rather than reusing Process-BlockStatements. We only emit a
|
||
* statement if there is something to report, to avoid noise for plain
|
||
* param($x) declarations. (Kept compact in-script to preserve argv budget.)
|
||
*/
|
||
/**
|
||
* PS1 parse script. Comments live here (not inline) — every char inside the
|
||
* backticks eats into WINDOWS_MAX_COMMAND_LENGTH (argv budget).
|
||
*
|
||
* Structure:
|
||
* - Get-RawCommandElements: extract CommandAst element data (type, text, value,
|
||
* expressionType, children for colon-bound param .Argument)
|
||
* - Get-RawRedirections: extract FileRedirectionAst operator+target
|
||
* - Get-SecurityPatterns: FindAll for security flags (hasSubExpressions via
|
||
* Sub/Array/ParenExpressionAst, hasScriptBlocks, etc.)
|
||
* - Type literals: emit TypeExpressionAst names for CLM allowlist check
|
||
* - --% token: PS7 MinusMinus, PS5.1 Generic kind
|
||
* - CommandExpressionAst.Redirections: inherits from CommandBaseAst —
|
||
* `1 > /tmp/x` statement has FileRedirectionAst that element-iteration misses
|
||
* - Nested commands: FindAll for ALL statement types (if/for/foreach/while/
|
||
* switch/try/function/assignment/PipelineChainAst) — skip direct pipeline
|
||
* elements already in the loop
|
||
*/
|
||
// exported for testing
|
||
export const PARSE_SCRIPT_BODY = `
|
||
if (-not $EncodedCommand) {
|
||
Write-Output '{"valid":false,"errors":[{"message":"No command provided","errorId":"NoInput"}],"statements":[],"variables":[],"hasStopParsing":false,"originalCommand":""}'
|
||
exit 0
|
||
}
|
||
|
||
$Command = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($EncodedCommand))
|
||
|
||
$tokens = $null
|
||
$parseErrors = $null
|
||
$ast = [System.Management.Automation.Language.Parser]::ParseInput(
|
||
$Command,
|
||
[ref]$tokens,
|
||
[ref]$parseErrors
|
||
)
|
||
|
||
$allVariables = [System.Collections.ArrayList]::new()
|
||
|
||
function Get-RawCommandElements {
|
||
param([System.Management.Automation.Language.CommandAst]$CmdAst)
|
||
$elems = [System.Collections.ArrayList]::new()
|
||
foreach ($ce in $CmdAst.CommandElements) {
|
||
$ceData = @{ type = $ce.GetType().Name; text = $ce.Extent.Text }
|
||
if ($ce.PSObject.Properties['Value'] -and $null -ne $ce.Value -and $ce.Value -is [string]) {
|
||
$ceData.value = $ce.Value
|
||
}
|
||
if ($ce -is [System.Management.Automation.Language.CommandExpressionAst]) {
|
||
$ceData.expressionType = $ce.Expression.GetType().Name
|
||
}
|
||
$a=$ce.Argument;if($a){$ceData.children=@(@{type=$a.GetType().Name;text=$a.Extent.Text})}
|
||
[void]$elems.Add($ceData)
|
||
}
|
||
return $elems
|
||
}
|
||
|
||
function Get-RawRedirections {
|
||
param($Redirections)
|
||
$result = [System.Collections.ArrayList]::new()
|
||
foreach ($redir in $Redirections) {
|
||
$redirData = @{ type = $redir.GetType().Name }
|
||
if ($redir -is [System.Management.Automation.Language.FileRedirectionAst]) {
|
||
$redirData.append = [bool]$redir.Append
|
||
$redirData.fromStream = $redir.FromStream.ToString()
|
||
$redirData.locationText = $redir.Location.Extent.Text
|
||
}
|
||
[void]$result.Add($redirData)
|
||
}
|
||
return $result
|
||
}
|
||
|
||
function Get-SecurityPatterns($A) {
|
||
$p = @{}
|
||
foreach ($n in $A.FindAll({ param($x)
|
||
$x -is [System.Management.Automation.Language.MemberExpressionAst] -or
|
||
$x -is [System.Management.Automation.Language.SubExpressionAst] -or
|
||
$x -is [System.Management.Automation.Language.ArrayExpressionAst] -or
|
||
$x -is [System.Management.Automation.Language.ExpandableStringExpressionAst] -or
|
||
$x -is [System.Management.Automation.Language.ScriptBlockExpressionAst] -or
|
||
$x -is [System.Management.Automation.Language.ParenExpressionAst]
|
||
}, $true)) { switch ($n.GetType().Name) {
|
||
'InvokeMemberExpressionAst' { $p.hasMemberInvocations = $true }
|
||
'MemberExpressionAst' { $p.hasMemberInvocations = $true }
|
||
'SubExpressionAst' { $p.hasSubExpressions = $true }
|
||
'ArrayExpressionAst' { $p.hasSubExpressions = $true }
|
||
'ParenExpressionAst' { $p.hasSubExpressions = $true }
|
||
'ExpandableStringExpressionAst' { $p.hasExpandableStrings = $true }
|
||
'ScriptBlockExpressionAst' { $p.hasScriptBlocks = $true }
|
||
}}
|
||
if ($p.Count -gt 0) { return $p }
|
||
return $null
|
||
}
|
||
|
||
$varExprs = $ast.FindAll({ param($node) $node -is [System.Management.Automation.Language.VariableExpressionAst] }, $true)
|
||
foreach ($v in $varExprs) {
|
||
[void]$allVariables.Add(@{
|
||
path = $v.VariablePath.ToString()
|
||
isSplatted = [bool]$v.Splatted
|
||
})
|
||
}
|
||
|
||
$typeLiterals = [System.Collections.ArrayList]::new()
|
||
foreach ($t in $ast.FindAll({ param($n)
|
||
$n -is [System.Management.Automation.Language.TypeExpressionAst] -or
|
||
$n -is [System.Management.Automation.Language.TypeConstraintAst]
|
||
}, $true)) { [void]$typeLiterals.Add($t.TypeName.FullName) }
|
||
|
||
$hasStopParsing = $false
|
||
$tk = [System.Management.Automation.Language.TokenKind]
|
||
foreach ($tok in $tokens) {
|
||
if ($tok.Kind -eq $tk::MinusMinus) { $hasStopParsing = $true; break }
|
||
if ($tok.Kind -eq $tk::Generic -and ($tok.Text -replace '[\u2013\u2014\u2015]','-') -eq '--%') {
|
||
$hasStopParsing = $true; break
|
||
}
|
||
}
|
||
|
||
$statements = [System.Collections.ArrayList]::new()
|
||
|
||
function Process-BlockStatements {
|
||
param($Block)
|
||
if (-not $Block) { return }
|
||
|
||
foreach ($stmt in $Block.Statements) {
|
||
$statement = @{
|
||
type = $stmt.GetType().Name
|
||
text = $stmt.Extent.Text
|
||
}
|
||
|
||
if ($stmt -is [System.Management.Automation.Language.PipelineAst]) {
|
||
$elements = [System.Collections.ArrayList]::new()
|
||
foreach ($element in $stmt.PipelineElements) {
|
||
$elemData = @{
|
||
type = $element.GetType().Name
|
||
text = $element.Extent.Text
|
||
}
|
||
|
||
if ($element -is [System.Management.Automation.Language.CommandAst]) {
|
||
$elemData.commandElements = @(Get-RawCommandElements -CmdAst $element)
|
||
$elemData.redirections = @(Get-RawRedirections -Redirections $element.Redirections)
|
||
} elseif ($element -is [System.Management.Automation.Language.CommandExpressionAst]) {
|
||
$elemData.expressionType = $element.Expression.GetType().Name
|
||
$elemData.redirections = @(Get-RawRedirections -Redirections $element.Redirections)
|
||
}
|
||
|
||
[void]$elements.Add($elemData)
|
||
}
|
||
$statement.elements = @($elements)
|
||
|
||
$allNestedCmds = $stmt.FindAll(
|
||
{ param($node) $node -is [System.Management.Automation.Language.CommandAst] },
|
||
$true
|
||
)
|
||
$nestedCmds = [System.Collections.ArrayList]::new()
|
||
foreach ($cmd in $allNestedCmds) {
|
||
if ($cmd.Parent -eq $stmt) { continue }
|
||
$nested = @{
|
||
type = $cmd.GetType().Name
|
||
text = $cmd.Extent.Text
|
||
commandElements = @(Get-RawCommandElements -CmdAst $cmd)
|
||
redirections = @(Get-RawRedirections -Redirections $cmd.Redirections)
|
||
}
|
||
[void]$nestedCmds.Add($nested)
|
||
}
|
||
if ($nestedCmds.Count -gt 0) {
|
||
$statement.nestedCommands = @($nestedCmds)
|
||
}
|
||
$r = $stmt.FindAll({param($n) $n -is [System.Management.Automation.Language.FileRedirectionAst]}, $true)
|
||
if ($r.Count -gt 0) {
|
||
$rr = @(Get-RawRedirections -Redirections $r)
|
||
$statement.redirections = if ($statement.redirections) { @($statement.redirections) + $rr } else { $rr }
|
||
}
|
||
} else {
|
||
$nestedCmdAsts = $stmt.FindAll(
|
||
{ param($node) $node -is [System.Management.Automation.Language.CommandAst] },
|
||
$true
|
||
)
|
||
$nested = [System.Collections.ArrayList]::new()
|
||
foreach ($cmd in $nestedCmdAsts) {
|
||
[void]$nested.Add(@{
|
||
type = 'CommandAst'
|
||
text = $cmd.Extent.Text
|
||
commandElements = @(Get-RawCommandElements -CmdAst $cmd)
|
||
redirections = @(Get-RawRedirections -Redirections $cmd.Redirections)
|
||
})
|
||
}
|
||
if ($nested.Count -gt 0) {
|
||
$statement.nestedCommands = @($nested)
|
||
}
|
||
$r = $stmt.FindAll({param($n) $n -is [System.Management.Automation.Language.FileRedirectionAst]}, $true)
|
||
if ($r.Count -gt 0) { $statement.redirections = @(Get-RawRedirections -Redirections $r) }
|
||
}
|
||
|
||
$sp = Get-SecurityPatterns $stmt
|
||
if ($sp) { $statement.securityPatterns = $sp }
|
||
|
||
[void]$statements.Add($statement)
|
||
}
|
||
|
||
if ($Block.Traps) {
|
||
foreach ($trap in $Block.Traps) {
|
||
$statement = @{
|
||
type = 'TrapStatementAst'
|
||
text = $trap.Extent.Text
|
||
}
|
||
$nestedCmdAsts = $trap.FindAll(
|
||
{ param($node) $node -is [System.Management.Automation.Language.CommandAst] },
|
||
$true
|
||
)
|
||
$nestedCmds = [System.Collections.ArrayList]::new()
|
||
foreach ($cmd in $nestedCmdAsts) {
|
||
$nested = @{
|
||
type = $cmd.GetType().Name
|
||
text = $cmd.Extent.Text
|
||
commandElements = @(Get-RawCommandElements -CmdAst $cmd)
|
||
redirections = @(Get-RawRedirections -Redirections $cmd.Redirections)
|
||
}
|
||
[void]$nestedCmds.Add($nested)
|
||
}
|
||
if ($nestedCmds.Count -gt 0) {
|
||
$statement.nestedCommands = @($nestedCmds)
|
||
}
|
||
$r = $trap.FindAll({param($n) $n -is [System.Management.Automation.Language.FileRedirectionAst]}, $true)
|
||
if ($r.Count -gt 0) { $statement.redirections = @(Get-RawRedirections -Redirections $r) }
|
||
$sp = Get-SecurityPatterns $trap
|
||
if ($sp) { $statement.securityPatterns = $sp }
|
||
[void]$statements.Add($statement)
|
||
}
|
||
}
|
||
}
|
||
|
||
Process-BlockStatements -Block $ast.BeginBlock
|
||
Process-BlockStatements -Block $ast.ProcessBlock
|
||
Process-BlockStatements -Block $ast.EndBlock
|
||
Process-BlockStatements -Block $ast.CleanBlock
|
||
Process-BlockStatements -Block $ast.DynamicParamBlock
|
||
|
||
if ($ast.ParamBlock) {
|
||
$pb = $ast.ParamBlock
|
||
$pn = [System.Collections.ArrayList]::new()
|
||
foreach ($c in $pb.FindAll({param($n) $n -is [System.Management.Automation.Language.CommandAst]}, $true)) {
|
||
[void]$pn.Add(@{type='CommandAst';text=$c.Extent.Text;commandElements=@(Get-RawCommandElements -CmdAst $c);redirections=@(Get-RawRedirections -Redirections $c.Redirections)})
|
||
}
|
||
$pr = $pb.FindAll({param($n) $n -is [System.Management.Automation.Language.FileRedirectionAst]}, $true)
|
||
$ps = Get-SecurityPatterns $pb
|
||
if ($pn.Count -gt 0 -or $pr.Count -gt 0 -or $ps) {
|
||
$st = @{type='ParamBlockAst';text=$pb.Extent.Text}
|
||
if ($pn.Count -gt 0) { $st.nestedCommands = @($pn) }
|
||
if ($pr.Count -gt 0) { $st.redirections = @(Get-RawRedirections -Redirections $pr) }
|
||
if ($ps) { $st.securityPatterns = $ps }
|
||
[void]$statements.Add($st)
|
||
}
|
||
}
|
||
|
||
$hasUsingStatements = $ast.UsingStatements -and $ast.UsingStatements.Count -gt 0
|
||
$hasScriptRequirements = $ast.ScriptRequirements -ne $null
|
||
|
||
$output = @{
|
||
valid = ($parseErrors.Count -eq 0)
|
||
errors = @($parseErrors | ForEach-Object {
|
||
@{
|
||
message = $_.Message
|
||
errorId = $_.ErrorId
|
||
}
|
||
})
|
||
statements = @($statements)
|
||
variables = @($allVariables)
|
||
hasStopParsing = $hasStopParsing
|
||
originalCommand = $Command
|
||
typeLiterals = @($typeLiterals)
|
||
hasUsingStatements = [bool]$hasUsingStatements
|
||
hasScriptRequirements = [bool]$hasScriptRequirements
|
||
}
|
||
|
||
$output | ConvertTo-Json -Depth 10 -Compress
|
||
`
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Windows CreateProcess has a 32,767 char command-line limit. The encoding
|
||
// chain is:
|
||
// command (N UTF-8 bytes) → Base64 (~4N/3 chars) → $EncodedCommand = '...'\n
|
||
// → full script (wrapper + PARSE_SCRIPT_BODY) → UTF-16LE (2× bytes)
|
||
// → Base64 (4/3× chars) → -EncodedCommand argv
|
||
// Final cmdline ≈ argv_overhead + (wrapper + 4N/3 + body) × 8/3
|
||
//
|
||
// Solving for N (UTF-8 bytes) with a 32,767 cap:
|
||
// script_budget = (32767 - argv_overhead) × 3/8
|
||
// cmd_b64_budget = script_budget - PARSE_SCRIPT_BODY.length - wrapper
|
||
// N = cmd_b64_budget × 3/4 - safety_margin
|
||
//
|
||
// SECURITY: N is a UTF-8 BYTE budget, not a UTF-16 code-unit budget. The
|
||
// length gate MUST measure Buffer.byteLength(command, 'utf8'), not
|
||
// command.length. A BMP character in U+0800–U+FFFF (CJK ideographs, most
|
||
// non-Latin scripts) is 1 UTF-16 code unit but 3 UTF-8 bytes. With
|
||
// PARSE_SCRIPT_BODY ≈ 10.6K, N ≈ 1,092 bytes. Comparing against .length
|
||
// permits a 1,092-code-unit pure-CJK command (≈3,276 UTF-8 bytes) → inner
|
||
// base64 ≈ 4,368 chars → final argv ≈ 40K chars, overflowing 32,767 by
|
||
// ~7.4K. CreateProcess fails → valid:false → parse-fail degradation (deny
|
||
// rules silently downgrade to ask). Finding #36.
|
||
//
|
||
// COMPUTED from PARSE_SCRIPT_BODY.length so it cannot drift. The prior
|
||
// hardcoded value (4,500) was derived from a ~6K body estimate; the body is
|
||
// actually ~11K chars, so the real ceiling was ~1,850. Commands in the
|
||
// 1,850–4,500 range passed this gate but then failed CreateProcess on
|
||
// Windows, returning valid=false and skipping all AST-based security checks.
|
||
//
|
||
// Unix argv limits are typically 2MB+ (ARG_MAX) with ~128KB per-argument
|
||
// limit (MAX_ARG_STRLEN on Linux; macOS has no per-arg limit below ARG_MAX).
|
||
// At MAX=4,500 the -EncodedCommand argument is ~45KB — well under either.
|
||
// Applying the Windows-derived limit on Unix would REGRESS: commands in the
|
||
// ~1K–4.5K range previously parsed successfully and reached the sub-command
|
||
// deny loop at powershellPermissions.ts; rejecting them pre-spawn degrades
|
||
// user-configured deny rules from deny→ask for compound commands with a
|
||
// denied cmdlet buried mid-script. So the Windows limit is platform-gated.
|
||
//
|
||
// If the Windows limit becomes too restrictive, switch to -File with a temp
|
||
// file for large inputs.
|
||
// ---------------------------------------------------------------------------
|
||
const WINDOWS_ARGV_CAP = 32_767
|
||
// pwsh path + " -NoProfile -NonInteractive -NoLogo -EncodedCommand " +
|
||
// argv quoting. A long Windows pwsh path (C:\Program Files\PowerShell\7\
|
||
// pwsh.exe) + flags is ~95 chars; 200 leaves headroom for unusual installs.
|
||
const FIXED_ARGV_OVERHEAD = 200
|
||
// "$EncodedCommand = '" + "'\n" wrapper around the user command's base64
|
||
const ENCODED_CMD_WRAPPER = `$EncodedCommand = ''\n`.length
|
||
// Margin for base64 padding rounding (≤4 chars at each of 2 levels) and minor
|
||
// estimation drift. Multibyte expansion is NOT absorbed here — the gate
|
||
// measures actual UTF-8 bytes (Buffer.byteLength), not code units.
|
||
const SAFETY_MARGIN = 100
|
||
const SCRIPT_CHARS_BUDGET = ((WINDOWS_ARGV_CAP - FIXED_ARGV_OVERHEAD) * 3) / 8
|
||
const CMD_B64_BUDGET =
|
||
SCRIPT_CHARS_BUDGET - PARSE_SCRIPT_BODY.length - ENCODED_CMD_WRAPPER
|
||
// Exported for drift-guard tests (the drift-prone value is the Windows one).
|
||
// Unit: UTF-8 BYTES. Compare against Buffer.byteLength, not .length.
|
||
export const WINDOWS_MAX_COMMAND_LENGTH = Math.max(
|
||
0,
|
||
Math.floor((CMD_B64_BUDGET * 3) / 4) - SAFETY_MARGIN,
|
||
)
|
||
// Pre-existing value, known to work on Unix. See comment above re: why the
|
||
// Windows derivation must NOT be applied here. Unit: UTF-8 BYTES — for ASCII
|
||
// commands (the common case) bytes==chars so no regression; for multibyte
|
||
// commands this is slightly tighter but still far below Unix ARG_MAX (~128KB
|
||
// per-arg), so the argv spawn cannot overflow.
|
||
const UNIX_MAX_COMMAND_LENGTH = 4_500
|
||
// Unit: UTF-8 BYTES (see SECURITY note above).
|
||
export const MAX_COMMAND_LENGTH =
|
||
process.platform === 'win32'
|
||
? WINDOWS_MAX_COMMAND_LENGTH
|
||
: UNIX_MAX_COMMAND_LENGTH
|
||
|
||
const INVALID_RESULT_BASE: Omit<
|
||
ParsedPowerShellCommand,
|
||
'errors' | 'originalCommand'
|
||
> = {
|
||
valid: false,
|
||
statements: [],
|
||
variables: [],
|
||
hasStopParsing: false,
|
||
}
|
||
|
||
function makeInvalidResult(
|
||
command: string,
|
||
message: string,
|
||
errorId: string,
|
||
): ParsedPowerShellCommand {
|
||
return {
|
||
...INVALID_RESULT_BASE,
|
||
errors: [{ message, errorId }],
|
||
originalCommand: command,
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Base64-encode a string as UTF-16LE, which is the encoding required by
|
||
* PowerShell's -EncodedCommand parameter.
|
||
*/
|
||
function toUtf16LeBase64(text: string): string {
|
||
if (typeof Buffer !== 'undefined') {
|
||
return Buffer.from(text, 'utf16le').toString('base64')
|
||
}
|
||
// Fallback for non-Node environments
|
||
const bytes: number[] = []
|
||
for (let i = 0; i < text.length; i++) {
|
||
const code = text.charCodeAt(i)
|
||
bytes.push(code & 0xff, (code >> 8) & 0xff)
|
||
}
|
||
return btoa(bytes.map(b => String.fromCharCode(b)).join(''))
|
||
}
|
||
|
||
/**
|
||
* Build the full PowerShell script that parses a command.
|
||
* The user command is Base64-encoded (UTF-8) and embedded in a variable
|
||
* to prevent injection attacks.
|
||
*/
|
||
function buildParseScript(command: string): string {
|
||
const encoded =
|
||
typeof Buffer !== 'undefined'
|
||
? Buffer.from(command, 'utf8').toString('base64')
|
||
: btoa(
|
||
new TextEncoder()
|
||
.encode(command)
|
||
.reduce((s, b) => s + String.fromCharCode(b), ''),
|
||
)
|
||
return `$EncodedCommand = '${encoded}'\n${PARSE_SCRIPT_BODY}`
|
||
}
|
||
|
||
/**
|
||
* Ensure a value is an array. PowerShell 5.1's ConvertTo-Json may unwrap
|
||
* single-element arrays into plain objects.
|
||
*/
|
||
function ensureArray<T>(value: T | T[] | undefined | null): T[] {
|
||
if (value === undefined || value === null) {
|
||
return []
|
||
}
|
||
return Array.isArray(value) ? value : [value]
|
||
}
|
||
|
||
/** Map raw .NET AST type name to our StatementType union */
|
||
// exported for testing
|
||
export function mapStatementType(rawType: string): StatementType {
|
||
switch (rawType) {
|
||
case 'PipelineAst':
|
||
return 'PipelineAst'
|
||
case 'PipelineChainAst':
|
||
return 'PipelineChainAst'
|
||
case 'AssignmentStatementAst':
|
||
return 'AssignmentStatementAst'
|
||
case 'IfStatementAst':
|
||
return 'IfStatementAst'
|
||
case 'ForStatementAst':
|
||
return 'ForStatementAst'
|
||
case 'ForEachStatementAst':
|
||
return 'ForEachStatementAst'
|
||
case 'WhileStatementAst':
|
||
return 'WhileStatementAst'
|
||
case 'DoWhileStatementAst':
|
||
return 'DoWhileStatementAst'
|
||
case 'DoUntilStatementAst':
|
||
return 'DoUntilStatementAst'
|
||
case 'SwitchStatementAst':
|
||
return 'SwitchStatementAst'
|
||
case 'TryStatementAst':
|
||
return 'TryStatementAst'
|
||
case 'TrapStatementAst':
|
||
return 'TrapStatementAst'
|
||
case 'FunctionDefinitionAst':
|
||
return 'FunctionDefinitionAst'
|
||
case 'DataStatementAst':
|
||
return 'DataStatementAst'
|
||
default:
|
||
return 'UnknownStatementAst'
|
||
}
|
||
}
|
||
|
||
/** Map raw .NET AST type name to our CommandElementType union */
|
||
// exported for testing
|
||
export function mapElementType(
|
||
rawType: string,
|
||
expressionType?: string,
|
||
): CommandElementType {
|
||
switch (rawType) {
|
||
case 'ScriptBlockExpressionAst':
|
||
return 'ScriptBlock'
|
||
case 'SubExpressionAst':
|
||
case 'ArrayExpressionAst':
|
||
// SECURITY: ArrayExpressionAst (@()) is a sibling of SubExpressionAst,
|
||
// not a subclass. Both evaluate arbitrary pipelines with side effects:
|
||
// Get-ChildItem @(Remove-Item ./data) runs Remove-Item inside @().
|
||
// Map both to SubExpression so hasSubExpressions fires and isReadOnlyCommand
|
||
// rejects (it doesn't check nestedCommands, only pipeline.commands[]).
|
||
return 'SubExpression'
|
||
case 'ExpandableStringExpressionAst':
|
||
return 'ExpandableString'
|
||
case 'InvokeMemberExpressionAst':
|
||
case 'MemberExpressionAst':
|
||
return 'MemberInvocation'
|
||
case 'VariableExpressionAst':
|
||
return 'Variable'
|
||
case 'StringConstantExpressionAst':
|
||
case 'ConstantExpressionAst':
|
||
// ConstantExpressionAst covers numeric literals (5, 3.14). For
|
||
// permission purposes a numeric literal is as safe as a string
|
||
// literal — it's an inert value, not code. Without this mapping,
|
||
// `-Seconds:5` produced children[0].type='Other' and consumers
|
||
// checking `children.some(c => c.type !== 'StringConstant')` would
|
||
// false-positive ask on harmless numeric args.
|
||
return 'StringConstant'
|
||
case 'CommandParameterAst':
|
||
return 'Parameter'
|
||
case 'ParenExpressionAst':
|
||
return 'SubExpression'
|
||
case 'CommandExpressionAst':
|
||
// Delegate to the wrapped expression type so we catch SubExpressionAst,
|
||
// ExpandableStringExpressionAst, ScriptBlockExpressionAst, etc.
|
||
// without maintaining a manual list. Falls through to 'Other' if the
|
||
// inner type is unrecognised.
|
||
if (expressionType) {
|
||
return mapElementType(expressionType)
|
||
}
|
||
return 'Other'
|
||
default:
|
||
return 'Other'
|
||
}
|
||
}
|
||
|
||
/** Classify command name as cmdlet, application, or unknown */
|
||
// exported for testing
|
||
export function classifyCommandName(
|
||
name: string,
|
||
): 'cmdlet' | 'application' | 'unknown' {
|
||
if (/^[A-Za-z]+-[A-Za-z][A-Za-z0-9_]*$/.test(name)) {
|
||
return 'cmdlet'
|
||
}
|
||
if (/[.\\/]/.test(name)) {
|
||
return 'application'
|
||
}
|
||
return 'unknown'
|
||
}
|
||
|
||
/** Strip module prefix from command name (e.g. "Microsoft.PowerShell.Utility\\Invoke-Expression" -> "Invoke-Expression") */
|
||
// exported for testing
|
||
export function stripModulePrefix(name: string): string {
|
||
const idx = name.lastIndexOf('\\')
|
||
if (idx < 0) return name
|
||
// Don't strip file paths: drive letters (C:\...), UNC paths (\\server\...), or relative paths (.\, ..\)
|
||
if (
|
||
/^[A-Za-z]:/.test(name) ||
|
||
name.startsWith('\\\\') ||
|
||
name.startsWith('.\\') ||
|
||
name.startsWith('..\\')
|
||
)
|
||
return name
|
||
return name.substring(idx + 1)
|
||
}
|
||
|
||
/** Transform a raw CommandAst pipeline element into ParsedCommandElement */
|
||
// exported for testing
|
||
export function transformCommandAst(
|
||
raw: RawPipelineElement,
|
||
): ParsedCommandElement {
|
||
const cmdElements = ensureArray(raw.commandElements)
|
||
let name = ''
|
||
const args: string[] = []
|
||
const elementTypes: CommandElementType[] = []
|
||
const children: (CommandElementChild[] | undefined)[] = []
|
||
let hasChildren = false
|
||
|
||
// SECURITY: nameType MUST be computed from the raw name (before
|
||
// stripModulePrefix). classifyCommandName('scripts\\Get-Process') returns
|
||
// 'application' (contains \\) — the correct answer, since PowerShell resolves
|
||
// this as a file path. After stripping it becomes 'Get-Process' which
|
||
// classifies as 'cmdlet' — wrong, and allowlist checks would trust it.
|
||
// Auto-allow paths gate on nameType !== 'application' to catch this.
|
||
// name (stripped) is still used for deny-rule matching symmetry, which is
|
||
// fail-safe: deny rules over-match (Module\\Remove-Item still hits a
|
||
// Remove-Item deny), allow rules are separately gated by nameType.
|
||
let nameType: 'cmdlet' | 'application' | 'unknown' = 'unknown'
|
||
if (cmdElements.length > 0) {
|
||
const first = cmdElements[0]!
|
||
// SECURITY: only trust .value for string-literal element types with a
|
||
// string-typed value. Numeric ConstantExpressionAst (e.g. `& 1`) emits an
|
||
// integer .value that crashes stripModulePrefix() → parser falls through
|
||
// to passthrough. For non-string-literal or non-string .value, use .text.
|
||
const isFirstStringLiteral =
|
||
first.type === 'StringConstantExpressionAst' ||
|
||
first.type === 'ExpandableStringExpressionAst'
|
||
const rawNameUnstripped =
|
||
isFirstStringLiteral && typeof first.value === 'string'
|
||
? first.value
|
||
: first.text
|
||
// SECURITY: strip surrounding quotes from the command name. When .value is
|
||
// unavailable (no StaticType on the raw node), .text preserves quotes —
|
||
// `& 'Invoke-Expression' 'x'` yields "'Invoke-Expression'". Stripping here
|
||
// at the source means every downstream reader of element.name (deny-rule
|
||
// matching, GIT_SAFETY_WRITE_CMDLETS lookup, resolveToCanonical, etc.)
|
||
// sees the bare cmdlet name. No-op when .value already stripped.
|
||
const rawName = rawNameUnstripped.replace(/^['"]|['"]$/g, '')
|
||
// SECURITY: PowerShell built-in cmdlet names are ASCII-only. Non-ASCII
|
||
// characters in cmdlet position are inherently suspicious — .NET
|
||
// OrdinalIgnoreCase folds U+017F (ſ) → S and U+0131 (ı) → I per
|
||
// UnicodeData.txt SimpleUppercaseMapping, so PowerShell resolves
|
||
// `ſtart-proceſſ` → Start-Process at runtime. JS .toLowerCase() does NOT
|
||
// fold these (ſ is already lowercase), so every downstream name
|
||
// comparison (NEVER_SUGGEST, deny-rule strEquals, resolveToCanonical,
|
||
// security validators) misses. Force 'application' to gate auto-allow
|
||
// (blocks at the nameType !== 'application' checks). Finding #31.
|
||
// Verified on Windows (pwsh 7.x, 2026-03): ſtart-proceſſ does NOT resolve.
|
||
// Retained as defense-in-depth against future .NET/PS behavior changes
|
||
// or module-provided command resolution hooks.
|
||
if (/[\u0080-\uFFFF]/.test(rawName)) {
|
||
nameType = 'application'
|
||
} else {
|
||
nameType = classifyCommandName(rawName)
|
||
}
|
||
name = stripModulePrefix(rawName)
|
||
elementTypes.push(mapElementType(first.type, first.expressionType))
|
||
|
||
for (let i = 1; i < cmdElements.length; i++) {
|
||
const ce = cmdElements[i]!
|
||
// Use resolved .value for string constants (strips quotes, resolves
|
||
// backtick escapes like `n -> newline) but keep raw .text for parameters
|
||
// (where .value loses the dash prefix, e.g. '-Path' -> 'Path'),
|
||
// variables, and other non-string types.
|
||
const isStringLiteral =
|
||
ce.type === 'StringConstantExpressionAst' ||
|
||
ce.type === 'ExpandableStringExpressionAst'
|
||
args.push(isStringLiteral && ce.value != null ? ce.value : ce.text)
|
||
elementTypes.push(mapElementType(ce.type, ce.expressionType))
|
||
// Map raw children (CommandParameterAst.Argument) through
|
||
// mapElementType so consumers see 'Variable', 'StringConstant', etc.
|
||
const rawChildren = ensureArray(ce.children)
|
||
if (rawChildren.length > 0) {
|
||
hasChildren = true
|
||
children.push(
|
||
rawChildren.map(c => ({
|
||
type: mapElementType(c.type),
|
||
text: c.text,
|
||
})),
|
||
)
|
||
} else {
|
||
children.push(undefined)
|
||
}
|
||
}
|
||
}
|
||
|
||
const result: ParsedCommandElement = {
|
||
name,
|
||
nameType,
|
||
elementType: 'CommandAst',
|
||
args,
|
||
text: raw.text,
|
||
elementTypes,
|
||
...(hasChildren ? { children } : {}),
|
||
}
|
||
|
||
// Preserve redirections from nested commands (e.g., in && / || chains)
|
||
const rawRedirs = ensureArray(raw.redirections)
|
||
if (rawRedirs.length > 0) {
|
||
result.redirections = rawRedirs.map(transformRedirection)
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
/** Transform a non-CommandAst pipeline element into ParsedCommandElement */
|
||
// exported for testing
|
||
export function transformExpressionElement(
|
||
raw: RawPipelineElement,
|
||
): ParsedCommandElement {
|
||
const elementType: PipelineElementType =
|
||
raw.type === 'ParenExpressionAst'
|
||
? 'ParenExpressionAst'
|
||
: 'CommandExpressionAst'
|
||
const elementTypes: CommandElementType[] = [
|
||
mapElementType(raw.type, raw.expressionType),
|
||
]
|
||
|
||
return {
|
||
name: raw.text,
|
||
nameType: 'unknown',
|
||
elementType,
|
||
args: [],
|
||
text: raw.text,
|
||
elementTypes,
|
||
}
|
||
}
|
||
|
||
/** Map raw redirection to ParsedRedirection */
|
||
// exported for testing
|
||
export function transformRedirection(raw: RawRedirection): ParsedRedirection {
|
||
if (raw.type === 'MergingRedirectionAst') {
|
||
return { operator: '2>&1', target: '', isMerging: true }
|
||
}
|
||
|
||
const append = raw.append ?? false
|
||
const fromStream = raw.fromStream ?? 'Output'
|
||
|
||
let operator: ParsedRedirection['operator']
|
||
if (append) {
|
||
switch (fromStream) {
|
||
case 'Error':
|
||
operator = '2>>'
|
||
break
|
||
case 'All':
|
||
operator = '*>>'
|
||
break
|
||
default:
|
||
operator = '>>'
|
||
break
|
||
}
|
||
} else {
|
||
switch (fromStream) {
|
||
case 'Error':
|
||
operator = '2>'
|
||
break
|
||
case 'All':
|
||
operator = '*>'
|
||
break
|
||
default:
|
||
operator = '>'
|
||
break
|
||
}
|
||
}
|
||
|
||
return { operator, target: raw.locationText ?? '', isMerging: false }
|
||
}
|
||
|
||
/** Transform a raw statement into ParsedStatement */
|
||
// exported for testing
|
||
export function transformStatement(raw: RawStatement): ParsedStatement {
|
||
const statementType = mapStatementType(raw.type)
|
||
const commands: ParsedCommandElement[] = []
|
||
const redirections: ParsedRedirection[] = []
|
||
|
||
if (raw.elements) {
|
||
// PipelineAst: walk pipeline elements
|
||
for (const elem of ensureArray(raw.elements)) {
|
||
if (elem.type === 'CommandAst') {
|
||
commands.push(transformCommandAst(elem))
|
||
for (const redir of ensureArray(elem.redirections)) {
|
||
redirections.push(transformRedirection(redir))
|
||
}
|
||
} else {
|
||
commands.push(transformExpressionElement(elem))
|
||
// SECURITY: CommandExpressionAst also carries .Redirections (inherited
|
||
// from CommandBaseAst). `1 > /tmp/evil.txt` is a CommandExpressionAst
|
||
// with a FileRedirectionAst. Must extract here or getFileRedirections()
|
||
// misses it and compound commands like `Get-ChildItem; 1 > /tmp/x`
|
||
// auto-allow at step 5 (only Get-ChildItem is checked).
|
||
for (const redir of ensureArray(elem.redirections)) {
|
||
redirections.push(transformRedirection(redir))
|
||
}
|
||
}
|
||
}
|
||
// SECURITY: The PS1 PipelineAst branch does a deep FindAll for
|
||
// FileRedirectionAst to catch redirections hidden inside:
|
||
// - colon-bound ParenExpressionAst args: -Name:('payload' > file)
|
||
// - hashtable value statements: @{k='payload' > ~/.bashrc}
|
||
// Both are invisible at the element level — the redirection's parent
|
||
// is a child of CommandParameterAst / CommandExpressionAst, not a
|
||
// separate pipeline element. Merge into statement-level redirections.
|
||
//
|
||
// The FindAll ALSO re-discovers direct-element redirections already
|
||
// captured in the per-element loop above. Dedupe by (operator, target)
|
||
// so tests and consumers see the real count.
|
||
const seen = new Set(redirections.map(r => `${r.operator}\0${r.target}`))
|
||
for (const redir of ensureArray(raw.redirections)) {
|
||
const r = transformRedirection(redir)
|
||
const key = `${r.operator}\0${r.target}`
|
||
if (!seen.has(key)) {
|
||
seen.add(key)
|
||
redirections.push(r)
|
||
}
|
||
}
|
||
} else {
|
||
// Non-pipeline statement: add synthetic command entry with full text
|
||
commands.push({
|
||
name: raw.text,
|
||
nameType: 'unknown',
|
||
elementType: 'CommandExpressionAst',
|
||
args: [],
|
||
text: raw.text,
|
||
})
|
||
// SECURITY: The PS1 else-branch does a direct recursive FindAll on
|
||
// FileRedirectionAst to catch expression redirections inside control flow
|
||
// (if/for/foreach/while/switch/try/trap/&& and ||). The CommandAst FindAll
|
||
// above CANNOT see these: in if ($x) { 1 > /tmp/evil }, the literal 1 with
|
||
// its attached redirection is a CommandExpressionAst — a SIBLING of
|
||
// CommandAst in the type hierarchy, not a subclass. So nestedCommands never
|
||
// contains it, and without this hoist the redirection is invisible to
|
||
// getFileRedirections → step 4.6 misses it → compound commands like
|
||
// `Get-Process && 1 > /tmp/evil` auto-allow at step 5 (only Get-Process
|
||
// is checked, allowlisted).
|
||
//
|
||
// Finding FileRedirectionAst DIRECTLY (rather than finding CommandExpressionAst
|
||
// and extracting .Redirections) is both simpler and more robust: it catches
|
||
// redirections on any node type, including ones we don't know about yet.
|
||
//
|
||
// Double-counts redirections already on nested CommandAst commands (those are
|
||
// extracted at line ~395 into nestedCommands[i].redirections AND found again
|
||
// here). Harmless: step 4.6 only checks fileRedirections.length > 0, not
|
||
// the exact count. No code does arithmetic on redirection counts.
|
||
//
|
||
// PS1 SIZE NOTE: The full rationale lives here (TS), not in the PS1 script,
|
||
// because PS1 comments bloat the -EncodedCommand payload and push the
|
||
// Windows CreateProcess 32K limit. Keep PS1 comments terse; point them here.
|
||
for (const redir of ensureArray(raw.redirections)) {
|
||
redirections.push(transformRedirection(redir))
|
||
}
|
||
}
|
||
|
||
let nestedCommands: ParsedCommandElement[] | undefined
|
||
const rawNested = ensureArray(raw.nestedCommands)
|
||
if (rawNested.length > 0) {
|
||
nestedCommands = rawNested.map(transformCommandAst)
|
||
}
|
||
|
||
const result: ParsedStatement = {
|
||
statementType,
|
||
commands,
|
||
redirections,
|
||
text: raw.text,
|
||
nestedCommands,
|
||
}
|
||
|
||
if (raw.securityPatterns) {
|
||
result.securityPatterns = raw.securityPatterns
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
/** Transform the complete raw PS output into ParsedPowerShellCommand */
|
||
function transformRawOutput(raw: RawParsedOutput): ParsedPowerShellCommand {
|
||
const result: ParsedPowerShellCommand = {
|
||
valid: raw.valid,
|
||
errors: ensureArray(raw.errors),
|
||
statements: ensureArray(raw.statements).map(transformStatement),
|
||
variables: ensureArray(raw.variables),
|
||
hasStopParsing: raw.hasStopParsing,
|
||
originalCommand: raw.originalCommand,
|
||
}
|
||
const tl = ensureArray(raw.typeLiterals)
|
||
if (tl.length > 0) {
|
||
result.typeLiterals = tl
|
||
}
|
||
if (raw.hasUsingStatements) {
|
||
result.hasUsingStatements = true
|
||
}
|
||
if (raw.hasScriptRequirements) {
|
||
result.hasScriptRequirements = true
|
||
}
|
||
return result
|
||
}
|
||
|
||
/**
|
||
* Parse a PowerShell command using the native AST parser.
|
||
* Spawns pwsh to parse the command and returns structured results.
|
||
* Results are memoized by command string.
|
||
*
|
||
* @param command - The PowerShell command to parse
|
||
* @returns Parsed command structure, or a result with valid=false on failure
|
||
*/
|
||
async function parsePowerShellCommandImpl(
|
||
command: string,
|
||
): Promise<ParsedPowerShellCommand> {
|
||
// SECURITY: MAX_COMMAND_LENGTH is a UTF-8 BYTE budget (see derivation at the
|
||
// constant definition). command.length counts UTF-16 code units; a CJK
|
||
// character is 1 code unit but 3 UTF-8 bytes, so .length under-reports by
|
||
// up to 3× and allows argv overflow on Windows → CreateProcess fails →
|
||
// valid:false → deny rules degrade to ask. Finding #36.
|
||
const commandBytes = Buffer.byteLength(command, 'utf8')
|
||
if (commandBytes > MAX_COMMAND_LENGTH) {
|
||
logForDebugging(
|
||
`PowerShell parser: command too long (${commandBytes} bytes, max ${MAX_COMMAND_LENGTH})`,
|
||
)
|
||
return makeInvalidResult(
|
||
command,
|
||
`Command too long for parsing (${commandBytes} bytes). Maximum supported length is ${MAX_COMMAND_LENGTH} bytes.`,
|
||
'CommandTooLong',
|
||
)
|
||
}
|
||
|
||
const pwshPath = await getCachedPowerShellPath()
|
||
if (!pwshPath) {
|
||
return makeInvalidResult(
|
||
command,
|
||
'PowerShell is not available',
|
||
'NoPowerShell',
|
||
)
|
||
}
|
||
|
||
const script = buildParseScript(command)
|
||
|
||
// Pass the script to PowerShell via -EncodedCommand.
|
||
// -EncodedCommand takes a Base64-encoded UTF-16LE string and executes it,
|
||
// which avoids: (1) stdin interactive-mode issues where -File - produces
|
||
// PS prompts and ANSI escapes in stdout, (2) command-line escaping issues,
|
||
// (3) temp files. The script itself is large but well within OS arg limits
|
||
// (Windows: 32K chars, Unix: typically 2MB+).
|
||
const encodedScript = toUtf16LeBase64(script)
|
||
const args = [
|
||
'-NoProfile',
|
||
'-NonInteractive',
|
||
'-NoLogo',
|
||
'-EncodedCommand',
|
||
encodedScript,
|
||
]
|
||
|
||
// Spawn pwsh with one retry on timeout. On loaded CI runners (Windows
|
||
// especially), pwsh spawn + .NET JIT + ParseInput occasionally exceeds 5s
|
||
// even after CAN_SPAWN_PARSE_SCRIPT() warms the JIT. execa kills the process
|
||
// but exitCode is undefined, which the old code reported as the misleading
|
||
// "pwsh exited with code 1:" with empty stderr. A single retry absorbs
|
||
// transient load spikes; a double timeout is reported as PwshTimeout.
|
||
const parseTimeoutMs = getParseTimeoutMs()
|
||
let stdout = ''
|
||
let stderr = ''
|
||
let code: number | null = null
|
||
let timedOut = false
|
||
for (let attempt = 0; attempt < 2; attempt++) {
|
||
try {
|
||
const result = await execa(pwshPath, args, {
|
||
timeout: parseTimeoutMs,
|
||
reject: false,
|
||
})
|
||
stdout = result.stdout
|
||
stderr = result.stderr
|
||
timedOut = result.timedOut
|
||
code = result.failed ? (result.exitCode ?? 1) : 0
|
||
} catch (e: unknown) {
|
||
logForDebugging(
|
||
`PowerShell parser: failed to spawn pwsh: ${e instanceof Error ? e.message : e}`,
|
||
)
|
||
return makeInvalidResult(
|
||
command,
|
||
`Failed to spawn PowerShell: ${e instanceof Error ? e.message : e}`,
|
||
'PwshSpawnError',
|
||
)
|
||
}
|
||
if (!timedOut) break
|
||
logForDebugging(
|
||
`PowerShell parser: pwsh timed out after ${parseTimeoutMs}ms (attempt ${attempt + 1})`,
|
||
)
|
||
}
|
||
|
||
if (timedOut) {
|
||
return makeInvalidResult(
|
||
command,
|
||
`pwsh timed out after ${parseTimeoutMs}ms (2 attempts)`,
|
||
'PwshTimeout',
|
||
)
|
||
}
|
||
|
||
if (code !== 0) {
|
||
logForDebugging(
|
||
`PowerShell parser: pwsh exited with code ${code}, stderr: ${stderr}`,
|
||
)
|
||
return makeInvalidResult(
|
||
command,
|
||
`pwsh exited with code ${code}: ${stderr}`,
|
||
'PwshError',
|
||
)
|
||
}
|
||
|
||
const trimmed = stdout.trim()
|
||
if (!trimmed) {
|
||
logForDebugging('PowerShell parser: empty stdout from pwsh')
|
||
return makeInvalidResult(
|
||
command,
|
||
'No output from PowerShell parser',
|
||
'EmptyOutput',
|
||
)
|
||
}
|
||
|
||
try {
|
||
const raw = jsonParse(trimmed) as RawParsedOutput
|
||
return transformRawOutput(raw)
|
||
} catch {
|
||
logForDebugging(
|
||
`PowerShell parser: invalid JSON output: ${trimmed.slice(0, 200)}`,
|
||
)
|
||
return makeInvalidResult(
|
||
command,
|
||
'Invalid JSON from PowerShell parser',
|
||
'InvalidJson',
|
||
)
|
||
}
|
||
}
|
||
|
||
// Error IDs from makeInvalidResult that represent transient process failures.
|
||
// These should be evicted from the cache so subsequent calls can retry.
|
||
// Deterministic failures (CommandTooLong, syntax errors from successful parses)
|
||
// should stay cached since retrying would produce the same result.
|
||
const TRANSIENT_ERROR_IDS = new Set([
|
||
'PwshSpawnError',
|
||
'PwshError',
|
||
'PwshTimeout',
|
||
'EmptyOutput',
|
||
'InvalidJson',
|
||
])
|
||
|
||
const parsePowerShellCommandCached = memoizeWithLRU(
|
||
(command: string) => {
|
||
const promise = parsePowerShellCommandImpl(command)
|
||
// Evict transient failures after resolution so they can be retried.
|
||
// The current caller still receives the cached promise for this call,
|
||
// ensuring concurrent callers share the same result.
|
||
void promise.then(result => {
|
||
if (
|
||
!result.valid &&
|
||
TRANSIENT_ERROR_IDS.has(result.errors[0]?.errorId ?? '')
|
||
) {
|
||
parsePowerShellCommandCached.cache.delete(command)
|
||
}
|
||
})
|
||
return promise
|
||
},
|
||
(command: string) => command,
|
||
256,
|
||
)
|
||
export { parsePowerShellCommandCached as parsePowerShellCommand }
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Analysis helpers — derived from the parsed AST structure.
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/**
|
||
* Security-relevant flags derived from the parsed AST.
|
||
*/
|
||
type SecurityFlags = {
|
||
/** Contains $(...) subexpression */
|
||
hasSubExpressions: boolean
|
||
/** Contains { ... } script block expressions */
|
||
hasScriptBlocks: boolean
|
||
/** Contains @variable splatting */
|
||
hasSplatting: boolean
|
||
/** Contains expandable strings with embedded expressions ("...$()...") */
|
||
hasExpandableStrings: boolean
|
||
/** Contains .NET method invocations ([Type]::Method or $obj.Method()) */
|
||
hasMemberInvocations: boolean
|
||
/** Contains variable assignments ($x = ...) */
|
||
hasAssignments: boolean
|
||
/** Uses stop-parsing token (--%) */
|
||
hasStopParsing: boolean
|
||
}
|
||
|
||
/**
|
||
* Common PowerShell aliases mapped to their canonical cmdlet names.
|
||
* Uses Object.create(null) to prevent prototype-chain pollution — attacker-controlled
|
||
* command names like 'constructor' or '__proto__' must return undefined, not inherited
|
||
* Object.prototype properties.
|
||
*/
|
||
export const COMMON_ALIASES: Record<string, string> = Object.assign(
|
||
Object.create(null) as Record<string, string>,
|
||
{
|
||
// Directory listing
|
||
ls: 'Get-ChildItem',
|
||
dir: 'Get-ChildItem',
|
||
gci: 'Get-ChildItem',
|
||
// Content
|
||
cat: 'Get-Content',
|
||
type: 'Get-Content',
|
||
gc: 'Get-Content',
|
||
// Navigation
|
||
cd: 'Set-Location',
|
||
sl: 'Set-Location',
|
||
chdir: 'Set-Location',
|
||
pushd: 'Push-Location',
|
||
popd: 'Pop-Location',
|
||
pwd: 'Get-Location',
|
||
gl: 'Get-Location',
|
||
// Items
|
||
gi: 'Get-Item',
|
||
gp: 'Get-ItemProperty',
|
||
ni: 'New-Item',
|
||
mkdir: 'New-Item',
|
||
// `md` is PowerShell's built-in alias for `mkdir`. resolveToCanonical is
|
||
// single-hop (no md→mkdir→New-Item chaining), so it needs its own entry
|
||
// or `md /etc/x` falls through while `mkdir /etc/x` is caught.
|
||
md: 'New-Item',
|
||
ri: 'Remove-Item',
|
||
del: 'Remove-Item',
|
||
rd: 'Remove-Item',
|
||
rmdir: 'Remove-Item',
|
||
rm: 'Remove-Item',
|
||
erase: 'Remove-Item',
|
||
mi: 'Move-Item',
|
||
mv: 'Move-Item',
|
||
move: 'Move-Item',
|
||
ci: 'Copy-Item',
|
||
cp: 'Copy-Item',
|
||
copy: 'Copy-Item',
|
||
cpi: 'Copy-Item',
|
||
si: 'Set-Item',
|
||
rni: 'Rename-Item',
|
||
ren: 'Rename-Item',
|
||
// Process
|
||
ps: 'Get-Process',
|
||
gps: 'Get-Process',
|
||
kill: 'Stop-Process',
|
||
spps: 'Stop-Process',
|
||
start: 'Start-Process',
|
||
saps: 'Start-Process',
|
||
sajb: 'Start-Job',
|
||
ipmo: 'Import-Module',
|
||
// Output
|
||
echo: 'Write-Output',
|
||
write: 'Write-Output',
|
||
sleep: 'Start-Sleep',
|
||
// Help
|
||
help: 'Get-Help',
|
||
man: 'Get-Help',
|
||
gcm: 'Get-Command',
|
||
// Service
|
||
gsv: 'Get-Service',
|
||
// Variables
|
||
gv: 'Get-Variable',
|
||
sv: 'Set-Variable',
|
||
// History
|
||
h: 'Get-History',
|
||
history: 'Get-History',
|
||
// Invoke
|
||
iex: 'Invoke-Expression',
|
||
iwr: 'Invoke-WebRequest',
|
||
irm: 'Invoke-RestMethod',
|
||
icm: 'Invoke-Command',
|
||
ii: 'Invoke-Item',
|
||
// PSSession — remote code execution surface
|
||
nsn: 'New-PSSession',
|
||
etsn: 'Enter-PSSession',
|
||
exsn: 'Exit-PSSession',
|
||
gsn: 'Get-PSSession',
|
||
rsn: 'Remove-PSSession',
|
||
// Misc
|
||
cls: 'Clear-Host',
|
||
clear: 'Clear-Host',
|
||
select: 'Select-Object',
|
||
where: 'Where-Object',
|
||
foreach: 'ForEach-Object',
|
||
'%': 'ForEach-Object',
|
||
'?': 'Where-Object',
|
||
measure: 'Measure-Object',
|
||
ft: 'Format-Table',
|
||
fl: 'Format-List',
|
||
fw: 'Format-Wide',
|
||
oh: 'Out-Host',
|
||
ogv: 'Out-GridView',
|
||
// SECURITY: The following aliases are deliberately omitted because PS Core 6+
|
||
// removed them (they collide with native executables). Our allowlist logic
|
||
// resolves aliases BEFORE checking safety — if we map 'sort' → 'Sort-Object'
|
||
// but PowerShell 7/Windows actually runs sort.exe, we'd auto-allow the wrong
|
||
// program.
|
||
// 'sc' → sc.exe (Service Controller) — e.g. `sc config Svc binpath= ...`
|
||
// 'sort' → sort.exe — e.g. `sort /O C:\evil.txt` (arbitrary file write)
|
||
// 'curl' → curl.exe (shipped with Windows 10 1803+)
|
||
// 'wget' → wget.exe (if installed)
|
||
// Prefer to leave ambiguous aliases unmapped — users can write the full name.
|
||
// If adding aliases that resolve to SAFE_OUTPUT_CMDLETS or
|
||
// ACCEPT_EDITS_ALLOWED_CMDLETS, verify no native .exe collision on PS Core.
|
||
ac: 'Add-Content',
|
||
clc: 'Clear-Content',
|
||
// Write/export: tee-object/export-csv are in
|
||
// CMDLET_PATH_CONFIG so path-level Edit denies fire on the full cmdlet name,
|
||
// but PowerShell's built-in aliases fell through to ask-then-approve because
|
||
// resolveToCanonical couldn't resolve them). Neither tee-object nor
|
||
// export-csv is in SAFE_OUTPUT_CMDLETS or ACCEPT_EDITS_ALLOWED_CMDLETS, so
|
||
// the native-exe collision warning above doesn't apply — on Linux PS Core
|
||
// where `tee` runs /usr/bin/tee, that binary also writes to its positional
|
||
// file arg and we correctly extract+check it.
|
||
tee: 'Tee-Object',
|
||
epcsv: 'Export-Csv',
|
||
sp: 'Set-ItemProperty',
|
||
rp: 'Remove-ItemProperty',
|
||
cli: 'Clear-Item',
|
||
epal: 'Export-Alias',
|
||
// Text search
|
||
sls: 'Select-String',
|
||
},
|
||
)
|
||
|
||
const DIRECTORY_CHANGE_CMDLETS = new Set([
|
||
'set-location',
|
||
'push-location',
|
||
'pop-location',
|
||
])
|
||
|
||
const DIRECTORY_CHANGE_ALIASES = new Set(['cd', 'sl', 'chdir', 'pushd', 'popd'])
|
||
|
||
/**
|
||
* Get all command names across all statements, pipeline segments, and nested commands.
|
||
* Returns lowercased names for case-insensitive comparison.
|
||
*/
|
||
// exported for testing
|
||
export function getAllCommandNames(parsed: ParsedPowerShellCommand): string[] {
|
||
const names: string[] = []
|
||
for (const statement of parsed.statements) {
|
||
for (const cmd of statement.commands) {
|
||
names.push(cmd.name.toLowerCase())
|
||
}
|
||
if (statement.nestedCommands) {
|
||
for (const cmd of statement.nestedCommands) {
|
||
names.push(cmd.name.toLowerCase())
|
||
}
|
||
}
|
||
}
|
||
return names
|
||
}
|
||
|
||
/**
|
||
* Get all pipeline segments as flat list of commands.
|
||
* Useful for checking each command independently.
|
||
*/
|
||
export function getAllCommands(
|
||
parsed: ParsedPowerShellCommand,
|
||
): ParsedCommandElement[] {
|
||
const commands: ParsedCommandElement[] = []
|
||
for (const statement of parsed.statements) {
|
||
for (const cmd of statement.commands) {
|
||
commands.push(cmd)
|
||
}
|
||
if (statement.nestedCommands) {
|
||
for (const cmd of statement.nestedCommands) {
|
||
commands.push(cmd)
|
||
}
|
||
}
|
||
}
|
||
return commands
|
||
}
|
||
|
||
/**
|
||
* Get all redirections across all statements.
|
||
*/
|
||
// exported for testing
|
||
export function getAllRedirections(
|
||
parsed: ParsedPowerShellCommand,
|
||
): ParsedRedirection[] {
|
||
const redirections: ParsedRedirection[] = []
|
||
for (const statement of parsed.statements) {
|
||
for (const redir of statement.redirections) {
|
||
redirections.push(redir)
|
||
}
|
||
// Include redirections from nested commands (e.g., from && / || chains)
|
||
if (statement.nestedCommands) {
|
||
for (const cmd of statement.nestedCommands) {
|
||
if (cmd.redirections) {
|
||
for (const redir of cmd.redirections) {
|
||
redirections.push(redir)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return redirections
|
||
}
|
||
|
||
/**
|
||
* Get all variables, optionally filtered by scope (e.g., 'env').
|
||
* Variable paths in PowerShell can have scopes like "env:PATH", "global:x".
|
||
*/
|
||
export function getVariablesByScope(
|
||
parsed: ParsedPowerShellCommand,
|
||
scope: string,
|
||
): ParsedVariable[] {
|
||
const prefix = scope.toLowerCase() + ':'
|
||
return parsed.variables.filter(v => v.path.toLowerCase().startsWith(prefix))
|
||
}
|
||
|
||
/**
|
||
* Check if any command in the parsed result matches a given name (case-insensitive).
|
||
* Handles common aliases too.
|
||
*/
|
||
export function hasCommandNamed(
|
||
parsed: ParsedPowerShellCommand,
|
||
name: string,
|
||
): boolean {
|
||
const lowerName = name.toLowerCase()
|
||
const canonicalFromAlias = COMMON_ALIASES[lowerName]?.toLowerCase()
|
||
|
||
for (const cmdName of getAllCommandNames(parsed)) {
|
||
if (cmdName === lowerName) {
|
||
return true
|
||
}
|
||
// Check if the command is an alias that resolves to the requested name
|
||
const canonical = COMMON_ALIASES[cmdName]?.toLowerCase()
|
||
if (canonical === lowerName) {
|
||
return true
|
||
}
|
||
// Check if the requested name is an alias and the command is its canonical form
|
||
if (canonicalFromAlias && cmdName === canonicalFromAlias) {
|
||
return true
|
||
}
|
||
// Check if both resolve to the same canonical cmdlet (alias-to-alias match)
|
||
if (canonical && canonicalFromAlias && canonical === canonicalFromAlias) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
/**
|
||
* Check if the command contains any directory-changing commands.
|
||
* (Set-Location, cd, sl, chdir, Push-Location, pushd, Pop-Location, popd)
|
||
*/
|
||
// exported for testing
|
||
export function hasDirectoryChange(parsed: ParsedPowerShellCommand): boolean {
|
||
for (const cmdName of getAllCommandNames(parsed)) {
|
||
if (
|
||
DIRECTORY_CHANGE_CMDLETS.has(cmdName) ||
|
||
DIRECTORY_CHANGE_ALIASES.has(cmdName)
|
||
) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
/**
|
||
* Check if the command is a single simple command (no pipes, no semicolons, no operators).
|
||
*/
|
||
// exported for testing
|
||
export function isSingleCommand(parsed: ParsedPowerShellCommand): boolean {
|
||
const stmt = parsed.statements[0]
|
||
return (
|
||
parsed.statements.length === 1 &&
|
||
stmt !== undefined &&
|
||
stmt.commands.length === 1 &&
|
||
(!stmt.nestedCommands || stmt.nestedCommands.length === 0)
|
||
)
|
||
}
|
||
|
||
/**
|
||
* Check if a specific command has a given argument/flag (case-insensitive).
|
||
* Useful for checking "-EncodedCommand", "-Recurse", etc.
|
||
*/
|
||
export function commandHasArg(
|
||
command: ParsedCommandElement,
|
||
arg: string,
|
||
): boolean {
|
||
const lowerArg = arg.toLowerCase()
|
||
return command.args.some(a => a.toLowerCase() === lowerArg)
|
||
}
|
||
|
||
/**
|
||
* Tokenizer-level dash characters that PowerShell's parser accepts as
|
||
* parameter prefixes. SpecialCharacters.IsDash (CharTraits.cs) accepts exactly
|
||
* these four: ASCII hyphen-minus, en-dash, em-dash, horizontal bar. These are
|
||
* tokenizer-level — they apply to ALL cmdlet parameters, not just argv to
|
||
* powershell.exe (contrast with `/` which is an argv-parser quirk of
|
||
* powershell.exe 5.1 only; see PS_ALT_PARAM_PREFIXES in powershellSecurity.ts).
|
||
*
|
||
* Extent.Text preserves the raw character; transformCommandAst uses ce.text
|
||
* for CommandParameterAst elements, so these reach callers unchanged.
|
||
*/
|
||
export const PS_TOKENIZER_DASH_CHARS = new Set([
|
||
'-', // U+002D hyphen-minus (ASCII)
|
||
'\u2013', // en-dash
|
||
'\u2014', // em-dash
|
||
'\u2015', // horizontal bar
|
||
])
|
||
|
||
/**
|
||
* Determines if an argument is a PowerShell parameter (flag), using the AST
|
||
* element type as ground truth when available.
|
||
*
|
||
* The parser maps CommandParameterAst → 'Parameter' regardless of which dash
|
||
* character the user typed — PowerShell's tokenizer handles that. So when
|
||
* elementType is available, it's authoritative:
|
||
* - 'Parameter' → true (covers `-Path`, `–Path`, `—Path`, `―Path`)
|
||
* - anything else → false (a quoted "-Path" is StringConstant, not a param)
|
||
*
|
||
* When elementType is unavailable (backward compat / no AST detail), fall back
|
||
* to a char check against PS_TOKENIZER_DASH_CHARS.
|
||
*/
|
||
export function isPowerShellParameter(
|
||
arg: string,
|
||
elementType?: CommandElementType,
|
||
): boolean {
|
||
if (elementType !== undefined) {
|
||
return elementType === 'Parameter'
|
||
}
|
||
return arg.length > 0 && PS_TOKENIZER_DASH_CHARS.has(arg[0]!)
|
||
}
|
||
|
||
/**
|
||
* Check if any argument on a command is an unambiguous abbreviation of a PowerShell parameter.
|
||
* PowerShell allows parameter abbreviation as long as the prefix is unambiguous.
|
||
* The minPrefix is the shortest unambiguous prefix for the parameter.
|
||
* For example, minPrefix '-en' for fullParam '-encodedcommand' matches '-en', '-enc', '-enco', etc.
|
||
*/
|
||
export function commandHasArgAbbreviation(
|
||
command: ParsedCommandElement,
|
||
fullParam: string,
|
||
minPrefix: string,
|
||
): boolean {
|
||
const lowerFull = fullParam.toLowerCase()
|
||
const lowerMin = minPrefix.toLowerCase()
|
||
return command.args.some(a => {
|
||
// Strip colon-bound value (e.g., -en:base64value -> -en)
|
||
const colonIndex = a.indexOf(':', 1)
|
||
const paramPart = colonIndex > 0 ? a.slice(0, colonIndex) : a
|
||
// Strip backtick escapes — PowerShell resolves `-Member`Name` to
|
||
// `-MemberName` but Extent.Text preserves the backtick, causing
|
||
// prefix-comparison misses on the raw text.
|
||
const lower = paramPart.replace(/`/g, '').toLowerCase()
|
||
return (
|
||
lower.startsWith(lowerMin) &&
|
||
lowerFull.startsWith(lower) &&
|
||
lower.length <= lowerFull.length
|
||
)
|
||
})
|
||
}
|
||
|
||
/**
|
||
* Split a parsed command into its pipeline segments for per-segment permission checking.
|
||
* Returns each pipeline's commands separately.
|
||
*/
|
||
export function getPipelineSegments(
|
||
parsed: ParsedPowerShellCommand,
|
||
): ParsedStatement[] {
|
||
return parsed.statements
|
||
}
|
||
|
||
/**
|
||
* True if a redirection target is PowerShell's `$null` automatic variable.
|
||
* `> $null` discards output (like /dev/null) — not a filesystem write.
|
||
* `$null` cannot be reassigned, so this is safe to treat as a no-op sink.
|
||
* `${null}` is the same automatic variable via curly-brace syntax. Spaces
|
||
* inside the braces (`${ null }`) name a different variable, so no regex.
|
||
*/
|
||
export function isNullRedirectionTarget(target: string): boolean {
|
||
const t = target.trim().toLowerCase()
|
||
return t === '$null' || t === '${null}'
|
||
}
|
||
|
||
/**
|
||
* Get output redirections (file redirections, not merging redirections).
|
||
* Returns only redirections that write to files.
|
||
*/
|
||
// exported for testing
|
||
export function getFileRedirections(
|
||
parsed: ParsedPowerShellCommand,
|
||
): ParsedRedirection[] {
|
||
return getAllRedirections(parsed).filter(
|
||
r => !r.isMerging && !isNullRedirectionTarget(r.target),
|
||
)
|
||
}
|
||
|
||
/**
|
||
* Derive security-relevant flags from the parsed command structure.
|
||
* This replaces the previous approach of computing flags in PowerShell via
|
||
* separate Find-AstNodes calls. Instead, the PS1 script tags each element
|
||
* with its AST node type, and this function walks those types.
|
||
*/
|
||
// exported for testing
|
||
export function deriveSecurityFlags(
|
||
parsed: ParsedPowerShellCommand,
|
||
): SecurityFlags {
|
||
const flags: SecurityFlags = {
|
||
hasSubExpressions: false,
|
||
hasScriptBlocks: false,
|
||
hasSplatting: false,
|
||
hasExpandableStrings: false,
|
||
hasMemberInvocations: false,
|
||
hasAssignments: false,
|
||
hasStopParsing: parsed.hasStopParsing,
|
||
}
|
||
|
||
function checkElements(cmd: ParsedCommandElement): void {
|
||
if (!cmd.elementTypes) {
|
||
return
|
||
}
|
||
for (const et of cmd.elementTypes) {
|
||
switch (et) {
|
||
case 'ScriptBlock':
|
||
flags.hasScriptBlocks = true
|
||
break
|
||
case 'SubExpression':
|
||
flags.hasSubExpressions = true
|
||
break
|
||
case 'ExpandableString':
|
||
flags.hasExpandableStrings = true
|
||
break
|
||
case 'MemberInvocation':
|
||
flags.hasMemberInvocations = true
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
for (const stmt of parsed.statements) {
|
||
if (stmt.statementType === 'AssignmentStatementAst') {
|
||
flags.hasAssignments = true
|
||
}
|
||
for (const cmd of stmt.commands) {
|
||
checkElements(cmd)
|
||
}
|
||
if (stmt.nestedCommands) {
|
||
for (const cmd of stmt.nestedCommands) {
|
||
checkElements(cmd)
|
||
}
|
||
}
|
||
// securityPatterns provides a belt-and-suspenders check that catches
|
||
// patterns elementTypes may miss (e.g. member invocations inside
|
||
// assignments, subexpressions in non-pipeline statements).
|
||
if (stmt.securityPatterns) {
|
||
if (stmt.securityPatterns.hasMemberInvocations) {
|
||
flags.hasMemberInvocations = true
|
||
}
|
||
if (stmt.securityPatterns.hasSubExpressions) {
|
||
flags.hasSubExpressions = true
|
||
}
|
||
if (stmt.securityPatterns.hasExpandableStrings) {
|
||
flags.hasExpandableStrings = true
|
||
}
|
||
if (stmt.securityPatterns.hasScriptBlocks) {
|
||
flags.hasScriptBlocks = true
|
||
}
|
||
}
|
||
}
|
||
|
||
for (const v of parsed.variables) {
|
||
if (v.isSplatted) {
|
||
flags.hasSplatting = true
|
||
break
|
||
}
|
||
}
|
||
|
||
return flags
|
||
}
|
||
|
||
// Raw types exported for testing (function exports are inline above)
|