533 lines
16 KiB
TypeScript
533 lines
16 KiB
TypeScript
import type { StructuredPatchHunk } from 'diff'
|
|
import { access, readFile } from 'fs/promises'
|
|
import { dirname, join, relative, sep } from 'path'
|
|
import { getCwd } from './cwd.js'
|
|
import { getCachedRepository } from './detectRepository.js'
|
|
import { execFileNoThrow, execFileNoThrowWithCwd } from './execFileNoThrow.js'
|
|
import { isFileWithinReadSizeLimit } from './file.js'
|
|
import {
|
|
findGitRoot,
|
|
getDefaultBranch,
|
|
getGitDir,
|
|
getIsGit,
|
|
gitExe,
|
|
} from './git.js'
|
|
|
|
export type GitDiffStats = {
|
|
filesCount: number
|
|
linesAdded: number
|
|
linesRemoved: number
|
|
}
|
|
|
|
export type PerFileStats = {
|
|
added: number
|
|
removed: number
|
|
isBinary: boolean
|
|
isUntracked?: boolean
|
|
}
|
|
|
|
export type GitDiffResult = {
|
|
stats: GitDiffStats
|
|
perFileStats: Map<string, PerFileStats>
|
|
hunks: Map<string, StructuredPatchHunk[]>
|
|
}
|
|
|
|
const GIT_TIMEOUT_MS = 5000
|
|
const MAX_FILES = 50
|
|
const MAX_DIFF_SIZE_BYTES = 1_000_000 // 1 MB - skip files larger than this
|
|
const MAX_LINES_PER_FILE = 400 // GitHub's auto-load limit
|
|
const MAX_FILES_FOR_DETAILS = 500 // Skip per-file details if more files than this
|
|
|
|
/**
|
|
* Fetch git diff stats and hunks comparing working tree to HEAD.
|
|
* Returns null if not in a git repo or if git commands fail.
|
|
*
|
|
* Returns null during merge/rebase/cherry-pick/revert operations since the
|
|
* working tree contains incoming changes that weren't intentionally
|
|
* made by the user.
|
|
*/
|
|
export async function fetchGitDiff(): Promise<GitDiffResult | null> {
|
|
const isGit = await getIsGit()
|
|
if (!isGit) return null
|
|
|
|
// Skip diff calculation during transient git states since the
|
|
// working tree contains incoming changes, not user-intentional edits
|
|
if (await isInTransientGitState()) {
|
|
return null
|
|
}
|
|
|
|
// Quick probe: use --shortstat to get totals without loading all content.
|
|
// This is O(1) memory and lets us detect massive diffs (e.g., jj workspaces)
|
|
// before committing to expensive operations.
|
|
const { stdout: shortstatOut, code: shortstatCode } = await execFileNoThrow(
|
|
gitExe(),
|
|
['--no-optional-locks', 'diff', 'HEAD', '--shortstat'],
|
|
{ timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false },
|
|
)
|
|
|
|
if (shortstatCode === 0) {
|
|
const quickStats = parseShortstat(shortstatOut)
|
|
if (quickStats && quickStats.filesCount > MAX_FILES_FOR_DETAILS) {
|
|
// Too many files - return accurate totals but skip per-file details
|
|
// to avoid loading hundreds of MB into memory
|
|
return {
|
|
stats: quickStats,
|
|
perFileStats: new Map(),
|
|
hunks: new Map(),
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get stats via --numstat (all uncommitted changes vs HEAD)
|
|
const { stdout: numstatOut, code: numstatCode } = await execFileNoThrow(
|
|
gitExe(),
|
|
['--no-optional-locks', 'diff', 'HEAD', '--numstat'],
|
|
{ timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false },
|
|
)
|
|
|
|
if (numstatCode !== 0) return null
|
|
|
|
const { stats, perFileStats } = parseGitNumstat(numstatOut)
|
|
|
|
// Include untracked files (new files not yet staged)
|
|
// Just filenames - no content reading for performance
|
|
const remainingSlots = MAX_FILES - perFileStats.size
|
|
if (remainingSlots > 0) {
|
|
const untrackedStats = await fetchUntrackedFiles(remainingSlots)
|
|
if (untrackedStats) {
|
|
stats.filesCount += untrackedStats.size
|
|
for (const [path, fileStats] of untrackedStats) {
|
|
perFileStats.set(path, fileStats)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Return stats only - hunks are fetched on-demand via fetchGitDiffHunks()
|
|
// to avoid expensive git diff HEAD call on every poll
|
|
return { stats, perFileStats, hunks: new Map() }
|
|
}
|
|
|
|
/**
|
|
* Fetch git diff hunks on-demand (for DiffDialog).
|
|
* Separated from fetchGitDiff() to avoid expensive calls during polling.
|
|
*/
|
|
export async function fetchGitDiffHunks(): Promise<
|
|
Map<string, StructuredPatchHunk[]>
|
|
> {
|
|
const isGit = await getIsGit()
|
|
if (!isGit) return new Map()
|
|
|
|
if (await isInTransientGitState()) {
|
|
return new Map()
|
|
}
|
|
|
|
const { stdout: diffOut, code: diffCode } = await execFileNoThrow(
|
|
gitExe(),
|
|
['--no-optional-locks', 'diff', 'HEAD'],
|
|
{ timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false },
|
|
)
|
|
|
|
if (diffCode !== 0) {
|
|
return new Map()
|
|
}
|
|
|
|
return parseGitDiff(diffOut)
|
|
}
|
|
|
|
export type NumstatResult = {
|
|
stats: GitDiffStats
|
|
perFileStats: Map<string, PerFileStats>
|
|
}
|
|
|
|
/**
|
|
* Parse git diff --numstat output into stats.
|
|
* Format: <added>\t<removed>\t<filename>
|
|
* Binary files show '-' for counts.
|
|
* Only stores first MAX_FILES entries in perFileStats.
|
|
*/
|
|
export function parseGitNumstat(stdout: string): NumstatResult {
|
|
const lines = stdout.trim().split('\n').filter(Boolean)
|
|
let added = 0
|
|
let removed = 0
|
|
let validFileCount = 0
|
|
const perFileStats = new Map<string, PerFileStats>()
|
|
|
|
for (const line of lines) {
|
|
const parts = line.split('\t')
|
|
// Valid numstat lines have exactly 3 tab-separated parts: added, removed, filename
|
|
if (parts.length < 3) continue
|
|
|
|
validFileCount++
|
|
const addStr = parts[0]
|
|
const remStr = parts[1]
|
|
const filePath = parts.slice(2).join('\t') // filename may contain tabs
|
|
const isBinary = addStr === '-' || remStr === '-'
|
|
const fileAdded = isBinary ? 0 : parseInt(addStr ?? '0', 10) || 0
|
|
const fileRemoved = isBinary ? 0 : parseInt(remStr ?? '0', 10) || 0
|
|
|
|
added += fileAdded
|
|
removed += fileRemoved
|
|
|
|
// Only store first MAX_FILES entries
|
|
if (perFileStats.size < MAX_FILES) {
|
|
perFileStats.set(filePath, {
|
|
added: fileAdded,
|
|
removed: fileRemoved,
|
|
isBinary,
|
|
})
|
|
}
|
|
}
|
|
|
|
return {
|
|
stats: {
|
|
filesCount: validFileCount,
|
|
linesAdded: added,
|
|
linesRemoved: removed,
|
|
},
|
|
perFileStats,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse unified diff output into per-file hunks.
|
|
* Splits by "diff --git" and parses each file's hunks.
|
|
*
|
|
* Applies limits:
|
|
* - MAX_FILES: stop after this many files
|
|
* - Files >1MB: skipped entirely (not in result map)
|
|
* - Files ≤1MB: parsed but limited to MAX_LINES_PER_FILE lines
|
|
*/
|
|
export function parseGitDiff(
|
|
stdout: string,
|
|
): Map<string, StructuredPatchHunk[]> {
|
|
const result = new Map<string, StructuredPatchHunk[]>()
|
|
if (!stdout.trim()) return result
|
|
|
|
// Split by file diffs
|
|
const fileDiffs = stdout.split(/^diff --git /m).filter(Boolean)
|
|
|
|
for (const fileDiff of fileDiffs) {
|
|
// Stop after MAX_FILES
|
|
if (result.size >= MAX_FILES) break
|
|
|
|
// Skip files larger than 1MB
|
|
if (fileDiff.length > MAX_DIFF_SIZE_BYTES) {
|
|
continue
|
|
}
|
|
|
|
const lines = fileDiff.split('\n')
|
|
|
|
// Extract filename from first line: "a/path/to/file b/path/to/file"
|
|
const headerMatch = lines[0]?.match(/^a\/(.+?) b\/(.+)$/)
|
|
if (!headerMatch) continue
|
|
const filePath = headerMatch[2] ?? headerMatch[1] ?? ''
|
|
|
|
// Find and parse hunks
|
|
const fileHunks: StructuredPatchHunk[] = []
|
|
let currentHunk: StructuredPatchHunk | null = null
|
|
let lineCount = 0
|
|
|
|
for (let i = 1; i < lines.length; i++) {
|
|
const line = lines[i] ?? ''
|
|
|
|
// StructuredPatchHunk header: @@ -oldStart,oldLines +newStart,newLines @@
|
|
const hunkMatch = line.match(
|
|
/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/,
|
|
)
|
|
if (hunkMatch) {
|
|
if (currentHunk) {
|
|
fileHunks.push(currentHunk)
|
|
}
|
|
currentHunk = {
|
|
oldStart: parseInt(hunkMatch[1] ?? '0', 10),
|
|
oldLines: parseInt(hunkMatch[2] ?? '1', 10),
|
|
newStart: parseInt(hunkMatch[3] ?? '0', 10),
|
|
newLines: parseInt(hunkMatch[4] ?? '1', 10),
|
|
lines: [],
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Skip binary file markers and other metadata
|
|
if (
|
|
line.startsWith('index ') ||
|
|
line.startsWith('---') ||
|
|
line.startsWith('+++') ||
|
|
line.startsWith('new file') ||
|
|
line.startsWith('deleted file') ||
|
|
line.startsWith('old mode') ||
|
|
line.startsWith('new mode') ||
|
|
line.startsWith('Binary files')
|
|
) {
|
|
continue
|
|
}
|
|
|
|
// Add diff lines to current hunk (with line limit)
|
|
if (
|
|
currentHunk &&
|
|
(line.startsWith('+') ||
|
|
line.startsWith('-') ||
|
|
line.startsWith(' ') ||
|
|
line === '')
|
|
) {
|
|
// Stop adding lines once we hit the limit
|
|
if (lineCount >= MAX_LINES_PER_FILE) {
|
|
continue
|
|
}
|
|
// Force a flat string copy to break V8 sliced string references.
|
|
// When split() creates lines, V8 creates "sliced strings" that reference
|
|
// the parent. This keeps the entire parent string (~MBs) alive as long as
|
|
// any line is retained. Using '' + line forces a new flat string allocation,
|
|
// unlike slice(0) which V8 may optimize to return the same reference.
|
|
currentHunk.lines.push('' + line)
|
|
lineCount++
|
|
}
|
|
}
|
|
|
|
// Don't forget the last hunk
|
|
if (currentHunk) {
|
|
fileHunks.push(currentHunk)
|
|
}
|
|
|
|
if (fileHunks.length > 0) {
|
|
result.set(filePath, fileHunks)
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
/**
|
|
* Check if we're in a transient git state (merge, rebase, cherry-pick, or revert).
|
|
* During these operations, we skip diff calculation since the working
|
|
* tree contains incoming changes that weren't intentionally made.
|
|
*
|
|
* Uses fs.access to check for transient ref files, avoiding process spawns.
|
|
*/
|
|
async function isInTransientGitState(): Promise<boolean> {
|
|
const gitDir = await getGitDir(getCwd())
|
|
if (!gitDir) return false
|
|
|
|
const transientFiles = [
|
|
'MERGE_HEAD',
|
|
'REBASE_HEAD',
|
|
'CHERRY_PICK_HEAD',
|
|
'REVERT_HEAD',
|
|
]
|
|
|
|
const results = await Promise.all(
|
|
transientFiles.map(file =>
|
|
access(join(gitDir, file))
|
|
.then(() => true)
|
|
.catch(() => false),
|
|
),
|
|
)
|
|
return results.some(Boolean)
|
|
}
|
|
|
|
/**
|
|
* Fetch untracked file names (no content reading).
|
|
* Returns file paths only - they'll be displayed with a note to stage them.
|
|
*
|
|
* @param maxFiles Maximum number of untracked files to include
|
|
*/
|
|
async function fetchUntrackedFiles(
|
|
maxFiles: number,
|
|
): Promise<Map<string, PerFileStats> | null> {
|
|
// Get list of untracked files (excludes gitignored)
|
|
const { stdout, code } = await execFileNoThrow(
|
|
gitExe(),
|
|
['--no-optional-locks', 'ls-files', '--others', '--exclude-standard'],
|
|
{ timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false },
|
|
)
|
|
|
|
if (code !== 0 || !stdout.trim()) return null
|
|
|
|
const untrackedPaths = stdout.trim().split('\n').filter(Boolean)
|
|
if (untrackedPaths.length === 0) return null
|
|
|
|
const perFileStats = new Map<string, PerFileStats>()
|
|
|
|
// Just record filenames, no content reading
|
|
for (const filePath of untrackedPaths.slice(0, maxFiles)) {
|
|
perFileStats.set(filePath, {
|
|
added: 0,
|
|
removed: 0,
|
|
isBinary: false,
|
|
isUntracked: true,
|
|
})
|
|
}
|
|
|
|
return perFileStats
|
|
}
|
|
|
|
/**
|
|
* Parse git diff --shortstat output into stats.
|
|
* Format: " 1648 files changed, 52341 insertions(+), 8123 deletions(-)"
|
|
*
|
|
* This is O(1) memory regardless of diff size - git computes totals without
|
|
* loading all content. Used as a quick probe before expensive operations.
|
|
*/
|
|
export function parseShortstat(stdout: string): GitDiffStats | null {
|
|
// Match: "N files changed" with optional ", N insertions(+)" and ", N deletions(-)"
|
|
const match = stdout.match(
|
|
/(\d+)\s+files?\s+changed(?:,\s+(\d+)\s+insertions?\(\+\))?(?:,\s+(\d+)\s+deletions?\(-\))?/,
|
|
)
|
|
if (!match) return null
|
|
return {
|
|
filesCount: parseInt(match[1] ?? '0', 10),
|
|
linesAdded: parseInt(match[2] ?? '0', 10),
|
|
linesRemoved: parseInt(match[3] ?? '0', 10),
|
|
}
|
|
}
|
|
|
|
const SINGLE_FILE_DIFF_TIMEOUT_MS = 3000
|
|
|
|
export type ToolUseDiff = {
|
|
filename: string
|
|
status: 'modified' | 'added'
|
|
additions: number
|
|
deletions: number
|
|
changes: number
|
|
patch: string
|
|
/** GitHub "owner/repo" when available (null for non-github.com or unknown repos) */
|
|
repository: string | null
|
|
}
|
|
|
|
/**
|
|
* Fetch a structured diff for a single file against the merge base with the
|
|
* default branch. This produces a PR-like diff showing all changes since
|
|
* the branch diverged. Falls back to diffing against HEAD if the merge base
|
|
* cannot be determined (e.g., on the default branch itself).
|
|
* For untracked files, generates a synthetic diff showing all additions.
|
|
* Returns null if not in a git repo or if git commands fail.
|
|
*/
|
|
export async function fetchSingleFileGitDiff(
|
|
absoluteFilePath: string,
|
|
): Promise<ToolUseDiff | null> {
|
|
const gitRoot = findGitRoot(dirname(absoluteFilePath))
|
|
if (!gitRoot) return null
|
|
|
|
const gitPath = relative(gitRoot, absoluteFilePath).split(sep).join('/')
|
|
const repository = getCachedRepository()
|
|
|
|
// Check if the file is tracked by git
|
|
const { code: lsFilesCode } = await execFileNoThrowWithCwd(
|
|
gitExe(),
|
|
['--no-optional-locks', 'ls-files', '--error-unmatch', gitPath],
|
|
{ cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS },
|
|
)
|
|
|
|
if (lsFilesCode === 0) {
|
|
// File is tracked - diff against merge base for PR-like view
|
|
const diffRef = await getDiffRef(gitRoot)
|
|
const { stdout, code } = await execFileNoThrowWithCwd(
|
|
gitExe(),
|
|
['--no-optional-locks', 'diff', diffRef, '--', gitPath],
|
|
{ cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS },
|
|
)
|
|
if (code !== 0) return null
|
|
if (!stdout) return null
|
|
return {
|
|
...parseRawDiffToToolUseDiff(gitPath, stdout, 'modified'),
|
|
repository,
|
|
}
|
|
}
|
|
|
|
// File is untracked - generate synthetic diff
|
|
const syntheticDiff = await generateSyntheticDiff(gitPath, absoluteFilePath)
|
|
if (!syntheticDiff) return null
|
|
return { ...syntheticDiff, repository }
|
|
}
|
|
|
|
/**
|
|
* Parse raw unified diff output into the structured ToolUseDiff format.
|
|
* Extracts only the hunk content (starting from @@) as the patch,
|
|
* and counts additions/deletions.
|
|
*/
|
|
function parseRawDiffToToolUseDiff(
|
|
filename: string,
|
|
rawDiff: string,
|
|
status: 'modified' | 'added',
|
|
): Omit<ToolUseDiff, 'repository'> {
|
|
const lines = rawDiff.split('\n')
|
|
const patchLines: string[] = []
|
|
let inHunks = false
|
|
let additions = 0
|
|
let deletions = 0
|
|
|
|
for (const line of lines) {
|
|
if (line.startsWith('@@')) {
|
|
inHunks = true
|
|
}
|
|
if (inHunks) {
|
|
patchLines.push(line)
|
|
if (line.startsWith('+') && !line.startsWith('+++')) {
|
|
additions++
|
|
} else if (line.startsWith('-') && !line.startsWith('---')) {
|
|
deletions++
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
filename,
|
|
status,
|
|
additions,
|
|
deletions,
|
|
changes: additions + deletions,
|
|
patch: patchLines.join('\n'),
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Determine the best ref to diff against for a PR-like diff.
|
|
* Priority:
|
|
* 1. CLAUDE_CODE_BASE_REF env var (set externally, e.g. by CCR managed containers)
|
|
* 2. Merge base with the default branch (best guess)
|
|
* 3. HEAD (fallback if merge-base fails)
|
|
*/
|
|
async function getDiffRef(gitRoot: string): Promise<string> {
|
|
const baseBranch =
|
|
process.env.CLAUDE_CODE_BASE_REF || (await getDefaultBranch())
|
|
const { stdout, code } = await execFileNoThrowWithCwd(
|
|
gitExe(),
|
|
['--no-optional-locks', 'merge-base', 'HEAD', baseBranch],
|
|
{ cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS },
|
|
)
|
|
if (code === 0 && stdout.trim()) {
|
|
return stdout.trim()
|
|
}
|
|
return 'HEAD'
|
|
}
|
|
|
|
async function generateSyntheticDiff(
|
|
gitPath: string,
|
|
absoluteFilePath: string,
|
|
): Promise<Omit<ToolUseDiff, 'repository'> | null> {
|
|
try {
|
|
if (!isFileWithinReadSizeLimit(absoluteFilePath, MAX_DIFF_SIZE_BYTES)) {
|
|
return null
|
|
}
|
|
const content = await readFile(absoluteFilePath, 'utf-8')
|
|
const lines = content.split('\n')
|
|
// Remove trailing empty line from split if file ends with newline
|
|
if (lines.length > 0 && lines.at(-1) === '') {
|
|
lines.pop()
|
|
}
|
|
const lineCount = lines.length
|
|
const addedLines = lines.map(line => `+${line}`).join('\n')
|
|
const patch = `@@ -0,0 +1,${lineCount} @@\n${addedLines}`
|
|
return {
|
|
filename: gitPath,
|
|
status: 'added',
|
|
additions: lineCount,
|
|
deletions: 0,
|
|
changes: lineCount,
|
|
patch,
|
|
}
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|