import { feature } from 'bun:bundle' import type { UUID } from 'crypto' import type { Dirent } from 'fs' // Sync fs primitives for readFileTailSync — separate from fs/promises // imports above. Named (not wildcard) per CLAUDE.md style; no collisions // with the async-suffixed names. import { closeSync, fstatSync, openSync, readSync } from 'fs' import { appendFile as fsAppendFile, open as fsOpen, mkdir, readdir, readFile, stat, unlink, writeFile, } from 'fs/promises' import memoize from 'lodash-es/memoize.js' import { basename, dirname, join } from 'path' import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, logEvent, } from 'src/services/analytics/index.js' import { getOriginalCwd, getPlanSlugCache, getPromptId, getSessionId, getSessionProjectDir, isSessionPersistenceDisabled, switchSession, } from '../bootstrap/state.js' import { builtInCommandNames } from '../commands.js' import { COMMAND_NAME_TAG, TICK_TAG } from '../constants/xml.js' import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js' import * as sessionIngress from '../services/api/sessionIngress.js' import { REPL_TOOL_NAME } from '../tools/REPLTool/constants.js' import { type AgentId, asAgentId, asSessionId, type SessionId, } from '../types/ids.js' import type { AttributionSnapshotMessage } from '../types/logs.js' import { type ContentReplacementEntry, type ContextCollapseCommitEntry, type ContextCollapseSnapshotEntry, type Entry, type FileHistorySnapshotMessage, type LogOption, type PersistedWorktreeSession, type SerializedMessage, sortLogs, type TranscriptMessage, } from '../types/logs.js' import type { AssistantMessage, AttachmentMessage, Message, SystemCompactBoundaryMessage, SystemMessage, UserMessage, } from '../types/message.js' import type { QueueOperationMessage } from '../types/messageQueueTypes.js' import { uniq } from './array.js' import { registerCleanup } from './cleanupRegistry.js' import { updateSessionName } from './concurrentSessions.js' import { getCwd } from './cwd.js' import { logForDebugging } from './debug.js' import { logForDiagnosticsNoPII } from './diagLogs.js' import { getClaudeConfigHomeDir, isEnvTruthy } from './envUtils.js' import { isFsInaccessible } from './errors.js' import type { FileHistorySnapshot } from './fileHistory.js' import { formatFileSize } from './format.js' import { getFsImplementation } from './fsOperations.js' import { getWorktreePaths } from './getWorktreePaths.js' import { getBranch } from './git.js' import { gracefulShutdownSync, isShuttingDown } from './gracefulShutdown.js' import { parseJSONL } from './json.js' import { logError } from './log.js' import { extractTag, isCompactBoundaryMessage } from './messages.js' import { sanitizePath } from './path.js' import { extractJsonStringField, extractLastJsonStringField, LITE_READ_BUF_SIZE, readHeadAndTail, readTranscriptForLoad, SKIP_PRECOMPACT_THRESHOLD, } from './sessionStoragePortable.js' import { getSettings_DEPRECATED } from './settings/settings.js' import { jsonParse, jsonStringify } from './slowOperations.js' import type { ContentReplacementRecord } from './toolResultStorage.js' import { validateUuid } from './uuid.js' // Cache MACRO.VERSION at module level to work around bun --define bug in async contexts // See: https://github.com/oven-sh/bun/issues/26168 const VERSION = typeof MACRO !== 'undefined' ? MACRO.VERSION : 'unknown' type Transcript = ( | UserMessage | AssistantMessage | AttachmentMessage | SystemMessage )[] // Use getOriginalCwd() at each call site instead of capturing at module load // time. getCwd() at import time may run before bootstrap resolves symlinks via // realpathSync, causing a different sanitized project directory than what // getOriginalCwd() returns after bootstrap. This split-brain made sessions // saved under one path invisible when loaded via the other. /** * Pre-compiled regex to skip non-meaningful messages when extracting first prompt. * Matches anything starting with a lowercase XML-like tag (IDE context, hook * output, task notifications, channel messages, etc.) or a synthetic interrupt * marker. Kept in sync with sessionStoragePortable.ts — generic pattern avoids * an ever-growing allowlist that falls behind as new notification types ship. */ // 50MB — prevents OOM in the tombstone slow path which reads + rewrites the // entire session file. Session files can grow to multiple GB (inc-3930). const MAX_TOMBSTONE_REWRITE_BYTES = 50 * 1024 * 1024 const SKIP_FIRST_PROMPT_PATTERN = /^(?:\s*<[a-z][\w-]*[\s>]|\[Request interrupted by user[^\]]*\])/ /** * Type guard to check if an entry is a transcript message. * Transcript messages include user, assistant, attachment, and system messages. * IMPORTANT: This is the single source of truth for what constitutes a transcript message. * loadTranscriptFile() uses this to determine which messages to load into the chain. * * Progress messages are NOT transcript messages. They are ephemeral UI state * and must not be persisted to the JSONL or participate in the parentUuid * chain. Including them caused chain forks that orphaned real conversation * messages on resume (see #14373, #23537). */ export function isTranscriptMessage(entry: Entry): entry is TranscriptMessage { return ( entry.type === 'user' || entry.type === 'assistant' || entry.type === 'attachment' || entry.type === 'system' ) } /** * Entries that participate in the parentUuid chain. Used on the write path * (insertMessageChain, useLogMessages) to skip progress when assigning * parentUuid. Old transcripts with progress already in the chain are handled * by the progressBridge rewrite in loadTranscriptFile. */ export function isChainParticipant(m: Pick): boolean { return m.type !== 'progress' } type LegacyProgressEntry = { type: 'progress' uuid: UUID parentUuid: UUID | null } /** * Progress entries in transcripts written before PR #24099. They are not * in the Entry type union anymore but still exist on disk with uuid and * parentUuid fields. loadTranscriptFile bridges the chain across them. */ function isLegacyProgressEntry(entry: unknown): entry is LegacyProgressEntry { return ( typeof entry === 'object' && entry !== null && 'type' in entry && entry.type === 'progress' && 'uuid' in entry && typeof entry.uuid === 'string' ) } /** * High-frequency tool progress ticks (1/sec for Sleep, per-chunk for Bash). * These are UI-only: not sent to the API, not rendered after the tool * completes. Used by REPL.tsx to replace-in-place instead of appending, and * by loadTranscriptFile to skip legacy entries from old transcripts. */ const EPHEMERAL_PROGRESS_TYPES = new Set([ 'bash_progress', 'powershell_progress', 'mcp_progress', ...(feature('PROACTIVE') || feature('KAIROS') ? (['sleep_progress'] as const) : []), ]) export function isEphemeralToolProgress(dataType: unknown): boolean { return typeof dataType === 'string' && EPHEMERAL_PROGRESS_TYPES.has(dataType) } export function getProjectsDir(): string { return join(getClaudeConfigHomeDir(), 'projects') } export function getTranscriptPath(): string { const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd()) return join(projectDir, `${getSessionId()}.jsonl`) } export function getTranscriptPathForSession(sessionId: string): string { // When asking for the CURRENT session's transcript, honor sessionProjectDir // the same way getTranscriptPath() does. Without this, hooks get a // transcript_path computed from originalCwd while the actual file was // written to sessionProjectDir (set by switchActiveSession on resume/branch) // — different directories, so the hook sees MISSING (gh-30217). CC-34 // made sessionId + sessionProjectDir atomic precisely to prevent this // kind of drift; this function just wasn't updated to read both. // // For OTHER session IDs we can only guess via originalCwd — we don't // track a sessionId→projectDir map. Callers wanting a specific other // session's path should pass fullPath explicitly (most save* functions // already accept this). if (sessionId === getSessionId()) { return getTranscriptPath() } const projectDir = getProjectDir(getOriginalCwd()) return join(projectDir, `${sessionId}.jsonl`) } // 50 MB — session JSONL can grow to multiple GB (inc-3930). Callers that // read the raw transcript must bail out above this threshold to avoid OOM. export const MAX_TRANSCRIPT_READ_BYTES = 50 * 1024 * 1024 // In-memory map of agentId → subdirectory for grouping related subagent // transcripts (e.g. workflow runs write to subagents/workflows//). // Populated before the agent runs; consulted by getAgentTranscriptPath. const agentTranscriptSubdirs = new Map() export function setAgentTranscriptSubdir( agentId: string, subdir: string, ): void { agentTranscriptSubdirs.set(agentId, subdir) } export function clearAgentTranscriptSubdir(agentId: string): void { agentTranscriptSubdirs.delete(agentId) } export function getAgentTranscriptPath(agentId: AgentId): string { // Same sessionProjectDir consistency as getTranscriptPathForSession — // subagent transcripts live under the session dir, so if the session // transcript is at sessionProjectDir, subagent transcripts are too. const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd()) const sessionId = getSessionId() const subdir = agentTranscriptSubdirs.get(agentId) const base = subdir ? join(projectDir, sessionId, 'subagents', subdir) : join(projectDir, sessionId, 'subagents') return join(base, `agent-${agentId}.jsonl`) } function getAgentMetadataPath(agentId: AgentId): string { return getAgentTranscriptPath(agentId).replace(/\.jsonl$/, '.meta.json') } export type AgentMetadata = { agentType: string /** Worktree path if the agent was spawned with isolation: "worktree" */ worktreePath?: string /** Original task description from the AgentTool input. Persisted so a * resumed agent's notification can show the original description instead * of a placeholder. Optional — older metadata files lack this field. */ description?: string } /** * Persist the agentType used to launch a subagent. Read by resume to * route correctly when subagent_type is omitted — without this, resuming * a fork silently degrades to general-purpose (4KB system prompt, no * inherited history). Sidecar file avoids JSONL schema changes. * * Also stores the worktreePath when the agent was spawned with worktree * isolation, enabling resume to restore the correct cwd. */ export async function writeAgentMetadata( agentId: AgentId, metadata: AgentMetadata, ): Promise { const path = getAgentMetadataPath(agentId) await mkdir(dirname(path), { recursive: true }) await writeFile(path, JSON.stringify(metadata)) } export async function readAgentMetadata( agentId: AgentId, ): Promise { const path = getAgentMetadataPath(agentId) try { const raw = await readFile(path, 'utf-8') return JSON.parse(raw) as AgentMetadata } catch (e) { if (isFsInaccessible(e)) return null throw e } } export type RemoteAgentMetadata = { taskId: string remoteTaskType: string /** CCR session ID — used to fetch live status from the Sessions API on resume. */ sessionId: string title: string command: string spawnedAt: number toolUseId?: string isLongRunning?: boolean isUltraplan?: boolean isRemoteReview?: boolean remoteTaskMetadata?: Record } function getRemoteAgentsDir(): string { // Same sessionProjectDir fallback as getAgentTranscriptPath — the project // dir (containing the .jsonl), not the session dir, so sessionId is joined. const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd()) return join(projectDir, getSessionId(), 'remote-agents') } function getRemoteAgentMetadataPath(taskId: string): string { return join(getRemoteAgentsDir(), `remote-agent-${taskId}.meta.json`) } /** * Persist metadata for a remote-agent task so it can be restored on session * resume. Per-task sidecar file (sibling dir to subagents/) survives * hydrateSessionFromRemote's .jsonl wipe; status is always fetched fresh * from CCR on restore — only identity is persisted locally. */ export async function writeRemoteAgentMetadata( taskId: string, metadata: RemoteAgentMetadata, ): Promise { const path = getRemoteAgentMetadataPath(taskId) await mkdir(dirname(path), { recursive: true }) await writeFile(path, JSON.stringify(metadata)) } export async function readRemoteAgentMetadata( taskId: string, ): Promise { const path = getRemoteAgentMetadataPath(taskId) try { const raw = await readFile(path, 'utf-8') return JSON.parse(raw) as RemoteAgentMetadata } catch (e) { if (isFsInaccessible(e)) return null throw e } } export async function deleteRemoteAgentMetadata(taskId: string): Promise { const path = getRemoteAgentMetadataPath(taskId) try { await unlink(path) } catch (e) { if (isFsInaccessible(e)) return throw e } } /** * Scan the remote-agents/ directory for all persisted metadata files. * Used by restoreRemoteAgentTasks to reconnect to still-running CCR sessions. */ export async function listRemoteAgentMetadata(): Promise< RemoteAgentMetadata[] > { const dir = getRemoteAgentsDir() let entries: Dirent[] try { entries = await readdir(dir, { withFileTypes: true }) } catch (e) { if (isFsInaccessible(e)) return [] throw e } const results: RemoteAgentMetadata[] = [] for (const entry of entries) { if (!entry.isFile() || !entry.name.endsWith('.meta.json')) continue try { const raw = await readFile(join(dir, entry.name), 'utf-8') results.push(JSON.parse(raw) as RemoteAgentMetadata) } catch (e) { // Skip unreadable or corrupt files — a partial write from a crashed // fire-and-forget persist shouldn't take down the whole restore. logForDebugging( `listRemoteAgentMetadata: skipping ${entry.name}: ${String(e)}`, ) } } return results } export function sessionIdExists(sessionId: string): boolean { const projectDir = getProjectDir(getOriginalCwd()) const sessionFile = join(projectDir, `${sessionId}.jsonl`) const fs = getFsImplementation() try { fs.statSync(sessionFile) return true } catch { return false } } // exported for testing export function getNodeEnv(): string { return process.env.NODE_ENV || 'development' } // exported for testing export function getUserType(): string { return process.env.USER_TYPE || 'external' } function getEntrypoint(): string | undefined { return process.env.CLAUDE_CODE_ENTRYPOINT } export function isCustomTitleEnabled(): boolean { return true } // Memoized: called 12+ times per turn via hooks.ts createBaseHookInput // (PostToolUse path, 5×/turn) + various save* functions. Input is a cwd // string; homedir/env/regex are all session-invariant so the result is // stable for a given input. Worktree switches just change the key — no // cache clear needed. export const getProjectDir = memoize((projectDir: string): string => { return join(getProjectsDir(), sanitizePath(projectDir)) }) let project: Project | null = null let cleanupRegistered = false function getProject(): Project { if (!project) { project = new Project() // Register flush as a cleanup handler (only once) if (!cleanupRegistered) { registerCleanup(async () => { // Flush queued writes first, then re-append session metadata // (customTitle, tag) so they always appear in the last 64KB tail // window. readLiteMetadata only reads the tail to extract these // fields — if enough messages are appended after a /rename, the // custom-title entry gets pushed outside the window and --resume // shows the auto-generated firstPrompt instead. await project?.flush() try { project?.reAppendSessionMetadata() } catch { // Best-effort — don't let metadata re-append crash the cleanup } }) cleanupRegistered = true } } return project } /** * Reset the Project singleton's flush state for testing. * This ensures tests don't interfere with each other via shared counter state. */ export function resetProjectFlushStateForTesting(): void { project?._resetFlushState() } /** * Reset the entire Project singleton for testing. * This ensures tests with different CLAUDE_CONFIG_DIR values * don't share stale sessionFile paths. */ export function resetProjectForTesting(): void { project = null } export function setSessionFileForTesting(path: string): void { getProject().sessionFile = path } type InternalEventWriter = ( eventType: string, payload: Record, options?: { isCompaction?: boolean; agentId?: string }, ) => Promise /** * Register a CCR v2 internal event writer for transcript persistence. * When set, transcript messages are written as internal worker events * instead of going through v1 Session Ingress. */ export function setInternalEventWriter(writer: InternalEventWriter): void { getProject().setInternalEventWriter(writer) } type InternalEventReader = () => Promise< { payload: Record; agent_id?: string }[] | null > /** * Register a CCR v2 internal event reader for session resume. * When set, hydrateFromCCRv2InternalEvents() can fetch foreground and * subagent internal events to reconstruct conversation state on reconnection. */ export function setInternalEventReader( reader: InternalEventReader, subagentReader: InternalEventReader, ): void { getProject().setInternalEventReader(reader) getProject().setInternalSubagentEventReader(subagentReader) } /** * Set the remote ingress URL on the current Project for testing. * This simulates what hydrateRemoteSession does in production. */ export function setRemoteIngressUrlForTesting(url: string): void { getProject().setRemoteIngressUrl(url) } const REMOTE_FLUSH_INTERVAL_MS = 10 class Project { // Minimal cache for current session only (not all sessions) currentSessionTag: string | undefined currentSessionTitle: string | undefined currentSessionAgentName: string | undefined currentSessionAgentColor: string | undefined currentSessionLastPrompt: string | undefined currentSessionAgentSetting: string | undefined currentSessionMode: 'coordinator' | 'normal' | undefined // Tri-state: undefined = never touched (don't write), null = exited worktree, // object = currently in worktree. reAppendSessionMetadata writes null so // --resume knows the session exited (vs. crashed while inside). currentSessionWorktree: PersistedWorktreeSession | null | undefined currentSessionPrNumber: number | undefined currentSessionPrUrl: string | undefined currentSessionPrRepository: string | undefined sessionFile: string | null = null // Entries buffered while sessionFile is null. Flushed by materializeSessionFile // on the first user/assistant message — prevents metadata-only session files. private pendingEntries: Entry[] = [] private remoteIngressUrl: string | null = null private internalEventWriter: InternalEventWriter | null = null private internalEventReader: InternalEventReader | null = null private internalSubagentEventReader: InternalEventReader | null = null private pendingWriteCount: number = 0 private flushResolvers: Array<() => void> = [] // Per-file write queues. Each entry carries a resolve callback so // callers of enqueueWrite can optionally await their specific write. private writeQueues = new Map< string, Array<{ entry: Entry; resolve: () => void }> >() private flushTimer: ReturnType | null = null private activeDrain: Promise | null = null private FLUSH_INTERVAL_MS = 100 private readonly MAX_CHUNK_BYTES = 100 * 1024 * 1024 constructor() {} /** @internal Reset flush/queue state for testing. */ _resetFlushState(): void { this.pendingWriteCount = 0 this.flushResolvers = [] if (this.flushTimer) clearTimeout(this.flushTimer) this.flushTimer = null this.activeDrain = null this.writeQueues = new Map() } private incrementPendingWrites(): void { this.pendingWriteCount++ } private decrementPendingWrites(): void { this.pendingWriteCount-- if (this.pendingWriteCount === 0) { // Resolve all waiting flush promises for (const resolve of this.flushResolvers) { resolve() } this.flushResolvers = [] } } private async trackWrite(fn: () => Promise): Promise { this.incrementPendingWrites() try { return await fn() } finally { this.decrementPendingWrites() } } private enqueueWrite(filePath: string, entry: Entry): Promise { return new Promise(resolve => { let queue = this.writeQueues.get(filePath) if (!queue) { queue = [] this.writeQueues.set(filePath, queue) } queue.push({ entry, resolve }) this.scheduleDrain() }) } private scheduleDrain(): void { if (this.flushTimer) { return } this.flushTimer = setTimeout(async () => { this.flushTimer = null this.activeDrain = this.drainWriteQueue() await this.activeDrain this.activeDrain = null // If more items arrived during drain, schedule again if (this.writeQueues.size > 0) { this.scheduleDrain() } }, this.FLUSH_INTERVAL_MS) } private async appendToFile(filePath: string, data: string): Promise { try { await fsAppendFile(filePath, data, { mode: 0o600 }) } catch { // Directory may not exist — some NFS-like filesystems return // unexpected error codes, so don't discriminate on code. await mkdir(dirname(filePath), { recursive: true, mode: 0o700 }) await fsAppendFile(filePath, data, { mode: 0o600 }) } } private async drainWriteQueue(): Promise { for (const [filePath, queue] of this.writeQueues) { if (queue.length === 0) { continue } const batch = queue.splice(0) let content = '' const resolvers: Array<() => void> = [] for (const { entry, resolve } of batch) { const line = jsonStringify(entry) + '\n' if (content.length + line.length >= this.MAX_CHUNK_BYTES) { // Flush chunk and resolve its entries before starting a new one await this.appendToFile(filePath, content) for (const r of resolvers) { r() } resolvers.length = 0 content = '' } content += line resolvers.push(resolve) } if (content.length > 0) { await this.appendToFile(filePath, content) for (const r of resolvers) { r() } } } // Clean up empty queues for (const [filePath, queue] of this.writeQueues) { if (queue.length === 0) { this.writeQueues.delete(filePath) } } } resetSessionFile(): void { this.sessionFile = null this.pendingEntries = [] } /** * Re-append cached session metadata to the end of the transcript file. * This ensures metadata stays within the tail window that readLiteMetadata * reads during progressive loading. * * Called from two contexts with different file-ordering implications: * - During compaction (compact.ts, reactiveCompact.ts): writes metadata * just before the boundary marker is emitted - these entries end up * before the boundary and are recovered by scanPreBoundaryMetadata. * - On session exit (cleanup handler): writes metadata at EOF after all * boundaries - this is what enables loadTranscriptFile's pre-compact * skip to find metadata without a forward scan. * * External-writer safety for SDK-mutable fields (custom-title, tag): * before re-appending, refresh the cache from the tail scan window. If an * external process (SDK renameSession/tagSession) wrote a fresher value, * our stale cache absorbs it and the re-append below persists it — not * the stale CLI value. If no entry is in the tail (evicted, or never * written by the SDK), the cache is the only source of truth and is * re-appended as-is. * * Re-append is unconditional (even when the value is already in the * tail): during compaction, a title 40KB from EOF is inside the current * tail window but will fall out once the post-compaction session grows. * Skipping the re-append would defeat the purpose of this call. Fields * the SDK cannot touch (last-prompt, agent-*, mode, pr-link) have no * external-writer concern — their caches are authoritative. */ reAppendSessionMetadata(skipTitleRefresh = false): void { if (!this.sessionFile) return const sessionId = getSessionId() as UUID if (!sessionId) return // One sync tail read to refresh SDK-mutable fields. Same // LITE_READ_BUF_SIZE window readLiteMetadata uses. Empty string on // failure → extract returns null → cache is the only source of truth. const tail = readFileTailSync(this.sessionFile) // Absorb any fresher SDK-written title/tag into our cache. If the SDK // wrote while we had the session open, our cache is stale — the tail // value is authoritative. If the tail has nothing (evicted or never // written externally), the cache stands. // // Filter with startsWith to match only top-level JSONL entries (col 0) // and not "type":"tag" appearing inside a nested tool_use input that // happens to be JSON-serialized into a message. const tailLines = tail.split('\n') if (!skipTitleRefresh) { const titleLine = tailLines.findLast(l => l.startsWith('{"type":"custom-title"'), ) if (titleLine) { const tailTitle = extractLastJsonStringField(titleLine, 'customTitle') // `!== undefined` distinguishes no-match from empty-string match. // renameSession rejects empty titles, but the CLI is defensive: an // external writer with customTitle:"" should clear the cache so the // re-append below skips it (instead of resurrecting a stale title). if (tailTitle !== undefined) { this.currentSessionTitle = tailTitle || undefined } } } const tagLine = tailLines.findLast(l => l.startsWith('{"type":"tag"')) if (tagLine) { const tailTag = extractLastJsonStringField(tagLine, 'tag') // Same: tagSession(id, null) writes `tag:""` to clear. if (tailTag !== undefined) { this.currentSessionTag = tailTag || undefined } } // lastPrompt is re-appended so readLiteMetadata can show what the // user was most recently doing. Written first so customTitle/tag/etc // land closer to EOF (they're the more critical fields for tail reads). if (this.currentSessionLastPrompt) { appendEntryToFile(this.sessionFile, { type: 'last-prompt', lastPrompt: this.currentSessionLastPrompt, sessionId, }) } // Unconditional: cache was refreshed from tail above; re-append keeps // the entry at EOF so compaction-pushed content doesn't evict it. if (this.currentSessionTitle) { appendEntryToFile(this.sessionFile, { type: 'custom-title', customTitle: this.currentSessionTitle, sessionId, }) } if (this.currentSessionTag) { appendEntryToFile(this.sessionFile, { type: 'tag', tag: this.currentSessionTag, sessionId, }) } if (this.currentSessionAgentName) { appendEntryToFile(this.sessionFile, { type: 'agent-name', agentName: this.currentSessionAgentName, sessionId, }) } if (this.currentSessionAgentColor) { appendEntryToFile(this.sessionFile, { type: 'agent-color', agentColor: this.currentSessionAgentColor, sessionId, }) } if (this.currentSessionAgentSetting) { appendEntryToFile(this.sessionFile, { type: 'agent-setting', agentSetting: this.currentSessionAgentSetting, sessionId, }) } if (this.currentSessionMode) { appendEntryToFile(this.sessionFile, { type: 'mode', mode: this.currentSessionMode, sessionId, }) } if (this.currentSessionWorktree !== undefined) { appendEntryToFile(this.sessionFile, { type: 'worktree-state', worktreeSession: this.currentSessionWorktree, sessionId, }) } if ( this.currentSessionPrNumber !== undefined && this.currentSessionPrUrl && this.currentSessionPrRepository ) { appendEntryToFile(this.sessionFile, { type: 'pr-link', sessionId, prNumber: this.currentSessionPrNumber, prUrl: this.currentSessionPrUrl, prRepository: this.currentSessionPrRepository, timestamp: new Date().toISOString(), }) } } async flush(): Promise { // Cancel pending timer if (this.flushTimer) { clearTimeout(this.flushTimer) this.flushTimer = null } // Wait for any in-flight drain to finish if (this.activeDrain) { await this.activeDrain } // Drain anything remaining in the queues await this.drainWriteQueue() // Wait for non-queue tracked operations (e.g. removeMessageByUuid) if (this.pendingWriteCount === 0) { return } return new Promise(resolve => { this.flushResolvers.push(resolve) }) } /** * Remove a message from the transcript by UUID. * Used for tombstoning orphaned messages from failed streaming attempts. * * The target is almost always the most recently appended entry, so we * read only the tail, locate the line, and splice it out with a * positional write + truncate instead of rewriting the whole file. */ async removeMessageByUuid(targetUuid: UUID): Promise { return this.trackWrite(async () => { if (this.sessionFile === null) return try { let fileSize = 0 const fh = await fsOpen(this.sessionFile, 'r+') try { const { size } = await fh.stat() fileSize = size if (size === 0) return const chunkLen = Math.min(size, LITE_READ_BUF_SIZE) const tailStart = size - chunkLen const buf = Buffer.allocUnsafe(chunkLen) const { bytesRead } = await fh.read(buf, 0, chunkLen, tailStart) const tail = buf.subarray(0, bytesRead) // Entries are serialized via JSON.stringify (no key-value // whitespace). Search for the full `"uuid":"..."` pattern, not // just the bare UUID, so we do not match the same value sitting // in `parentUuid` of a child entry. UUIDs are pure ASCII so a // byte-level search is correct. const needle = `"uuid":"${targetUuid}"` const matchIdx = tail.lastIndexOf(needle) if (matchIdx >= 0) { // 0x0a never appears inside a UTF-8 multi-byte sequence, so // byte-scanning for line boundaries is safe even if the chunk // starts mid-character. const prevNl = tail.lastIndexOf(0x0a, matchIdx) // If the preceding newline is outside our chunk and we did not // read from the start of the file, the line is longer than the // window - fall through to the slow path. if (prevNl >= 0 || tailStart === 0) { const lineStart = prevNl + 1 // 0 when prevNl === -1 const nextNl = tail.indexOf(0x0a, matchIdx + needle.length) const lineEnd = nextNl >= 0 ? nextNl + 1 : bytesRead const absLineStart = tailStart + lineStart const afterLen = bytesRead - lineEnd // Truncate first, then re-append the trailing lines. In the // common case (target is the last entry) afterLen is 0 and // this is a single ftruncate. await fh.truncate(absLineStart) if (afterLen > 0) { await fh.write(tail, lineEnd, afterLen, absLineStart) } return } } } finally { await fh.close() } // Slow path: target was not in the last 64KB. Rare - requires many // large entries to have landed between the write and the tombstone. if (fileSize > MAX_TOMBSTONE_REWRITE_BYTES) { logForDebugging( `Skipping tombstone removal: session file too large (${formatFileSize(fileSize)})`, { level: 'warn' }, ) return } const content = await readFile(this.sessionFile, { encoding: 'utf-8' }) const lines = content.split('\n').filter((line: string) => { if (!line.trim()) return true try { const entry = jsonParse(line) return entry.uuid !== targetUuid } catch { return true // Keep malformed lines } }) await writeFile(this.sessionFile, lines.join('\n'), { encoding: 'utf8', }) } catch { // Silently ignore errors - the file might not exist yet } }) } /** * True when test env / cleanupPeriodDays=0 / --no-session-persistence / * CLAUDE_CODE_SKIP_PROMPT_HISTORY should suppress all transcript writes. * Shared guard for appendEntry and materializeSessionFile so both skip * consistently. The env var is set by tmuxSocket.ts so Tungsten-spawned * test sessions don't pollute the user's --resume list. */ private shouldSkipPersistence(): boolean { const allowTestPersistence = isEnvTruthy( process.env.TEST_ENABLE_SESSION_PERSISTENCE, ) return ( (getNodeEnv() === 'test' && !allowTestPersistence) || getSettings_DEPRECATED()?.cleanupPeriodDays === 0 || isSessionPersistenceDisabled() || isEnvTruthy(process.env.CLAUDE_CODE_SKIP_PROMPT_HISTORY) ) } /** * Create the session file, write cached startup metadata, and flush * buffered entries. Called on the first user/assistant message. */ private async materializeSessionFile(): Promise { // Guard here too — reAppendSessionMetadata writes via appendEntryToFile // (not appendEntry) so it would bypass the per-entry persistence check // and create a metadata-only file despite --no-session-persistence. if (this.shouldSkipPersistence()) return this.ensureCurrentSessionFile() // mode/agentSetting are cache-only pre-materialization; write them now. this.reAppendSessionMetadata() if (this.pendingEntries.length > 0) { const buffered = this.pendingEntries this.pendingEntries = [] for (const entry of buffered) { await this.appendEntry(entry) } } } async insertMessageChain( messages: Transcript, isSidechain: boolean = false, agentId?: string, startingParentUuid?: UUID | null, teamInfo?: { teamName?: string; agentName?: string }, ) { return this.trackWrite(async () => { let parentUuid: UUID | null = startingParentUuid ?? null // First user/assistant message materializes the session file. // Hook progress/attachment messages alone stay buffered. if ( this.sessionFile === null && messages.some(m => m.type === 'user' || m.type === 'assistant') ) { await this.materializeSessionFile() } // Get current git branch once for this message chain let gitBranch: string | undefined try { gitBranch = await getBranch() } catch { // Not in a git repo or git command failed gitBranch = undefined } // Get slug if one exists for this session (used for plan files, etc.) const sessionId = getSessionId() const slug = getPlanSlugCache().get(sessionId) for (const message of messages) { const isCompactBoundary = isCompactBoundaryMessage(message) // For tool_result messages, use the assistant message UUID from the message // if available (set at creation time), otherwise fall back to sequential parent let effectiveParentUuid = parentUuid if ( message.type === 'user' && 'sourceToolAssistantUUID' in message && message.sourceToolAssistantUUID ) { effectiveParentUuid = message.sourceToolAssistantUUID } const transcriptMessage: TranscriptMessage = { parentUuid: isCompactBoundary ? null : effectiveParentUuid, logicalParentUuid: isCompactBoundary ? parentUuid : undefined, isSidechain, teamName: teamInfo?.teamName, agentName: teamInfo?.agentName, promptId: message.type === 'user' ? (getPromptId() ?? undefined) : undefined, agentId, ...message, // Session-stamp fields MUST come after the spread. On --fork-session // and --resume, messages arrive as SerializedMessage (carries source // sessionId/cwd/etc. because removeExtraFields only strips parentUuid // and isSidechain). If sessionId isn't re-stamped, FRESH.jsonl ends up // with messages stamped sessionId=A but content-replacement entries // stamped sessionId=FRESH (from insertContentReplacement), and // loadFullLog's sessionId-keyed contentReplacements lookup misses → // replacement records lost → FROZEN misclassification. userType: getUserType(), entrypoint: getEntrypoint(), cwd: getCwd(), sessionId, version: VERSION, gitBranch, slug, } await this.appendEntry(transcriptMessage) if (isChainParticipant(message)) { parentUuid = message.uuid } } // Cache this turn's user prompt for reAppendSessionMetadata — // the --resume picker shows what the user was last doing. // Overwritten every turn by design. if (!isSidechain) { const text = getFirstMeaningfulUserMessageTextContent(messages) if (text) { const flat = text.replace(/\n/g, ' ').trim() this.currentSessionLastPrompt = flat.length > 200 ? flat.slice(0, 200).trim() + '…' : flat } } }) } async insertFileHistorySnapshot( messageId: UUID, snapshot: FileHistorySnapshot, isSnapshotUpdate: boolean, ) { return this.trackWrite(async () => { const fileHistoryMessage: FileHistorySnapshotMessage = { type: 'file-history-snapshot', messageId, snapshot, isSnapshotUpdate, } await this.appendEntry(fileHistoryMessage) }) } async insertQueueOperation(queueOp: QueueOperationMessage) { return this.trackWrite(async () => { await this.appendEntry(queueOp) }) } async insertAttributionSnapshot(snapshot: AttributionSnapshotMessage) { return this.trackWrite(async () => { await this.appendEntry(snapshot) }) } async insertContentReplacement( replacements: ContentReplacementRecord[], agentId?: AgentId, ) { return this.trackWrite(async () => { const entry: ContentReplacementEntry = { type: 'content-replacement', sessionId: getSessionId() as UUID, agentId, replacements, } await this.appendEntry(entry) }) } async appendEntry(entry: Entry, sessionId: UUID = getSessionId() as UUID) { if (this.shouldSkipPersistence()) { return } const currentSessionId = getSessionId() as UUID const isCurrentSession = sessionId === currentSessionId let sessionFile: string if (isCurrentSession) { // Buffer until materializeSessionFile runs (first user/assistant message). if (this.sessionFile === null) { this.pendingEntries.push(entry) return } sessionFile = this.sessionFile } else { const existing = await this.getExistingSessionFile(sessionId) if (!existing) { logError( new Error( `appendEntry: session file not found for other session ${sessionId}`, ), ) return } sessionFile = existing } // Only load current session messages if needed if (entry.type === 'summary') { // Summaries can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'custom-title') { // Custom titles can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'ai-title') { // AI titles can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'last-prompt') { void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'task-summary') { void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'tag') { // Tags can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'agent-name') { // Agent names can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'agent-color') { // Agent colors can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'agent-setting') { // Agent settings can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'pr-link') { // PR links can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'file-history-snapshot') { // File history snapshots can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'attribution-snapshot') { // Attribution snapshots can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'speculation-accept') { // Speculation accept entries can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'mode') { // Mode entries can always be appended void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'worktree-state') { void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'content-replacement') { // Content replacement records can always be appended. Subagent records // go to the sidechain file (for AgentTool resume); main-thread // records go to the session file (for /resume). const targetFile = entry.agentId ? getAgentTranscriptPath(entry.agentId) : sessionFile void this.enqueueWrite(targetFile, entry) } else if (entry.type === 'marble-origami-commit') { // Always append. Commit order matters for restore (later commits may // reference earlier commits' summary messages), so these must be // written in the order received and read back sequentially. void this.enqueueWrite(sessionFile, entry) } else if (entry.type === 'marble-origami-snapshot') { // Always append. Last-wins on restore — later entries supersede. void this.enqueueWrite(sessionFile, entry) } else { const messageSet = await getSessionMessages(sessionId) if (entry.type === 'queue-operation') { // Queue operations are always appended to the session file void this.enqueueWrite(sessionFile, entry) } else { // At this point, entry must be a TranscriptMessage (user/assistant/attachment/system) // All other entry types have been handled above const isAgentSidechain = entry.isSidechain && entry.agentId !== undefined const targetFile = isAgentSidechain ? getAgentTranscriptPath(asAgentId(entry.agentId!)) : sessionFile // For message entries, check if UUID already exists in current session. // Skip dedup for agent sidechain LOCAL writes — they go to a separate // file, and fork-inherited parent messages share UUIDs with the main // session transcript. Deduping against the main session's set would // drop them, leaving the persisted sidechain transcript incomplete // (resume-of-fork loads a 10KB file instead of the full 85KB inherited // context). // // The sidechain bypass applies ONLY to the local file write — remote // persistence (session-ingress) uses a single Last-Uuid chain per // sessionId, so re-POSTing a UUID it already has 409s and eventually // exhausts retries → gracefulShutdownSync(1). See inc-4718. const isNewUuid = !messageSet.has(entry.uuid) if (isAgentSidechain || isNewUuid) { // Enqueue write — appendToFile handles ENOENT by creating directories void this.enqueueWrite(targetFile, entry) if (!isAgentSidechain) { // messageSet is main-file-authoritative. Sidechain entries go to a // separate agent file — adding their UUIDs here causes recordTranscript // to skip them on the main thread (line ~1270), so the message is never // written to the main session file. The next main-thread message then // chains its parentUuid to a UUID that only exists in the agent file, // and --resume's buildConversationChain terminates at the dangling ref. // Same constraint for remote (inc-4718 above): sidechain persisting a // UUID the main thread hasn't written yet → 409 when main writes it. messageSet.add(entry.uuid) if (isTranscriptMessage(entry)) { await this.persistToRemote(sessionId, entry) } } } } } } /** * Loads the sessionFile variable. * Do not need to create session files until they are written to. */ private ensureCurrentSessionFile(): string { if (this.sessionFile === null) { this.sessionFile = getTranscriptPath() } return this.sessionFile } /** * Returns the session file path if it exists, null otherwise. * Used for writing to sessions other than the current one. * Caches positive results so we only stat once per session. */ private existingSessionFiles = new Map() private async getExistingSessionFile( sessionId: UUID, ): Promise { const cached = this.existingSessionFiles.get(sessionId) if (cached) return cached const targetFile = getTranscriptPathForSession(sessionId) try { await stat(targetFile) this.existingSessionFiles.set(sessionId, targetFile) return targetFile } catch (e) { if (isFsInaccessible(e)) return null throw e } } private async persistToRemote(sessionId: UUID, entry: TranscriptMessage) { if (isShuttingDown()) { return } // CCR v2 path: write as internal worker event if (this.internalEventWriter) { try { await this.internalEventWriter( 'transcript', entry as unknown as Record, { ...(isCompactBoundaryMessage(entry) && { isCompaction: true }), ...(entry.agentId && { agentId: entry.agentId }), }, ) } catch { logEvent('tengu_session_persistence_failed', {}) logForDebugging('Failed to write transcript as internal event') } return } // v1 Session Ingress path if ( !isEnvTruthy(process.env.ENABLE_SESSION_PERSISTENCE) || !this.remoteIngressUrl ) { return } const success = await sessionIngress.appendSessionLog( sessionId, entry, this.remoteIngressUrl, ) if (!success) { logEvent('tengu_session_persistence_failed', {}) gracefulShutdownSync(1, 'other') } } setRemoteIngressUrl(url: string): void { this.remoteIngressUrl = url logForDebugging(`Remote persistence enabled with URL: ${url}`) if (url) { // If using CCR, don't delay messages by any more than 10ms. this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS } } setInternalEventWriter(writer: InternalEventWriter): void { this.internalEventWriter = writer logForDebugging( 'CCR v2 internal event writer registered for transcript persistence', ) // Use fast flush interval for CCR v2 this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS } setInternalEventReader(reader: InternalEventReader): void { this.internalEventReader = reader logForDebugging( 'CCR v2 internal event reader registered for session resume', ) } setInternalSubagentEventReader(reader: InternalEventReader): void { this.internalSubagentEventReader = reader logForDebugging( 'CCR v2 subagent event reader registered for session resume', ) } getInternalEventReader(): InternalEventReader | null { return this.internalEventReader } getInternalSubagentEventReader(): InternalEventReader | null { return this.internalSubagentEventReader } } export type TeamInfo = { teamName?: string agentName?: string } // Filter out already-recorded messages before passing to insertMessageChain. // Without this, after compaction messagesToKeep (same UUIDs as pre-compact // messages) are dedup-skipped by appendEntry but still advance the parentUuid // cursor in insertMessageChain, causing new messages to chain from pre-compact // UUIDs instead of the post-compact summary — orphaning the compact boundary. // // `startingParentUuidHint`: used by useLogMessages to pass the parent from // the previous incremental slice, avoiding an O(n) scan to rediscover it. // // Skip-tracking: already-recorded messages are tracked as the parent ONLY if // they form a PREFIX (appear before any new message). This handles both cases: // - Growing-array callers (QueryEngine, queryHelpers, LocalMainSessionTask, // trajectory): recorded messages are always a prefix → tracked → correct // parent chain for new messages. // - Compaction (useLogMessages): new CB/summary appear FIRST, then recorded // messagesToKeep → not a prefix → not tracked → CB gets parentUuid=null // (correct: truncates --continue chain at compact boundary). export async function recordTranscript( messages: Message[], teamInfo?: TeamInfo, startingParentUuidHint?: UUID, allMessages?: readonly Message[], ): Promise { const cleanedMessages = cleanMessagesForLogging(messages, allMessages) const sessionId = getSessionId() as UUID const messageSet = await getSessionMessages(sessionId) const newMessages: typeof cleanedMessages = [] let startingParentUuid: UUID | undefined = startingParentUuidHint let seenNewMessage = false for (const m of cleanedMessages) { if (messageSet.has(m.uuid as UUID)) { // Only track skipped messages that form a prefix. After compaction, // messagesToKeep appear AFTER new CB/summary, so this skips them. if (!seenNewMessage && isChainParticipant(m)) { startingParentUuid = m.uuid as UUID } } else { newMessages.push(m) seenNewMessage = true } } if (newMessages.length > 0) { await getProject().insertMessageChain( newMessages, false, undefined, startingParentUuid, teamInfo, ) } // Return the last ACTUALLY recorded chain-participant's UUID, OR the // prefix-tracked UUID if no new chain participants were recorded. This lets // callers (useLogMessages) maintain the correct parent chain even when the // slice is all-recorded (rewind, /resume scenarios where every message is // already in messageSet). Progress is skipped — it's written to the JSONL // but nothing chains TO it (see isChainParticipant). const lastRecorded = newMessages.findLast(isChainParticipant) return (lastRecorded?.uuid as UUID | undefined) ?? startingParentUuid ?? null } export async function recordSidechainTranscript( messages: Message[], agentId?: string, startingParentUuid?: UUID | null, ) { await getProject().insertMessageChain( cleanMessagesForLogging(messages), true, agentId, startingParentUuid, ) } export async function recordQueueOperation(queueOp: QueueOperationMessage) { await getProject().insertQueueOperation(queueOp) } /** * Remove a message from the transcript by UUID. * Used when a tombstone is received for an orphaned message. */ export async function removeTranscriptMessage(targetUuid: UUID): Promise { await getProject().removeMessageByUuid(targetUuid) } export async function recordFileHistorySnapshot( messageId: UUID, snapshot: FileHistorySnapshot, isSnapshotUpdate: boolean, ) { await getProject().insertFileHistorySnapshot( messageId, snapshot, isSnapshotUpdate, ) } export async function recordAttributionSnapshot( snapshot: AttributionSnapshotMessage, ) { await getProject().insertAttributionSnapshot(snapshot) } export async function recordContentReplacement( replacements: ContentReplacementRecord[], agentId?: AgentId, ) { await getProject().insertContentReplacement(replacements, agentId) } /** * Reset the session file pointer after switchSession/regenerateSessionId. * The new file is created lazily on the first user/assistant message. */ export async function resetSessionFilePointer() { getProject().resetSessionFile() } /** * Adopt the existing session file after --continue/--resume (non-fork). * Call after switchSession + resetSessionFilePointer + restoreSessionMetadata: * getTranscriptPath() now derives the resumed file's path from the switched * sessionId, and the cache holds the final metadata (--name title, resumed * mode/tag/agent). * * Setting sessionFile here — instead of waiting for materializeSessionFile * on the first user message — lets the exit cleanup handler's * reAppendSessionMetadata run (it bails when sessionFile is null). Without * this, `-c -n foo` + quit-before-message drops the title on the floor: * the in-memory cache is correct but never written. The resumed file * already exists on disk (we loaded from it), so this can't create an * orphan the way a fresh --name session would. * * skipTitleRefresh: restoreSessionMetadata populated the cache from the * same disk read microseconds ago, so refreshing from the tail here is a * no-op — unless --name was used, in which case it would clobber the fresh * CLI title with the stale disk value. After this write, disk == cache and * later calls (compaction, exit cleanup) absorb SDK writes normally. */ export function adoptResumedSessionFile(): void { const project = getProject() project.sessionFile = getTranscriptPath() project.reAppendSessionMetadata(true) } /** * Append a context-collapse commit entry to the transcript. One entry per * commit, in commit order. On resume these are collected into an ordered * array and handed to restoreFromEntries() which rebuilds the commit log. */ export async function recordContextCollapseCommit(commit: { collapseId: string summaryUuid: string summaryContent: string summary: string firstArchivedUuid: string lastArchivedUuid: string }): Promise { const sessionId = getSessionId() as UUID if (!sessionId) return await getProject().appendEntry({ type: 'marble-origami-commit', sessionId, ...commit, }) } /** * Snapshot the staged queue + spawn state. Written after each ctx-agent * spawn resolves (when staged contents may have changed). Last-wins on * restore — the loader keeps only the most recent snapshot entry. */ export async function recordContextCollapseSnapshot(snapshot: { staged: Array<{ startUuid: string endUuid: string summary: string risk: number stagedAt: number }> armed: boolean lastSpawnTokens: number }): Promise { const sessionId = getSessionId() as UUID if (!sessionId) return await getProject().appendEntry({ type: 'marble-origami-snapshot', sessionId, ...snapshot, }) } export async function flushSessionStorage(): Promise { await getProject().flush() } export async function hydrateRemoteSession( sessionId: string, ingressUrl: string, ): Promise { switchSession(asSessionId(sessionId)) const project = getProject() try { const remoteLogs = (await sessionIngress.getSessionLogs(sessionId, ingressUrl)) || [] // Ensure the project directory and session file exist const projectDir = getProjectDir(getOriginalCwd()) await mkdir(projectDir, { recursive: true, mode: 0o700 }) const sessionFile = getTranscriptPathForSession(sessionId) // Replace local logs with remote logs. writeFile truncates, so no // unlink is needed; an empty remoteLogs array produces an empty file. const content = remoteLogs.map(e => jsonStringify(e) + '\n').join('') await writeFile(sessionFile, content, { encoding: 'utf8', mode: 0o600 }) logForDebugging(`Hydrated ${remoteLogs.length} entries from remote`) return remoteLogs.length > 0 } catch (error) { logForDebugging(`Error hydrating session from remote: ${error}`) logForDiagnosticsNoPII('error', 'hydrate_remote_session_fail') return false } finally { // Set remote ingress URL after hydrating the remote session // to ensure we've always synced with the remote session // prior to enabling persistence project.setRemoteIngressUrl(ingressUrl) } } /** * Hydrate session state from CCR v2 internal events. * Fetches foreground and subagent events via the registered readers, * extracts transcript entries from payloads, and writes them to the * local transcript files (main + per-agent). * The server handles compaction filtering — it returns events starting * from the latest compaction boundary. */ export async function hydrateFromCCRv2InternalEvents( sessionId: string, ): Promise { const startMs = Date.now() switchSession(asSessionId(sessionId)) const project = getProject() const reader = project.getInternalEventReader() if (!reader) { logForDebugging('No internal event reader registered for CCR v2 resume') return false } try { // Fetch foreground events const events = await reader() if (!events) { logForDebugging('Failed to read internal events for resume') logForDiagnosticsNoPII('error', 'hydrate_ccr_v2_read_fail') return false } const projectDir = getProjectDir(getOriginalCwd()) await mkdir(projectDir, { recursive: true, mode: 0o700 }) // Write foreground transcript const sessionFile = getTranscriptPathForSession(sessionId) const fgContent = events.map(e => jsonStringify(e.payload) + '\n').join('') await writeFile(sessionFile, fgContent, { encoding: 'utf8', mode: 0o600 }) logForDebugging( `Hydrated ${events.length} foreground entries from CCR v2 internal events`, ) // Fetch and write subagent events let subagentEventCount = 0 const subagentReader = project.getInternalSubagentEventReader() if (subagentReader) { const subagentEvents = await subagentReader() if (subagentEvents && subagentEvents.length > 0) { subagentEventCount = subagentEvents.length // Group by agent_id const byAgent = new Map[]>() for (const e of subagentEvents) { const agentId = e.agent_id || '' if (!agentId) continue let list = byAgent.get(agentId) if (!list) { list = [] byAgent.set(agentId, list) } list.push(e.payload) } // Write each agent's transcript to its own file for (const [agentId, entries] of byAgent) { const agentFile = getAgentTranscriptPath(asAgentId(agentId)) await mkdir(dirname(agentFile), { recursive: true, mode: 0o700 }) const agentContent = entries .map(p => jsonStringify(p) + '\n') .join('') await writeFile(agentFile, agentContent, { encoding: 'utf8', mode: 0o600, }) } logForDebugging( `Hydrated ${subagentEvents.length} subagent entries across ${byAgent.size} agents`, ) } } logForDiagnosticsNoPII('info', 'hydrate_ccr_v2_completed', { duration_ms: Date.now() - startMs, event_count: events.length, subagent_event_count: subagentEventCount, }) return events.length > 0 } catch (error) { // Re-throw epoch mismatch so the worker doesn't race against gracefulShutdown if ( error instanceof Error && error.message === 'CCRClient: Epoch mismatch (409)' ) { throw error } logForDebugging(`Error hydrating session from CCR v2: ${error}`) logForDiagnosticsNoPII('error', 'hydrate_ccr_v2_fail') return false } } function extractFirstPrompt(transcript: TranscriptMessage[]): string { const textContent = getFirstMeaningfulUserMessageTextContent(transcript) if (textContent) { let result = textContent.replace(/\n/g, ' ').trim() // Store a reasonably long version for display-time truncation // The actual truncation will be applied at display time based on terminal width if (result.length > 200) { result = result.slice(0, 200).trim() + '…' } return result } return 'No prompt' } /** * Gets the last user message that was processed (i.e., before any non-user message appears). * Used to determine if a session has valid user interaction. */ export function getFirstMeaningfulUserMessageTextContent( transcript: T[], ): string | undefined { for (const msg of transcript) { if (msg.type !== 'user' || msg.isMeta) continue // Skip compact summary messages - they should not be treated as the first prompt if ('isCompactSummary' in msg && msg.isCompactSummary) continue const content = msg.message?.content if (!content) continue // Collect all text values. For array content (common in VS Code where // IDE metadata tags come before the user's actual prompt), iterate all // text blocks so we don't miss the real prompt hidden behind // / blocks. const texts: string[] = [] if (typeof content === 'string') { texts.push(content) } else if (Array.isArray(content)) { for (const block of content) { if (block.type === 'text' && block.text) { texts.push(block.text) } } } for (const textContent of texts) { if (!textContent) continue const commandNameTag = extractTag(textContent, COMMAND_NAME_TAG) if (commandNameTag) { const commandName = commandNameTag.replace(/^\//, '') // If it's a built-in command, then it's unlikely to provide // meaningful context (e.g. `/model sonnet`) if (builtInCommandNames().has(commandName)) { continue } else { // Otherwise, for custom commands, then keep it only if it has // arguments (e.g. `/review reticulate splines`) const commandArgs = extractTag(textContent, 'command-args')?.trim() if (!commandArgs) { continue } // Return clean formatted command instead of raw XML return `${commandNameTag} ${commandArgs}` } } // Format bash input with ! prefix (as user typed it). Checked before // the generic XML skip so bash-mode sessions get a meaningful title. const bashInput = extractTag(textContent, 'bash-input') if (bashInput) { return `! ${bashInput}` } // Skip non-meaningful messages (local command output, hook output, // autonomous tick prompts, task notifications, pure IDE metadata tags) if (SKIP_FIRST_PROMPT_PATTERN.test(textContent)) { continue } return textContent } } return undefined } export function removeExtraFields( transcript: TranscriptMessage[], ): SerializedMessage[] { return transcript.map(m => { const { isSidechain, parentUuid, ...serializedMessage } = m return serializedMessage }) } /** * Splice the preserved segment back into the chain after compaction. * * Preserved messages exist in the JSONL with their ORIGINAL pre-compact * parentUuids (recordTranscript dedup-skipped them — can't rewrite). * The internal chain (keep[i+1]→keep[i]) is intact; only endpoints need * patching: head→anchor, and anchor's other children→tail. Anchor is the * last summary for suffix-preserving, boundary itself for prefix-preserving. * * Only the LAST seg-boundary is relinked — earlier segs were summarized * into it. Everything physically before the absolute-last boundary (except * preservedUuids) is deleted, which handles all multi-boundary shapes * without special-casing. * * Mutates the Map in place. */ function applyPreservedSegmentRelinks( messages: Map, ): void { type Seg = NonNullable< SystemCompactBoundaryMessage['compactMetadata']['preservedSegment'] > // Find the absolute-last boundary and the last seg-boundary (can differ: // manual /compact after reactive compact → seg is stale). let lastSeg: Seg | undefined let lastSegBoundaryIdx = -1 let absoluteLastBoundaryIdx = -1 const entryIndex = new Map() let i = 0 for (const entry of messages.values()) { entryIndex.set(entry.uuid, i) if (isCompactBoundaryMessage(entry)) { absoluteLastBoundaryIdx = i const seg = entry.compactMetadata?.preservedSegment if (seg) { lastSeg = seg lastSegBoundaryIdx = i } } i++ } // No seg anywhere → no-op. findUnresolvedToolUse etc. read the full map. if (!lastSeg) return // Seg stale (no-seg boundary came after): skip relink, still prune at // absolute — otherwise the stale preserved chain becomes a phantom leaf. const segIsLive = lastSegBoundaryIdx === absoluteLastBoundaryIdx // Validate tail→head BEFORE mutating so malformed metadata is a true // no-op (walk stops at headUuid, doesn't need the relink to run first). const preservedUuids = new Set() if (segIsLive) { const walkSeen = new Set() let cur = messages.get(lastSeg.tailUuid) let reachedHead = false while (cur && !walkSeen.has(cur.uuid)) { walkSeen.add(cur.uuid) preservedUuids.add(cur.uuid) if (cur.uuid === lastSeg.headUuid) { reachedHead = true break } cur = cur.parentUuid ? messages.get(cur.parentUuid) : undefined } if (!reachedHead) { // tail→head walk broke — a UUID in the preserved segment isn't in the // transcript. Returning here skips the prune below, so resume loads // the full pre-compact history. Known cause: mid-turn-yielded // attachment pushed to mutableMessages but never recordTranscript'd // (SDK subprocess restarted before next turn's qe:420 flush). logEvent('tengu_relink_walk_broken', { tailInTranscript: messages.has(lastSeg.tailUuid), headInTranscript: messages.has(lastSeg.headUuid), anchorInTranscript: messages.has(lastSeg.anchorUuid), walkSteps: walkSeen.size, transcriptSize: messages.size, }) return } } if (segIsLive) { const head = messages.get(lastSeg.headUuid) if (head) { messages.set(lastSeg.headUuid, { ...head, parentUuid: lastSeg.anchorUuid, }) } // Tail-splice: anchor's other children → tail. No-op if already pointing // at tail (the useLogMessages race case). for (const [uuid, msg] of messages) { if (msg.parentUuid === lastSeg.anchorUuid && uuid !== lastSeg.headUuid) { messages.set(uuid, { ...msg, parentUuid: lastSeg.tailUuid }) } } // Zero stale usage: on-disk input_tokens reflect pre-compact context // (~190K) — stripStaleUsage only patched in-memory copies that were // dedup-skipped. Without this, resume → immediate autocompact spiral. for (const uuid of preservedUuids) { const msg = messages.get(uuid) if (msg?.type !== 'assistant') continue messages.set(uuid, { ...msg, message: { ...msg.message, usage: { ...msg.message.usage, input_tokens: 0, output_tokens: 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0, }, }, }) } } // Prune everything physically before the absolute-last boundary that // isn't preserved. preservedUuids empty when !segIsLive → full prune. const toDelete: UUID[] = [] for (const [uuid] of messages) { const idx = entryIndex.get(uuid) if ( idx !== undefined && idx < absoluteLastBoundaryIdx && !preservedUuids.has(uuid) ) { toDelete.push(uuid) } } for (const uuid of toDelete) messages.delete(uuid) } /** * Delete messages that Snip executions removed from the in-memory array, * and relink parentUuid across the gaps. * * Unlike compact_boundary which truncates a prefix, snip removes * middle ranges. The JSONL is append-only, so removed messages stay on disk * and the surviving messages' parentUuid chains walk through them. Without * this filter, buildConversationChain reconstructs the full unsnipped history * and resume immediately PTLs (adamr-20260320-165831: 397K displayed → 1.65M * actual). * * Deleting alone is not enough: the surviving message AFTER a removed range * has parentUuid pointing INTO the gap. buildConversationChain would hit * messages.get(undefined) and stop, orphaning everything before the gap. So * after delete we relink: for each survivor with a dangling parentUuid, walk * backward through the removed region's own parent links to the first * non-removed ancestor. * * The boundary records removedUuids at execution time so we can replay the * exact removal on load. Older boundaries without removedUuids are skipped — * resume loads their pre-snip history (the pre-fix behavior). * * Mutates the Map in place. */ function applySnipRemovals(messages: Map): void { // Structural check — snipMetadata only exists on the boundary subtype. // Avoids the subtype literal which is in excluded-strings.txt // (HISTORY_SNIP is ant-only; the literal must not leak into external builds). type WithSnipMeta = { snipMetadata?: { removedUuids?: UUID[] } } const toDelete = new Set() for (const entry of messages.values()) { const removedUuids = (entry as WithSnipMeta).snipMetadata?.removedUuids if (!removedUuids) continue for (const uuid of removedUuids) toDelete.add(uuid) } if (toDelete.size === 0) return // Capture each to-delete entry's own parentUuid BEFORE deleting so we can // walk backward through contiguous removed ranges. Entries not in the Map // (already absent, e.g. from a prior compact_boundary prune) contribute no // link; the relink walk will stop at the gap and pick up null (chain-root // behavior — same as if compact truncated there, which it did). const deletedParent = new Map() let removedCount = 0 for (const uuid of toDelete) { const entry = messages.get(uuid) if (!entry) continue deletedParent.set(uuid, entry.parentUuid) messages.delete(uuid) removedCount++ } // Relink survivors with dangling parentUuid. Walk backward through // deletedParent until we hit a UUID not in toDelete (or null). Path // compression: after resolving, seed the map with the resolved link so // subsequent survivors sharing the same chain segment don't re-walk. const resolve = (start: UUID): UUID | null => { const path: UUID[] = [] let cur: UUID | null | undefined = start while (cur && toDelete.has(cur)) { path.push(cur) cur = deletedParent.get(cur) if (cur === undefined) { cur = null break } } for (const p of path) deletedParent.set(p, cur) return cur } let relinkedCount = 0 for (const [uuid, msg] of messages) { if (!msg.parentUuid || !toDelete.has(msg.parentUuid)) continue messages.set(uuid, { ...msg, parentUuid: resolve(msg.parentUuid) }) relinkedCount++ } logEvent('tengu_snip_resume_filtered', { removed_count: removedCount, relinked_count: relinkedCount, }) } /** * O(n) single-pass: find the message with the latest timestamp matching a predicate. * Replaces the `[...values].filter(pred).sort((a,b) => Date(b)-Date(a))[0]` pattern * which is O(n log n) + 2n Date allocations. */ function findLatestMessage( messages: Iterable, predicate: (m: T) => boolean, ): T | undefined { let latest: T | undefined let maxTime = -Infinity for (const m of messages) { if (!predicate(m)) continue const t = Date.parse(m.timestamp) if (t > maxTime) { maxTime = t latest = m } } return latest } /** * Builds a conversation chain from a leaf message to root * @param messages Map of all messages * @param leafMessage The leaf message to start from * @returns Array of messages from root to leaf */ export function buildConversationChain( messages: Map, leafMessage: TranscriptMessage, ): TranscriptMessage[] { const transcript: TranscriptMessage[] = [] const seen = new Set() let currentMsg: TranscriptMessage | undefined = leafMessage while (currentMsg) { if (seen.has(currentMsg.uuid)) { logError( new Error( `Cycle detected in parentUuid chain at message ${currentMsg.uuid}. Returning partial transcript.`, ), ) logEvent('tengu_chain_parent_cycle', {}) break } seen.add(currentMsg.uuid) transcript.push(currentMsg) currentMsg = currentMsg.parentUuid ? messages.get(currentMsg.parentUuid) : undefined } transcript.reverse() return recoverOrphanedParallelToolResults(messages, transcript, seen) } /** * Post-pass for buildConversationChain: recover sibling assistant blocks and * tool_results that the single-parent walk orphaned. * * Streaming (claude.ts:~2024) emits one AssistantMessage per content_block_stop * — N parallel tool_uses → N messages, distinct uuid, same message.id. Each * tool_result's sourceToolAssistantUUID points to its own one-block assistant, * so insertMessageChain's override (line ~894) writes each TR's parentUuid to a * DIFFERENT assistant. The topology is a DAG; the walk above is a linked-list * traversal and keeps only one branch. * * Two loss modes observed in production (both fixed here): * 1. Sibling assistant orphaned: walk goes prev→asstA→TR_A→next, drops asstB * (same message.id, chained off asstA) and TR_B. * 2. Progress-fork (legacy, pre-#23537): each tool_use asst had a progress * child (continued the write chain) AND a TR child. Walk followed * progress; TRs were dropped. No longer written (progress removed from * transcript persistence), but old transcripts still have this shape. * * Read-side fix: the write topology is already on disk for old transcripts; * this recovery pass handles them. */ function recoverOrphanedParallelToolResults( messages: Map, chain: TranscriptMessage[], seen: Set, ): TranscriptMessage[] { type ChainAssistant = Extract const chainAssistants = chain.filter( (m): m is ChainAssistant => m.type === 'assistant', ) if (chainAssistants.length === 0) return chain // Anchor = last on-chain member of each sibling group. chainAssistants is // already in chain order, so later iterations overwrite → last wins. const anchorByMsgId = new Map() for (const a of chainAssistants) { if (a.message.id) anchorByMsgId.set(a.message.id, a) } // O(n) precompute: sibling groups and TR index. // TRs indexed by parentUuid — insertMessageChain:~894 already wrote that // as the srcUUID, and --fork-session strips srcUUID but keeps parentUuid. const siblingsByMsgId = new Map() const toolResultsByAsst = new Map() for (const m of messages.values()) { if (m.type === 'assistant' && m.message.id) { const group = siblingsByMsgId.get(m.message.id) if (group) group.push(m) else siblingsByMsgId.set(m.message.id, [m]) } else if ( m.type === 'user' && m.parentUuid && Array.isArray(m.message.content) && m.message.content.some(b => b.type === 'tool_result') ) { const group = toolResultsByAsst.get(m.parentUuid) if (group) group.push(m) else toolResultsByAsst.set(m.parentUuid, [m]) } } // For each message.id group touching the chain: collect off-chain siblings, // then off-chain TRs for ALL members. Splice right after the last on-chain // member so the group stays contiguous for normalizeMessagesForAPI's merge // and every TR lands after its tool_use. const processedGroups = new Set() const inserts = new Map() let recoveredCount = 0 for (const asst of chainAssistants) { const msgId = asst.message.id if (!msgId || processedGroups.has(msgId)) continue processedGroups.add(msgId) const group = siblingsByMsgId.get(msgId) ?? [asst] const orphanedSiblings = group.filter(s => !seen.has(s.uuid)) const orphanedTRs: TranscriptMessage[] = [] for (const member of group) { const trs = toolResultsByAsst.get(member.uuid) if (!trs) continue for (const tr of trs) { if (!seen.has(tr.uuid)) orphanedTRs.push(tr) } } if (orphanedSiblings.length === 0 && orphanedTRs.length === 0) continue // Timestamp sort keeps content-block / completion order; stable-sort // preserves JSONL write order on ties. orphanedSiblings.sort((a, b) => a.timestamp.localeCompare(b.timestamp)) orphanedTRs.sort((a, b) => a.timestamp.localeCompare(b.timestamp)) const anchor = anchorByMsgId.get(msgId)! const recovered = [...orphanedSiblings, ...orphanedTRs] for (const r of recovered) seen.add(r.uuid) recoveredCount += recovered.length inserts.set(anchor.uuid, recovered) } if (recoveredCount === 0) return chain logEvent('tengu_chain_parallel_tr_recovered', { recovered_count: recoveredCount, }) const result: TranscriptMessage[] = [] for (const m of chain) { result.push(m) const toInsert = inserts.get(m.uuid) if (toInsert) result.push(...toInsert) } return result } /** * Find the latest turn_duration checkpoint in the reconstructed chain and * compare its recorded messageCount against the chain's position at that * point. Emits tengu_resume_consistency_delta for BigQuery monitoring of * write→load round-trip drift — the class of bugs where snip/compact/ * parallel-TR operations mutate in-memory but the parentUuid walk on disk * reconstructs a different set (adamr-20260320-165831: 397K displayed → * 1.65M actual on resume). * * delta > 0: resume loaded MORE than in-session (the usual failure mode) * delta < 0: resume loaded FEWER (chain truncation — #22453 class) * delta = 0: round-trip consistent * * Called from loadConversationForResume — fires once per resume, not on * /share or log-listing chain rebuilds. */ export function checkResumeConsistency(chain: Message[]): void { for (let i = chain.length - 1; i >= 0; i--) { const m = chain[i]! if (m.type !== 'system' || m.subtype !== 'turn_duration') continue const expected = m.messageCount if (expected === undefined) return // `i` is the 0-based index of the checkpoint in the reconstructed chain. // The checkpoint was appended AFTER messageCount messages, so its own // position should be messageCount (i.e., i === expected). const actual = i logEvent('tengu_resume_consistency_delta', { expected, actual, delta: actual - expected, chain_length: chain.length, checkpoint_age_entries: chain.length - 1 - i, }) return } } /** * Builds a filie history snapshot chain from the conversation */ function buildFileHistorySnapshotChain( fileHistorySnapshots: Map, conversation: TranscriptMessage[], ): FileHistorySnapshot[] { const snapshots: FileHistorySnapshot[] = [] // messageId → last index in snapshots[] for O(1) update lookup const indexByMessageId = new Map() for (const message of conversation) { const snapshotMessage = fileHistorySnapshots.get(message.uuid) if (!snapshotMessage) { continue } const { snapshot, isSnapshotUpdate } = snapshotMessage const existingIndex = isSnapshotUpdate ? indexByMessageId.get(snapshot.messageId) : undefined if (existingIndex === undefined) { indexByMessageId.set(snapshot.messageId, snapshots.length) snapshots.push(snapshot) } else { snapshots[existingIndex] = snapshot } } return snapshots } /** * Builds an attribution snapshot chain from the conversation. * Unlike file history snapshots, attribution snapshots are returned in full * because they use generated UUIDs (not message UUIDs) and represent * cumulative state that should be restored on session resume. */ function buildAttributionSnapshotChain( attributionSnapshots: Map, _conversation: TranscriptMessage[], ): AttributionSnapshotMessage[] { // Return all attribution snapshots - they will be merged during restore return Array.from(attributionSnapshots.values()) } /** * Loads a transcript from a JSON or JSONL file and converts it to LogOption format * @param filePath Path to the transcript file (.json or .jsonl) * @returns LogOption containing the transcript messages * @throws Error if file doesn't exist or contains invalid data */ export async function loadTranscriptFromFile( filePath: string, ): Promise { if (filePath.endsWith('.jsonl')) { const { messages, summaries, customTitles, tags, fileHistorySnapshots, attributionSnapshots, contextCollapseCommits, contextCollapseSnapshot, leafUuids, contentReplacements, worktreeStates, } = await loadTranscriptFile(filePath) if (messages.size === 0) { throw new Error('No messages found in JSONL file') } // Find the most recent leaf message using pre-computed leaf UUIDs const leafMessage = findLatestMessage(messages.values(), msg => leafUuids.has(msg.uuid), ) if (!leafMessage) { throw new Error('No valid conversation chain found in JSONL file') } // Build the conversation chain backwards from leaf to root const transcript = buildConversationChain(messages, leafMessage) const summary = summaries.get(leafMessage.uuid) const customTitle = customTitles.get(leafMessage.sessionId as UUID) const tag = tags.get(leafMessage.sessionId as UUID) const sessionId = leafMessage.sessionId as UUID return { ...convertToLogOption( transcript, 0, summary, customTitle, buildFileHistorySnapshotChain(fileHistorySnapshots, transcript), tag, filePath, buildAttributionSnapshotChain(attributionSnapshots, transcript), undefined, contentReplacements.get(sessionId) ?? [], ), contextCollapseCommits: contextCollapseCommits.filter( e => e.sessionId === sessionId, ), contextCollapseSnapshot: contextCollapseSnapshot?.sessionId === sessionId ? contextCollapseSnapshot : undefined, worktreeSession: worktreeStates.has(sessionId) ? worktreeStates.get(sessionId) : undefined, } } // json log files const content = await readFile(filePath, { encoding: 'utf-8' }) let parsed: unknown try { parsed = jsonParse(content) } catch (error) { throw new Error(`Invalid JSON in transcript file: ${error}`) } let messages: TranscriptMessage[] if (Array.isArray(parsed)) { messages = parsed } else if (parsed && typeof parsed === 'object' && 'messages' in parsed) { if (!Array.isArray(parsed.messages)) { throw new Error('Transcript messages must be an array') } messages = parsed.messages } else { throw new Error( 'Transcript must be an array of messages or an object with a messages array', ) } return convertToLogOption( messages, 0, undefined, undefined, undefined, undefined, filePath, ) } /** * Checks if a user message has visible content (text or image, not just tool_result). * Tool results are displayed as part of collapsed groups, not as standalone messages. * Also excludes meta messages which are not shown to the user. */ function hasVisibleUserContent(message: TranscriptMessage): boolean { if (message.type !== 'user') return false // Meta messages are not shown to the user if (message.isMeta) return false const content = message.message?.content if (!content) return false // String content is always visible if (typeof content === 'string') { return content.trim().length > 0 } // Array content: check for text or image blocks (not tool_result) if (Array.isArray(content)) { return content.some( block => block.type === 'text' || block.type === 'image' || block.type === 'document', ) } return false } /** * Checks if an assistant message has visible text content (not just tool_use blocks). * Tool uses are displayed as grouped/collapsed UI elements, not as standalone messages. */ function hasVisibleAssistantContent(message: TranscriptMessage): boolean { if (message.type !== 'assistant') return false const content = message.message?.content if (!content || !Array.isArray(content)) return false // Check for text block (not just tool_use/thinking blocks) return content.some( block => block.type === 'text' && typeof block.text === 'string' && block.text.trim().length > 0, ) } /** * Counts visible messages that would appear as conversation turns in the UI. * Excludes: * - System, attachment, and progress messages * - User messages with isMeta flag (hidden from user) * - User messages that only contain tool_result blocks (displayed as collapsed groups) * - Assistant messages that only contain tool_use blocks (displayed as collapsed groups) */ function countVisibleMessages(transcript: TranscriptMessage[]): number { let count = 0 for (const message of transcript) { switch (message.type) { case 'user': // Count user messages with visible content (text, image, not just tool_result or meta) if (hasVisibleUserContent(message)) { count++ } break case 'assistant': // Count assistant messages with text content (not just tool_use) if (hasVisibleAssistantContent(message)) { count++ } break case 'attachment': case 'system': case 'progress': // These message types are not counted as visible conversation turns break } } return count } function convertToLogOption( transcript: TranscriptMessage[], value: number = 0, summary?: string, customTitle?: string, fileHistorySnapshots?: FileHistorySnapshot[], tag?: string, fullPath?: string, attributionSnapshots?: AttributionSnapshotMessage[], agentSetting?: string, contentReplacements?: ContentReplacementRecord[], ): LogOption { const lastMessage = transcript.at(-1)! const firstMessage = transcript[0]! // Get the first user message for the prompt const firstPrompt = extractFirstPrompt(transcript) // Create timestamps from message timestamps const created = new Date(firstMessage.timestamp) const modified = new Date(lastMessage.timestamp) return { date: lastMessage.timestamp, messages: removeExtraFields(transcript), fullPath, value, created, modified, firstPrompt, messageCount: countVisibleMessages(transcript), isSidechain: firstMessage.isSidechain, teamName: firstMessage.teamName, agentName: firstMessage.agentName, agentSetting, leafUuid: lastMessage.uuid, summary, customTitle, tag, fileHistorySnapshots: fileHistorySnapshots, attributionSnapshots: attributionSnapshots, contentReplacements, gitBranch: lastMessage.gitBranch, projectPath: firstMessage.cwd, } } async function trackSessionBranchingAnalytics( logs: LogOption[], ): Promise { const sessionIdCounts = new Map() let maxCount = 0 for (const log of logs) { const sessionId = getSessionIdFromLog(log) if (sessionId) { const newCount = (sessionIdCounts.get(sessionId) || 0) + 1 sessionIdCounts.set(sessionId, newCount) maxCount = Math.max(newCount, maxCount) } } // Early exit if no duplicates detected if (maxCount <= 1) { return } // Count sessions with branches and calculate stats using functional approach const branchCounts = Array.from(sessionIdCounts.values()).filter(c => c > 1) const sessionsWithBranches = branchCounts.length const totalBranches = branchCounts.reduce((sum, count) => sum + count, 0) logEvent('tengu_session_forked_branches_fetched', { total_sessions: sessionIdCounts.size, sessions_with_branches: sessionsWithBranches, max_branches_per_session: Math.max(...branchCounts), avg_branches_per_session: Math.round(totalBranches / sessionsWithBranches), total_transcript_count: logs.length, }) } export async function fetchLogs(limit?: number): Promise { const projectDir = getProjectDir(getOriginalCwd()) const logs = await getSessionFilesLite(projectDir, limit, getOriginalCwd()) await trackSessionBranchingAnalytics(logs) return logs } /** * Append an entry to a session file. Creates the parent dir if missing. */ /* eslint-disable custom-rules/no-sync-fs -- sync callers (exit cleanup, materialize) */ function appendEntryToFile( fullPath: string, entry: Record, ): void { const fs = getFsImplementation() const line = jsonStringify(entry) + '\n' try { fs.appendFileSync(fullPath, line, { mode: 0o600 }) } catch { fs.mkdirSync(dirname(fullPath), { mode: 0o700 }) fs.appendFileSync(fullPath, line, { mode: 0o600 }) } } /** * Sync tail read for reAppendSessionMetadata's external-writer check. * fstat on the already-open fd (no extra path lookup); reads the same * LITE_READ_BUF_SIZE window that readLiteMetadata scans. Returns empty * string on any error so callers fall through to unconditional behavior. */ function readFileTailSync(fullPath: string): string { let fd: number | undefined try { fd = openSync(fullPath, 'r') const st = fstatSync(fd) const tailOffset = Math.max(0, st.size - LITE_READ_BUF_SIZE) const buf = Buffer.allocUnsafe( Math.min(LITE_READ_BUF_SIZE, st.size - tailOffset), ) const bytesRead = readSync(fd, buf, 0, buf.length, tailOffset) return buf.toString('utf8', 0, bytesRead) } catch { return '' } finally { if (fd !== undefined) { try { closeSync(fd) } catch { // closeSync can throw; swallow to preserve return '' contract } } } } /* eslint-enable custom-rules/no-sync-fs */ export async function saveCustomTitle( sessionId: UUID, customTitle: string, fullPath?: string, source: 'user' | 'auto' = 'user', ) { // Fall back to computed path if fullPath is not provided const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) appendEntryToFile(resolvedPath, { type: 'custom-title', customTitle, sessionId, }) // Cache for current session only (for immediate visibility) if (sessionId === getSessionId()) { getProject().currentSessionTitle = customTitle } logEvent('tengu_session_renamed', { source: source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, }) } /** * Persist an AI-generated title to the JSONL as a distinct `ai-title` entry. * * Writing a separate entry type (vs. reusing `custom-title`) is load-bearing: * - Read preference: readers prefer `customTitle` field over `aiTitle`, so * a user rename always wins regardless of append order. * - Resume safety: `loadTranscriptFile` only populates the `customTitles` * Map from `custom-title` entries, so `restoreSessionMetadata` never * caches an AI title and `reAppendSessionMetadata` never re-appends one * at EOF — avoiding the clobber-on-resume bug where a stale AI title * overwrites a mid-session user rename. * - CAS semantics: VS Code's `onlyIfNoCustomTitle` check scans for the * `customTitle` field only, so AI can overwrite its own previous AI * title but never a user title. * - Metrics: `tengu_session_renamed` is not fired for AI titles. * * Because the entry is never re-appended, it scrolls out of the 64KB tail * window once enough messages accumulate. Readers (`readLiteMetadata`, * `listSessionsImpl`, VS Code `fetchSessions`) fall back to scanning the * head buffer for `aiTitle` in that case. Both head and tail reads are * bounded (64KB each via `extractLastJsonStringField`), never a full scan. * * Callers with a stale-write guard (e.g., VS Code client) should prefer * passing `persist: false` to the SDK control request and persisting * through their own rename path after the guard passes, to avoid a race * where the AI title lands after a mid-flight user rename. */ export function saveAiGeneratedTitle(sessionId: UUID, aiTitle: string): void { appendEntryToFile(getTranscriptPathForSession(sessionId), { type: 'ai-title', aiTitle, sessionId, }) } /** * Append a periodic task summary for `claude ps`. Unlike ai-title this is * not re-appended by reAppendSessionMetadata — it's a rolling snapshot of * what the agent is doing *now*, so staleness is fine; ps reads the most * recent one from the tail. */ export function saveTaskSummary(sessionId: UUID, summary: string): void { appendEntryToFile(getTranscriptPathForSession(sessionId), { type: 'task-summary', summary, sessionId, timestamp: new Date().toISOString(), }) } export async function saveTag(sessionId: UUID, tag: string, fullPath?: string) { // Fall back to computed path if fullPath is not provided const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) appendEntryToFile(resolvedPath, { type: 'tag', tag, sessionId }) // Cache for current session only (for immediate visibility) if (sessionId === getSessionId()) { getProject().currentSessionTag = tag } logEvent('tengu_session_tagged', {}) } /** * Link a session to a GitHub pull request. * This stores the PR number, URL, and repository for tracking and navigation. */ export async function linkSessionToPR( sessionId: UUID, prNumber: number, prUrl: string, prRepository: string, fullPath?: string, ): Promise { const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) appendEntryToFile(resolvedPath, { type: 'pr-link', sessionId, prNumber, prUrl, prRepository, timestamp: new Date().toISOString(), }) // Cache for current session so reAppendSessionMetadata can re-write after compaction if (sessionId === getSessionId()) { const project = getProject() project.currentSessionPrNumber = prNumber project.currentSessionPrUrl = prUrl project.currentSessionPrRepository = prRepository } logEvent('tengu_session_linked_to_pr', { prNumber }) } export function getCurrentSessionTag(sessionId: UUID): string | undefined { // Only returns tag for current session (the only one we cache) if (sessionId === getSessionId()) { return getProject().currentSessionTag } return undefined } export function getCurrentSessionTitle( sessionId: SessionId, ): string | undefined { // Only returns title for current session (the only one we cache) if (sessionId === getSessionId()) { return getProject().currentSessionTitle } return undefined } export function getCurrentSessionAgentColor(): string | undefined { return getProject().currentSessionAgentColor } /** * Restore session metadata into in-memory cache on resume. * Populates the cache so metadata is available for display (e.g. the * agent banner) and re-appended on session exit via reAppendSessionMetadata. */ export function restoreSessionMetadata(meta: { customTitle?: string tag?: string agentName?: string agentColor?: string agentSetting?: string mode?: 'coordinator' | 'normal' worktreeSession?: PersistedWorktreeSession | null prNumber?: number prUrl?: string prRepository?: string }): void { const project = getProject() // ??= so --name (cacheSessionTitle) wins over the resumed // session's title. REPL.tsx clears before calling, so /resume is unaffected. if (meta.customTitle) project.currentSessionTitle ??= meta.customTitle if (meta.tag !== undefined) project.currentSessionTag = meta.tag || undefined if (meta.agentName) project.currentSessionAgentName = meta.agentName if (meta.agentColor) project.currentSessionAgentColor = meta.agentColor if (meta.agentSetting) project.currentSessionAgentSetting = meta.agentSetting if (meta.mode) project.currentSessionMode = meta.mode if (meta.worktreeSession !== undefined) project.currentSessionWorktree = meta.worktreeSession if (meta.prNumber !== undefined) project.currentSessionPrNumber = meta.prNumber if (meta.prUrl) project.currentSessionPrUrl = meta.prUrl if (meta.prRepository) project.currentSessionPrRepository = meta.prRepository } /** * Clear all cached session metadata (title, tag, agent name/color). * Called when /clear creates a new session so stale metadata * from the previous session does not leak into the new one. */ export function clearSessionMetadata(): void { const project = getProject() project.currentSessionTitle = undefined project.currentSessionTag = undefined project.currentSessionAgentName = undefined project.currentSessionAgentColor = undefined project.currentSessionLastPrompt = undefined project.currentSessionAgentSetting = undefined project.currentSessionMode = undefined project.currentSessionWorktree = undefined project.currentSessionPrNumber = undefined project.currentSessionPrUrl = undefined project.currentSessionPrRepository = undefined } /** * Re-append cached session metadata (custom title, tag) to the end of the * transcript file. Call this after compaction so the metadata stays within * the 16KB tail window that readLiteMetadata reads during progressive loading. * Without this, enough post-compaction messages can push the metadata entry * out of the window, causing `--resume` to show the auto-generated firstPrompt * instead of the user-set session name. */ export function reAppendSessionMetadata(): void { getProject().reAppendSessionMetadata() } export async function saveAgentName( sessionId: UUID, agentName: string, fullPath?: string, source: 'user' | 'auto' = 'user', ) { const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) appendEntryToFile(resolvedPath, { type: 'agent-name', agentName, sessionId }) // Cache for current session only (for immediate visibility) if (sessionId === getSessionId()) { getProject().currentSessionAgentName = agentName void updateSessionName(agentName) } logEvent('tengu_agent_name_set', { source: source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, }) } export async function saveAgentColor( sessionId: UUID, agentColor: string, fullPath?: string, ) { const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) appendEntryToFile(resolvedPath, { type: 'agent-color', agentColor, sessionId, }) // Cache for current session only (for immediate visibility) if (sessionId === getSessionId()) { getProject().currentSessionAgentColor = agentColor } logEvent('tengu_agent_color_set', {}) } /** * Cache the session agent setting. Written to disk by materializeSessionFile * on the first user message, and re-stamped by reAppendSessionMetadata on exit. * Cache-only here to avoid creating metadata-only session files at startup. */ export function saveAgentSetting(agentSetting: string): void { getProject().currentSessionAgentSetting = agentSetting } /** * Cache a session title set at startup (--name). Written to disk by * materializeSessionFile on the first user message. Cache-only here so no * orphan metadata-only file is created before the session ID is finalized. */ export function cacheSessionTitle(customTitle: string): void { getProject().currentSessionTitle = customTitle } /** * Cache the session mode. Written to disk by materializeSessionFile on the * first user message, and re-stamped by reAppendSessionMetadata on exit. * Cache-only here to avoid creating metadata-only session files at startup. */ export function saveMode(mode: 'coordinator' | 'normal'): void { getProject().currentSessionMode = mode } /** * Record the session's worktree state for --resume. Written to disk by * materializeSessionFile on the first user message and re-stamped by * reAppendSessionMetadata on exit. Pass null when exiting a worktree * so --resume knows not to cd back into it. */ export function saveWorktreeState( worktreeSession: PersistedWorktreeSession | null, ): void { // Strip ephemeral fields (creationDurationMs, usedSparsePaths) that callers // may pass via full WorktreeSession objects — TypeScript structural typing // allows this, but we don't want them serialized to the transcript. const stripped: PersistedWorktreeSession | null = worktreeSession ? { originalCwd: worktreeSession.originalCwd, worktreePath: worktreeSession.worktreePath, worktreeName: worktreeSession.worktreeName, worktreeBranch: worktreeSession.worktreeBranch, originalBranch: worktreeSession.originalBranch, originalHeadCommit: worktreeSession.originalHeadCommit, sessionId: worktreeSession.sessionId, tmuxSessionName: worktreeSession.tmuxSessionName, hookBased: worktreeSession.hookBased, } : null const project = getProject() project.currentSessionWorktree = stripped // Write eagerly when the file already exists (mid-session enter/exit). // For --worktree startup, sessionFile is null — materializeSessionFile // will write it on the first message via reAppendSessionMetadata. if (project.sessionFile) { appendEntryToFile(project.sessionFile, { type: 'worktree-state', worktreeSession: stripped, sessionId: getSessionId(), }) } } /** * Extracts the session ID from a log. * For lite logs, uses the sessionId field directly. * For full logs, extracts from the first message. */ export function getSessionIdFromLog(log: LogOption): UUID | undefined { // For lite logs, use the direct sessionId field if (log.sessionId) { return log.sessionId as UUID } // Fall back to extracting from first message (full logs) return log.messages[0]?.sessionId as UUID | undefined } /** * Checks if a log is a lite log that needs full loading. * Lite logs have messages: [] and sessionId set. */ export function isLiteLog(log: LogOption): boolean { return log.messages.length === 0 && log.sessionId !== undefined } /** * Loads full messages for a lite log by reading its JSONL file. * Returns a new LogOption with populated messages array. * If the log is already full or loading fails, returns the original log. */ export async function loadFullLog(log: LogOption): Promise { // If already full, return as-is if (!isLiteLog(log)) { return log } // Use the fullPath from the index entry directly const sessionFile = log.fullPath if (!sessionFile) { return log } try { const { messages, summaries, customTitles, tags, agentNames, agentColors, agentSettings, prNumbers, prUrls, prRepositories, modes, worktreeStates, fileHistorySnapshots, attributionSnapshots, contentReplacements, contextCollapseCommits, contextCollapseSnapshot, leafUuids, } = await loadTranscriptFile(sessionFile) if (messages.size === 0) { return log } // Find the most recent user/assistant leaf message from the transcript const mostRecentLeaf = findLatestMessage( messages.values(), msg => leafUuids.has(msg.uuid) && (msg.type === 'user' || msg.type === 'assistant'), ) if (!mostRecentLeaf) { return log } // Build the conversation chain from this leaf const transcript = buildConversationChain(messages, mostRecentLeaf) // Leaf's sessionId — forked sessions copy chain[0] from the source, but // metadata entries (custom-title etc.) are keyed by the current session. const sessionId = mostRecentLeaf.sessionId as UUID | undefined return { ...log, messages: removeExtraFields(transcript), firstPrompt: extractFirstPrompt(transcript), messageCount: countVisibleMessages(transcript), summary: mostRecentLeaf ? summaries.get(mostRecentLeaf.uuid) : log.summary, customTitle: sessionId ? customTitles.get(sessionId) : log.customTitle, tag: sessionId ? tags.get(sessionId) : log.tag, agentName: sessionId ? agentNames.get(sessionId) : log.agentName, agentColor: sessionId ? agentColors.get(sessionId) : log.agentColor, agentSetting: sessionId ? agentSettings.get(sessionId) : log.agentSetting, mode: sessionId ? (modes.get(sessionId) as LogOption['mode']) : log.mode, worktreeSession: sessionId && worktreeStates.has(sessionId) ? worktreeStates.get(sessionId) : log.worktreeSession, prNumber: sessionId ? prNumbers.get(sessionId) : log.prNumber, prUrl: sessionId ? prUrls.get(sessionId) : log.prUrl, prRepository: sessionId ? prRepositories.get(sessionId) : log.prRepository, gitBranch: mostRecentLeaf?.gitBranch ?? log.gitBranch, isSidechain: transcript[0]?.isSidechain ?? log.isSidechain, teamName: transcript[0]?.teamName ?? log.teamName, leafUuid: mostRecentLeaf?.uuid ?? log.leafUuid, fileHistorySnapshots: buildFileHistorySnapshotChain( fileHistorySnapshots, transcript, ), attributionSnapshots: buildAttributionSnapshotChain( attributionSnapshots, transcript, ), contentReplacements: sessionId ? (contentReplacements.get(sessionId) ?? []) : log.contentReplacements, // Filter to the resumed session's entries. loadTranscriptFile reads // the file sequentially so the array is already in commit order; // filter preserves that. contextCollapseCommits: sessionId ? contextCollapseCommits.filter(e => e.sessionId === sessionId) : undefined, contextCollapseSnapshot: sessionId && contextCollapseSnapshot?.sessionId === sessionId ? contextCollapseSnapshot : undefined, } } catch { // If loading fails, return the original log return log } } /** * Searches for sessions by custom title match. * Returns matches sorted by recency (newest first). * Uses case-insensitive matching for better UX. * Deduplicates by sessionId (keeps most recent per session). * Searches across same-repo worktrees by default. */ export async function searchSessionsByCustomTitle( query: string, options?: { limit?: number; exact?: boolean }, ): Promise { const { limit, exact } = options || {} // Use worktree-aware loading to search across same-repo sessions const worktreePaths = await getWorktreePaths(getOriginalCwd()) const allStatLogs = await getStatOnlyLogsForWorktrees(worktreePaths) // Enrich all logs to access customTitle metadata const { logs } = await enrichLogs(allStatLogs, 0, allStatLogs.length) const normalizedQuery = query.toLowerCase().trim() const matchingLogs = logs.filter(log => { const title = log.customTitle?.toLowerCase().trim() if (!title) return false return exact ? title === normalizedQuery : title.includes(normalizedQuery) }) // Deduplicate by sessionId - multiple logs can have the same sessionId // if they're different branches of the same conversation. Keep most recent. const sessionIdToLog = new Map() for (const log of matchingLogs) { const sessionId = getSessionIdFromLog(log) if (sessionId) { const existing = sessionIdToLog.get(sessionId) if (!existing || log.modified > existing.modified) { sessionIdToLog.set(sessionId, log) } } } const deduplicated = Array.from(sessionIdToLog.values()) // Sort by recency deduplicated.sort((a, b) => b.modified.getTime() - a.modified.getTime()) // Apply limit if specified if (limit) { return deduplicated.slice(0, limit) } return deduplicated } /** * Metadata entry types that can appear before a compact boundary but must * still be loaded (they're session-scoped, not message-scoped). * Kept as raw JSON string markers for cheap line filtering during streaming. */ const METADATA_TYPE_MARKERS = [ '"type":"summary"', '"type":"custom-title"', '"type":"tag"', '"type":"agent-name"', '"type":"agent-color"', '"type":"agent-setting"', '"type":"mode"', '"type":"worktree-state"', '"type":"pr-link"', ] const METADATA_MARKER_BUFS = METADATA_TYPE_MARKERS.map(m => Buffer.from(m)) // Longest marker is 22 bytes; +1 for leading `{` = 23. const METADATA_PREFIX_BOUND = 25 // null = carry spans whole chunk. Skips concat when carry provably isn't // a metadata line (markers sit at byte 1 after `{`). function resolveMetadataBuf( carry: Buffer | null, chunkBuf: Buffer, ): Buffer | null { if (carry === null || carry.length === 0) return chunkBuf if (carry.length < METADATA_PREFIX_BOUND) { return Buffer.concat([carry, chunkBuf]) } if (carry[0] === 0x7b /* { */) { for (const m of METADATA_MARKER_BUFS) { if (carry.compare(m, 0, m.length, 1, 1 + m.length) === 0) { return Buffer.concat([carry, chunkBuf]) } } } const firstNl = chunkBuf.indexOf(0x0a) return firstNl === -1 ? null : chunkBuf.subarray(firstNl + 1) } /** * Lightweight forward scan of [0, endOffset) collecting only metadata-entry lines. * Uses raw Buffer chunks and byte-level marker matching — no readline, no per-line * string conversion for the ~99% of lines that are message content. * * Fast path: if a chunk contains zero markers (the common case — metadata entries * are <50 per session), the entire chunk is skipped without line splitting. */ async function scanPreBoundaryMetadata( filePath: string, endOffset: number, ): Promise { const { createReadStream } = await import('fs') const NEWLINE = 0x0a const stream = createReadStream(filePath, { end: endOffset - 1 }) const metadataLines: string[] = [] let carry: Buffer | null = null for await (const chunk of stream) { const chunkBuf = chunk as Buffer const buf = resolveMetadataBuf(carry, chunkBuf) if (buf === null) { carry = null continue } // Fast path: most chunks contain zero metadata markers. Skip line splitting. let hasAnyMarker = false for (const m of METADATA_MARKER_BUFS) { if (buf.includes(m)) { hasAnyMarker = true break } } if (hasAnyMarker) { let lineStart = 0 let nl = buf.indexOf(NEWLINE) while (nl !== -1) { // Bounded marker check: only look within this line's byte range for (const m of METADATA_MARKER_BUFS) { const mIdx = buf.indexOf(m, lineStart) if (mIdx !== -1 && mIdx < nl) { metadataLines.push(buf.toString('utf-8', lineStart, nl)) break } } lineStart = nl + 1 nl = buf.indexOf(NEWLINE, lineStart) } carry = buf.subarray(lineStart) } else { // No markers in this chunk — just preserve the incomplete trailing line const lastNl = buf.lastIndexOf(NEWLINE) carry = lastNl >= 0 ? buf.subarray(lastNl + 1) : buf } // Guard against quadratic carry growth for pathological huge lines // (e.g., a 10 MB tool-output line with no newline). Real metadata entries // are <1 KB, so if carry exceeds this we're mid-message-content — drop it. if (carry.length > 64 * 1024) carry = null } // Final incomplete line (no trailing newline at endOffset) if (carry !== null && carry.length > 0) { for (const m of METADATA_MARKER_BUFS) { if (carry.includes(m)) { metadataLines.push(carry.toString('utf-8')) break } } } return metadataLines } /** * Byte-level pre-filter that excises dead fork branches before parseJSONL. * * Every rewind/ctrl-z leaves an orphaned chain branch in the append-only * JSONL forever. buildConversationChain walks parentUuid from the latest leaf * and discards everything else, but by then parseJSONL has already paid to * JSON.parse all of it. Measured on fork-heavy sessions: * * 41 MB, 99% dead: parseJSONL 56.0 ms -> 3.9 ms (-93%) * 151 MB, 92% dead: 47.3 ms -> 9.4 ms (-80%) * * Sessions with few dead branches (5-7%) see a small win from the overhead of * the index pass roughly canceling the parse savings, so this is gated on * buffer size (same threshold as SKIP_PRECOMPACT_THRESHOLD). * * Relies on two invariants verified across 25k+ message lines in local * sessions (0 violations): * * 1. Transcript messages always serialize with parentUuid as the first key. * JSON.stringify emits keys in insertion order and recordTranscript's * object literal puts parentUuid first. So `{"parentUuid":` is a stable * line prefix that distinguishes transcript messages from metadata. * * 2. Top-level uuid detection is handled by a suffix check + depth check * (see inline comment in the scan loop). toolUseResult/mcpMeta serialize * AFTER uuid with arbitrary server-controlled objects, and agent_progress * entries serialize a nested Message in data BEFORE uuid — both can * produce nested `"uuid":"<36>","timestamp":"` bytes, so suffix alone * is insufficient. When multiple suffix matches exist, a brace-depth * scan disambiguates. * * The append-only write discipline guarantees parents appear at earlier file * offsets than children, so walking backward from EOF always finds them. */ /** * Disambiguate multiple `"uuid":"<36>","timestamp":"` matches in one line by * finding the one at JSON nesting depth 1. String-aware brace counter: * `{`/`}` inside string values don't count; `\"` and `\\` inside strings are * handled. Candidates is sorted ascending (the scan loop produces them in * byte order). Returns the first depth-1 candidate, or the last candidate if * none are at depth 1 (shouldn't happen for well-formed JSONL — depth-1 is * where the top-level object's fields live). * * Only called when ≥2 suffix matches exist (agent_progress with a nested * Message, or mcpMeta with a coincidentally-suffixed object). Cost is * O(max(candidates) - lineStart) — one forward byte pass, stopping at the * first depth-1 hit. */ function pickDepthOneUuidCandidate( buf: Buffer, lineStart: number, candidates: number[], ): number { const QUOTE = 0x22 const BACKSLASH = 0x5c const OPEN_BRACE = 0x7b const CLOSE_BRACE = 0x7d let depth = 0 let inString = false let escapeNext = false let ci = 0 for (let i = lineStart; ci < candidates.length; i++) { if (i === candidates[ci]) { if (depth === 1 && !inString) return candidates[ci]! ci++ } const b = buf[i]! if (escapeNext) { escapeNext = false } else if (inString) { if (b === BACKSLASH) escapeNext = true else if (b === QUOTE) inString = false } else if (b === QUOTE) inString = true else if (b === OPEN_BRACE) depth++ else if (b === CLOSE_BRACE) depth-- } return candidates.at(-1)! } function walkChainBeforeParse(buf: Buffer): Buffer { const NEWLINE = 0x0a const OPEN_BRACE = 0x7b const QUOTE = 0x22 const PARENT_PREFIX = Buffer.from('{"parentUuid":') const UUID_KEY = Buffer.from('"uuid":"') const SIDECHAIN_TRUE = Buffer.from('"isSidechain":true') const UUID_LEN = 36 const TS_SUFFIX = Buffer.from('","timestamp":"') const TS_SUFFIX_LEN = TS_SUFFIX.length const PREFIX_LEN = PARENT_PREFIX.length const KEY_LEN = UUID_KEY.length // Stride-3 flat index of transcript messages: [lineStart, lineEnd, parentStart]. // parentStart is the byte offset of the parent uuid's first char, or -1 for null. // Metadata lines (summary, mode, file-history-snapshot, etc.) go in metaRanges // unfiltered - they lack the parentUuid prefix and downstream needs all of them. const msgIdx: number[] = [] const metaRanges: number[] = [] const uuidToSlot = new Map() let pos = 0 const len = buf.length while (pos < len) { const nl = buf.indexOf(NEWLINE, pos) const lineEnd = nl === -1 ? len : nl + 1 if ( lineEnd - pos > PREFIX_LEN && buf[pos] === OPEN_BRACE && buf.compare(PARENT_PREFIX, 0, PREFIX_LEN, pos, pos + PREFIX_LEN) === 0 ) { // `{"parentUuid":null,` or `{"parentUuid":"<36 chars>",` const parentStart = buf[pos + PREFIX_LEN] === QUOTE ? pos + PREFIX_LEN + 1 : -1 // The top-level uuid is immediately followed by `","timestamp":"` in // user/assistant/attachment entries (the create* helpers put them // adjacent; both always defined). But the suffix is NOT unique: // - agent_progress entries carry a nested Message in data.message, // serialized BEFORE top-level uuid — that inner Message has its // own uuid,timestamp adjacent, so its bytes also satisfy the // suffix check. // - mcpMeta/toolUseResult come AFTER top-level uuid and hold // server-controlled Record — a server returning // {uuid:"<36>",timestamp:"..."} would also match. // Collect all suffix matches; a single one is unambiguous (common // case), multiple need a brace-depth check to pick the one at // JSON nesting depth 1. Entries with NO suffix match (some progress // variants put timestamp BEFORE uuid → `"uuid":"<36>"}` at EOL) // have only one `"uuid":"` and the first-match fallback is sound. let firstAny = -1 let suffix0 = -1 let suffixN: number[] | undefined let from = pos for (;;) { const next = buf.indexOf(UUID_KEY, from) if (next < 0 || next >= lineEnd) break if (firstAny < 0) firstAny = next const after = next + KEY_LEN + UUID_LEN if ( after + TS_SUFFIX_LEN <= lineEnd && buf.compare( TS_SUFFIX, 0, TS_SUFFIX_LEN, after, after + TS_SUFFIX_LEN, ) === 0 ) { if (suffix0 < 0) suffix0 = next else (suffixN ??= [suffix0]).push(next) } from = next + KEY_LEN } const uk = suffixN ? pickDepthOneUuidCandidate(buf, pos, suffixN) : suffix0 >= 0 ? suffix0 : firstAny if (uk >= 0) { const uuidStart = uk + KEY_LEN // UUIDs are pure ASCII so latin1 avoids UTF-8 decode overhead. const uuid = buf.toString('latin1', uuidStart, uuidStart + UUID_LEN) uuidToSlot.set(uuid, msgIdx.length) msgIdx.push(pos, lineEnd, parentStart) } else { metaRanges.push(pos, lineEnd) } } else { metaRanges.push(pos, lineEnd) } pos = lineEnd } // Leaf = last non-sidechain entry. isSidechain is the 2nd or 3rd key // (after parentUuid, maybe logicalParentUuid) so indexOf from lineStart // finds it within a few dozen bytes when present; when absent it spills // into the next line, caught by the bounds check. let leafSlot = -1 for (let i = msgIdx.length - 3; i >= 0; i -= 3) { const sc = buf.indexOf(SIDECHAIN_TRUE, msgIdx[i]!) if (sc === -1 || sc >= msgIdx[i + 1]!) { leafSlot = i break } } if (leafSlot < 0) return buf // Walk parentUuid to root. Collect kept-message line starts and sum their // byte lengths so we can decide whether the concat is worth it. A dangling // parent (uuid not in file) is the normal termination for forked sessions // and post-boundary chains -- same semantics as buildConversationChain. // Correctness against index poisoning rests on the timestamp suffix check // above: a nested `"uuid":"` match without the suffix never becomes uk. const seen = new Set() const chain = new Set() let chainBytes = 0 let slot: number | undefined = leafSlot while (slot !== undefined) { if (seen.has(slot)) break seen.add(slot) chain.add(msgIdx[slot]!) chainBytes += msgIdx[slot + 1]! - msgIdx[slot]! const parentStart = msgIdx[slot + 2]! if (parentStart < 0) break const parent = buf.toString('latin1', parentStart, parentStart + UUID_LEN) slot = uuidToSlot.get(parent) } // parseJSONL cost scales with bytes, not entry count. A session can have // thousands of dead entries by count but only single-digit-% of bytes if // the dead branches are short turns and the live chain holds the fat // assistant responses (measured: 107 MB session, 69% dead entries, 30% // dead bytes - index+concat overhead exceeded parse savings). Gate on // bytes: only stitch if we would drop at least half the buffer. Metadata // is tiny so len - chainBytes approximates dead bytes closely enough. // Near break-even the concat memcpy (copying chainBytes into a fresh // allocation) dominates, so a conservative 50% gate stays safely on the // winning side. if (len - chainBytes < len >> 1) return buf // Merge chain entries with metadata in original file order. Both msgIdx and // metaRanges are already sorted by offset; interleave them into subarray // views and concat once. const parts: Buffer[] = [] let m = 0 for (let i = 0; i < msgIdx.length; i += 3) { const start = msgIdx[i]! while (m < metaRanges.length && metaRanges[m]! < start) { parts.push(buf.subarray(metaRanges[m]!, metaRanges[m + 1]!)) m += 2 } if (chain.has(start)) { parts.push(buf.subarray(start, msgIdx[i + 1]!)) } } while (m < metaRanges.length) { parts.push(buf.subarray(metaRanges[m]!, metaRanges[m + 1]!)) m += 2 } return Buffer.concat(parts) } /** * Loads all messages, summaries, and file history snapshots from a transcript file. * Returns the messages, summaries, custom titles, tags, file history snapshots, and attribution snapshots. */ export async function loadTranscriptFile( filePath: string, opts?: { keepAllLeaves?: boolean }, ): Promise<{ messages: Map summaries: Map customTitles: Map tags: Map agentNames: Map agentColors: Map agentSettings: Map prNumbers: Map prUrls: Map prRepositories: Map modes: Map worktreeStates: Map fileHistorySnapshots: Map attributionSnapshots: Map contentReplacements: Map agentContentReplacements: Map contextCollapseCommits: ContextCollapseCommitEntry[] contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined leafUuids: Set }> { const messages = new Map() const summaries = new Map() const customTitles = new Map() const tags = new Map() const agentNames = new Map() const agentColors = new Map() const agentSettings = new Map() const prNumbers = new Map() const prUrls = new Map() const prRepositories = new Map() const modes = new Map() const worktreeStates = new Map() const fileHistorySnapshots = new Map() const attributionSnapshots = new Map() const contentReplacements = new Map() const agentContentReplacements = new Map< AgentId, ContentReplacementRecord[] >() // Array, not Map — commit order matters (nested collapses). const contextCollapseCommits: ContextCollapseCommitEntry[] = [] // Last-wins — later entries supersede. let contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined try { // For large transcripts, avoid materializing megabytes of stale content. // Single forward chunked read: attribution-snapshot lines are skipped at // the fd level (never buffered), compact boundaries truncate the // accumulator in-stream. Peak allocation is the OUTPUT size, not the // file size — a 151 MB session that is 84% stale attr-snaps allocates // ~32 MB instead of 159+64 MB. This matters because mimalloc does not // return those pages to the OS even after JS-level GC frees the backing // buffers (measured: arrayBuffers=0 after Bun.gc(true) but RSS stuck at // ~316 MB on the old scan+strip path vs ~155 MB here). // // Pre-boundary metadata (agent-setting, mode, pr-link, etc.) is recovered // via a cheap byte-level forward scan of [0, boundary). let buf: Buffer | null = null let metadataLines: string[] | null = null let hasPreservedSegment = false if (!isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP)) { const { size } = await stat(filePath) if (size > SKIP_PRECOMPACT_THRESHOLD) { const scan = await readTranscriptForLoad(filePath, size) buf = scan.postBoundaryBuf hasPreservedSegment = scan.hasPreservedSegment // >0 means we truncated pre-boundary bytes and must recover // session-scoped metadata from that range. A preservedSegment // boundary does not truncate (preserved messages are physically // pre-boundary), so offset stays 0 unless an EARLIER non-preserved // boundary already truncated — in which case the preserved messages // for the later boundary are post-that-earlier-boundary and were // kept, and we still want the metadata scan. if (scan.boundaryStartOffset > 0) { metadataLines = await scanPreBoundaryMetadata( filePath, scan.boundaryStartOffset, ) } } } buf ??= await readFile(filePath) // For large buffers (which here means readTranscriptForLoad output with // attr-snaps already stripped at the fd level — the <5MB readFile path // falls through the size gate below), the dominant cost is parsing dead // fork branches that buildConversationChain would discard anyway. Skip // when the caller needs all // leaves (loadAllLogsFromSessionFile for /insights picks the branch with // most user messages, not the latest), when the boundary has a // preservedSegment (those messages keep their pre-compact parentUuid on // disk -- applyPreservedSegmentRelinks splices them in-memory AFTER // parse, so a pre-parse chain walk would drop them as orphans), and when // CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP is set (that kill switch means // "load everything, skip nothing"; this is another skip-before-parse // optimization and the scan it depends on for hasPreservedSegment did // not run). if ( !opts?.keepAllLeaves && !hasPreservedSegment && !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP) && buf.length > SKIP_PRECOMPACT_THRESHOLD ) { buf = walkChainBeforeParse(buf) } // First pass: process metadata-only lines collected during the boundary scan. // These populate the session-scoped maps (agentSettings, modes, prNumbers, // etc.) for entries written before the compact boundary. Any overlap with // the post-boundary buffer is harmless — later values overwrite earlier ones. if (metadataLines && metadataLines.length > 0) { const metaEntries = parseJSONL( Buffer.from(metadataLines.join('\n')), ) for (const entry of metaEntries) { if (entry.type === 'summary' && entry.leafUuid) { summaries.set(entry.leafUuid, entry.summary) } else if (entry.type === 'custom-title' && entry.sessionId) { customTitles.set(entry.sessionId, entry.customTitle) } else if (entry.type === 'tag' && entry.sessionId) { tags.set(entry.sessionId, entry.tag) } else if (entry.type === 'agent-name' && entry.sessionId) { agentNames.set(entry.sessionId, entry.agentName) } else if (entry.type === 'agent-color' && entry.sessionId) { agentColors.set(entry.sessionId, entry.agentColor) } else if (entry.type === 'agent-setting' && entry.sessionId) { agentSettings.set(entry.sessionId, entry.agentSetting) } else if (entry.type === 'mode' && entry.sessionId) { modes.set(entry.sessionId, entry.mode) } else if (entry.type === 'worktree-state' && entry.sessionId) { worktreeStates.set(entry.sessionId, entry.worktreeSession) } else if (entry.type === 'pr-link' && entry.sessionId) { prNumbers.set(entry.sessionId, entry.prNumber) prUrls.set(entry.sessionId, entry.prUrl) prRepositories.set(entry.sessionId, entry.prRepository) } } } const entries = parseJSONL(buf) // Bridge map for legacy progress entries: progress_uuid → progress_parent_uuid. // PR #24099 removed progress from isTranscriptMessage, so old transcripts with // progress in the parentUuid chain would truncate at buildConversationChain // when messages.get(progressUuid) returns undefined. Since transcripts are // append-only (parents before children), we record each progress→parent link // as we see it, chain-resolving through consecutive progress entries, then // rewrite any subsequent message whose parentUuid lands in the bridge. const progressBridge = new Map() for (const entry of entries) { // Legacy progress check runs before the Entry-typed else-if chain — // progress is not in the Entry union, so checking it after TypeScript // has narrowed `entry` intersects to `never`. if (isLegacyProgressEntry(entry)) { // Chain-resolve through consecutive progress entries so a later // message pointing at the tail of a progress run bridges to the // nearest non-progress ancestor in one lookup. const parent = entry.parentUuid progressBridge.set( entry.uuid, parent && progressBridge.has(parent) ? (progressBridge.get(parent) ?? null) : parent, ) continue } if (isTranscriptMessage(entry)) { if (entry.parentUuid && progressBridge.has(entry.parentUuid)) { entry.parentUuid = progressBridge.get(entry.parentUuid) ?? null } messages.set(entry.uuid, entry) // Compact boundary: prior marble-origami-commit entries reference // messages that won't be in the post-boundary chain. The >5MB // backward-scan path discards them naturally by never reading the // pre-boundary bytes; the <5MB path reads everything, so discard // here. Without this, getStats().collapsedSpans in /context // overcounts (projectView silently skips the stale commits but // they're still in the log). if (isCompactBoundaryMessage(entry)) { contextCollapseCommits.length = 0 contextCollapseSnapshot = undefined } } else if (entry.type === 'summary' && entry.leafUuid) { summaries.set(entry.leafUuid, entry.summary) } else if (entry.type === 'custom-title' && entry.sessionId) { customTitles.set(entry.sessionId, entry.customTitle) } else if (entry.type === 'tag' && entry.sessionId) { tags.set(entry.sessionId, entry.tag) } else if (entry.type === 'agent-name' && entry.sessionId) { agentNames.set(entry.sessionId, entry.agentName) } else if (entry.type === 'agent-color' && entry.sessionId) { agentColors.set(entry.sessionId, entry.agentColor) } else if (entry.type === 'agent-setting' && entry.sessionId) { agentSettings.set(entry.sessionId, entry.agentSetting) } else if (entry.type === 'mode' && entry.sessionId) { modes.set(entry.sessionId, entry.mode) } else if (entry.type === 'worktree-state' && entry.sessionId) { worktreeStates.set(entry.sessionId, entry.worktreeSession) } else if (entry.type === 'pr-link' && entry.sessionId) { prNumbers.set(entry.sessionId, entry.prNumber) prUrls.set(entry.sessionId, entry.prUrl) prRepositories.set(entry.sessionId, entry.prRepository) } else if (entry.type === 'file-history-snapshot') { fileHistorySnapshots.set(entry.messageId, entry) } else if (entry.type === 'attribution-snapshot') { attributionSnapshots.set(entry.messageId, entry) } else if (entry.type === 'content-replacement') { // Subagent decisions key by agentId (sidechain resume); main-thread // decisions key by sessionId (/resume). if (entry.agentId) { const existing = agentContentReplacements.get(entry.agentId) ?? [] agentContentReplacements.set(entry.agentId, existing) existing.push(...entry.replacements) } else { const existing = contentReplacements.get(entry.sessionId) ?? [] contentReplacements.set(entry.sessionId, existing) existing.push(...entry.replacements) } } else if (entry.type === 'marble-origami-commit') { contextCollapseCommits.push(entry) } else if (entry.type === 'marble-origami-snapshot') { contextCollapseSnapshot = entry } } } catch { // File doesn't exist or can't be read } applyPreservedSegmentRelinks(messages) applySnipRemovals(messages) // Compute leaf UUIDs once at load time // Only user/assistant messages should be considered as leaves for anchoring resume. // Other message types (system, attachment) are metadata or auxiliary and shouldn't // anchor a conversation chain. // // We use standard parent relationship for main chain detection, but also need to // handle cases where the last message is a system/metadata message. // For each conversation chain (identified by following parent links), the leaf // is the most recent user/assistant message. const allMessages = [...messages.values()] // Standard leaf computation using parent relationships const parentUuids = new Set( allMessages .map(msg => msg.parentUuid) .filter((uuid): uuid is UUID => uuid !== null), ) // Find all terminal messages (messages with no children) const terminalMessages = allMessages.filter(msg => !parentUuids.has(msg.uuid)) const leafUuids = new Set() let hasCycle = false if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_pebble_leaf_prune', false)) { // Build a set of UUIDs that have user/assistant children // (these are mid-conversation nodes, not dead ends) const hasUserAssistantChild = new Set() for (const msg of allMessages) { if (msg.parentUuid && (msg.type === 'user' || msg.type === 'assistant')) { hasUserAssistantChild.add(msg.parentUuid) } } // For each terminal message, walk back to find the nearest user/assistant ancestor. // Skip ancestors that already have user/assistant children - those are mid-conversation // nodes where the conversation continued (e.g., an assistant tool_use message whose // progress child is terminal, but whose tool_result child continues the conversation). for (const terminal of terminalMessages) { const seen = new Set() let current: TranscriptMessage | undefined = terminal while (current) { if (seen.has(current.uuid)) { hasCycle = true break } seen.add(current.uuid) if (current.type === 'user' || current.type === 'assistant') { if (!hasUserAssistantChild.has(current.uuid)) { leafUuids.add(current.uuid) } break } current = current.parentUuid ? messages.get(current.parentUuid) : undefined } } } else { // Original leaf computation: walk back from terminal messages to find // the nearest user/assistant ancestor unconditionally for (const terminal of terminalMessages) { const seen = new Set() let current: TranscriptMessage | undefined = terminal while (current) { if (seen.has(current.uuid)) { hasCycle = true break } seen.add(current.uuid) if (current.type === 'user' || current.type === 'assistant') { leafUuids.add(current.uuid) break } current = current.parentUuid ? messages.get(current.parentUuid) : undefined } } } if (hasCycle) { logEvent('tengu_transcript_parent_cycle', {}) } return { messages, summaries, customTitles, tags, agentNames, agentColors, agentSettings, prNumbers, prUrls, prRepositories, modes, worktreeStates, fileHistorySnapshots, attributionSnapshots, contentReplacements, agentContentReplacements, contextCollapseCommits, contextCollapseSnapshot, leafUuids, } } /** * Loads all messages, summaries, file history snapshots, and attribution snapshots from a specific session file. */ async function loadSessionFile(sessionId: UUID): Promise<{ messages: Map summaries: Map customTitles: Map tags: Map agentSettings: Map worktreeStates: Map fileHistorySnapshots: Map attributionSnapshots: Map contentReplacements: Map contextCollapseCommits: ContextCollapseCommitEntry[] contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined }> { const sessionFile = join( getSessionProjectDir() ?? getProjectDir(getOriginalCwd()), `${sessionId}.jsonl`, ) return loadTranscriptFile(sessionFile) } /** * Gets message UUIDs for a specific session without loading all sessions. * Memoized to avoid re-reading the same session file multiple times. */ const getSessionMessages = memoize( async (sessionId: UUID): Promise> => { const { messages } = await loadSessionFile(sessionId) return new Set(messages.keys()) }, (sessionId: UUID) => sessionId, ) /** * Clear the memoized session messages cache. * Call after compaction when old message UUIDs are no longer valid. */ export function clearSessionMessagesCache(): void { getSessionMessages.cache.clear?.() } /** * Check if a message UUID exists in the session storage */ export async function doesMessageExistInSession( sessionId: UUID, messageUuid: UUID, ): Promise { const messageSet = await getSessionMessages(sessionId) return messageSet.has(messageUuid) } export async function getLastSessionLog( sessionId: UUID, ): Promise { // Single read: load all session data at once instead of reading the file twice const { messages, summaries, customTitles, tags, agentSettings, worktreeStates, fileHistorySnapshots, attributionSnapshots, contentReplacements, contextCollapseCommits, contextCollapseSnapshot, } = await loadSessionFile(sessionId) if (messages.size === 0) return null // Prime getSessionMessages cache so recordTranscript (called after REPL // mount on --resume) skips a second full file load. -170~227ms on large sessions. // Guard: only prime if cache is empty. Mid-session callers (e.g. IssueFeedback) // may call getLastSessionLog on the current session — overwriting a live cache // with a stale disk snapshot would lose unflushed UUIDs and break dedup. if (!getSessionMessages.cache.has(sessionId)) { getSessionMessages.cache.set( sessionId, Promise.resolve(new Set(messages.keys())), ) } // Find the most recent non-sidechain message const lastMessage = findLatestMessage(messages.values(), m => !m.isSidechain) if (!lastMessage) return null // Build the transcript chain from the last message const transcript = buildConversationChain(messages, lastMessage) const summary = summaries.get(lastMessage.uuid) const customTitle = customTitles.get(lastMessage.sessionId as UUID) const tag = tags.get(lastMessage.sessionId as UUID) const agentSetting = agentSettings.get(sessionId) return { ...convertToLogOption( transcript, 0, summary, customTitle, buildFileHistorySnapshotChain(fileHistorySnapshots, transcript), tag, getTranscriptPathForSession(sessionId), buildAttributionSnapshotChain(attributionSnapshots, transcript), agentSetting, contentReplacements.get(sessionId) ?? [], ), worktreeSession: worktreeStates.get(sessionId), contextCollapseCommits: contextCollapseCommits.filter( e => e.sessionId === sessionId, ), contextCollapseSnapshot: contextCollapseSnapshot?.sessionId === sessionId ? contextCollapseSnapshot : undefined, } } /** * Loads the list of message logs * @param limit Optional limit on number of session files to load * @returns List of message logs sorted by date */ export async function loadMessageLogs(limit?: number): Promise { const sessionLogs = await fetchLogs(limit) // fetchLogs returns lite (stat-only) logs — enrich them to get metadata. // enrichLogs already filters out sidechains, empty sessions, etc. const { logs: enriched } = await enrichLogs( sessionLogs, 0, sessionLogs.length, ) // enrichLogs returns fresh unshared objects — mutate in place to avoid // re-spreading every 30-field LogOption just to renumber the index. const sorted = sortLogs(enriched) sorted.forEach((log, i) => { log.value = i }) return sorted } /** * Loads message logs from all project directories. * @param limit Optional limit on number of session files to load per project (used when no index exists) * @returns List of message logs sorted by date */ export async function loadAllProjectsMessageLogs( limit?: number, options?: { skipIndex?: boolean; initialEnrichCount?: number }, ): Promise { if (options?.skipIndex) { // Load all sessions with full message data (e.g. for /insights analysis) return loadAllProjectsMessageLogsFull(limit) } const result = await loadAllProjectsMessageLogsProgressive( limit, options?.initialEnrichCount ?? INITIAL_ENRICH_COUNT, ) return result.logs } async function loadAllProjectsMessageLogsFull( limit?: number, ): Promise { const projectsDir = getProjectsDir() let dirents: Dirent[] try { dirents = await readdir(projectsDir, { withFileTypes: true }) } catch { return [] } const projectDirs = dirents .filter(dirent => dirent.isDirectory()) .map(dirent => join(projectsDir, dirent.name)) const logsPerProject = await Promise.all( projectDirs.map(projectDir => getLogsWithoutIndex(projectDir, limit)), ) const allLogs = logsPerProject.flat() // Deduplicate — same session+leaf can appear in multiple project dirs. // This path creates one LogOption per leaf, so use sessionId+leafUuid key. const deduped = new Map() for (const log of allLogs) { const key = `${log.sessionId ?? ''}:${log.leafUuid ?? ''}` const existing = deduped.get(key) if (!existing || log.modified.getTime() > existing.modified.getTime()) { deduped.set(key, log) } } // deduped values are fresh from getLogsWithoutIndex — safe to mutate const sorted = sortLogs([...deduped.values()]) sorted.forEach((log, i) => { log.value = i }) return sorted } export async function loadAllProjectsMessageLogsProgressive( limit?: number, initialEnrichCount: number = INITIAL_ENRICH_COUNT, ): Promise { const projectsDir = getProjectsDir() let dirents: Dirent[] try { dirents = await readdir(projectsDir, { withFileTypes: true }) } catch { return { logs: [], allStatLogs: [], nextIndex: 0 } } const projectDirs = dirents .filter(dirent => dirent.isDirectory()) .map(dirent => join(projectsDir, dirent.name)) const rawLogs: LogOption[] = [] for (const projectDir of projectDirs) { rawLogs.push(...(await getSessionFilesLite(projectDir, limit))) } // Deduplicate — same session can appear in multiple project dirs const sorted = deduplicateLogsBySessionId(rawLogs) const { logs, nextIndex } = await enrichLogs(sorted, 0, initialEnrichCount) // enrichLogs returns fresh unshared objects — safe to mutate in place logs.forEach((log, i) => { log.value = i }) return { logs, allStatLogs: sorted, nextIndex } } /** * Loads message logs from all worktrees of the same git repository. * Falls back to loadMessageLogs if no worktrees provided. * * Uses pure filesystem metadata for fast loading. * * @param worktreePaths Array of worktree paths (from getWorktreePaths) * @param limit Optional limit on number of session files to load per project * @returns List of message logs sorted by date */ /** * Result of loading session logs with progressive enrichment support. */ export type SessionLogResult = { /** Enriched logs ready for display */ logs: LogOption[] /** Full stat-only list for progressive loading (call enrichLogs to get more) */ allStatLogs: LogOption[] /** Index into allStatLogs where progressive loading should continue from */ nextIndex: number } export async function loadSameRepoMessageLogs( worktreePaths: string[], limit?: number, initialEnrichCount: number = INITIAL_ENRICH_COUNT, ): Promise { const result = await loadSameRepoMessageLogsProgressive( worktreePaths, limit, initialEnrichCount, ) return result.logs } export async function loadSameRepoMessageLogsProgressive( worktreePaths: string[], limit?: number, initialEnrichCount: number = INITIAL_ENRICH_COUNT, ): Promise { logForDebugging( `/resume: loading sessions for cwd=${getOriginalCwd()}, worktrees=[${worktreePaths.join(', ')}]`, ) const allStatLogs = await getStatOnlyLogsForWorktrees(worktreePaths, limit) logForDebugging(`/resume: found ${allStatLogs.length} session files on disk`) const { logs, nextIndex } = await enrichLogs( allStatLogs, 0, initialEnrichCount, ) // enrichLogs returns fresh unshared objects — safe to mutate in place logs.forEach((log, i) => { log.value = i }) return { logs, allStatLogs, nextIndex } } /** * Gets stat-only logs for worktree paths (no file reads). */ async function getStatOnlyLogsForWorktrees( worktreePaths: string[], limit?: number, ): Promise { const projectsDir = getProjectsDir() if (worktreePaths.length <= 1) { const cwd = getOriginalCwd() const projectDir = getProjectDir(cwd) return getSessionFilesLite(projectDir, undefined, cwd) } // On Windows, drive letter case can differ between git worktree list // output (e.g. C:/Users/...) and how paths were stored in project // directories (e.g. c:/Users/...). Use case-insensitive comparison. const caseInsensitive = process.platform === 'win32' // Sort worktree paths by sanitized prefix length (longest first) so // more specific matches take priority over shorter ones. Without this, // a short prefix like -code-myrepo could match -code-myrepo-worktree1 // before the longer, more specific prefix gets a chance. const indexed = worktreePaths.map(wt => { const sanitized = sanitizePath(wt) return { path: wt, prefix: caseInsensitive ? sanitized.toLowerCase() : sanitized, } }) indexed.sort((a, b) => b.prefix.length - a.prefix.length) const allLogs: LogOption[] = [] const seenDirs = new Set() let allDirents: Dirent[] try { allDirents = await readdir(projectsDir, { withFileTypes: true }) } catch (e) { // Fall back to current project logForDebugging( `Failed to read projects dir ${projectsDir}, falling back to current project: ${e}`, ) const projectDir = getProjectDir(getOriginalCwd()) return getSessionFilesLite(projectDir, limit, getOriginalCwd()) } for (const dirent of allDirents) { if (!dirent.isDirectory()) continue const dirName = caseInsensitive ? dirent.name.toLowerCase() : dirent.name if (seenDirs.has(dirName)) continue for (const { path: wtPath, prefix } of indexed) { if (dirName === prefix || dirName.startsWith(prefix + '-')) { seenDirs.add(dirName) allLogs.push( ...(await getSessionFilesLite( join(projectsDir, dirent.name), undefined, wtPath, )), ) break } } } // Deduplicate by sessionId — the same session can appear in multiple // worktree project dirs. Keep the entry with the newest modified time. return deduplicateLogsBySessionId(allLogs) } /** * Retrieves the transcript for a specific agent by agentId. * Directly loads the agent-specific transcript file. * @param agentId The agent ID to search for * @returns The conversation chain and budget replacement records for the agent, * or null if not found */ export async function getAgentTranscript(agentId: AgentId): Promise<{ messages: Message[] contentReplacements: ContentReplacementRecord[] } | null> { const agentFile = getAgentTranscriptPath(agentId) try { const { messages, agentContentReplacements } = await loadTranscriptFile(agentFile) // Find messages with matching agentId const agentMessages = Array.from(messages.values()).filter( msg => msg.agentId === agentId && msg.isSidechain, ) if (agentMessages.length === 0) { return null } // Find the most recent leaf message with this agentId const parentUuids = new Set(agentMessages.map(msg => msg.parentUuid)) const leafMessage = findLatestMessage( agentMessages, msg => !parentUuids.has(msg.uuid), ) if (!leafMessage) { return null } // Build the conversation chain const transcript = buildConversationChain(messages, leafMessage) // Filter to only include messages with this agentId const agentTranscript = transcript.filter(msg => msg.agentId === agentId) return { // Convert TranscriptMessage[] to Message[] messages: agentTranscript.map( ({ isSidechain, parentUuid, ...msg }) => msg, ), contentReplacements: agentContentReplacements.get(agentId) ?? [], } } catch { return null } } /** * Extract agent IDs from progress messages in the conversation. * Agent/skill progress messages have type 'progress' with data.type * 'agent_progress' or 'skill_progress' and data.agentId. * This captures sync agents that emit progress messages during execution. */ export function extractAgentIdsFromMessages(messages: Message[]): string[] { const agentIds: string[] = [] for (const message of messages) { if ( message.type === 'progress' && message.data && typeof message.data === 'object' && 'type' in message.data && (message.data.type === 'agent_progress' || message.data.type === 'skill_progress') && 'agentId' in message.data && typeof message.data.agentId === 'string' ) { agentIds.push(message.data.agentId) } } return uniq(agentIds) } /** * Extract teammate transcripts directly from AppState tasks. * In-process teammates store their messages in task.messages, * which is more reliable than loading from disk since each teammate turn * uses a random agentId for transcript storage. */ export function extractTeammateTranscriptsFromTasks(tasks: { [taskId: string]: { type: string identity?: { agentId: string } messages?: Message[] } }): { [agentId: string]: Message[] } { const transcripts: { [agentId: string]: Message[] } = {} for (const task of Object.values(tasks)) { if ( task.type === 'in_process_teammate' && task.identity?.agentId && task.messages && task.messages.length > 0 ) { transcripts[task.identity.agentId] = task.messages } } return transcripts } /** * Load subagent transcripts for the given agent IDs */ export async function loadSubagentTranscripts( agentIds: string[], ): Promise<{ [agentId: string]: Message[] }> { const results = await Promise.all( agentIds.map(async agentId => { try { const result = await getAgentTranscript(asAgentId(agentId)) if (result && result.messages.length > 0) { return { agentId, transcript: result.messages } } return null } catch { // Skip if transcript can't be loaded return null } }), ) const transcripts: { [agentId: string]: Message[] } = {} for (const result of results) { if (result) { transcripts[result.agentId] = result.transcript } } return transcripts } // Globs the session's subagents dir directly — unlike AppState.tasks, this survives task eviction. export async function loadAllSubagentTranscriptsFromDisk(): Promise<{ [agentId: string]: Message[] }> { const subagentsDir = join( getSessionProjectDir() ?? getProjectDir(getOriginalCwd()), getSessionId(), 'subagents', ) let entries: Dirent[] try { entries = await readdir(subagentsDir, { withFileTypes: true }) } catch { return {} } // Filename format is the inverse of getAgentTranscriptPath() — keep in sync. const agentIds = entries .filter( d => d.isFile() && d.name.startsWith('agent-') && d.name.endsWith('.jsonl'), ) .map(d => d.name.slice('agent-'.length, -'.jsonl'.length)) return loadSubagentTranscripts(agentIds) } // Exported so useLogMessages can sync-compute the last loggable uuid // without awaiting recordTranscript's return value (race-free hint tracking). export function isLoggableMessage(m: Message): boolean { if (m.type === 'progress') return false // IMPORTANT: We deliberately filter out most attachments for non-ants because // they have sensitive info for training that we don't want exposed to the public. // When enabled, we allow hook_additional_context through since it contains // user-configured hook output that is useful for session context on resume. if (m.type === 'attachment' && getUserType() !== 'ant') { if ( m.attachment.type === 'hook_additional_context' && isEnvTruthy(process.env.CLAUDE_CODE_SAVE_HOOK_ADDITIONAL_CONTEXT) ) { return true } return false } return true } function collectReplIds(messages: readonly Message[]): Set { const ids = new Set() for (const m of messages) { if (m.type === 'assistant' && Array.isArray(m.message.content)) { for (const b of m.message.content) { if (b.type === 'tool_use' && b.name === REPL_TOOL_NAME) { ids.add(b.id) } } } } return ids } /** * For external users, make REPL invisible in the persisted transcript: strip * REPL tool_use/tool_result pairs and promote isVirtual messages to real. On * --resume the model then sees a coherent native-tool-call history (assistant * called Bash, got result, called Read, got result) without the REPL wrapper. * Ant transcripts keep the wrapper so /share training data sees REPL usage. * * replIds is pre-collected from the FULL session array, not the slice being * transformed — recordTranscript receives incremental slices where the REPL * tool_use (earlier render) and its tool_result (later render, after async * execution) land in separate calls. A fresh per-call Set would miss the id * and leave an orphaned tool_result on disk. */ function transformMessagesForExternalTranscript( messages: Transcript, replIds: Set, ): Transcript { return messages.flatMap(m => { if (m.type === 'assistant' && Array.isArray(m.message.content)) { const content = m.message.content const hasRepl = content.some( b => b.type === 'tool_use' && b.name === REPL_TOOL_NAME, ) const filtered = hasRepl ? content.filter( b => !(b.type === 'tool_use' && b.name === REPL_TOOL_NAME), ) : content if (filtered.length === 0) return [] if (m.isVirtual) { const { isVirtual: _omit, ...rest } = m return [{ ...rest, message: { ...m.message, content: filtered } }] } if (filtered !== content) { return [{ ...m, message: { ...m.message, content: filtered } }] } return [m] } if (m.type === 'user' && Array.isArray(m.message.content)) { const content = m.message.content const hasRepl = content.some( b => b.type === 'tool_result' && replIds.has(b.tool_use_id), ) const filtered = hasRepl ? content.filter( b => !(b.type === 'tool_result' && replIds.has(b.tool_use_id)), ) : content if (filtered.length === 0) return [] if (m.isVirtual) { const { isVirtual: _omit, ...rest } = m return [{ ...rest, message: { ...m.message, content: filtered } }] } if (filtered !== content) { return [{ ...m, message: { ...m.message, content: filtered } }] } return [m] } // string-content user, system, attachment if ('isVirtual' in m && m.isVirtual) { const { isVirtual: _omit, ...rest } = m return [rest] } return [m] }) as Transcript } export function cleanMessagesForLogging( messages: Message[], allMessages: readonly Message[] = messages, ): Transcript { const filtered = messages.filter(isLoggableMessage) as Transcript return getUserType() !== 'ant' ? transformMessagesForExternalTranscript( filtered, collectReplIds(allMessages), ) : filtered } /** * Gets a log by its index * @param index Index in the sorted list of logs (0-based) * @returns Log data or null if not found */ export async function getLogByIndex(index: number): Promise { const logs = await loadMessageLogs() return logs[index] || null } /** * Looks up unresolved tool uses in the transcript by tool_use_id. * Returns the assistant message containing the tool_use, or null if not found * or the tool call already has a tool_result. */ export async function findUnresolvedToolUse( toolUseId: string, ): Promise { try { const transcriptPath = getTranscriptPath() const { messages } = await loadTranscriptFile(transcriptPath) let toolUseMessage = null // Find the tool use but make sure there's not also a result for (const message of messages.values()) { if (message.type === 'assistant') { const content = message.message.content if (Array.isArray(content)) { for (const block of content) { if (block.type === 'tool_use' && block.id === toolUseId) { toolUseMessage = message break } } } } else if (message.type === 'user') { const content = message.message.content if (Array.isArray(content)) { for (const block of content) { if ( block.type === 'tool_result' && block.tool_use_id === toolUseId ) { // Found tool result, bail out return null } } } } } return toolUseMessage } catch { return null } } /** * Gets all session JSONL files in a project directory with their stats. * Returns a map of sessionId → {path, mtime, ctime, size}. * Stats are batched via Promise.all to avoid serial syscalls in the hot loop. */ export async function getSessionFilesWithMtime( projectDir: string, ): Promise< Map > { const sessionFilesMap = new Map< string, { path: string; mtime: number; ctime: number; size: number } >() let dirents: Dirent[] try { dirents = await readdir(projectDir, { withFileTypes: true }) } catch { // Directory doesn't exist - return empty map return sessionFilesMap } const candidates: Array<{ sessionId: string; filePath: string }> = [] for (const dirent of dirents) { if (!dirent.isFile() || !dirent.name.endsWith('.jsonl')) continue const sessionId = validateUuid(basename(dirent.name, '.jsonl')) if (!sessionId) continue candidates.push({ sessionId, filePath: join(projectDir, dirent.name) }) } await Promise.all( candidates.map(async ({ sessionId, filePath }) => { try { const st = await stat(filePath) sessionFilesMap.set(sessionId, { path: filePath, mtime: st.mtime.getTime(), ctime: st.birthtime.getTime(), size: st.size, }) } catch { logForDebugging(`Failed to stat session file: ${filePath}`) } }), ) return sessionFilesMap } /** * Number of sessions to enrich on the initial load of the resume picker. * Each enrichment reads up to 128 KB per file (head + tail), so 50 sessions * means ~6.4 MB of I/O — fast on any modern filesystem while giving users * a much better initial view than the previous default of 10. */ const INITIAL_ENRICH_COUNT = 50 type LiteMetadata = { firstPrompt: string gitBranch?: string isSidechain: boolean projectPath?: string teamName?: string customTitle?: string summary?: string tag?: string agentSetting?: string prNumber?: number prUrl?: string prRepository?: string } /** * Loads all logs from a single session file with full message data. * Builds a LogOption for each leaf message in the file. */ export async function loadAllLogsFromSessionFile( sessionFile: string, projectPathOverride?: string, ): Promise { const { messages, summaries, customTitles, tags, agentNames, agentColors, agentSettings, prNumbers, prUrls, prRepositories, modes, fileHistorySnapshots, attributionSnapshots, contentReplacements, leafUuids, } = await loadTranscriptFile(sessionFile, { keepAllLeaves: true }) if (messages.size === 0) return [] const leafMessages: TranscriptMessage[] = [] // Build parentUuid → children index once (O(n)), so trailing-message lookup is O(1) per leaf const childrenByParent = new Map() for (const msg of messages.values()) { if (leafUuids.has(msg.uuid)) { leafMessages.push(msg) } else if (msg.parentUuid) { const siblings = childrenByParent.get(msg.parentUuid) if (siblings) { siblings.push(msg) } else { childrenByParent.set(msg.parentUuid, [msg]) } } } const logs: LogOption[] = [] for (const leafMessage of leafMessages) { const chain = buildConversationChain(messages, leafMessage) if (chain.length === 0) continue // Append trailing messages that are children of the leaf const trailingMessages = childrenByParent.get(leafMessage.uuid) if (trailingMessages) { // ISO-8601 UTC timestamps are lexically sortable trailingMessages.sort((a, b) => a.timestamp < b.timestamp ? -1 : a.timestamp > b.timestamp ? 1 : 0, ) chain.push(...trailingMessages) } const firstMessage = chain[0]! const sessionId = leafMessage.sessionId as UUID logs.push({ date: leafMessage.timestamp, messages: removeExtraFields(chain), fullPath: sessionFile, value: 0, created: new Date(firstMessage.timestamp), modified: new Date(leafMessage.timestamp), firstPrompt: extractFirstPrompt(chain), messageCount: countVisibleMessages(chain), isSidechain: firstMessage.isSidechain ?? false, sessionId, leafUuid: leafMessage.uuid, summary: summaries.get(leafMessage.uuid), customTitle: customTitles.get(sessionId), tag: tags.get(sessionId), agentName: agentNames.get(sessionId), agentColor: agentColors.get(sessionId), agentSetting: agentSettings.get(sessionId), mode: modes.get(sessionId) as LogOption['mode'], prNumber: prNumbers.get(sessionId), prUrl: prUrls.get(sessionId), prRepository: prRepositories.get(sessionId), gitBranch: leafMessage.gitBranch, projectPath: projectPathOverride ?? firstMessage.cwd, fileHistorySnapshots: buildFileHistorySnapshotChain( fileHistorySnapshots, chain, ), attributionSnapshots: buildAttributionSnapshotChain( attributionSnapshots, chain, ), contentReplacements: contentReplacements.get(sessionId) ?? [], }) } return logs } /** * Gets logs by loading all session files fully, bypassing the session index. * Use this when you need full message data (e.g., for /insights analysis). */ async function getLogsWithoutIndex( projectDir: string, limit?: number, ): Promise { const sessionFilesMap = await getSessionFilesWithMtime(projectDir) if (sessionFilesMap.size === 0) return [] // If limit specified, only load N most recent files by mtime let filesToProcess: Array<{ path: string; mtime: number }> if (limit && sessionFilesMap.size > limit) { filesToProcess = [...sessionFilesMap.values()] .sort((a, b) => b.mtime - a.mtime) .slice(0, limit) } else { filesToProcess = [...sessionFilesMap.values()] } const logs: LogOption[] = [] for (const fileInfo of filesToProcess) { try { const fileLogOptions = await loadAllLogsFromSessionFile(fileInfo.path) logs.push(...fileLogOptions) } catch { logForDebugging(`Failed to load session file: ${fileInfo.path}`) } } return logs } /** * Reads the first and last ~64KB of a JSONL file and extracts lite metadata. * * Head (first 64KB): isSidechain, projectPath, teamName, firstPrompt. * Tail (last 64KB): customTitle, tag, PR link, latest gitBranch. * * Accepts a shared buffer to avoid per-file allocation overhead. */ async function readLiteMetadata( filePath: string, fileSize: number, buf: Buffer, ): Promise { const { head, tail } = await readHeadAndTail(filePath, fileSize, buf) if (!head) return { firstPrompt: '', isSidechain: false } // Extract stable metadata from the first line via string search. // Works even when the first line is truncated (>64KB message). const isSidechain = head.includes('"isSidechain":true') || head.includes('"isSidechain": true') const projectPath = extractJsonStringField(head, 'cwd') const teamName = extractJsonStringField(head, 'teamName') const agentSetting = extractJsonStringField(head, 'agentSetting') // Prefer the last-prompt tail entry — captured by extractFirstPrompt at // write time (filtered, authoritative) and shows what the user was most // recently doing. Head scan is the fallback for sessions written before // last-prompt entries existed. Raw string scrapes of head are last resort // and catch array-format content blocks (VS Code metadata). const firstPrompt = extractLastJsonStringField(tail, 'lastPrompt') || extractFirstPromptFromChunk(head) || extractJsonStringFieldPrefix(head, 'content', 200) || extractJsonStringFieldPrefix(head, 'text', 200) || '' // Extract tail metadata via string search (last occurrence wins). // User titles (customTitle field, from custom-title entries) win over // AI titles (aiTitle field, from ai-title entries). The distinct field // names mean extractLastJsonStringField naturally disambiguates. const customTitle = extractLastJsonStringField(tail, 'customTitle') ?? extractLastJsonStringField(head, 'customTitle') ?? extractLastJsonStringField(tail, 'aiTitle') ?? extractLastJsonStringField(head, 'aiTitle') const summary = extractLastJsonStringField(tail, 'summary') const tag = extractLastJsonStringField(tail, 'tag') const gitBranch = extractLastJsonStringField(tail, 'gitBranch') ?? extractJsonStringField(head, 'gitBranch') // PR link fields — prNumber is a number not a string, so try both const prUrl = extractLastJsonStringField(tail, 'prUrl') const prRepository = extractLastJsonStringField(tail, 'prRepository') let prNumber: number | undefined const prNumStr = extractLastJsonStringField(tail, 'prNumber') if (prNumStr) { prNumber = parseInt(prNumStr, 10) || undefined } if (!prNumber) { const prNumMatch = tail.lastIndexOf('"prNumber":') if (prNumMatch >= 0) { const afterColon = tail.slice(prNumMatch + 11, prNumMatch + 25) const num = parseInt(afterColon.trim(), 10) if (num > 0) prNumber = num } } return { firstPrompt, gitBranch, isSidechain, projectPath, teamName, customTitle, summary, tag, agentSetting, prNumber, prUrl, prRepository, } } /** * Scans a chunk of text for the first meaningful user prompt. */ function extractFirstPromptFromChunk(chunk: string): string { let start = 0 let hasTickMessages = false let firstCommandFallback = '' while (start < chunk.length) { const newlineIdx = chunk.indexOf('\n', start) const line = newlineIdx >= 0 ? chunk.slice(start, newlineIdx) : chunk.slice(start) start = newlineIdx >= 0 ? newlineIdx + 1 : chunk.length if (!line.includes('"type":"user"') && !line.includes('"type": "user"')) { continue } if (line.includes('"tool_result"')) continue if (line.includes('"isMeta":true') || line.includes('"isMeta": true')) continue try { const entry = jsonParse(line) as Record if (entry.type !== 'user') continue const message = entry.message as Record | undefined if (!message) continue const content = message.content // Collect all text values from the message content. For array content // (common in VS Code where IDE metadata tags come before the user's // actual prompt), iterate all text blocks so we don't miss the real // prompt hidden behind / blocks. const texts: string[] = [] if (typeof content === 'string') { texts.push(content) } else if (Array.isArray(content)) { for (const block of content) { const b = block as Record if (b.type === 'text' && typeof b.text === 'string') { texts.push(b.text as string) } } } for (const text of texts) { if (!text) continue let result = text.replace(/\n/g, ' ').trim() // Skip command messages (slash commands) but remember the first one // as a fallback title. Matches skip logic in // getFirstMeaningfulUserMessageTextContent, but instead of discarding // command messages entirely, we format them cleanly (e.g. "/clear") // so the session still appears in the resume picker. const commandNameTag = extractTag(result, COMMAND_NAME_TAG) if (commandNameTag) { const name = commandNameTag.replace(/^\//, '') const commandArgs = extractTag(result, 'command-args')?.trim() || '' if (builtInCommandNames().has(name) || !commandArgs) { if (!firstCommandFallback) { firstCommandFallback = commandNameTag } continue } // Custom command with meaningful args — use clean display return commandArgs ? `${commandNameTag} ${commandArgs}` : commandNameTag } // Format bash input with ! prefix before the generic XML skip const bashInput = extractTag(result, 'bash-input') if (bashInput) return `! ${bashInput}` if (SKIP_FIRST_PROMPT_PATTERN.test(result)) { if ( (feature('PROACTIVE') || feature('KAIROS')) && result.startsWith(`<${TICK_TAG}>`) ) hasTickMessages = true continue } if (result.length > 200) { result = result.slice(0, 200).trim() + '…' } return result } } catch { continue } } // Session started with a slash command but had no subsequent real message — // use the clean command name so the session still appears in the resume picker if (firstCommandFallback) return firstCommandFallback // Proactive sessions have only tick messages — give them a synthetic prompt // so they're not filtered out by enrichLogs if ((feature('PROACTIVE') || feature('KAIROS')) && hasTickMessages) return 'Proactive session' return '' } /** * Like extractJsonStringField but returns the first `maxLen` characters of the * value even when the closing quote is missing (truncated buffer). Newline * escapes are replaced with spaces and the result is trimmed. */ function extractJsonStringFieldPrefix( text: string, key: string, maxLen: number, ): string { const patterns = [`"${key}":"`, `"${key}": "`] for (const pattern of patterns) { const idx = text.indexOf(pattern) if (idx < 0) continue const valueStart = idx + pattern.length // Grab up to maxLen characters from the value, stopping at closing quote let i = valueStart let collected = 0 while (i < text.length && collected < maxLen) { if (text[i] === '\\') { i += 2 // skip escaped char collected++ continue } if (text[i] === '"') break i++ collected++ } const raw = text.slice(valueStart, i) return raw.replace(/\\n/g, ' ').replace(/\\t/g, ' ').trim() } return '' } /** * Deduplicates logs by sessionId, keeping the entry with the newest * modified time. Returns sorted logs with sequential value indices. */ function deduplicateLogsBySessionId(logs: LogOption[]): LogOption[] { const deduped = new Map() for (const log of logs) { if (!log.sessionId) continue const existing = deduped.get(log.sessionId) if (!existing || log.modified.getTime() > existing.modified.getTime()) { deduped.set(log.sessionId, log) } } return sortLogs([...deduped.values()]).map((log, i) => ({ ...log, value: i, })) } /** * Returns lite LogOption[] from pure filesystem metadata (stat only). * No file reads — instant. Call `enrichLogs` to enrich * visible sessions with firstPrompt, gitBranch, customTitle, etc. */ export async function getSessionFilesLite( projectDir: string, limit?: number, projectPath?: string, ): Promise { const sessionFilesMap = await getSessionFilesWithMtime(projectDir) // Sort by mtime descending and apply limit let entries = [...sessionFilesMap.entries()].sort( (a, b) => b[1].mtime - a[1].mtime, ) if (limit && entries.length > limit) { entries = entries.slice(0, limit) } const logs: LogOption[] = [] for (const [sessionId, fileInfo] of entries) { logs.push({ date: new Date(fileInfo.mtime).toISOString(), messages: [], isLite: true, fullPath: fileInfo.path, value: 0, created: new Date(fileInfo.ctime), modified: new Date(fileInfo.mtime), firstPrompt: '', messageCount: 0, fileSize: fileInfo.size, isSidechain: false, sessionId, projectPath, }) } // logs are freshly pushed above — safe to mutate in place const sorted = sortLogs(logs) sorted.forEach((log, i) => { log.value = i }) return sorted } /** * Enriches a lite log with metadata from its JSONL file. * Returns the enriched log, or null if the log has no meaningful content * (no firstPrompt, no customTitle — e.g., metadata-only session files). */ async function enrichLog( log: LogOption, readBuf: Buffer, ): Promise { if (!log.isLite || !log.fullPath) return log const meta = await readLiteMetadata(log.fullPath, log.fileSize ?? 0, readBuf) const enriched: LogOption = { ...log, isLite: false, firstPrompt: meta.firstPrompt, gitBranch: meta.gitBranch, isSidechain: meta.isSidechain, teamName: meta.teamName, customTitle: meta.customTitle, summary: meta.summary, tag: meta.tag, agentSetting: meta.agentSetting, prNumber: meta.prNumber, prUrl: meta.prUrl, prRepository: meta.prRepository, projectPath: meta.projectPath ?? log.projectPath, } // Provide a fallback title for sessions where we couldn't extract the first // prompt (e.g., large first messages that exceed the 16KB read buffer). // Previously these sessions were silently dropped, making them inaccessible // via /resume after crashes or large-context sessions. if (!enriched.firstPrompt && !enriched.customTitle) { enriched.firstPrompt = '(session)' } // Filter: skip sidechains and agent sessions if (enriched.isSidechain) { logForDebugging( `Session ${log.sessionId} filtered from /resume: isSidechain=true`, ) return null } if (enriched.teamName) { logForDebugging( `Session ${log.sessionId} filtered from /resume: teamName=${enriched.teamName}`, ) return null } return enriched } /** * Enriches enough lite logs from `allLogs` (starting at `startIndex`) to * produce `count` valid results. Returns the valid enriched logs and the * index where scanning stopped (for progressive loading to continue from). */ export async function enrichLogs( allLogs: LogOption[], startIndex: number, count: number, ): Promise<{ logs: LogOption[]; nextIndex: number }> { const result: LogOption[] = [] const readBuf = Buffer.alloc(LITE_READ_BUF_SIZE) let i = startIndex while (i < allLogs.length && result.length < count) { const log = allLogs[i]! i++ const enriched = await enrichLog(log, readBuf) if (enriched) { result.push(enriched) } } const scanned = i - startIndex const filtered = scanned - result.length if (filtered > 0) { logForDebugging( `/resume: enriched ${scanned} sessions, ${filtered} filtered out, ${result.length} visible (${allLogs.length - i} remaining on disk)`, ) } return { logs: result, nextIndex: i } }