5106 lines
176 KiB
TypeScript
5106 lines
176 KiB
TypeScript
import { feature } from 'bun:bundle'
|
||
import type { UUID } from 'crypto'
|
||
import type { Dirent } from 'fs'
|
||
// Sync fs primitives for readFileTailSync — separate from fs/promises
|
||
// imports above. Named (not wildcard) per CLAUDE.md style; no collisions
|
||
// with the async-suffixed names.
|
||
import { closeSync, fstatSync, openSync, readSync } from 'fs'
|
||
import {
|
||
appendFile as fsAppendFile,
|
||
open as fsOpen,
|
||
mkdir,
|
||
readdir,
|
||
readFile,
|
||
stat,
|
||
unlink,
|
||
writeFile,
|
||
} from 'fs/promises'
|
||
import memoize from 'lodash-es/memoize.js'
|
||
import { basename, dirname, join } from 'path'
|
||
import {
|
||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||
logEvent,
|
||
} from 'src/services/analytics/index.js'
|
||
import {
|
||
getOriginalCwd,
|
||
getPlanSlugCache,
|
||
getPromptId,
|
||
getSessionId,
|
||
getSessionProjectDir,
|
||
isSessionPersistenceDisabled,
|
||
switchSession,
|
||
} from '../bootstrap/state.js'
|
||
import { builtInCommandNames } from '../commands.js'
|
||
import { COMMAND_NAME_TAG, TICK_TAG } from '../constants/xml.js'
|
||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
|
||
import * as sessionIngress from '../services/api/sessionIngress.js'
|
||
import { REPL_TOOL_NAME } from '../tools/REPLTool/constants.js'
|
||
import {
|
||
type AgentId,
|
||
asAgentId,
|
||
asSessionId,
|
||
type SessionId,
|
||
} from '../types/ids.js'
|
||
import type { AttributionSnapshotMessage } from '../types/logs.js'
|
||
import {
|
||
type ContentReplacementEntry,
|
||
type ContextCollapseCommitEntry,
|
||
type ContextCollapseSnapshotEntry,
|
||
type Entry,
|
||
type FileHistorySnapshotMessage,
|
||
type LogOption,
|
||
type PersistedWorktreeSession,
|
||
type SerializedMessage,
|
||
sortLogs,
|
||
type TranscriptMessage,
|
||
} from '../types/logs.js'
|
||
import type {
|
||
AssistantMessage,
|
||
AttachmentMessage,
|
||
Message,
|
||
SystemCompactBoundaryMessage,
|
||
SystemMessage,
|
||
UserMessage,
|
||
} from '../types/message.js'
|
||
import type { QueueOperationMessage } from '../types/messageQueueTypes.js'
|
||
import { uniq } from './array.js'
|
||
import { registerCleanup } from './cleanupRegistry.js'
|
||
import { updateSessionName } from './concurrentSessions.js'
|
||
import { getCwd } from './cwd.js'
|
||
import { logForDebugging } from './debug.js'
|
||
import { logForDiagnosticsNoPII } from './diagLogs.js'
|
||
import { getClaudeConfigHomeDir, isEnvTruthy } from './envUtils.js'
|
||
import { isFsInaccessible } from './errors.js'
|
||
import type { FileHistorySnapshot } from './fileHistory.js'
|
||
import { formatFileSize } from './format.js'
|
||
import { getFsImplementation } from './fsOperations.js'
|
||
import { getWorktreePaths } from './getWorktreePaths.js'
|
||
import { getBranch } from './git.js'
|
||
import { gracefulShutdownSync, isShuttingDown } from './gracefulShutdown.js'
|
||
import { parseJSONL } from './json.js'
|
||
import { logError } from './log.js'
|
||
import { extractTag, isCompactBoundaryMessage } from './messages.js'
|
||
import { sanitizePath } from './path.js'
|
||
import {
|
||
extractJsonStringField,
|
||
extractLastJsonStringField,
|
||
LITE_READ_BUF_SIZE,
|
||
readHeadAndTail,
|
||
readTranscriptForLoad,
|
||
SKIP_PRECOMPACT_THRESHOLD,
|
||
} from './sessionStoragePortable.js'
|
||
import { getSettings_DEPRECATED } from './settings/settings.js'
|
||
import { jsonParse, jsonStringify } from './slowOperations.js'
|
||
import type { ContentReplacementRecord } from './toolResultStorage.js'
|
||
import { validateUuid } from './uuid.js'
|
||
|
||
// Cache MACRO.VERSION at module level to work around bun --define bug in async contexts
|
||
// See: https://github.com/oven-sh/bun/issues/26168
|
||
const VERSION = typeof MACRO !== 'undefined' ? MACRO.VERSION : 'unknown'
|
||
|
||
type Transcript = (
|
||
| UserMessage
|
||
| AssistantMessage
|
||
| AttachmentMessage
|
||
| SystemMessage
|
||
)[]
|
||
|
||
// Use getOriginalCwd() at each call site instead of capturing at module load
|
||
// time. getCwd() at import time may run before bootstrap resolves symlinks via
|
||
// realpathSync, causing a different sanitized project directory than what
|
||
// getOriginalCwd() returns after bootstrap. This split-brain made sessions
|
||
// saved under one path invisible when loaded via the other.
|
||
|
||
/**
|
||
* Pre-compiled regex to skip non-meaningful messages when extracting first prompt.
|
||
* Matches anything starting with a lowercase XML-like tag (IDE context, hook
|
||
* output, task notifications, channel messages, etc.) or a synthetic interrupt
|
||
* marker. Kept in sync with sessionStoragePortable.ts — generic pattern avoids
|
||
* an ever-growing allowlist that falls behind as new notification types ship.
|
||
*/
|
||
// 50MB — prevents OOM in the tombstone slow path which reads + rewrites the
|
||
// entire session file. Session files can grow to multiple GB (inc-3930).
|
||
const MAX_TOMBSTONE_REWRITE_BYTES = 50 * 1024 * 1024
|
||
|
||
const SKIP_FIRST_PROMPT_PATTERN =
|
||
/^(?:\s*<[a-z][\w-]*[\s>]|\[Request interrupted by user[^\]]*\])/
|
||
|
||
/**
|
||
* Type guard to check if an entry is a transcript message.
|
||
* Transcript messages include user, assistant, attachment, and system messages.
|
||
* IMPORTANT: This is the single source of truth for what constitutes a transcript message.
|
||
* loadTranscriptFile() uses this to determine which messages to load into the chain.
|
||
*
|
||
* Progress messages are NOT transcript messages. They are ephemeral UI state
|
||
* and must not be persisted to the JSONL or participate in the parentUuid
|
||
* chain. Including them caused chain forks that orphaned real conversation
|
||
* messages on resume (see #14373, #23537).
|
||
*/
|
||
export function isTranscriptMessage(entry: Entry): entry is TranscriptMessage {
|
||
return (
|
||
entry.type === 'user' ||
|
||
entry.type === 'assistant' ||
|
||
entry.type === 'attachment' ||
|
||
entry.type === 'system'
|
||
)
|
||
}
|
||
|
||
/**
|
||
* Entries that participate in the parentUuid chain. Used on the write path
|
||
* (insertMessageChain, useLogMessages) to skip progress when assigning
|
||
* parentUuid. Old transcripts with progress already in the chain are handled
|
||
* by the progressBridge rewrite in loadTranscriptFile.
|
||
*/
|
||
export function isChainParticipant(m: Pick<Message, 'type'>): boolean {
|
||
return m.type !== 'progress'
|
||
}
|
||
|
||
type LegacyProgressEntry = {
|
||
type: 'progress'
|
||
uuid: UUID
|
||
parentUuid: UUID | null
|
||
}
|
||
|
||
/**
|
||
* Progress entries in transcripts written before PR #24099. They are not
|
||
* in the Entry type union anymore but still exist on disk with uuid and
|
||
* parentUuid fields. loadTranscriptFile bridges the chain across them.
|
||
*/
|
||
function isLegacyProgressEntry(entry: unknown): entry is LegacyProgressEntry {
|
||
return (
|
||
typeof entry === 'object' &&
|
||
entry !== null &&
|
||
'type' in entry &&
|
||
entry.type === 'progress' &&
|
||
'uuid' in entry &&
|
||
typeof entry.uuid === 'string'
|
||
)
|
||
}
|
||
|
||
/**
|
||
* High-frequency tool progress ticks (1/sec for Sleep, per-chunk for Bash).
|
||
* These are UI-only: not sent to the API, not rendered after the tool
|
||
* completes. Used by REPL.tsx to replace-in-place instead of appending, and
|
||
* by loadTranscriptFile to skip legacy entries from old transcripts.
|
||
*/
|
||
const EPHEMERAL_PROGRESS_TYPES = new Set([
|
||
'bash_progress',
|
||
'powershell_progress',
|
||
'mcp_progress',
|
||
...(feature('PROACTIVE') || feature('KAIROS')
|
||
? (['sleep_progress'] as const)
|
||
: []),
|
||
])
|
||
export function isEphemeralToolProgress(dataType: unknown): boolean {
|
||
return typeof dataType === 'string' && EPHEMERAL_PROGRESS_TYPES.has(dataType)
|
||
}
|
||
|
||
export function getProjectsDir(): string {
|
||
return join(getClaudeConfigHomeDir(), 'projects')
|
||
}
|
||
|
||
export function getTranscriptPath(): string {
|
||
const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd())
|
||
return join(projectDir, `${getSessionId()}.jsonl`)
|
||
}
|
||
|
||
export function getTranscriptPathForSession(sessionId: string): string {
|
||
// When asking for the CURRENT session's transcript, honor sessionProjectDir
|
||
// the same way getTranscriptPath() does. Without this, hooks get a
|
||
// transcript_path computed from originalCwd while the actual file was
|
||
// written to sessionProjectDir (set by switchActiveSession on resume/branch)
|
||
// — different directories, so the hook sees MISSING (gh-30217). CC-34
|
||
// made sessionId + sessionProjectDir atomic precisely to prevent this
|
||
// kind of drift; this function just wasn't updated to read both.
|
||
//
|
||
// For OTHER session IDs we can only guess via originalCwd — we don't
|
||
// track a sessionId→projectDir map. Callers wanting a specific other
|
||
// session's path should pass fullPath explicitly (most save* functions
|
||
// already accept this).
|
||
if (sessionId === getSessionId()) {
|
||
return getTranscriptPath()
|
||
}
|
||
const projectDir = getProjectDir(getOriginalCwd())
|
||
return join(projectDir, `${sessionId}.jsonl`)
|
||
}
|
||
|
||
// 50 MB — session JSONL can grow to multiple GB (inc-3930). Callers that
|
||
// read the raw transcript must bail out above this threshold to avoid OOM.
|
||
export const MAX_TRANSCRIPT_READ_BYTES = 50 * 1024 * 1024
|
||
|
||
// In-memory map of agentId → subdirectory for grouping related subagent
|
||
// transcripts (e.g. workflow runs write to subagents/workflows/<runId>/).
|
||
// Populated before the agent runs; consulted by getAgentTranscriptPath.
|
||
const agentTranscriptSubdirs = new Map<string, string>()
|
||
|
||
export function setAgentTranscriptSubdir(
|
||
agentId: string,
|
||
subdir: string,
|
||
): void {
|
||
agentTranscriptSubdirs.set(agentId, subdir)
|
||
}
|
||
|
||
export function clearAgentTranscriptSubdir(agentId: string): void {
|
||
agentTranscriptSubdirs.delete(agentId)
|
||
}
|
||
|
||
export function getAgentTranscriptPath(agentId: AgentId): string {
|
||
// Same sessionProjectDir consistency as getTranscriptPathForSession —
|
||
// subagent transcripts live under the session dir, so if the session
|
||
// transcript is at sessionProjectDir, subagent transcripts are too.
|
||
const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd())
|
||
const sessionId = getSessionId()
|
||
const subdir = agentTranscriptSubdirs.get(agentId)
|
||
const base = subdir
|
||
? join(projectDir, sessionId, 'subagents', subdir)
|
||
: join(projectDir, sessionId, 'subagents')
|
||
return join(base, `agent-${agentId}.jsonl`)
|
||
}
|
||
|
||
function getAgentMetadataPath(agentId: AgentId): string {
|
||
return getAgentTranscriptPath(agentId).replace(/\.jsonl$/, '.meta.json')
|
||
}
|
||
|
||
export type AgentMetadata = {
|
||
agentType: string
|
||
/** Worktree path if the agent was spawned with isolation: "worktree" */
|
||
worktreePath?: string
|
||
/** Original task description from the AgentTool input. Persisted so a
|
||
* resumed agent's notification can show the original description instead
|
||
* of a placeholder. Optional — older metadata files lack this field. */
|
||
description?: string
|
||
}
|
||
|
||
/**
|
||
* Persist the agentType used to launch a subagent. Read by resume to
|
||
* route correctly when subagent_type is omitted — without this, resuming
|
||
* a fork silently degrades to general-purpose (4KB system prompt, no
|
||
* inherited history). Sidecar file avoids JSONL schema changes.
|
||
*
|
||
* Also stores the worktreePath when the agent was spawned with worktree
|
||
* isolation, enabling resume to restore the correct cwd.
|
||
*/
|
||
export async function writeAgentMetadata(
|
||
agentId: AgentId,
|
||
metadata: AgentMetadata,
|
||
): Promise<void> {
|
||
const path = getAgentMetadataPath(agentId)
|
||
await mkdir(dirname(path), { recursive: true })
|
||
await writeFile(path, JSON.stringify(metadata))
|
||
}
|
||
|
||
export async function readAgentMetadata(
|
||
agentId: AgentId,
|
||
): Promise<AgentMetadata | null> {
|
||
const path = getAgentMetadataPath(agentId)
|
||
try {
|
||
const raw = await readFile(path, 'utf-8')
|
||
return JSON.parse(raw) as AgentMetadata
|
||
} catch (e) {
|
||
if (isFsInaccessible(e)) return null
|
||
throw e
|
||
}
|
||
}
|
||
|
||
export type RemoteAgentMetadata = {
|
||
taskId: string
|
||
remoteTaskType: string
|
||
/** CCR session ID — used to fetch live status from the Sessions API on resume. */
|
||
sessionId: string
|
||
title: string
|
||
command: string
|
||
spawnedAt: number
|
||
toolUseId?: string
|
||
isLongRunning?: boolean
|
||
isUltraplan?: boolean
|
||
isRemoteReview?: boolean
|
||
remoteTaskMetadata?: Record<string, unknown>
|
||
}
|
||
|
||
function getRemoteAgentsDir(): string {
|
||
// Same sessionProjectDir fallback as getAgentTranscriptPath — the project
|
||
// dir (containing the .jsonl), not the session dir, so sessionId is joined.
|
||
const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd())
|
||
return join(projectDir, getSessionId(), 'remote-agents')
|
||
}
|
||
|
||
function getRemoteAgentMetadataPath(taskId: string): string {
|
||
return join(getRemoteAgentsDir(), `remote-agent-${taskId}.meta.json`)
|
||
}
|
||
|
||
/**
|
||
* Persist metadata for a remote-agent task so it can be restored on session
|
||
* resume. Per-task sidecar file (sibling dir to subagents/) survives
|
||
* hydrateSessionFromRemote's .jsonl wipe; status is always fetched fresh
|
||
* from CCR on restore — only identity is persisted locally.
|
||
*/
|
||
export async function writeRemoteAgentMetadata(
|
||
taskId: string,
|
||
metadata: RemoteAgentMetadata,
|
||
): Promise<void> {
|
||
const path = getRemoteAgentMetadataPath(taskId)
|
||
await mkdir(dirname(path), { recursive: true })
|
||
await writeFile(path, JSON.stringify(metadata))
|
||
}
|
||
|
||
export async function readRemoteAgentMetadata(
|
||
taskId: string,
|
||
): Promise<RemoteAgentMetadata | null> {
|
||
const path = getRemoteAgentMetadataPath(taskId)
|
||
try {
|
||
const raw = await readFile(path, 'utf-8')
|
||
return JSON.parse(raw) as RemoteAgentMetadata
|
||
} catch (e) {
|
||
if (isFsInaccessible(e)) return null
|
||
throw e
|
||
}
|
||
}
|
||
|
||
export async function deleteRemoteAgentMetadata(taskId: string): Promise<void> {
|
||
const path = getRemoteAgentMetadataPath(taskId)
|
||
try {
|
||
await unlink(path)
|
||
} catch (e) {
|
||
if (isFsInaccessible(e)) return
|
||
throw e
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Scan the remote-agents/ directory for all persisted metadata files.
|
||
* Used by restoreRemoteAgentTasks to reconnect to still-running CCR sessions.
|
||
*/
|
||
export async function listRemoteAgentMetadata(): Promise<
|
||
RemoteAgentMetadata[]
|
||
> {
|
||
const dir = getRemoteAgentsDir()
|
||
let entries: Dirent[]
|
||
try {
|
||
entries = await readdir(dir, { withFileTypes: true })
|
||
} catch (e) {
|
||
if (isFsInaccessible(e)) return []
|
||
throw e
|
||
}
|
||
const results: RemoteAgentMetadata[] = []
|
||
for (const entry of entries) {
|
||
if (!entry.isFile() || !entry.name.endsWith('.meta.json')) continue
|
||
try {
|
||
const raw = await readFile(join(dir, entry.name), 'utf-8')
|
||
results.push(JSON.parse(raw) as RemoteAgentMetadata)
|
||
} catch (e) {
|
||
// Skip unreadable or corrupt files — a partial write from a crashed
|
||
// fire-and-forget persist shouldn't take down the whole restore.
|
||
logForDebugging(
|
||
`listRemoteAgentMetadata: skipping ${entry.name}: ${String(e)}`,
|
||
)
|
||
}
|
||
}
|
||
return results
|
||
}
|
||
|
||
export function sessionIdExists(sessionId: string): boolean {
|
||
const projectDir = getProjectDir(getOriginalCwd())
|
||
const sessionFile = join(projectDir, `${sessionId}.jsonl`)
|
||
const fs = getFsImplementation()
|
||
try {
|
||
fs.statSync(sessionFile)
|
||
return true
|
||
} catch {
|
||
return false
|
||
}
|
||
}
|
||
|
||
// exported for testing
|
||
export function getNodeEnv(): string {
|
||
return process.env.NODE_ENV || 'development'
|
||
}
|
||
|
||
// exported for testing
|
||
export function getUserType(): string {
|
||
return process.env.USER_TYPE || 'external'
|
||
}
|
||
|
||
function getEntrypoint(): string | undefined {
|
||
return process.env.CLAUDE_CODE_ENTRYPOINT
|
||
}
|
||
|
||
export function isCustomTitleEnabled(): boolean {
|
||
return true
|
||
}
|
||
|
||
// Memoized: called 12+ times per turn via hooks.ts createBaseHookInput
|
||
// (PostToolUse path, 5×/turn) + various save* functions. Input is a cwd
|
||
// string; homedir/env/regex are all session-invariant so the result is
|
||
// stable for a given input. Worktree switches just change the key — no
|
||
// cache clear needed.
|
||
export const getProjectDir = memoize((projectDir: string): string => {
|
||
return join(getProjectsDir(), sanitizePath(projectDir))
|
||
})
|
||
|
||
let project: Project | null = null
|
||
let cleanupRegistered = false
|
||
|
||
function getProject(): Project {
|
||
if (!project) {
|
||
project = new Project()
|
||
|
||
// Register flush as a cleanup handler (only once)
|
||
if (!cleanupRegistered) {
|
||
registerCleanup(async () => {
|
||
// Flush queued writes first, then re-append session metadata
|
||
// (customTitle, tag) so they always appear in the last 64KB tail
|
||
// window. readLiteMetadata only reads the tail to extract these
|
||
// fields — if enough messages are appended after a /rename, the
|
||
// custom-title entry gets pushed outside the window and --resume
|
||
// shows the auto-generated firstPrompt instead.
|
||
await project?.flush()
|
||
try {
|
||
project?.reAppendSessionMetadata()
|
||
} catch {
|
||
// Best-effort — don't let metadata re-append crash the cleanup
|
||
}
|
||
})
|
||
cleanupRegistered = true
|
||
}
|
||
}
|
||
return project
|
||
}
|
||
|
||
/**
|
||
* Reset the Project singleton's flush state for testing.
|
||
* This ensures tests don't interfere with each other via shared counter state.
|
||
*/
|
||
export function resetProjectFlushStateForTesting(): void {
|
||
project?._resetFlushState()
|
||
}
|
||
|
||
/**
|
||
* Reset the entire Project singleton for testing.
|
||
* This ensures tests with different CLAUDE_CONFIG_DIR values
|
||
* don't share stale sessionFile paths.
|
||
*/
|
||
export function resetProjectForTesting(): void {
|
||
project = null
|
||
}
|
||
|
||
export function setSessionFileForTesting(path: string): void {
|
||
getProject().sessionFile = path
|
||
}
|
||
|
||
type InternalEventWriter = (
|
||
eventType: string,
|
||
payload: Record<string, unknown>,
|
||
options?: { isCompaction?: boolean; agentId?: string },
|
||
) => Promise<void>
|
||
|
||
/**
|
||
* Register a CCR v2 internal event writer for transcript persistence.
|
||
* When set, transcript messages are written as internal worker events
|
||
* instead of going through v1 Session Ingress.
|
||
*/
|
||
export function setInternalEventWriter(writer: InternalEventWriter): void {
|
||
getProject().setInternalEventWriter(writer)
|
||
}
|
||
|
||
type InternalEventReader = () => Promise<
|
||
{ payload: Record<string, unknown>; agent_id?: string }[] | null
|
||
>
|
||
|
||
/**
|
||
* Register a CCR v2 internal event reader for session resume.
|
||
* When set, hydrateFromCCRv2InternalEvents() can fetch foreground and
|
||
* subagent internal events to reconstruct conversation state on reconnection.
|
||
*/
|
||
export function setInternalEventReader(
|
||
reader: InternalEventReader,
|
||
subagentReader: InternalEventReader,
|
||
): void {
|
||
getProject().setInternalEventReader(reader)
|
||
getProject().setInternalSubagentEventReader(subagentReader)
|
||
}
|
||
|
||
/**
|
||
* Set the remote ingress URL on the current Project for testing.
|
||
* This simulates what hydrateRemoteSession does in production.
|
||
*/
|
||
export function setRemoteIngressUrlForTesting(url: string): void {
|
||
getProject().setRemoteIngressUrl(url)
|
||
}
|
||
|
||
const REMOTE_FLUSH_INTERVAL_MS = 10
|
||
|
||
class Project {
|
||
// Minimal cache for current session only (not all sessions)
|
||
currentSessionTag: string | undefined
|
||
currentSessionTitle: string | undefined
|
||
currentSessionAgentName: string | undefined
|
||
currentSessionAgentColor: string | undefined
|
||
currentSessionLastPrompt: string | undefined
|
||
currentSessionAgentSetting: string | undefined
|
||
currentSessionMode: 'coordinator' | 'normal' | undefined
|
||
// Tri-state: undefined = never touched (don't write), null = exited worktree,
|
||
// object = currently in worktree. reAppendSessionMetadata writes null so
|
||
// --resume knows the session exited (vs. crashed while inside).
|
||
currentSessionWorktree: PersistedWorktreeSession | null | undefined
|
||
currentSessionPrNumber: number | undefined
|
||
currentSessionPrUrl: string | undefined
|
||
currentSessionPrRepository: string | undefined
|
||
|
||
sessionFile: string | null = null
|
||
// Entries buffered while sessionFile is null. Flushed by materializeSessionFile
|
||
// on the first user/assistant message — prevents metadata-only session files.
|
||
private pendingEntries: Entry[] = []
|
||
private remoteIngressUrl: string | null = null
|
||
private internalEventWriter: InternalEventWriter | null = null
|
||
private internalEventReader: InternalEventReader | null = null
|
||
private internalSubagentEventReader: InternalEventReader | null = null
|
||
private pendingWriteCount: number = 0
|
||
private flushResolvers: Array<() => void> = []
|
||
// Per-file write queues. Each entry carries a resolve callback so
|
||
// callers of enqueueWrite can optionally await their specific write.
|
||
private writeQueues = new Map<
|
||
string,
|
||
Array<{ entry: Entry; resolve: () => void }>
|
||
>()
|
||
private flushTimer: ReturnType<typeof setTimeout> | null = null
|
||
private activeDrain: Promise<void> | null = null
|
||
private FLUSH_INTERVAL_MS = 100
|
||
private readonly MAX_CHUNK_BYTES = 100 * 1024 * 1024
|
||
|
||
constructor() {}
|
||
|
||
/** @internal Reset flush/queue state for testing. */
|
||
_resetFlushState(): void {
|
||
this.pendingWriteCount = 0
|
||
this.flushResolvers = []
|
||
if (this.flushTimer) clearTimeout(this.flushTimer)
|
||
this.flushTimer = null
|
||
this.activeDrain = null
|
||
this.writeQueues = new Map()
|
||
}
|
||
|
||
private incrementPendingWrites(): void {
|
||
this.pendingWriteCount++
|
||
}
|
||
|
||
private decrementPendingWrites(): void {
|
||
this.pendingWriteCount--
|
||
if (this.pendingWriteCount === 0) {
|
||
// Resolve all waiting flush promises
|
||
for (const resolve of this.flushResolvers) {
|
||
resolve()
|
||
}
|
||
this.flushResolvers = []
|
||
}
|
||
}
|
||
|
||
private async trackWrite<T>(fn: () => Promise<T>): Promise<T> {
|
||
this.incrementPendingWrites()
|
||
try {
|
||
return await fn()
|
||
} finally {
|
||
this.decrementPendingWrites()
|
||
}
|
||
}
|
||
|
||
private enqueueWrite(filePath: string, entry: Entry): Promise<void> {
|
||
return new Promise<void>(resolve => {
|
||
let queue = this.writeQueues.get(filePath)
|
||
if (!queue) {
|
||
queue = []
|
||
this.writeQueues.set(filePath, queue)
|
||
}
|
||
queue.push({ entry, resolve })
|
||
this.scheduleDrain()
|
||
})
|
||
}
|
||
|
||
private scheduleDrain(): void {
|
||
if (this.flushTimer) {
|
||
return
|
||
}
|
||
this.flushTimer = setTimeout(async () => {
|
||
this.flushTimer = null
|
||
this.activeDrain = this.drainWriteQueue()
|
||
await this.activeDrain
|
||
this.activeDrain = null
|
||
// If more items arrived during drain, schedule again
|
||
if (this.writeQueues.size > 0) {
|
||
this.scheduleDrain()
|
||
}
|
||
}, this.FLUSH_INTERVAL_MS)
|
||
}
|
||
|
||
private async appendToFile(filePath: string, data: string): Promise<void> {
|
||
try {
|
||
await fsAppendFile(filePath, data, { mode: 0o600 })
|
||
} catch {
|
||
// Directory may not exist — some NFS-like filesystems return
|
||
// unexpected error codes, so don't discriminate on code.
|
||
await mkdir(dirname(filePath), { recursive: true, mode: 0o700 })
|
||
await fsAppendFile(filePath, data, { mode: 0o600 })
|
||
}
|
||
}
|
||
|
||
private async drainWriteQueue(): Promise<void> {
|
||
for (const [filePath, queue] of this.writeQueues) {
|
||
if (queue.length === 0) {
|
||
continue
|
||
}
|
||
const batch = queue.splice(0)
|
||
|
||
let content = ''
|
||
const resolvers: Array<() => void> = []
|
||
|
||
for (const { entry, resolve } of batch) {
|
||
const line = jsonStringify(entry) + '\n'
|
||
|
||
if (content.length + line.length >= this.MAX_CHUNK_BYTES) {
|
||
// Flush chunk and resolve its entries before starting a new one
|
||
await this.appendToFile(filePath, content)
|
||
for (const r of resolvers) {
|
||
r()
|
||
}
|
||
resolvers.length = 0
|
||
content = ''
|
||
}
|
||
|
||
content += line
|
||
resolvers.push(resolve)
|
||
}
|
||
|
||
if (content.length > 0) {
|
||
await this.appendToFile(filePath, content)
|
||
for (const r of resolvers) {
|
||
r()
|
||
}
|
||
}
|
||
}
|
||
|
||
// Clean up empty queues
|
||
for (const [filePath, queue] of this.writeQueues) {
|
||
if (queue.length === 0) {
|
||
this.writeQueues.delete(filePath)
|
||
}
|
||
}
|
||
}
|
||
|
||
resetSessionFile(): void {
|
||
this.sessionFile = null
|
||
this.pendingEntries = []
|
||
}
|
||
|
||
/**
|
||
* Re-append cached session metadata to the end of the transcript file.
|
||
* This ensures metadata stays within the tail window that readLiteMetadata
|
||
* reads during progressive loading.
|
||
*
|
||
* Called from two contexts with different file-ordering implications:
|
||
* - During compaction (compact.ts, reactiveCompact.ts): writes metadata
|
||
* just before the boundary marker is emitted - these entries end up
|
||
* before the boundary and are recovered by scanPreBoundaryMetadata.
|
||
* - On session exit (cleanup handler): writes metadata at EOF after all
|
||
* boundaries - this is what enables loadTranscriptFile's pre-compact
|
||
* skip to find metadata without a forward scan.
|
||
*
|
||
* External-writer safety for SDK-mutable fields (custom-title, tag):
|
||
* before re-appending, refresh the cache from the tail scan window. If an
|
||
* external process (SDK renameSession/tagSession) wrote a fresher value,
|
||
* our stale cache absorbs it and the re-append below persists it — not
|
||
* the stale CLI value. If no entry is in the tail (evicted, or never
|
||
* written by the SDK), the cache is the only source of truth and is
|
||
* re-appended as-is.
|
||
*
|
||
* Re-append is unconditional (even when the value is already in the
|
||
* tail): during compaction, a title 40KB from EOF is inside the current
|
||
* tail window but will fall out once the post-compaction session grows.
|
||
* Skipping the re-append would defeat the purpose of this call. Fields
|
||
* the SDK cannot touch (last-prompt, agent-*, mode, pr-link) have no
|
||
* external-writer concern — their caches are authoritative.
|
||
*/
|
||
reAppendSessionMetadata(skipTitleRefresh = false): void {
|
||
if (!this.sessionFile) return
|
||
const sessionId = getSessionId() as UUID
|
||
if (!sessionId) return
|
||
|
||
// One sync tail read to refresh SDK-mutable fields. Same
|
||
// LITE_READ_BUF_SIZE window readLiteMetadata uses. Empty string on
|
||
// failure → extract returns null → cache is the only source of truth.
|
||
const tail = readFileTailSync(this.sessionFile)
|
||
|
||
// Absorb any fresher SDK-written title/tag into our cache. If the SDK
|
||
// wrote while we had the session open, our cache is stale — the tail
|
||
// value is authoritative. If the tail has nothing (evicted or never
|
||
// written externally), the cache stands.
|
||
//
|
||
// Filter with startsWith to match only top-level JSONL entries (col 0)
|
||
// and not "type":"tag" appearing inside a nested tool_use input that
|
||
// happens to be JSON-serialized into a message.
|
||
const tailLines = tail.split('\n')
|
||
if (!skipTitleRefresh) {
|
||
const titleLine = tailLines.findLast(l =>
|
||
l.startsWith('{"type":"custom-title"'),
|
||
)
|
||
if (titleLine) {
|
||
const tailTitle = extractLastJsonStringField(titleLine, 'customTitle')
|
||
// `!== undefined` distinguishes no-match from empty-string match.
|
||
// renameSession rejects empty titles, but the CLI is defensive: an
|
||
// external writer with customTitle:"" should clear the cache so the
|
||
// re-append below skips it (instead of resurrecting a stale title).
|
||
if (tailTitle !== undefined) {
|
||
this.currentSessionTitle = tailTitle || undefined
|
||
}
|
||
}
|
||
}
|
||
const tagLine = tailLines.findLast(l => l.startsWith('{"type":"tag"'))
|
||
if (tagLine) {
|
||
const tailTag = extractLastJsonStringField(tagLine, 'tag')
|
||
// Same: tagSession(id, null) writes `tag:""` to clear.
|
||
if (tailTag !== undefined) {
|
||
this.currentSessionTag = tailTag || undefined
|
||
}
|
||
}
|
||
|
||
// lastPrompt is re-appended so readLiteMetadata can show what the
|
||
// user was most recently doing. Written first so customTitle/tag/etc
|
||
// land closer to EOF (they're the more critical fields for tail reads).
|
||
if (this.currentSessionLastPrompt) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'last-prompt',
|
||
lastPrompt: this.currentSessionLastPrompt,
|
||
sessionId,
|
||
})
|
||
}
|
||
// Unconditional: cache was refreshed from tail above; re-append keeps
|
||
// the entry at EOF so compaction-pushed content doesn't evict it.
|
||
if (this.currentSessionTitle) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'custom-title',
|
||
customTitle: this.currentSessionTitle,
|
||
sessionId,
|
||
})
|
||
}
|
||
if (this.currentSessionTag) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'tag',
|
||
tag: this.currentSessionTag,
|
||
sessionId,
|
||
})
|
||
}
|
||
if (this.currentSessionAgentName) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'agent-name',
|
||
agentName: this.currentSessionAgentName,
|
||
sessionId,
|
||
})
|
||
}
|
||
if (this.currentSessionAgentColor) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'agent-color',
|
||
agentColor: this.currentSessionAgentColor,
|
||
sessionId,
|
||
})
|
||
}
|
||
if (this.currentSessionAgentSetting) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'agent-setting',
|
||
agentSetting: this.currentSessionAgentSetting,
|
||
sessionId,
|
||
})
|
||
}
|
||
if (this.currentSessionMode) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'mode',
|
||
mode: this.currentSessionMode,
|
||
sessionId,
|
||
})
|
||
}
|
||
if (this.currentSessionWorktree !== undefined) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'worktree-state',
|
||
worktreeSession: this.currentSessionWorktree,
|
||
sessionId,
|
||
})
|
||
}
|
||
if (
|
||
this.currentSessionPrNumber !== undefined &&
|
||
this.currentSessionPrUrl &&
|
||
this.currentSessionPrRepository
|
||
) {
|
||
appendEntryToFile(this.sessionFile, {
|
||
type: 'pr-link',
|
||
sessionId,
|
||
prNumber: this.currentSessionPrNumber,
|
||
prUrl: this.currentSessionPrUrl,
|
||
prRepository: this.currentSessionPrRepository,
|
||
timestamp: new Date().toISOString(),
|
||
})
|
||
}
|
||
}
|
||
|
||
async flush(): Promise<void> {
|
||
// Cancel pending timer
|
||
if (this.flushTimer) {
|
||
clearTimeout(this.flushTimer)
|
||
this.flushTimer = null
|
||
}
|
||
// Wait for any in-flight drain to finish
|
||
if (this.activeDrain) {
|
||
await this.activeDrain
|
||
}
|
||
// Drain anything remaining in the queues
|
||
await this.drainWriteQueue()
|
||
|
||
// Wait for non-queue tracked operations (e.g. removeMessageByUuid)
|
||
if (this.pendingWriteCount === 0) {
|
||
return
|
||
}
|
||
return new Promise<void>(resolve => {
|
||
this.flushResolvers.push(resolve)
|
||
})
|
||
}
|
||
|
||
/**
|
||
* Remove a message from the transcript by UUID.
|
||
* Used for tombstoning orphaned messages from failed streaming attempts.
|
||
*
|
||
* The target is almost always the most recently appended entry, so we
|
||
* read only the tail, locate the line, and splice it out with a
|
||
* positional write + truncate instead of rewriting the whole file.
|
||
*/
|
||
async removeMessageByUuid(targetUuid: UUID): Promise<void> {
|
||
return this.trackWrite(async () => {
|
||
if (this.sessionFile === null) return
|
||
try {
|
||
let fileSize = 0
|
||
const fh = await fsOpen(this.sessionFile, 'r+')
|
||
try {
|
||
const { size } = await fh.stat()
|
||
fileSize = size
|
||
if (size === 0) return
|
||
|
||
const chunkLen = Math.min(size, LITE_READ_BUF_SIZE)
|
||
const tailStart = size - chunkLen
|
||
const buf = Buffer.allocUnsafe(chunkLen)
|
||
const { bytesRead } = await fh.read(buf, 0, chunkLen, tailStart)
|
||
const tail = buf.subarray(0, bytesRead)
|
||
|
||
// Entries are serialized via JSON.stringify (no key-value
|
||
// whitespace). Search for the full `"uuid":"..."` pattern, not
|
||
// just the bare UUID, so we do not match the same value sitting
|
||
// in `parentUuid` of a child entry. UUIDs are pure ASCII so a
|
||
// byte-level search is correct.
|
||
const needle = `"uuid":"${targetUuid}"`
|
||
const matchIdx = tail.lastIndexOf(needle)
|
||
|
||
if (matchIdx >= 0) {
|
||
// 0x0a never appears inside a UTF-8 multi-byte sequence, so
|
||
// byte-scanning for line boundaries is safe even if the chunk
|
||
// starts mid-character.
|
||
const prevNl = tail.lastIndexOf(0x0a, matchIdx)
|
||
// If the preceding newline is outside our chunk and we did not
|
||
// read from the start of the file, the line is longer than the
|
||
// window - fall through to the slow path.
|
||
if (prevNl >= 0 || tailStart === 0) {
|
||
const lineStart = prevNl + 1 // 0 when prevNl === -1
|
||
const nextNl = tail.indexOf(0x0a, matchIdx + needle.length)
|
||
const lineEnd = nextNl >= 0 ? nextNl + 1 : bytesRead
|
||
|
||
const absLineStart = tailStart + lineStart
|
||
const afterLen = bytesRead - lineEnd
|
||
// Truncate first, then re-append the trailing lines. In the
|
||
// common case (target is the last entry) afterLen is 0 and
|
||
// this is a single ftruncate.
|
||
await fh.truncate(absLineStart)
|
||
if (afterLen > 0) {
|
||
await fh.write(tail, lineEnd, afterLen, absLineStart)
|
||
}
|
||
return
|
||
}
|
||
}
|
||
} finally {
|
||
await fh.close()
|
||
}
|
||
|
||
// Slow path: target was not in the last 64KB. Rare - requires many
|
||
// large entries to have landed between the write and the tombstone.
|
||
if (fileSize > MAX_TOMBSTONE_REWRITE_BYTES) {
|
||
logForDebugging(
|
||
`Skipping tombstone removal: session file too large (${formatFileSize(fileSize)})`,
|
||
{ level: 'warn' },
|
||
)
|
||
return
|
||
}
|
||
const content = await readFile(this.sessionFile, { encoding: 'utf-8' })
|
||
const lines = content.split('\n').filter((line: string) => {
|
||
if (!line.trim()) return true
|
||
try {
|
||
const entry = jsonParse(line)
|
||
return entry.uuid !== targetUuid
|
||
} catch {
|
||
return true // Keep malformed lines
|
||
}
|
||
})
|
||
await writeFile(this.sessionFile, lines.join('\n'), {
|
||
encoding: 'utf8',
|
||
})
|
||
} catch {
|
||
// Silently ignore errors - the file might not exist yet
|
||
}
|
||
})
|
||
}
|
||
|
||
/**
|
||
* True when test env / cleanupPeriodDays=0 / --no-session-persistence /
|
||
* CLAUDE_CODE_SKIP_PROMPT_HISTORY should suppress all transcript writes.
|
||
* Shared guard for appendEntry and materializeSessionFile so both skip
|
||
* consistently. The env var is set by tmuxSocket.ts so Tungsten-spawned
|
||
* test sessions don't pollute the user's --resume list.
|
||
*/
|
||
private shouldSkipPersistence(): boolean {
|
||
const allowTestPersistence = isEnvTruthy(
|
||
process.env.TEST_ENABLE_SESSION_PERSISTENCE,
|
||
)
|
||
return (
|
||
(getNodeEnv() === 'test' && !allowTestPersistence) ||
|
||
getSettings_DEPRECATED()?.cleanupPeriodDays === 0 ||
|
||
isSessionPersistenceDisabled() ||
|
||
isEnvTruthy(process.env.CLAUDE_CODE_SKIP_PROMPT_HISTORY)
|
||
)
|
||
}
|
||
|
||
/**
|
||
* Create the session file, write cached startup metadata, and flush
|
||
* buffered entries. Called on the first user/assistant message.
|
||
*/
|
||
private async materializeSessionFile(): Promise<void> {
|
||
// Guard here too — reAppendSessionMetadata writes via appendEntryToFile
|
||
// (not appendEntry) so it would bypass the per-entry persistence check
|
||
// and create a metadata-only file despite --no-session-persistence.
|
||
if (this.shouldSkipPersistence()) return
|
||
this.ensureCurrentSessionFile()
|
||
// mode/agentSetting are cache-only pre-materialization; write them now.
|
||
this.reAppendSessionMetadata()
|
||
if (this.pendingEntries.length > 0) {
|
||
const buffered = this.pendingEntries
|
||
this.pendingEntries = []
|
||
for (const entry of buffered) {
|
||
await this.appendEntry(entry)
|
||
}
|
||
}
|
||
}
|
||
|
||
async insertMessageChain(
|
||
messages: Transcript,
|
||
isSidechain: boolean = false,
|
||
agentId?: string,
|
||
startingParentUuid?: UUID | null,
|
||
teamInfo?: { teamName?: string; agentName?: string },
|
||
) {
|
||
return this.trackWrite(async () => {
|
||
let parentUuid: UUID | null = startingParentUuid ?? null
|
||
|
||
// First user/assistant message materializes the session file.
|
||
// Hook progress/attachment messages alone stay buffered.
|
||
if (
|
||
this.sessionFile === null &&
|
||
messages.some(m => m.type === 'user' || m.type === 'assistant')
|
||
) {
|
||
await this.materializeSessionFile()
|
||
}
|
||
|
||
// Get current git branch once for this message chain
|
||
let gitBranch: string | undefined
|
||
try {
|
||
gitBranch = await getBranch()
|
||
} catch {
|
||
// Not in a git repo or git command failed
|
||
gitBranch = undefined
|
||
}
|
||
|
||
// Get slug if one exists for this session (used for plan files, etc.)
|
||
const sessionId = getSessionId()
|
||
const slug = getPlanSlugCache().get(sessionId)
|
||
|
||
for (const message of messages) {
|
||
const isCompactBoundary = isCompactBoundaryMessage(message)
|
||
|
||
// For tool_result messages, use the assistant message UUID from the message
|
||
// if available (set at creation time), otherwise fall back to sequential parent
|
||
let effectiveParentUuid = parentUuid
|
||
if (
|
||
message.type === 'user' &&
|
||
'sourceToolAssistantUUID' in message &&
|
||
message.sourceToolAssistantUUID
|
||
) {
|
||
effectiveParentUuid = message.sourceToolAssistantUUID
|
||
}
|
||
|
||
const transcriptMessage: TranscriptMessage = {
|
||
parentUuid: isCompactBoundary ? null : effectiveParentUuid,
|
||
logicalParentUuid: isCompactBoundary ? parentUuid : undefined,
|
||
isSidechain,
|
||
teamName: teamInfo?.teamName,
|
||
agentName: teamInfo?.agentName,
|
||
promptId:
|
||
message.type === 'user' ? (getPromptId() ?? undefined) : undefined,
|
||
agentId,
|
||
...message,
|
||
// Session-stamp fields MUST come after the spread. On --fork-session
|
||
// and --resume, messages arrive as SerializedMessage (carries source
|
||
// sessionId/cwd/etc. because removeExtraFields only strips parentUuid
|
||
// and isSidechain). If sessionId isn't re-stamped, FRESH.jsonl ends up
|
||
// with messages stamped sessionId=A but content-replacement entries
|
||
// stamped sessionId=FRESH (from insertContentReplacement), and
|
||
// loadFullLog's sessionId-keyed contentReplacements lookup misses →
|
||
// replacement records lost → FROZEN misclassification.
|
||
userType: getUserType(),
|
||
entrypoint: getEntrypoint(),
|
||
cwd: getCwd(),
|
||
sessionId,
|
||
version: VERSION,
|
||
gitBranch,
|
||
slug,
|
||
}
|
||
await this.appendEntry(transcriptMessage)
|
||
if (isChainParticipant(message)) {
|
||
parentUuid = message.uuid
|
||
}
|
||
}
|
||
|
||
// Cache this turn's user prompt for reAppendSessionMetadata —
|
||
// the --resume picker shows what the user was last doing.
|
||
// Overwritten every turn by design.
|
||
if (!isSidechain) {
|
||
const text = getFirstMeaningfulUserMessageTextContent(messages)
|
||
if (text) {
|
||
const flat = text.replace(/\n/g, ' ').trim()
|
||
this.currentSessionLastPrompt =
|
||
flat.length > 200 ? flat.slice(0, 200).trim() + '…' : flat
|
||
}
|
||
}
|
||
})
|
||
}
|
||
|
||
async insertFileHistorySnapshot(
|
||
messageId: UUID,
|
||
snapshot: FileHistorySnapshot,
|
||
isSnapshotUpdate: boolean,
|
||
) {
|
||
return this.trackWrite(async () => {
|
||
const fileHistoryMessage: FileHistorySnapshotMessage = {
|
||
type: 'file-history-snapshot',
|
||
messageId,
|
||
snapshot,
|
||
isSnapshotUpdate,
|
||
}
|
||
await this.appendEntry(fileHistoryMessage)
|
||
})
|
||
}
|
||
|
||
async insertQueueOperation(queueOp: QueueOperationMessage) {
|
||
return this.trackWrite(async () => {
|
||
await this.appendEntry(queueOp)
|
||
})
|
||
}
|
||
|
||
async insertAttributionSnapshot(snapshot: AttributionSnapshotMessage) {
|
||
return this.trackWrite(async () => {
|
||
await this.appendEntry(snapshot)
|
||
})
|
||
}
|
||
|
||
async insertContentReplacement(
|
||
replacements: ContentReplacementRecord[],
|
||
agentId?: AgentId,
|
||
) {
|
||
return this.trackWrite(async () => {
|
||
const entry: ContentReplacementEntry = {
|
||
type: 'content-replacement',
|
||
sessionId: getSessionId() as UUID,
|
||
agentId,
|
||
replacements,
|
||
}
|
||
await this.appendEntry(entry)
|
||
})
|
||
}
|
||
|
||
async appendEntry(entry: Entry, sessionId: UUID = getSessionId() as UUID) {
|
||
if (this.shouldSkipPersistence()) {
|
||
return
|
||
}
|
||
|
||
const currentSessionId = getSessionId() as UUID
|
||
const isCurrentSession = sessionId === currentSessionId
|
||
|
||
let sessionFile: string
|
||
if (isCurrentSession) {
|
||
// Buffer until materializeSessionFile runs (first user/assistant message).
|
||
if (this.sessionFile === null) {
|
||
this.pendingEntries.push(entry)
|
||
return
|
||
}
|
||
sessionFile = this.sessionFile
|
||
} else {
|
||
const existing = await this.getExistingSessionFile(sessionId)
|
||
if (!existing) {
|
||
logError(
|
||
new Error(
|
||
`appendEntry: session file not found for other session ${sessionId}`,
|
||
),
|
||
)
|
||
return
|
||
}
|
||
sessionFile = existing
|
||
}
|
||
|
||
// Only load current session messages if needed
|
||
if (entry.type === 'summary') {
|
||
// Summaries can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'custom-title') {
|
||
// Custom titles can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'ai-title') {
|
||
// AI titles can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'last-prompt') {
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'task-summary') {
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'tag') {
|
||
// Tags can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'agent-name') {
|
||
// Agent names can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'agent-color') {
|
||
// Agent colors can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'agent-setting') {
|
||
// Agent settings can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'pr-link') {
|
||
// PR links can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'file-history-snapshot') {
|
||
// File history snapshots can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'attribution-snapshot') {
|
||
// Attribution snapshots can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'speculation-accept') {
|
||
// Speculation accept entries can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'mode') {
|
||
// Mode entries can always be appended
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'worktree-state') {
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'content-replacement') {
|
||
// Content replacement records can always be appended. Subagent records
|
||
// go to the sidechain file (for AgentTool resume); main-thread
|
||
// records go to the session file (for /resume).
|
||
const targetFile = entry.agentId
|
||
? getAgentTranscriptPath(entry.agentId)
|
||
: sessionFile
|
||
void this.enqueueWrite(targetFile, entry)
|
||
} else if (entry.type === 'marble-origami-commit') {
|
||
// Always append. Commit order matters for restore (later commits may
|
||
// reference earlier commits' summary messages), so these must be
|
||
// written in the order received and read back sequentially.
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else if (entry.type === 'marble-origami-snapshot') {
|
||
// Always append. Last-wins on restore — later entries supersede.
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else {
|
||
const messageSet = await getSessionMessages(sessionId)
|
||
if (entry.type === 'queue-operation') {
|
||
// Queue operations are always appended to the session file
|
||
void this.enqueueWrite(sessionFile, entry)
|
||
} else {
|
||
// At this point, entry must be a TranscriptMessage (user/assistant/attachment/system)
|
||
// All other entry types have been handled above
|
||
const isAgentSidechain =
|
||
entry.isSidechain && entry.agentId !== undefined
|
||
const targetFile = isAgentSidechain
|
||
? getAgentTranscriptPath(asAgentId(entry.agentId!))
|
||
: sessionFile
|
||
|
||
// For message entries, check if UUID already exists in current session.
|
||
// Skip dedup for agent sidechain LOCAL writes — they go to a separate
|
||
// file, and fork-inherited parent messages share UUIDs with the main
|
||
// session transcript. Deduping against the main session's set would
|
||
// drop them, leaving the persisted sidechain transcript incomplete
|
||
// (resume-of-fork loads a 10KB file instead of the full 85KB inherited
|
||
// context).
|
||
//
|
||
// The sidechain bypass applies ONLY to the local file write — remote
|
||
// persistence (session-ingress) uses a single Last-Uuid chain per
|
||
// sessionId, so re-POSTing a UUID it already has 409s and eventually
|
||
// exhausts retries → gracefulShutdownSync(1). See inc-4718.
|
||
const isNewUuid = !messageSet.has(entry.uuid)
|
||
if (isAgentSidechain || isNewUuid) {
|
||
// Enqueue write — appendToFile handles ENOENT by creating directories
|
||
void this.enqueueWrite(targetFile, entry)
|
||
|
||
if (!isAgentSidechain) {
|
||
// messageSet is main-file-authoritative. Sidechain entries go to a
|
||
// separate agent file — adding their UUIDs here causes recordTranscript
|
||
// to skip them on the main thread (line ~1270), so the message is never
|
||
// written to the main session file. The next main-thread message then
|
||
// chains its parentUuid to a UUID that only exists in the agent file,
|
||
// and --resume's buildConversationChain terminates at the dangling ref.
|
||
// Same constraint for remote (inc-4718 above): sidechain persisting a
|
||
// UUID the main thread hasn't written yet → 409 when main writes it.
|
||
messageSet.add(entry.uuid)
|
||
|
||
if (isTranscriptMessage(entry)) {
|
||
await this.persistToRemote(sessionId, entry)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Loads the sessionFile variable.
|
||
* Do not need to create session files until they are written to.
|
||
*/
|
||
private ensureCurrentSessionFile(): string {
|
||
if (this.sessionFile === null) {
|
||
this.sessionFile = getTranscriptPath()
|
||
}
|
||
|
||
return this.sessionFile
|
||
}
|
||
|
||
/**
|
||
* Returns the session file path if it exists, null otherwise.
|
||
* Used for writing to sessions other than the current one.
|
||
* Caches positive results so we only stat once per session.
|
||
*/
|
||
private existingSessionFiles = new Map<string, string>()
|
||
private async getExistingSessionFile(
|
||
sessionId: UUID,
|
||
): Promise<string | null> {
|
||
const cached = this.existingSessionFiles.get(sessionId)
|
||
if (cached) return cached
|
||
|
||
const targetFile = getTranscriptPathForSession(sessionId)
|
||
try {
|
||
await stat(targetFile)
|
||
this.existingSessionFiles.set(sessionId, targetFile)
|
||
return targetFile
|
||
} catch (e) {
|
||
if (isFsInaccessible(e)) return null
|
||
throw e
|
||
}
|
||
}
|
||
|
||
private async persistToRemote(sessionId: UUID, entry: TranscriptMessage) {
|
||
if (isShuttingDown()) {
|
||
return
|
||
}
|
||
|
||
// CCR v2 path: write as internal worker event
|
||
if (this.internalEventWriter) {
|
||
try {
|
||
await this.internalEventWriter(
|
||
'transcript',
|
||
entry as unknown as Record<string, unknown>,
|
||
{
|
||
...(isCompactBoundaryMessage(entry) && { isCompaction: true }),
|
||
...(entry.agentId && { agentId: entry.agentId }),
|
||
},
|
||
)
|
||
} catch {
|
||
logEvent('tengu_session_persistence_failed', {})
|
||
logForDebugging('Failed to write transcript as internal event')
|
||
}
|
||
return
|
||
}
|
||
|
||
// v1 Session Ingress path
|
||
if (
|
||
!isEnvTruthy(process.env.ENABLE_SESSION_PERSISTENCE) ||
|
||
!this.remoteIngressUrl
|
||
) {
|
||
return
|
||
}
|
||
|
||
const success = await sessionIngress.appendSessionLog(
|
||
sessionId,
|
||
entry,
|
||
this.remoteIngressUrl,
|
||
)
|
||
|
||
if (!success) {
|
||
logEvent('tengu_session_persistence_failed', {})
|
||
gracefulShutdownSync(1, 'other')
|
||
}
|
||
}
|
||
|
||
setRemoteIngressUrl(url: string): void {
|
||
this.remoteIngressUrl = url
|
||
logForDebugging(`Remote persistence enabled with URL: ${url}`)
|
||
if (url) {
|
||
// If using CCR, don't delay messages by any more than 10ms.
|
||
this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS
|
||
}
|
||
}
|
||
|
||
setInternalEventWriter(writer: InternalEventWriter): void {
|
||
this.internalEventWriter = writer
|
||
logForDebugging(
|
||
'CCR v2 internal event writer registered for transcript persistence',
|
||
)
|
||
// Use fast flush interval for CCR v2
|
||
this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS
|
||
}
|
||
|
||
setInternalEventReader(reader: InternalEventReader): void {
|
||
this.internalEventReader = reader
|
||
logForDebugging(
|
||
'CCR v2 internal event reader registered for session resume',
|
||
)
|
||
}
|
||
|
||
setInternalSubagentEventReader(reader: InternalEventReader): void {
|
||
this.internalSubagentEventReader = reader
|
||
logForDebugging(
|
||
'CCR v2 subagent event reader registered for session resume',
|
||
)
|
||
}
|
||
|
||
getInternalEventReader(): InternalEventReader | null {
|
||
return this.internalEventReader
|
||
}
|
||
|
||
getInternalSubagentEventReader(): InternalEventReader | null {
|
||
return this.internalSubagentEventReader
|
||
}
|
||
}
|
||
|
||
export type TeamInfo = {
|
||
teamName?: string
|
||
agentName?: string
|
||
}
|
||
|
||
// Filter out already-recorded messages before passing to insertMessageChain.
|
||
// Without this, after compaction messagesToKeep (same UUIDs as pre-compact
|
||
// messages) are dedup-skipped by appendEntry but still advance the parentUuid
|
||
// cursor in insertMessageChain, causing new messages to chain from pre-compact
|
||
// UUIDs instead of the post-compact summary — orphaning the compact boundary.
|
||
//
|
||
// `startingParentUuidHint`: used by useLogMessages to pass the parent from
|
||
// the previous incremental slice, avoiding an O(n) scan to rediscover it.
|
||
//
|
||
// Skip-tracking: already-recorded messages are tracked as the parent ONLY if
|
||
// they form a PREFIX (appear before any new message). This handles both cases:
|
||
// - Growing-array callers (QueryEngine, queryHelpers, LocalMainSessionTask,
|
||
// trajectory): recorded messages are always a prefix → tracked → correct
|
||
// parent chain for new messages.
|
||
// - Compaction (useLogMessages): new CB/summary appear FIRST, then recorded
|
||
// messagesToKeep → not a prefix → not tracked → CB gets parentUuid=null
|
||
// (correct: truncates --continue chain at compact boundary).
|
||
export async function recordTranscript(
|
||
messages: Message[],
|
||
teamInfo?: TeamInfo,
|
||
startingParentUuidHint?: UUID,
|
||
allMessages?: readonly Message[],
|
||
): Promise<UUID | null> {
|
||
const cleanedMessages = cleanMessagesForLogging(messages, allMessages)
|
||
const sessionId = getSessionId() as UUID
|
||
const messageSet = await getSessionMessages(sessionId)
|
||
const newMessages: typeof cleanedMessages = []
|
||
let startingParentUuid: UUID | undefined = startingParentUuidHint
|
||
let seenNewMessage = false
|
||
for (const m of cleanedMessages) {
|
||
if (messageSet.has(m.uuid as UUID)) {
|
||
// Only track skipped messages that form a prefix. After compaction,
|
||
// messagesToKeep appear AFTER new CB/summary, so this skips them.
|
||
if (!seenNewMessage && isChainParticipant(m)) {
|
||
startingParentUuid = m.uuid as UUID
|
||
}
|
||
} else {
|
||
newMessages.push(m)
|
||
seenNewMessage = true
|
||
}
|
||
}
|
||
if (newMessages.length > 0) {
|
||
await getProject().insertMessageChain(
|
||
newMessages,
|
||
false,
|
||
undefined,
|
||
startingParentUuid,
|
||
teamInfo,
|
||
)
|
||
}
|
||
// Return the last ACTUALLY recorded chain-participant's UUID, OR the
|
||
// prefix-tracked UUID if no new chain participants were recorded. This lets
|
||
// callers (useLogMessages) maintain the correct parent chain even when the
|
||
// slice is all-recorded (rewind, /resume scenarios where every message is
|
||
// already in messageSet). Progress is skipped — it's written to the JSONL
|
||
// but nothing chains TO it (see isChainParticipant).
|
||
const lastRecorded = newMessages.findLast(isChainParticipant)
|
||
return (lastRecorded?.uuid as UUID | undefined) ?? startingParentUuid ?? null
|
||
}
|
||
|
||
export async function recordSidechainTranscript(
|
||
messages: Message[],
|
||
agentId?: string,
|
||
startingParentUuid?: UUID | null,
|
||
) {
|
||
await getProject().insertMessageChain(
|
||
cleanMessagesForLogging(messages),
|
||
true,
|
||
agentId,
|
||
startingParentUuid,
|
||
)
|
||
}
|
||
|
||
export async function recordQueueOperation(queueOp: QueueOperationMessage) {
|
||
await getProject().insertQueueOperation(queueOp)
|
||
}
|
||
|
||
/**
|
||
* Remove a message from the transcript by UUID.
|
||
* Used when a tombstone is received for an orphaned message.
|
||
*/
|
||
export async function removeTranscriptMessage(targetUuid: UUID): Promise<void> {
|
||
await getProject().removeMessageByUuid(targetUuid)
|
||
}
|
||
|
||
export async function recordFileHistorySnapshot(
|
||
messageId: UUID,
|
||
snapshot: FileHistorySnapshot,
|
||
isSnapshotUpdate: boolean,
|
||
) {
|
||
await getProject().insertFileHistorySnapshot(
|
||
messageId,
|
||
snapshot,
|
||
isSnapshotUpdate,
|
||
)
|
||
}
|
||
|
||
export async function recordAttributionSnapshot(
|
||
snapshot: AttributionSnapshotMessage,
|
||
) {
|
||
await getProject().insertAttributionSnapshot(snapshot)
|
||
}
|
||
|
||
export async function recordContentReplacement(
|
||
replacements: ContentReplacementRecord[],
|
||
agentId?: AgentId,
|
||
) {
|
||
await getProject().insertContentReplacement(replacements, agentId)
|
||
}
|
||
|
||
/**
|
||
* Reset the session file pointer after switchSession/regenerateSessionId.
|
||
* The new file is created lazily on the first user/assistant message.
|
||
*/
|
||
export async function resetSessionFilePointer() {
|
||
getProject().resetSessionFile()
|
||
}
|
||
|
||
/**
|
||
* Adopt the existing session file after --continue/--resume (non-fork).
|
||
* Call after switchSession + resetSessionFilePointer + restoreSessionMetadata:
|
||
* getTranscriptPath() now derives the resumed file's path from the switched
|
||
* sessionId, and the cache holds the final metadata (--name title, resumed
|
||
* mode/tag/agent).
|
||
*
|
||
* Setting sessionFile here — instead of waiting for materializeSessionFile
|
||
* on the first user message — lets the exit cleanup handler's
|
||
* reAppendSessionMetadata run (it bails when sessionFile is null). Without
|
||
* this, `-c -n foo` + quit-before-message drops the title on the floor:
|
||
* the in-memory cache is correct but never written. The resumed file
|
||
* already exists on disk (we loaded from it), so this can't create an
|
||
* orphan the way a fresh --name session would.
|
||
*
|
||
* skipTitleRefresh: restoreSessionMetadata populated the cache from the
|
||
* same disk read microseconds ago, so refreshing from the tail here is a
|
||
* no-op — unless --name was used, in which case it would clobber the fresh
|
||
* CLI title with the stale disk value. After this write, disk == cache and
|
||
* later calls (compaction, exit cleanup) absorb SDK writes normally.
|
||
*/
|
||
export function adoptResumedSessionFile(): void {
|
||
const project = getProject()
|
||
project.sessionFile = getTranscriptPath()
|
||
project.reAppendSessionMetadata(true)
|
||
}
|
||
|
||
/**
|
||
* Append a context-collapse commit entry to the transcript. One entry per
|
||
* commit, in commit order. On resume these are collected into an ordered
|
||
* array and handed to restoreFromEntries() which rebuilds the commit log.
|
||
*/
|
||
export async function recordContextCollapseCommit(commit: {
|
||
collapseId: string
|
||
summaryUuid: string
|
||
summaryContent: string
|
||
summary: string
|
||
firstArchivedUuid: string
|
||
lastArchivedUuid: string
|
||
}): Promise<void> {
|
||
const sessionId = getSessionId() as UUID
|
||
if (!sessionId) return
|
||
await getProject().appendEntry({
|
||
type: 'marble-origami-commit',
|
||
sessionId,
|
||
...commit,
|
||
})
|
||
}
|
||
|
||
/**
|
||
* Snapshot the staged queue + spawn state. Written after each ctx-agent
|
||
* spawn resolves (when staged contents may have changed). Last-wins on
|
||
* restore — the loader keeps only the most recent snapshot entry.
|
||
*/
|
||
export async function recordContextCollapseSnapshot(snapshot: {
|
||
staged: Array<{
|
||
startUuid: string
|
||
endUuid: string
|
||
summary: string
|
||
risk: number
|
||
stagedAt: number
|
||
}>
|
||
armed: boolean
|
||
lastSpawnTokens: number
|
||
}): Promise<void> {
|
||
const sessionId = getSessionId() as UUID
|
||
if (!sessionId) return
|
||
await getProject().appendEntry({
|
||
type: 'marble-origami-snapshot',
|
||
sessionId,
|
||
...snapshot,
|
||
})
|
||
}
|
||
|
||
export async function flushSessionStorage(): Promise<void> {
|
||
await getProject().flush()
|
||
}
|
||
|
||
export async function hydrateRemoteSession(
|
||
sessionId: string,
|
||
ingressUrl: string,
|
||
): Promise<boolean> {
|
||
switchSession(asSessionId(sessionId))
|
||
|
||
const project = getProject()
|
||
|
||
try {
|
||
const remoteLogs =
|
||
(await sessionIngress.getSessionLogs(sessionId, ingressUrl)) || []
|
||
|
||
// Ensure the project directory and session file exist
|
||
const projectDir = getProjectDir(getOriginalCwd())
|
||
await mkdir(projectDir, { recursive: true, mode: 0o700 })
|
||
|
||
const sessionFile = getTranscriptPathForSession(sessionId)
|
||
|
||
// Replace local logs with remote logs. writeFile truncates, so no
|
||
// unlink is needed; an empty remoteLogs array produces an empty file.
|
||
const content = remoteLogs.map(e => jsonStringify(e) + '\n').join('')
|
||
await writeFile(sessionFile, content, { encoding: 'utf8', mode: 0o600 })
|
||
|
||
logForDebugging(`Hydrated ${remoteLogs.length} entries from remote`)
|
||
return remoteLogs.length > 0
|
||
} catch (error) {
|
||
logForDebugging(`Error hydrating session from remote: ${error}`)
|
||
logForDiagnosticsNoPII('error', 'hydrate_remote_session_fail')
|
||
return false
|
||
} finally {
|
||
// Set remote ingress URL after hydrating the remote session
|
||
// to ensure we've always synced with the remote session
|
||
// prior to enabling persistence
|
||
project.setRemoteIngressUrl(ingressUrl)
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Hydrate session state from CCR v2 internal events.
|
||
* Fetches foreground and subagent events via the registered readers,
|
||
* extracts transcript entries from payloads, and writes them to the
|
||
* local transcript files (main + per-agent).
|
||
* The server handles compaction filtering — it returns events starting
|
||
* from the latest compaction boundary.
|
||
*/
|
||
export async function hydrateFromCCRv2InternalEvents(
|
||
sessionId: string,
|
||
): Promise<boolean> {
|
||
const startMs = Date.now()
|
||
switchSession(asSessionId(sessionId))
|
||
|
||
const project = getProject()
|
||
const reader = project.getInternalEventReader()
|
||
if (!reader) {
|
||
logForDebugging('No internal event reader registered for CCR v2 resume')
|
||
return false
|
||
}
|
||
|
||
try {
|
||
// Fetch foreground events
|
||
const events = await reader()
|
||
if (!events) {
|
||
logForDebugging('Failed to read internal events for resume')
|
||
logForDiagnosticsNoPII('error', 'hydrate_ccr_v2_read_fail')
|
||
return false
|
||
}
|
||
|
||
const projectDir = getProjectDir(getOriginalCwd())
|
||
await mkdir(projectDir, { recursive: true, mode: 0o700 })
|
||
|
||
// Write foreground transcript
|
||
const sessionFile = getTranscriptPathForSession(sessionId)
|
||
const fgContent = events.map(e => jsonStringify(e.payload) + '\n').join('')
|
||
await writeFile(sessionFile, fgContent, { encoding: 'utf8', mode: 0o600 })
|
||
|
||
logForDebugging(
|
||
`Hydrated ${events.length} foreground entries from CCR v2 internal events`,
|
||
)
|
||
|
||
// Fetch and write subagent events
|
||
let subagentEventCount = 0
|
||
const subagentReader = project.getInternalSubagentEventReader()
|
||
if (subagentReader) {
|
||
const subagentEvents = await subagentReader()
|
||
if (subagentEvents && subagentEvents.length > 0) {
|
||
subagentEventCount = subagentEvents.length
|
||
// Group by agent_id
|
||
const byAgent = new Map<string, Record<string, unknown>[]>()
|
||
for (const e of subagentEvents) {
|
||
const agentId = e.agent_id || ''
|
||
if (!agentId) continue
|
||
let list = byAgent.get(agentId)
|
||
if (!list) {
|
||
list = []
|
||
byAgent.set(agentId, list)
|
||
}
|
||
list.push(e.payload)
|
||
}
|
||
|
||
// Write each agent's transcript to its own file
|
||
for (const [agentId, entries] of byAgent) {
|
||
const agentFile = getAgentTranscriptPath(asAgentId(agentId))
|
||
await mkdir(dirname(agentFile), { recursive: true, mode: 0o700 })
|
||
const agentContent = entries
|
||
.map(p => jsonStringify(p) + '\n')
|
||
.join('')
|
||
await writeFile(agentFile, agentContent, {
|
||
encoding: 'utf8',
|
||
mode: 0o600,
|
||
})
|
||
}
|
||
|
||
logForDebugging(
|
||
`Hydrated ${subagentEvents.length} subagent entries across ${byAgent.size} agents`,
|
||
)
|
||
}
|
||
}
|
||
|
||
logForDiagnosticsNoPII('info', 'hydrate_ccr_v2_completed', {
|
||
duration_ms: Date.now() - startMs,
|
||
event_count: events.length,
|
||
subagent_event_count: subagentEventCount,
|
||
})
|
||
return events.length > 0
|
||
} catch (error) {
|
||
// Re-throw epoch mismatch so the worker doesn't race against gracefulShutdown
|
||
if (
|
||
error instanceof Error &&
|
||
error.message === 'CCRClient: Epoch mismatch (409)'
|
||
) {
|
||
throw error
|
||
}
|
||
logForDebugging(`Error hydrating session from CCR v2: ${error}`)
|
||
logForDiagnosticsNoPII('error', 'hydrate_ccr_v2_fail')
|
||
return false
|
||
}
|
||
}
|
||
|
||
function extractFirstPrompt(transcript: TranscriptMessage[]): string {
|
||
const textContent = getFirstMeaningfulUserMessageTextContent(transcript)
|
||
if (textContent) {
|
||
let result = textContent.replace(/\n/g, ' ').trim()
|
||
|
||
// Store a reasonably long version for display-time truncation
|
||
// The actual truncation will be applied at display time based on terminal width
|
||
if (result.length > 200) {
|
||
result = result.slice(0, 200).trim() + '…'
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
return 'No prompt'
|
||
}
|
||
|
||
/**
|
||
* Gets the last user message that was processed (i.e., before any non-user message appears).
|
||
* Used to determine if a session has valid user interaction.
|
||
*/
|
||
export function getFirstMeaningfulUserMessageTextContent<T extends Message>(
|
||
transcript: T[],
|
||
): string | undefined {
|
||
for (const msg of transcript) {
|
||
if (msg.type !== 'user' || msg.isMeta) continue
|
||
// Skip compact summary messages - they should not be treated as the first prompt
|
||
if ('isCompactSummary' in msg && msg.isCompactSummary) continue
|
||
|
||
const content = msg.message?.content
|
||
if (!content) continue
|
||
|
||
// Collect all text values. For array content (common in VS Code where
|
||
// IDE metadata tags come before the user's actual prompt), iterate all
|
||
// text blocks so we don't miss the real prompt hidden behind
|
||
// <ide_selection>/<ide_opened_file> blocks.
|
||
const texts: string[] = []
|
||
if (typeof content === 'string') {
|
||
texts.push(content)
|
||
} else if (Array.isArray(content)) {
|
||
for (const block of content) {
|
||
if (block.type === 'text' && block.text) {
|
||
texts.push(block.text)
|
||
}
|
||
}
|
||
}
|
||
|
||
for (const textContent of texts) {
|
||
if (!textContent) continue
|
||
|
||
const commandNameTag = extractTag(textContent, COMMAND_NAME_TAG)
|
||
if (commandNameTag) {
|
||
const commandName = commandNameTag.replace(/^\//, '')
|
||
|
||
// If it's a built-in command, then it's unlikely to provide
|
||
// meaningful context (e.g. `/model sonnet`)
|
||
if (builtInCommandNames().has(commandName)) {
|
||
continue
|
||
} else {
|
||
// Otherwise, for custom commands, then keep it only if it has
|
||
// arguments (e.g. `/review reticulate splines`)
|
||
const commandArgs = extractTag(textContent, 'command-args')?.trim()
|
||
if (!commandArgs) {
|
||
continue
|
||
}
|
||
// Return clean formatted command instead of raw XML
|
||
return `${commandNameTag} ${commandArgs}`
|
||
}
|
||
}
|
||
|
||
// Format bash input with ! prefix (as user typed it). Checked before
|
||
// the generic XML skip so bash-mode sessions get a meaningful title.
|
||
const bashInput = extractTag(textContent, 'bash-input')
|
||
if (bashInput) {
|
||
return `! ${bashInput}`
|
||
}
|
||
|
||
// Skip non-meaningful messages (local command output, hook output,
|
||
// autonomous tick prompts, task notifications, pure IDE metadata tags)
|
||
if (SKIP_FIRST_PROMPT_PATTERN.test(textContent)) {
|
||
continue
|
||
}
|
||
|
||
return textContent
|
||
}
|
||
}
|
||
return undefined
|
||
}
|
||
|
||
export function removeExtraFields(
|
||
transcript: TranscriptMessage[],
|
||
): SerializedMessage[] {
|
||
return transcript.map(m => {
|
||
const { isSidechain, parentUuid, ...serializedMessage } = m
|
||
return serializedMessage
|
||
})
|
||
}
|
||
|
||
/**
|
||
* Splice the preserved segment back into the chain after compaction.
|
||
*
|
||
* Preserved messages exist in the JSONL with their ORIGINAL pre-compact
|
||
* parentUuids (recordTranscript dedup-skipped them — can't rewrite).
|
||
* The internal chain (keep[i+1]→keep[i]) is intact; only endpoints need
|
||
* patching: head→anchor, and anchor's other children→tail. Anchor is the
|
||
* last summary for suffix-preserving, boundary itself for prefix-preserving.
|
||
*
|
||
* Only the LAST seg-boundary is relinked — earlier segs were summarized
|
||
* into it. Everything physically before the absolute-last boundary (except
|
||
* preservedUuids) is deleted, which handles all multi-boundary shapes
|
||
* without special-casing.
|
||
*
|
||
* Mutates the Map in place.
|
||
*/
|
||
function applyPreservedSegmentRelinks(
|
||
messages: Map<UUID, TranscriptMessage>,
|
||
): void {
|
||
type Seg = NonNullable<
|
||
SystemCompactBoundaryMessage['compactMetadata']['preservedSegment']
|
||
>
|
||
|
||
// Find the absolute-last boundary and the last seg-boundary (can differ:
|
||
// manual /compact after reactive compact → seg is stale).
|
||
let lastSeg: Seg | undefined
|
||
let lastSegBoundaryIdx = -1
|
||
let absoluteLastBoundaryIdx = -1
|
||
const entryIndex = new Map<UUID, number>()
|
||
let i = 0
|
||
for (const entry of messages.values()) {
|
||
entryIndex.set(entry.uuid, i)
|
||
if (isCompactBoundaryMessage(entry)) {
|
||
absoluteLastBoundaryIdx = i
|
||
const seg = entry.compactMetadata?.preservedSegment
|
||
if (seg) {
|
||
lastSeg = seg
|
||
lastSegBoundaryIdx = i
|
||
}
|
||
}
|
||
i++
|
||
}
|
||
// No seg anywhere → no-op. findUnresolvedToolUse etc. read the full map.
|
||
if (!lastSeg) return
|
||
|
||
// Seg stale (no-seg boundary came after): skip relink, still prune at
|
||
// absolute — otherwise the stale preserved chain becomes a phantom leaf.
|
||
const segIsLive = lastSegBoundaryIdx === absoluteLastBoundaryIdx
|
||
|
||
// Validate tail→head BEFORE mutating so malformed metadata is a true
|
||
// no-op (walk stops at headUuid, doesn't need the relink to run first).
|
||
const preservedUuids = new Set<UUID>()
|
||
if (segIsLive) {
|
||
const walkSeen = new Set<UUID>()
|
||
let cur = messages.get(lastSeg.tailUuid)
|
||
let reachedHead = false
|
||
while (cur && !walkSeen.has(cur.uuid)) {
|
||
walkSeen.add(cur.uuid)
|
||
preservedUuids.add(cur.uuid)
|
||
if (cur.uuid === lastSeg.headUuid) {
|
||
reachedHead = true
|
||
break
|
||
}
|
||
cur = cur.parentUuid ? messages.get(cur.parentUuid) : undefined
|
||
}
|
||
if (!reachedHead) {
|
||
// tail→head walk broke — a UUID in the preserved segment isn't in the
|
||
// transcript. Returning here skips the prune below, so resume loads
|
||
// the full pre-compact history. Known cause: mid-turn-yielded
|
||
// attachment pushed to mutableMessages but never recordTranscript'd
|
||
// (SDK subprocess restarted before next turn's qe:420 flush).
|
||
logEvent('tengu_relink_walk_broken', {
|
||
tailInTranscript: messages.has(lastSeg.tailUuid),
|
||
headInTranscript: messages.has(lastSeg.headUuid),
|
||
anchorInTranscript: messages.has(lastSeg.anchorUuid),
|
||
walkSteps: walkSeen.size,
|
||
transcriptSize: messages.size,
|
||
})
|
||
return
|
||
}
|
||
}
|
||
|
||
if (segIsLive) {
|
||
const head = messages.get(lastSeg.headUuid)
|
||
if (head) {
|
||
messages.set(lastSeg.headUuid, {
|
||
...head,
|
||
parentUuid: lastSeg.anchorUuid,
|
||
})
|
||
}
|
||
// Tail-splice: anchor's other children → tail. No-op if already pointing
|
||
// at tail (the useLogMessages race case).
|
||
for (const [uuid, msg] of messages) {
|
||
if (msg.parentUuid === lastSeg.anchorUuid && uuid !== lastSeg.headUuid) {
|
||
messages.set(uuid, { ...msg, parentUuid: lastSeg.tailUuid })
|
||
}
|
||
}
|
||
// Zero stale usage: on-disk input_tokens reflect pre-compact context
|
||
// (~190K) — stripStaleUsage only patched in-memory copies that were
|
||
// dedup-skipped. Without this, resume → immediate autocompact spiral.
|
||
for (const uuid of preservedUuids) {
|
||
const msg = messages.get(uuid)
|
||
if (msg?.type !== 'assistant') continue
|
||
messages.set(uuid, {
|
||
...msg,
|
||
message: {
|
||
...msg.message,
|
||
usage: {
|
||
...msg.message.usage,
|
||
input_tokens: 0,
|
||
output_tokens: 0,
|
||
cache_creation_input_tokens: 0,
|
||
cache_read_input_tokens: 0,
|
||
},
|
||
},
|
||
})
|
||
}
|
||
}
|
||
|
||
// Prune everything physically before the absolute-last boundary that
|
||
// isn't preserved. preservedUuids empty when !segIsLive → full prune.
|
||
const toDelete: UUID[] = []
|
||
for (const [uuid] of messages) {
|
||
const idx = entryIndex.get(uuid)
|
||
if (
|
||
idx !== undefined &&
|
||
idx < absoluteLastBoundaryIdx &&
|
||
!preservedUuids.has(uuid)
|
||
) {
|
||
toDelete.push(uuid)
|
||
}
|
||
}
|
||
for (const uuid of toDelete) messages.delete(uuid)
|
||
}
|
||
|
||
/**
|
||
* Delete messages that Snip executions removed from the in-memory array,
|
||
* and relink parentUuid across the gaps.
|
||
*
|
||
* Unlike compact_boundary which truncates a prefix, snip removes
|
||
* middle ranges. The JSONL is append-only, so removed messages stay on disk
|
||
* and the surviving messages' parentUuid chains walk through them. Without
|
||
* this filter, buildConversationChain reconstructs the full unsnipped history
|
||
* and resume immediately PTLs (adamr-20260320-165831: 397K displayed → 1.65M
|
||
* actual).
|
||
*
|
||
* Deleting alone is not enough: the surviving message AFTER a removed range
|
||
* has parentUuid pointing INTO the gap. buildConversationChain would hit
|
||
* messages.get(undefined) and stop, orphaning everything before the gap. So
|
||
* after delete we relink: for each survivor with a dangling parentUuid, walk
|
||
* backward through the removed region's own parent links to the first
|
||
* non-removed ancestor.
|
||
*
|
||
* The boundary records removedUuids at execution time so we can replay the
|
||
* exact removal on load. Older boundaries without removedUuids are skipped —
|
||
* resume loads their pre-snip history (the pre-fix behavior).
|
||
*
|
||
* Mutates the Map in place.
|
||
*/
|
||
function applySnipRemovals(messages: Map<UUID, TranscriptMessage>): void {
|
||
// Structural check — snipMetadata only exists on the boundary subtype.
|
||
// Avoids the subtype literal which is in excluded-strings.txt
|
||
// (HISTORY_SNIP is ant-only; the literal must not leak into external builds).
|
||
type WithSnipMeta = { snipMetadata?: { removedUuids?: UUID[] } }
|
||
const toDelete = new Set<UUID>()
|
||
for (const entry of messages.values()) {
|
||
const removedUuids = (entry as WithSnipMeta).snipMetadata?.removedUuids
|
||
if (!removedUuids) continue
|
||
for (const uuid of removedUuids) toDelete.add(uuid)
|
||
}
|
||
if (toDelete.size === 0) return
|
||
|
||
// Capture each to-delete entry's own parentUuid BEFORE deleting so we can
|
||
// walk backward through contiguous removed ranges. Entries not in the Map
|
||
// (already absent, e.g. from a prior compact_boundary prune) contribute no
|
||
// link; the relink walk will stop at the gap and pick up null (chain-root
|
||
// behavior — same as if compact truncated there, which it did).
|
||
const deletedParent = new Map<UUID, UUID | null>()
|
||
let removedCount = 0
|
||
for (const uuid of toDelete) {
|
||
const entry = messages.get(uuid)
|
||
if (!entry) continue
|
||
deletedParent.set(uuid, entry.parentUuid)
|
||
messages.delete(uuid)
|
||
removedCount++
|
||
}
|
||
|
||
// Relink survivors with dangling parentUuid. Walk backward through
|
||
// deletedParent until we hit a UUID not in toDelete (or null). Path
|
||
// compression: after resolving, seed the map with the resolved link so
|
||
// subsequent survivors sharing the same chain segment don't re-walk.
|
||
const resolve = (start: UUID): UUID | null => {
|
||
const path: UUID[] = []
|
||
let cur: UUID | null | undefined = start
|
||
while (cur && toDelete.has(cur)) {
|
||
path.push(cur)
|
||
cur = deletedParent.get(cur)
|
||
if (cur === undefined) {
|
||
cur = null
|
||
break
|
||
}
|
||
}
|
||
for (const p of path) deletedParent.set(p, cur)
|
||
return cur
|
||
}
|
||
let relinkedCount = 0
|
||
for (const [uuid, msg] of messages) {
|
||
if (!msg.parentUuid || !toDelete.has(msg.parentUuid)) continue
|
||
messages.set(uuid, { ...msg, parentUuid: resolve(msg.parentUuid) })
|
||
relinkedCount++
|
||
}
|
||
|
||
logEvent('tengu_snip_resume_filtered', {
|
||
removed_count: removedCount,
|
||
relinked_count: relinkedCount,
|
||
})
|
||
}
|
||
|
||
/**
|
||
* O(n) single-pass: find the message with the latest timestamp matching a predicate.
|
||
* Replaces the `[...values].filter(pred).sort((a,b) => Date(b)-Date(a))[0]` pattern
|
||
* which is O(n log n) + 2n Date allocations.
|
||
*/
|
||
function findLatestMessage<T extends { timestamp: string }>(
|
||
messages: Iterable<T>,
|
||
predicate: (m: T) => boolean,
|
||
): T | undefined {
|
||
let latest: T | undefined
|
||
let maxTime = -Infinity
|
||
for (const m of messages) {
|
||
if (!predicate(m)) continue
|
||
const t = Date.parse(m.timestamp)
|
||
if (t > maxTime) {
|
||
maxTime = t
|
||
latest = m
|
||
}
|
||
}
|
||
return latest
|
||
}
|
||
|
||
/**
|
||
* Builds a conversation chain from a leaf message to root
|
||
* @param messages Map of all messages
|
||
* @param leafMessage The leaf message to start from
|
||
* @returns Array of messages from root to leaf
|
||
*/
|
||
export function buildConversationChain(
|
||
messages: Map<UUID, TranscriptMessage>,
|
||
leafMessage: TranscriptMessage,
|
||
): TranscriptMessage[] {
|
||
const transcript: TranscriptMessage[] = []
|
||
const seen = new Set<UUID>()
|
||
let currentMsg: TranscriptMessage | undefined = leafMessage
|
||
while (currentMsg) {
|
||
if (seen.has(currentMsg.uuid)) {
|
||
logError(
|
||
new Error(
|
||
`Cycle detected in parentUuid chain at message ${currentMsg.uuid}. Returning partial transcript.`,
|
||
),
|
||
)
|
||
logEvent('tengu_chain_parent_cycle', {})
|
||
break
|
||
}
|
||
seen.add(currentMsg.uuid)
|
||
transcript.push(currentMsg)
|
||
currentMsg = currentMsg.parentUuid
|
||
? messages.get(currentMsg.parentUuid)
|
||
: undefined
|
||
}
|
||
transcript.reverse()
|
||
return recoverOrphanedParallelToolResults(messages, transcript, seen)
|
||
}
|
||
|
||
/**
|
||
* Post-pass for buildConversationChain: recover sibling assistant blocks and
|
||
* tool_results that the single-parent walk orphaned.
|
||
*
|
||
* Streaming (claude.ts:~2024) emits one AssistantMessage per content_block_stop
|
||
* — N parallel tool_uses → N messages, distinct uuid, same message.id. Each
|
||
* tool_result's sourceToolAssistantUUID points to its own one-block assistant,
|
||
* so insertMessageChain's override (line ~894) writes each TR's parentUuid to a
|
||
* DIFFERENT assistant. The topology is a DAG; the walk above is a linked-list
|
||
* traversal and keeps only one branch.
|
||
*
|
||
* Two loss modes observed in production (both fixed here):
|
||
* 1. Sibling assistant orphaned: walk goes prev→asstA→TR_A→next, drops asstB
|
||
* (same message.id, chained off asstA) and TR_B.
|
||
* 2. Progress-fork (legacy, pre-#23537): each tool_use asst had a progress
|
||
* child (continued the write chain) AND a TR child. Walk followed
|
||
* progress; TRs were dropped. No longer written (progress removed from
|
||
* transcript persistence), but old transcripts still have this shape.
|
||
*
|
||
* Read-side fix: the write topology is already on disk for old transcripts;
|
||
* this recovery pass handles them.
|
||
*/
|
||
function recoverOrphanedParallelToolResults(
|
||
messages: Map<UUID, TranscriptMessage>,
|
||
chain: TranscriptMessage[],
|
||
seen: Set<UUID>,
|
||
): TranscriptMessage[] {
|
||
type ChainAssistant = Extract<TranscriptMessage, { type: 'assistant' }>
|
||
const chainAssistants = chain.filter(
|
||
(m): m is ChainAssistant => m.type === 'assistant',
|
||
)
|
||
if (chainAssistants.length === 0) return chain
|
||
|
||
// Anchor = last on-chain member of each sibling group. chainAssistants is
|
||
// already in chain order, so later iterations overwrite → last wins.
|
||
const anchorByMsgId = new Map<string, ChainAssistant>()
|
||
for (const a of chainAssistants) {
|
||
if (a.message.id) anchorByMsgId.set(a.message.id, a)
|
||
}
|
||
|
||
// O(n) precompute: sibling groups and TR index.
|
||
// TRs indexed by parentUuid — insertMessageChain:~894 already wrote that
|
||
// as the srcUUID, and --fork-session strips srcUUID but keeps parentUuid.
|
||
const siblingsByMsgId = new Map<string, TranscriptMessage[]>()
|
||
const toolResultsByAsst = new Map<UUID, TranscriptMessage[]>()
|
||
for (const m of messages.values()) {
|
||
if (m.type === 'assistant' && m.message.id) {
|
||
const group = siblingsByMsgId.get(m.message.id)
|
||
if (group) group.push(m)
|
||
else siblingsByMsgId.set(m.message.id, [m])
|
||
} else if (
|
||
m.type === 'user' &&
|
||
m.parentUuid &&
|
||
Array.isArray(m.message.content) &&
|
||
m.message.content.some(b => b.type === 'tool_result')
|
||
) {
|
||
const group = toolResultsByAsst.get(m.parentUuid)
|
||
if (group) group.push(m)
|
||
else toolResultsByAsst.set(m.parentUuid, [m])
|
||
}
|
||
}
|
||
|
||
// For each message.id group touching the chain: collect off-chain siblings,
|
||
// then off-chain TRs for ALL members. Splice right after the last on-chain
|
||
// member so the group stays contiguous for normalizeMessagesForAPI's merge
|
||
// and every TR lands after its tool_use.
|
||
const processedGroups = new Set<string>()
|
||
const inserts = new Map<UUID, TranscriptMessage[]>()
|
||
let recoveredCount = 0
|
||
for (const asst of chainAssistants) {
|
||
const msgId = asst.message.id
|
||
if (!msgId || processedGroups.has(msgId)) continue
|
||
processedGroups.add(msgId)
|
||
|
||
const group = siblingsByMsgId.get(msgId) ?? [asst]
|
||
const orphanedSiblings = group.filter(s => !seen.has(s.uuid))
|
||
const orphanedTRs: TranscriptMessage[] = []
|
||
for (const member of group) {
|
||
const trs = toolResultsByAsst.get(member.uuid)
|
||
if (!trs) continue
|
||
for (const tr of trs) {
|
||
if (!seen.has(tr.uuid)) orphanedTRs.push(tr)
|
||
}
|
||
}
|
||
if (orphanedSiblings.length === 0 && orphanedTRs.length === 0) continue
|
||
|
||
// Timestamp sort keeps content-block / completion order; stable-sort
|
||
// preserves JSONL write order on ties.
|
||
orphanedSiblings.sort((a, b) => a.timestamp.localeCompare(b.timestamp))
|
||
orphanedTRs.sort((a, b) => a.timestamp.localeCompare(b.timestamp))
|
||
|
||
const anchor = anchorByMsgId.get(msgId)!
|
||
const recovered = [...orphanedSiblings, ...orphanedTRs]
|
||
for (const r of recovered) seen.add(r.uuid)
|
||
recoveredCount += recovered.length
|
||
inserts.set(anchor.uuid, recovered)
|
||
}
|
||
|
||
if (recoveredCount === 0) return chain
|
||
logEvent('tengu_chain_parallel_tr_recovered', {
|
||
recovered_count: recoveredCount,
|
||
})
|
||
|
||
const result: TranscriptMessage[] = []
|
||
for (const m of chain) {
|
||
result.push(m)
|
||
const toInsert = inserts.get(m.uuid)
|
||
if (toInsert) result.push(...toInsert)
|
||
}
|
||
return result
|
||
}
|
||
|
||
/**
|
||
* Find the latest turn_duration checkpoint in the reconstructed chain and
|
||
* compare its recorded messageCount against the chain's position at that
|
||
* point. Emits tengu_resume_consistency_delta for BigQuery monitoring of
|
||
* write→load round-trip drift — the class of bugs where snip/compact/
|
||
* parallel-TR operations mutate in-memory but the parentUuid walk on disk
|
||
* reconstructs a different set (adamr-20260320-165831: 397K displayed →
|
||
* 1.65M actual on resume).
|
||
*
|
||
* delta > 0: resume loaded MORE than in-session (the usual failure mode)
|
||
* delta < 0: resume loaded FEWER (chain truncation — #22453 class)
|
||
* delta = 0: round-trip consistent
|
||
*
|
||
* Called from loadConversationForResume — fires once per resume, not on
|
||
* /share or log-listing chain rebuilds.
|
||
*/
|
||
export function checkResumeConsistency(chain: Message[]): void {
|
||
for (let i = chain.length - 1; i >= 0; i--) {
|
||
const m = chain[i]!
|
||
if (m.type !== 'system' || m.subtype !== 'turn_duration') continue
|
||
const expected = m.messageCount
|
||
if (expected === undefined) return
|
||
// `i` is the 0-based index of the checkpoint in the reconstructed chain.
|
||
// The checkpoint was appended AFTER messageCount messages, so its own
|
||
// position should be messageCount (i.e., i === expected).
|
||
const actual = i
|
||
logEvent('tengu_resume_consistency_delta', {
|
||
expected,
|
||
actual,
|
||
delta: actual - expected,
|
||
chain_length: chain.length,
|
||
checkpoint_age_entries: chain.length - 1 - i,
|
||
})
|
||
return
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Builds a filie history snapshot chain from the conversation
|
||
*/
|
||
function buildFileHistorySnapshotChain(
|
||
fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage>,
|
||
conversation: TranscriptMessage[],
|
||
): FileHistorySnapshot[] {
|
||
const snapshots: FileHistorySnapshot[] = []
|
||
// messageId → last index in snapshots[] for O(1) update lookup
|
||
const indexByMessageId = new Map<string, number>()
|
||
for (const message of conversation) {
|
||
const snapshotMessage = fileHistorySnapshots.get(message.uuid)
|
||
if (!snapshotMessage) {
|
||
continue
|
||
}
|
||
const { snapshot, isSnapshotUpdate } = snapshotMessage
|
||
const existingIndex = isSnapshotUpdate
|
||
? indexByMessageId.get(snapshot.messageId)
|
||
: undefined
|
||
if (existingIndex === undefined) {
|
||
indexByMessageId.set(snapshot.messageId, snapshots.length)
|
||
snapshots.push(snapshot)
|
||
} else {
|
||
snapshots[existingIndex] = snapshot
|
||
}
|
||
}
|
||
return snapshots
|
||
}
|
||
|
||
/**
|
||
* Builds an attribution snapshot chain from the conversation.
|
||
* Unlike file history snapshots, attribution snapshots are returned in full
|
||
* because they use generated UUIDs (not message UUIDs) and represent
|
||
* cumulative state that should be restored on session resume.
|
||
*/
|
||
function buildAttributionSnapshotChain(
|
||
attributionSnapshots: Map<UUID, AttributionSnapshotMessage>,
|
||
_conversation: TranscriptMessage[],
|
||
): AttributionSnapshotMessage[] {
|
||
// Return all attribution snapshots - they will be merged during restore
|
||
return Array.from(attributionSnapshots.values())
|
||
}
|
||
|
||
/**
|
||
* Loads a transcript from a JSON or JSONL file and converts it to LogOption format
|
||
* @param filePath Path to the transcript file (.json or .jsonl)
|
||
* @returns LogOption containing the transcript messages
|
||
* @throws Error if file doesn't exist or contains invalid data
|
||
*/
|
||
export async function loadTranscriptFromFile(
|
||
filePath: string,
|
||
): Promise<LogOption> {
|
||
if (filePath.endsWith('.jsonl')) {
|
||
const {
|
||
messages,
|
||
summaries,
|
||
customTitles,
|
||
tags,
|
||
fileHistorySnapshots,
|
||
attributionSnapshots,
|
||
contextCollapseCommits,
|
||
contextCollapseSnapshot,
|
||
leafUuids,
|
||
contentReplacements,
|
||
worktreeStates,
|
||
} = await loadTranscriptFile(filePath)
|
||
|
||
if (messages.size === 0) {
|
||
throw new Error('No messages found in JSONL file')
|
||
}
|
||
|
||
// Find the most recent leaf message using pre-computed leaf UUIDs
|
||
const leafMessage = findLatestMessage(messages.values(), msg =>
|
||
leafUuids.has(msg.uuid),
|
||
)
|
||
|
||
if (!leafMessage) {
|
||
throw new Error('No valid conversation chain found in JSONL file')
|
||
}
|
||
|
||
// Build the conversation chain backwards from leaf to root
|
||
const transcript = buildConversationChain(messages, leafMessage)
|
||
|
||
const summary = summaries.get(leafMessage.uuid)
|
||
const customTitle = customTitles.get(leafMessage.sessionId as UUID)
|
||
const tag = tags.get(leafMessage.sessionId as UUID)
|
||
const sessionId = leafMessage.sessionId as UUID
|
||
return {
|
||
...convertToLogOption(
|
||
transcript,
|
||
0,
|
||
summary,
|
||
customTitle,
|
||
buildFileHistorySnapshotChain(fileHistorySnapshots, transcript),
|
||
tag,
|
||
filePath,
|
||
buildAttributionSnapshotChain(attributionSnapshots, transcript),
|
||
undefined,
|
||
contentReplacements.get(sessionId) ?? [],
|
||
),
|
||
contextCollapseCommits: contextCollapseCommits.filter(
|
||
e => e.sessionId === sessionId,
|
||
),
|
||
contextCollapseSnapshot:
|
||
contextCollapseSnapshot?.sessionId === sessionId
|
||
? contextCollapseSnapshot
|
||
: undefined,
|
||
worktreeSession: worktreeStates.has(sessionId)
|
||
? worktreeStates.get(sessionId)
|
||
: undefined,
|
||
}
|
||
}
|
||
|
||
// json log files
|
||
const content = await readFile(filePath, { encoding: 'utf-8' })
|
||
let parsed: unknown
|
||
|
||
try {
|
||
parsed = jsonParse(content)
|
||
} catch (error) {
|
||
throw new Error(`Invalid JSON in transcript file: ${error}`)
|
||
}
|
||
|
||
let messages: TranscriptMessage[]
|
||
|
||
if (Array.isArray(parsed)) {
|
||
messages = parsed
|
||
} else if (parsed && typeof parsed === 'object' && 'messages' in parsed) {
|
||
if (!Array.isArray(parsed.messages)) {
|
||
throw new Error('Transcript messages must be an array')
|
||
}
|
||
messages = parsed.messages
|
||
} else {
|
||
throw new Error(
|
||
'Transcript must be an array of messages or an object with a messages array',
|
||
)
|
||
}
|
||
|
||
return convertToLogOption(
|
||
messages,
|
||
0,
|
||
undefined,
|
||
undefined,
|
||
undefined,
|
||
undefined,
|
||
filePath,
|
||
)
|
||
}
|
||
|
||
/**
|
||
* Checks if a user message has visible content (text or image, not just tool_result).
|
||
* Tool results are displayed as part of collapsed groups, not as standalone messages.
|
||
* Also excludes meta messages which are not shown to the user.
|
||
*/
|
||
function hasVisibleUserContent(message: TranscriptMessage): boolean {
|
||
if (message.type !== 'user') return false
|
||
|
||
// Meta messages are not shown to the user
|
||
if (message.isMeta) return false
|
||
|
||
const content = message.message?.content
|
||
if (!content) return false
|
||
|
||
// String content is always visible
|
||
if (typeof content === 'string') {
|
||
return content.trim().length > 0
|
||
}
|
||
|
||
// Array content: check for text or image blocks (not tool_result)
|
||
if (Array.isArray(content)) {
|
||
return content.some(
|
||
block =>
|
||
block.type === 'text' ||
|
||
block.type === 'image' ||
|
||
block.type === 'document',
|
||
)
|
||
}
|
||
|
||
return false
|
||
}
|
||
|
||
/**
|
||
* Checks if an assistant message has visible text content (not just tool_use blocks).
|
||
* Tool uses are displayed as grouped/collapsed UI elements, not as standalone messages.
|
||
*/
|
||
function hasVisibleAssistantContent(message: TranscriptMessage): boolean {
|
||
if (message.type !== 'assistant') return false
|
||
|
||
const content = message.message?.content
|
||
if (!content || !Array.isArray(content)) return false
|
||
|
||
// Check for text block (not just tool_use/thinking blocks)
|
||
return content.some(
|
||
block =>
|
||
block.type === 'text' &&
|
||
typeof block.text === 'string' &&
|
||
block.text.trim().length > 0,
|
||
)
|
||
}
|
||
|
||
/**
|
||
* Counts visible messages that would appear as conversation turns in the UI.
|
||
* Excludes:
|
||
* - System, attachment, and progress messages
|
||
* - User messages with isMeta flag (hidden from user)
|
||
* - User messages that only contain tool_result blocks (displayed as collapsed groups)
|
||
* - Assistant messages that only contain tool_use blocks (displayed as collapsed groups)
|
||
*/
|
||
function countVisibleMessages(transcript: TranscriptMessage[]): number {
|
||
let count = 0
|
||
for (const message of transcript) {
|
||
switch (message.type) {
|
||
case 'user':
|
||
// Count user messages with visible content (text, image, not just tool_result or meta)
|
||
if (hasVisibleUserContent(message)) {
|
||
count++
|
||
}
|
||
break
|
||
case 'assistant':
|
||
// Count assistant messages with text content (not just tool_use)
|
||
if (hasVisibleAssistantContent(message)) {
|
||
count++
|
||
}
|
||
break
|
||
case 'attachment':
|
||
case 'system':
|
||
case 'progress':
|
||
// These message types are not counted as visible conversation turns
|
||
break
|
||
}
|
||
}
|
||
return count
|
||
}
|
||
|
||
function convertToLogOption(
|
||
transcript: TranscriptMessage[],
|
||
value: number = 0,
|
||
summary?: string,
|
||
customTitle?: string,
|
||
fileHistorySnapshots?: FileHistorySnapshot[],
|
||
tag?: string,
|
||
fullPath?: string,
|
||
attributionSnapshots?: AttributionSnapshotMessage[],
|
||
agentSetting?: string,
|
||
contentReplacements?: ContentReplacementRecord[],
|
||
): LogOption {
|
||
const lastMessage = transcript.at(-1)!
|
||
const firstMessage = transcript[0]!
|
||
|
||
// Get the first user message for the prompt
|
||
const firstPrompt = extractFirstPrompt(transcript)
|
||
|
||
// Create timestamps from message timestamps
|
||
const created = new Date(firstMessage.timestamp)
|
||
const modified = new Date(lastMessage.timestamp)
|
||
|
||
return {
|
||
date: lastMessage.timestamp,
|
||
messages: removeExtraFields(transcript),
|
||
fullPath,
|
||
value,
|
||
created,
|
||
modified,
|
||
firstPrompt,
|
||
messageCount: countVisibleMessages(transcript),
|
||
isSidechain: firstMessage.isSidechain,
|
||
teamName: firstMessage.teamName,
|
||
agentName: firstMessage.agentName,
|
||
agentSetting,
|
||
leafUuid: lastMessage.uuid,
|
||
summary,
|
||
customTitle,
|
||
tag,
|
||
fileHistorySnapshots: fileHistorySnapshots,
|
||
attributionSnapshots: attributionSnapshots,
|
||
contentReplacements,
|
||
gitBranch: lastMessage.gitBranch,
|
||
projectPath: firstMessage.cwd,
|
||
}
|
||
}
|
||
|
||
async function trackSessionBranchingAnalytics(
|
||
logs: LogOption[],
|
||
): Promise<void> {
|
||
const sessionIdCounts = new Map<string, number>()
|
||
let maxCount = 0
|
||
for (const log of logs) {
|
||
const sessionId = getSessionIdFromLog(log)
|
||
if (sessionId) {
|
||
const newCount = (sessionIdCounts.get(sessionId) || 0) + 1
|
||
sessionIdCounts.set(sessionId, newCount)
|
||
maxCount = Math.max(newCount, maxCount)
|
||
}
|
||
}
|
||
|
||
// Early exit if no duplicates detected
|
||
if (maxCount <= 1) {
|
||
return
|
||
}
|
||
|
||
// Count sessions with branches and calculate stats using functional approach
|
||
const branchCounts = Array.from(sessionIdCounts.values()).filter(c => c > 1)
|
||
const sessionsWithBranches = branchCounts.length
|
||
const totalBranches = branchCounts.reduce((sum, count) => sum + count, 0)
|
||
|
||
logEvent('tengu_session_forked_branches_fetched', {
|
||
total_sessions: sessionIdCounts.size,
|
||
sessions_with_branches: sessionsWithBranches,
|
||
max_branches_per_session: Math.max(...branchCounts),
|
||
avg_branches_per_session: Math.round(totalBranches / sessionsWithBranches),
|
||
total_transcript_count: logs.length,
|
||
})
|
||
}
|
||
|
||
export async function fetchLogs(limit?: number): Promise<LogOption[]> {
|
||
const projectDir = getProjectDir(getOriginalCwd())
|
||
const logs = await getSessionFilesLite(projectDir, limit, getOriginalCwd())
|
||
|
||
await trackSessionBranchingAnalytics(logs)
|
||
|
||
return logs
|
||
}
|
||
|
||
/**
|
||
* Append an entry to a session file. Creates the parent dir if missing.
|
||
*/
|
||
/* eslint-disable custom-rules/no-sync-fs -- sync callers (exit cleanup, materialize) */
|
||
function appendEntryToFile(
|
||
fullPath: string,
|
||
entry: Record<string, unknown>,
|
||
): void {
|
||
const fs = getFsImplementation()
|
||
const line = jsonStringify(entry) + '\n'
|
||
try {
|
||
fs.appendFileSync(fullPath, line, { mode: 0o600 })
|
||
} catch {
|
||
fs.mkdirSync(dirname(fullPath), { mode: 0o700 })
|
||
fs.appendFileSync(fullPath, line, { mode: 0o600 })
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Sync tail read for reAppendSessionMetadata's external-writer check.
|
||
* fstat on the already-open fd (no extra path lookup); reads the same
|
||
* LITE_READ_BUF_SIZE window that readLiteMetadata scans. Returns empty
|
||
* string on any error so callers fall through to unconditional behavior.
|
||
*/
|
||
function readFileTailSync(fullPath: string): string {
|
||
let fd: number | undefined
|
||
try {
|
||
fd = openSync(fullPath, 'r')
|
||
const st = fstatSync(fd)
|
||
const tailOffset = Math.max(0, st.size - LITE_READ_BUF_SIZE)
|
||
const buf = Buffer.allocUnsafe(
|
||
Math.min(LITE_READ_BUF_SIZE, st.size - tailOffset),
|
||
)
|
||
const bytesRead = readSync(fd, buf, 0, buf.length, tailOffset)
|
||
return buf.toString('utf8', 0, bytesRead)
|
||
} catch {
|
||
return ''
|
||
} finally {
|
||
if (fd !== undefined) {
|
||
try {
|
||
closeSync(fd)
|
||
} catch {
|
||
// closeSync can throw; swallow to preserve return '' contract
|
||
}
|
||
}
|
||
}
|
||
}
|
||
/* eslint-enable custom-rules/no-sync-fs */
|
||
|
||
export async function saveCustomTitle(
|
||
sessionId: UUID,
|
||
customTitle: string,
|
||
fullPath?: string,
|
||
source: 'user' | 'auto' = 'user',
|
||
) {
|
||
// Fall back to computed path if fullPath is not provided
|
||
const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
|
||
appendEntryToFile(resolvedPath, {
|
||
type: 'custom-title',
|
||
customTitle,
|
||
sessionId,
|
||
})
|
||
// Cache for current session only (for immediate visibility)
|
||
if (sessionId === getSessionId()) {
|
||
getProject().currentSessionTitle = customTitle
|
||
}
|
||
logEvent('tengu_session_renamed', {
|
||
source:
|
||
source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||
})
|
||
}
|
||
|
||
/**
|
||
* Persist an AI-generated title to the JSONL as a distinct `ai-title` entry.
|
||
*
|
||
* Writing a separate entry type (vs. reusing `custom-title`) is load-bearing:
|
||
* - Read preference: readers prefer `customTitle` field over `aiTitle`, so
|
||
* a user rename always wins regardless of append order.
|
||
* - Resume safety: `loadTranscriptFile` only populates the `customTitles`
|
||
* Map from `custom-title` entries, so `restoreSessionMetadata` never
|
||
* caches an AI title and `reAppendSessionMetadata` never re-appends one
|
||
* at EOF — avoiding the clobber-on-resume bug where a stale AI title
|
||
* overwrites a mid-session user rename.
|
||
* - CAS semantics: VS Code's `onlyIfNoCustomTitle` check scans for the
|
||
* `customTitle` field only, so AI can overwrite its own previous AI
|
||
* title but never a user title.
|
||
* - Metrics: `tengu_session_renamed` is not fired for AI titles.
|
||
*
|
||
* Because the entry is never re-appended, it scrolls out of the 64KB tail
|
||
* window once enough messages accumulate. Readers (`readLiteMetadata`,
|
||
* `listSessionsImpl`, VS Code `fetchSessions`) fall back to scanning the
|
||
* head buffer for `aiTitle` in that case. Both head and tail reads are
|
||
* bounded (64KB each via `extractLastJsonStringField`), never a full scan.
|
||
*
|
||
* Callers with a stale-write guard (e.g., VS Code client) should prefer
|
||
* passing `persist: false` to the SDK control request and persisting
|
||
* through their own rename path after the guard passes, to avoid a race
|
||
* where the AI title lands after a mid-flight user rename.
|
||
*/
|
||
export function saveAiGeneratedTitle(sessionId: UUID, aiTitle: string): void {
|
||
appendEntryToFile(getTranscriptPathForSession(sessionId), {
|
||
type: 'ai-title',
|
||
aiTitle,
|
||
sessionId,
|
||
})
|
||
}
|
||
|
||
/**
|
||
* Append a periodic task summary for `claude ps`. Unlike ai-title this is
|
||
* not re-appended by reAppendSessionMetadata — it's a rolling snapshot of
|
||
* what the agent is doing *now*, so staleness is fine; ps reads the most
|
||
* recent one from the tail.
|
||
*/
|
||
export function saveTaskSummary(sessionId: UUID, summary: string): void {
|
||
appendEntryToFile(getTranscriptPathForSession(sessionId), {
|
||
type: 'task-summary',
|
||
summary,
|
||
sessionId,
|
||
timestamp: new Date().toISOString(),
|
||
})
|
||
}
|
||
|
||
export async function saveTag(sessionId: UUID, tag: string, fullPath?: string) {
|
||
// Fall back to computed path if fullPath is not provided
|
||
const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
|
||
appendEntryToFile(resolvedPath, { type: 'tag', tag, sessionId })
|
||
// Cache for current session only (for immediate visibility)
|
||
if (sessionId === getSessionId()) {
|
||
getProject().currentSessionTag = tag
|
||
}
|
||
logEvent('tengu_session_tagged', {})
|
||
}
|
||
|
||
/**
|
||
* Link a session to a GitHub pull request.
|
||
* This stores the PR number, URL, and repository for tracking and navigation.
|
||
*/
|
||
export async function linkSessionToPR(
|
||
sessionId: UUID,
|
||
prNumber: number,
|
||
prUrl: string,
|
||
prRepository: string,
|
||
fullPath?: string,
|
||
): Promise<void> {
|
||
const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
|
||
appendEntryToFile(resolvedPath, {
|
||
type: 'pr-link',
|
||
sessionId,
|
||
prNumber,
|
||
prUrl,
|
||
prRepository,
|
||
timestamp: new Date().toISOString(),
|
||
})
|
||
// Cache for current session so reAppendSessionMetadata can re-write after compaction
|
||
if (sessionId === getSessionId()) {
|
||
const project = getProject()
|
||
project.currentSessionPrNumber = prNumber
|
||
project.currentSessionPrUrl = prUrl
|
||
project.currentSessionPrRepository = prRepository
|
||
}
|
||
logEvent('tengu_session_linked_to_pr', { prNumber })
|
||
}
|
||
|
||
export function getCurrentSessionTag(sessionId: UUID): string | undefined {
|
||
// Only returns tag for current session (the only one we cache)
|
||
if (sessionId === getSessionId()) {
|
||
return getProject().currentSessionTag
|
||
}
|
||
return undefined
|
||
}
|
||
|
||
export function getCurrentSessionTitle(
|
||
sessionId: SessionId,
|
||
): string | undefined {
|
||
// Only returns title for current session (the only one we cache)
|
||
if (sessionId === getSessionId()) {
|
||
return getProject().currentSessionTitle
|
||
}
|
||
return undefined
|
||
}
|
||
|
||
export function getCurrentSessionAgentColor(): string | undefined {
|
||
return getProject().currentSessionAgentColor
|
||
}
|
||
|
||
/**
|
||
* Restore session metadata into in-memory cache on resume.
|
||
* Populates the cache so metadata is available for display (e.g. the
|
||
* agent banner) and re-appended on session exit via reAppendSessionMetadata.
|
||
*/
|
||
export function restoreSessionMetadata(meta: {
|
||
customTitle?: string
|
||
tag?: string
|
||
agentName?: string
|
||
agentColor?: string
|
||
agentSetting?: string
|
||
mode?: 'coordinator' | 'normal'
|
||
worktreeSession?: PersistedWorktreeSession | null
|
||
prNumber?: number
|
||
prUrl?: string
|
||
prRepository?: string
|
||
}): void {
|
||
const project = getProject()
|
||
// ??= so --name (cacheSessionTitle) wins over the resumed
|
||
// session's title. REPL.tsx clears before calling, so /resume is unaffected.
|
||
if (meta.customTitle) project.currentSessionTitle ??= meta.customTitle
|
||
if (meta.tag !== undefined) project.currentSessionTag = meta.tag || undefined
|
||
if (meta.agentName) project.currentSessionAgentName = meta.agentName
|
||
if (meta.agentColor) project.currentSessionAgentColor = meta.agentColor
|
||
if (meta.agentSetting) project.currentSessionAgentSetting = meta.agentSetting
|
||
if (meta.mode) project.currentSessionMode = meta.mode
|
||
if (meta.worktreeSession !== undefined)
|
||
project.currentSessionWorktree = meta.worktreeSession
|
||
if (meta.prNumber !== undefined)
|
||
project.currentSessionPrNumber = meta.prNumber
|
||
if (meta.prUrl) project.currentSessionPrUrl = meta.prUrl
|
||
if (meta.prRepository) project.currentSessionPrRepository = meta.prRepository
|
||
}
|
||
|
||
/**
|
||
* Clear all cached session metadata (title, tag, agent name/color).
|
||
* Called when /clear creates a new session so stale metadata
|
||
* from the previous session does not leak into the new one.
|
||
*/
|
||
export function clearSessionMetadata(): void {
|
||
const project = getProject()
|
||
project.currentSessionTitle = undefined
|
||
project.currentSessionTag = undefined
|
||
project.currentSessionAgentName = undefined
|
||
project.currentSessionAgentColor = undefined
|
||
project.currentSessionLastPrompt = undefined
|
||
project.currentSessionAgentSetting = undefined
|
||
project.currentSessionMode = undefined
|
||
project.currentSessionWorktree = undefined
|
||
project.currentSessionPrNumber = undefined
|
||
project.currentSessionPrUrl = undefined
|
||
project.currentSessionPrRepository = undefined
|
||
}
|
||
|
||
/**
|
||
* Re-append cached session metadata (custom title, tag) to the end of the
|
||
* transcript file. Call this after compaction so the metadata stays within
|
||
* the 16KB tail window that readLiteMetadata reads during progressive loading.
|
||
* Without this, enough post-compaction messages can push the metadata entry
|
||
* out of the window, causing `--resume` to show the auto-generated firstPrompt
|
||
* instead of the user-set session name.
|
||
*/
|
||
export function reAppendSessionMetadata(): void {
|
||
getProject().reAppendSessionMetadata()
|
||
}
|
||
|
||
export async function saveAgentName(
|
||
sessionId: UUID,
|
||
agentName: string,
|
||
fullPath?: string,
|
||
source: 'user' | 'auto' = 'user',
|
||
) {
|
||
const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
|
||
appendEntryToFile(resolvedPath, { type: 'agent-name', agentName, sessionId })
|
||
// Cache for current session only (for immediate visibility)
|
||
if (sessionId === getSessionId()) {
|
||
getProject().currentSessionAgentName = agentName
|
||
void updateSessionName(agentName)
|
||
}
|
||
logEvent('tengu_agent_name_set', {
|
||
source:
|
||
source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||
})
|
||
}
|
||
|
||
export async function saveAgentColor(
|
||
sessionId: UUID,
|
||
agentColor: string,
|
||
fullPath?: string,
|
||
) {
|
||
const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
|
||
appendEntryToFile(resolvedPath, {
|
||
type: 'agent-color',
|
||
agentColor,
|
||
sessionId,
|
||
})
|
||
// Cache for current session only (for immediate visibility)
|
||
if (sessionId === getSessionId()) {
|
||
getProject().currentSessionAgentColor = agentColor
|
||
}
|
||
logEvent('tengu_agent_color_set', {})
|
||
}
|
||
|
||
/**
|
||
* Cache the session agent setting. Written to disk by materializeSessionFile
|
||
* on the first user message, and re-stamped by reAppendSessionMetadata on exit.
|
||
* Cache-only here to avoid creating metadata-only session files at startup.
|
||
*/
|
||
export function saveAgentSetting(agentSetting: string): void {
|
||
getProject().currentSessionAgentSetting = agentSetting
|
||
}
|
||
|
||
/**
|
||
* Cache a session title set at startup (--name). Written to disk by
|
||
* materializeSessionFile on the first user message. Cache-only here so no
|
||
* orphan metadata-only file is created before the session ID is finalized.
|
||
*/
|
||
export function cacheSessionTitle(customTitle: string): void {
|
||
getProject().currentSessionTitle = customTitle
|
||
}
|
||
|
||
/**
|
||
* Cache the session mode. Written to disk by materializeSessionFile on the
|
||
* first user message, and re-stamped by reAppendSessionMetadata on exit.
|
||
* Cache-only here to avoid creating metadata-only session files at startup.
|
||
*/
|
||
export function saveMode(mode: 'coordinator' | 'normal'): void {
|
||
getProject().currentSessionMode = mode
|
||
}
|
||
|
||
/**
|
||
* Record the session's worktree state for --resume. Written to disk by
|
||
* materializeSessionFile on the first user message and re-stamped by
|
||
* reAppendSessionMetadata on exit. Pass null when exiting a worktree
|
||
* so --resume knows not to cd back into it.
|
||
*/
|
||
export function saveWorktreeState(
|
||
worktreeSession: PersistedWorktreeSession | null,
|
||
): void {
|
||
// Strip ephemeral fields (creationDurationMs, usedSparsePaths) that callers
|
||
// may pass via full WorktreeSession objects — TypeScript structural typing
|
||
// allows this, but we don't want them serialized to the transcript.
|
||
const stripped: PersistedWorktreeSession | null = worktreeSession
|
||
? {
|
||
originalCwd: worktreeSession.originalCwd,
|
||
worktreePath: worktreeSession.worktreePath,
|
||
worktreeName: worktreeSession.worktreeName,
|
||
worktreeBranch: worktreeSession.worktreeBranch,
|
||
originalBranch: worktreeSession.originalBranch,
|
||
originalHeadCommit: worktreeSession.originalHeadCommit,
|
||
sessionId: worktreeSession.sessionId,
|
||
tmuxSessionName: worktreeSession.tmuxSessionName,
|
||
hookBased: worktreeSession.hookBased,
|
||
}
|
||
: null
|
||
const project = getProject()
|
||
project.currentSessionWorktree = stripped
|
||
// Write eagerly when the file already exists (mid-session enter/exit).
|
||
// For --worktree startup, sessionFile is null — materializeSessionFile
|
||
// will write it on the first message via reAppendSessionMetadata.
|
||
if (project.sessionFile) {
|
||
appendEntryToFile(project.sessionFile, {
|
||
type: 'worktree-state',
|
||
worktreeSession: stripped,
|
||
sessionId: getSessionId(),
|
||
})
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Extracts the session ID from a log.
|
||
* For lite logs, uses the sessionId field directly.
|
||
* For full logs, extracts from the first message.
|
||
*/
|
||
export function getSessionIdFromLog(log: LogOption): UUID | undefined {
|
||
// For lite logs, use the direct sessionId field
|
||
if (log.sessionId) {
|
||
return log.sessionId as UUID
|
||
}
|
||
// Fall back to extracting from first message (full logs)
|
||
return log.messages[0]?.sessionId as UUID | undefined
|
||
}
|
||
|
||
/**
|
||
* Checks if a log is a lite log that needs full loading.
|
||
* Lite logs have messages: [] and sessionId set.
|
||
*/
|
||
export function isLiteLog(log: LogOption): boolean {
|
||
return log.messages.length === 0 && log.sessionId !== undefined
|
||
}
|
||
|
||
/**
|
||
* Loads full messages for a lite log by reading its JSONL file.
|
||
* Returns a new LogOption with populated messages array.
|
||
* If the log is already full or loading fails, returns the original log.
|
||
*/
|
||
export async function loadFullLog(log: LogOption): Promise<LogOption> {
|
||
// If already full, return as-is
|
||
if (!isLiteLog(log)) {
|
||
return log
|
||
}
|
||
|
||
// Use the fullPath from the index entry directly
|
||
const sessionFile = log.fullPath
|
||
if (!sessionFile) {
|
||
return log
|
||
}
|
||
|
||
try {
|
||
const {
|
||
messages,
|
||
summaries,
|
||
customTitles,
|
||
tags,
|
||
agentNames,
|
||
agentColors,
|
||
agentSettings,
|
||
prNumbers,
|
||
prUrls,
|
||
prRepositories,
|
||
modes,
|
||
worktreeStates,
|
||
fileHistorySnapshots,
|
||
attributionSnapshots,
|
||
contentReplacements,
|
||
contextCollapseCommits,
|
||
contextCollapseSnapshot,
|
||
leafUuids,
|
||
} = await loadTranscriptFile(sessionFile)
|
||
|
||
if (messages.size === 0) {
|
||
return log
|
||
}
|
||
|
||
// Find the most recent user/assistant leaf message from the transcript
|
||
const mostRecentLeaf = findLatestMessage(
|
||
messages.values(),
|
||
msg =>
|
||
leafUuids.has(msg.uuid) &&
|
||
(msg.type === 'user' || msg.type === 'assistant'),
|
||
)
|
||
if (!mostRecentLeaf) {
|
||
return log
|
||
}
|
||
|
||
// Build the conversation chain from this leaf
|
||
const transcript = buildConversationChain(messages, mostRecentLeaf)
|
||
// Leaf's sessionId — forked sessions copy chain[0] from the source, but
|
||
// metadata entries (custom-title etc.) are keyed by the current session.
|
||
const sessionId = mostRecentLeaf.sessionId as UUID | undefined
|
||
return {
|
||
...log,
|
||
messages: removeExtraFields(transcript),
|
||
firstPrompt: extractFirstPrompt(transcript),
|
||
messageCount: countVisibleMessages(transcript),
|
||
summary: mostRecentLeaf
|
||
? summaries.get(mostRecentLeaf.uuid)
|
||
: log.summary,
|
||
customTitle: sessionId ? customTitles.get(sessionId) : log.customTitle,
|
||
tag: sessionId ? tags.get(sessionId) : log.tag,
|
||
agentName: sessionId ? agentNames.get(sessionId) : log.agentName,
|
||
agentColor: sessionId ? agentColors.get(sessionId) : log.agentColor,
|
||
agentSetting: sessionId ? agentSettings.get(sessionId) : log.agentSetting,
|
||
mode: sessionId ? (modes.get(sessionId) as LogOption['mode']) : log.mode,
|
||
worktreeSession:
|
||
sessionId && worktreeStates.has(sessionId)
|
||
? worktreeStates.get(sessionId)
|
||
: log.worktreeSession,
|
||
prNumber: sessionId ? prNumbers.get(sessionId) : log.prNumber,
|
||
prUrl: sessionId ? prUrls.get(sessionId) : log.prUrl,
|
||
prRepository: sessionId
|
||
? prRepositories.get(sessionId)
|
||
: log.prRepository,
|
||
gitBranch: mostRecentLeaf?.gitBranch ?? log.gitBranch,
|
||
isSidechain: transcript[0]?.isSidechain ?? log.isSidechain,
|
||
teamName: transcript[0]?.teamName ?? log.teamName,
|
||
leafUuid: mostRecentLeaf?.uuid ?? log.leafUuid,
|
||
fileHistorySnapshots: buildFileHistorySnapshotChain(
|
||
fileHistorySnapshots,
|
||
transcript,
|
||
),
|
||
attributionSnapshots: buildAttributionSnapshotChain(
|
||
attributionSnapshots,
|
||
transcript,
|
||
),
|
||
contentReplacements: sessionId
|
||
? (contentReplacements.get(sessionId) ?? [])
|
||
: log.contentReplacements,
|
||
// Filter to the resumed session's entries. loadTranscriptFile reads
|
||
// the file sequentially so the array is already in commit order;
|
||
// filter preserves that.
|
||
contextCollapseCommits: sessionId
|
||
? contextCollapseCommits.filter(e => e.sessionId === sessionId)
|
||
: undefined,
|
||
contextCollapseSnapshot:
|
||
sessionId && contextCollapseSnapshot?.sessionId === sessionId
|
||
? contextCollapseSnapshot
|
||
: undefined,
|
||
}
|
||
} catch {
|
||
// If loading fails, return the original log
|
||
return log
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Searches for sessions by custom title match.
|
||
* Returns matches sorted by recency (newest first).
|
||
* Uses case-insensitive matching for better UX.
|
||
* Deduplicates by sessionId (keeps most recent per session).
|
||
* Searches across same-repo worktrees by default.
|
||
*/
|
||
export async function searchSessionsByCustomTitle(
|
||
query: string,
|
||
options?: { limit?: number; exact?: boolean },
|
||
): Promise<LogOption[]> {
|
||
const { limit, exact } = options || {}
|
||
// Use worktree-aware loading to search across same-repo sessions
|
||
const worktreePaths = await getWorktreePaths(getOriginalCwd())
|
||
const allStatLogs = await getStatOnlyLogsForWorktrees(worktreePaths)
|
||
// Enrich all logs to access customTitle metadata
|
||
const { logs } = await enrichLogs(allStatLogs, 0, allStatLogs.length)
|
||
const normalizedQuery = query.toLowerCase().trim()
|
||
|
||
const matchingLogs = logs.filter(log => {
|
||
const title = log.customTitle?.toLowerCase().trim()
|
||
if (!title) return false
|
||
return exact ? title === normalizedQuery : title.includes(normalizedQuery)
|
||
})
|
||
|
||
// Deduplicate by sessionId - multiple logs can have the same sessionId
|
||
// if they're different branches of the same conversation. Keep most recent.
|
||
const sessionIdToLog = new Map<UUID, LogOption>()
|
||
for (const log of matchingLogs) {
|
||
const sessionId = getSessionIdFromLog(log)
|
||
if (sessionId) {
|
||
const existing = sessionIdToLog.get(sessionId)
|
||
if (!existing || log.modified > existing.modified) {
|
||
sessionIdToLog.set(sessionId, log)
|
||
}
|
||
}
|
||
}
|
||
const deduplicated = Array.from(sessionIdToLog.values())
|
||
|
||
// Sort by recency
|
||
deduplicated.sort((a, b) => b.modified.getTime() - a.modified.getTime())
|
||
|
||
// Apply limit if specified
|
||
if (limit) {
|
||
return deduplicated.slice(0, limit)
|
||
}
|
||
|
||
return deduplicated
|
||
}
|
||
|
||
/**
|
||
* Metadata entry types that can appear before a compact boundary but must
|
||
* still be loaded (they're session-scoped, not message-scoped).
|
||
* Kept as raw JSON string markers for cheap line filtering during streaming.
|
||
*/
|
||
const METADATA_TYPE_MARKERS = [
|
||
'"type":"summary"',
|
||
'"type":"custom-title"',
|
||
'"type":"tag"',
|
||
'"type":"agent-name"',
|
||
'"type":"agent-color"',
|
||
'"type":"agent-setting"',
|
||
'"type":"mode"',
|
||
'"type":"worktree-state"',
|
||
'"type":"pr-link"',
|
||
]
|
||
const METADATA_MARKER_BUFS = METADATA_TYPE_MARKERS.map(m => Buffer.from(m))
|
||
// Longest marker is 22 bytes; +1 for leading `{` = 23.
|
||
const METADATA_PREFIX_BOUND = 25
|
||
|
||
// null = carry spans whole chunk. Skips concat when carry provably isn't
|
||
// a metadata line (markers sit at byte 1 after `{`).
|
||
function resolveMetadataBuf(
|
||
carry: Buffer | null,
|
||
chunkBuf: Buffer,
|
||
): Buffer | null {
|
||
if (carry === null || carry.length === 0) return chunkBuf
|
||
if (carry.length < METADATA_PREFIX_BOUND) {
|
||
return Buffer.concat([carry, chunkBuf])
|
||
}
|
||
if (carry[0] === 0x7b /* { */) {
|
||
for (const m of METADATA_MARKER_BUFS) {
|
||
if (carry.compare(m, 0, m.length, 1, 1 + m.length) === 0) {
|
||
return Buffer.concat([carry, chunkBuf])
|
||
}
|
||
}
|
||
}
|
||
const firstNl = chunkBuf.indexOf(0x0a)
|
||
return firstNl === -1 ? null : chunkBuf.subarray(firstNl + 1)
|
||
}
|
||
|
||
/**
|
||
* Lightweight forward scan of [0, endOffset) collecting only metadata-entry lines.
|
||
* Uses raw Buffer chunks and byte-level marker matching — no readline, no per-line
|
||
* string conversion for the ~99% of lines that are message content.
|
||
*
|
||
* Fast path: if a chunk contains zero markers (the common case — metadata entries
|
||
* are <50 per session), the entire chunk is skipped without line splitting.
|
||
*/
|
||
async function scanPreBoundaryMetadata(
|
||
filePath: string,
|
||
endOffset: number,
|
||
): Promise<string[]> {
|
||
const { createReadStream } = await import('fs')
|
||
const NEWLINE = 0x0a
|
||
|
||
const stream = createReadStream(filePath, { end: endOffset - 1 })
|
||
const metadataLines: string[] = []
|
||
let carry: Buffer | null = null
|
||
|
||
for await (const chunk of stream) {
|
||
const chunkBuf = chunk as Buffer
|
||
const buf = resolveMetadataBuf(carry, chunkBuf)
|
||
if (buf === null) {
|
||
carry = null
|
||
continue
|
||
}
|
||
|
||
// Fast path: most chunks contain zero metadata markers. Skip line splitting.
|
||
let hasAnyMarker = false
|
||
for (const m of METADATA_MARKER_BUFS) {
|
||
if (buf.includes(m)) {
|
||
hasAnyMarker = true
|
||
break
|
||
}
|
||
}
|
||
|
||
if (hasAnyMarker) {
|
||
let lineStart = 0
|
||
let nl = buf.indexOf(NEWLINE)
|
||
while (nl !== -1) {
|
||
// Bounded marker check: only look within this line's byte range
|
||
for (const m of METADATA_MARKER_BUFS) {
|
||
const mIdx = buf.indexOf(m, lineStart)
|
||
if (mIdx !== -1 && mIdx < nl) {
|
||
metadataLines.push(buf.toString('utf-8', lineStart, nl))
|
||
break
|
||
}
|
||
}
|
||
lineStart = nl + 1
|
||
nl = buf.indexOf(NEWLINE, lineStart)
|
||
}
|
||
carry = buf.subarray(lineStart)
|
||
} else {
|
||
// No markers in this chunk — just preserve the incomplete trailing line
|
||
const lastNl = buf.lastIndexOf(NEWLINE)
|
||
carry = lastNl >= 0 ? buf.subarray(lastNl + 1) : buf
|
||
}
|
||
|
||
// Guard against quadratic carry growth for pathological huge lines
|
||
// (e.g., a 10 MB tool-output line with no newline). Real metadata entries
|
||
// are <1 KB, so if carry exceeds this we're mid-message-content — drop it.
|
||
if (carry.length > 64 * 1024) carry = null
|
||
}
|
||
|
||
// Final incomplete line (no trailing newline at endOffset)
|
||
if (carry !== null && carry.length > 0) {
|
||
for (const m of METADATA_MARKER_BUFS) {
|
||
if (carry.includes(m)) {
|
||
metadataLines.push(carry.toString('utf-8'))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
return metadataLines
|
||
}
|
||
|
||
/**
|
||
* Byte-level pre-filter that excises dead fork branches before parseJSONL.
|
||
*
|
||
* Every rewind/ctrl-z leaves an orphaned chain branch in the append-only
|
||
* JSONL forever. buildConversationChain walks parentUuid from the latest leaf
|
||
* and discards everything else, but by then parseJSONL has already paid to
|
||
* JSON.parse all of it. Measured on fork-heavy sessions:
|
||
*
|
||
* 41 MB, 99% dead: parseJSONL 56.0 ms -> 3.9 ms (-93%)
|
||
* 151 MB, 92% dead: 47.3 ms -> 9.4 ms (-80%)
|
||
*
|
||
* Sessions with few dead branches (5-7%) see a small win from the overhead of
|
||
* the index pass roughly canceling the parse savings, so this is gated on
|
||
* buffer size (same threshold as SKIP_PRECOMPACT_THRESHOLD).
|
||
*
|
||
* Relies on two invariants verified across 25k+ message lines in local
|
||
* sessions (0 violations):
|
||
*
|
||
* 1. Transcript messages always serialize with parentUuid as the first key.
|
||
* JSON.stringify emits keys in insertion order and recordTranscript's
|
||
* object literal puts parentUuid first. So `{"parentUuid":` is a stable
|
||
* line prefix that distinguishes transcript messages from metadata.
|
||
*
|
||
* 2. Top-level uuid detection is handled by a suffix check + depth check
|
||
* (see inline comment in the scan loop). toolUseResult/mcpMeta serialize
|
||
* AFTER uuid with arbitrary server-controlled objects, and agent_progress
|
||
* entries serialize a nested Message in data BEFORE uuid — both can
|
||
* produce nested `"uuid":"<36>","timestamp":"` bytes, so suffix alone
|
||
* is insufficient. When multiple suffix matches exist, a brace-depth
|
||
* scan disambiguates.
|
||
*
|
||
* The append-only write discipline guarantees parents appear at earlier file
|
||
* offsets than children, so walking backward from EOF always finds them.
|
||
*/
|
||
|
||
/**
|
||
* Disambiguate multiple `"uuid":"<36>","timestamp":"` matches in one line by
|
||
* finding the one at JSON nesting depth 1. String-aware brace counter:
|
||
* `{`/`}` inside string values don't count; `\"` and `\\` inside strings are
|
||
* handled. Candidates is sorted ascending (the scan loop produces them in
|
||
* byte order). Returns the first depth-1 candidate, or the last candidate if
|
||
* none are at depth 1 (shouldn't happen for well-formed JSONL — depth-1 is
|
||
* where the top-level object's fields live).
|
||
*
|
||
* Only called when ≥2 suffix matches exist (agent_progress with a nested
|
||
* Message, or mcpMeta with a coincidentally-suffixed object). Cost is
|
||
* O(max(candidates) - lineStart) — one forward byte pass, stopping at the
|
||
* first depth-1 hit.
|
||
*/
|
||
function pickDepthOneUuidCandidate(
|
||
buf: Buffer,
|
||
lineStart: number,
|
||
candidates: number[],
|
||
): number {
|
||
const QUOTE = 0x22
|
||
const BACKSLASH = 0x5c
|
||
const OPEN_BRACE = 0x7b
|
||
const CLOSE_BRACE = 0x7d
|
||
let depth = 0
|
||
let inString = false
|
||
let escapeNext = false
|
||
let ci = 0
|
||
for (let i = lineStart; ci < candidates.length; i++) {
|
||
if (i === candidates[ci]) {
|
||
if (depth === 1 && !inString) return candidates[ci]!
|
||
ci++
|
||
}
|
||
const b = buf[i]!
|
||
if (escapeNext) {
|
||
escapeNext = false
|
||
} else if (inString) {
|
||
if (b === BACKSLASH) escapeNext = true
|
||
else if (b === QUOTE) inString = false
|
||
} else if (b === QUOTE) inString = true
|
||
else if (b === OPEN_BRACE) depth++
|
||
else if (b === CLOSE_BRACE) depth--
|
||
}
|
||
return candidates.at(-1)!
|
||
}
|
||
|
||
function walkChainBeforeParse(buf: Buffer): Buffer {
|
||
const NEWLINE = 0x0a
|
||
const OPEN_BRACE = 0x7b
|
||
const QUOTE = 0x22
|
||
const PARENT_PREFIX = Buffer.from('{"parentUuid":')
|
||
const UUID_KEY = Buffer.from('"uuid":"')
|
||
const SIDECHAIN_TRUE = Buffer.from('"isSidechain":true')
|
||
const UUID_LEN = 36
|
||
const TS_SUFFIX = Buffer.from('","timestamp":"')
|
||
const TS_SUFFIX_LEN = TS_SUFFIX.length
|
||
const PREFIX_LEN = PARENT_PREFIX.length
|
||
const KEY_LEN = UUID_KEY.length
|
||
|
||
// Stride-3 flat index of transcript messages: [lineStart, lineEnd, parentStart].
|
||
// parentStart is the byte offset of the parent uuid's first char, or -1 for null.
|
||
// Metadata lines (summary, mode, file-history-snapshot, etc.) go in metaRanges
|
||
// unfiltered - they lack the parentUuid prefix and downstream needs all of them.
|
||
const msgIdx: number[] = []
|
||
const metaRanges: number[] = []
|
||
const uuidToSlot = new Map<string, number>()
|
||
|
||
let pos = 0
|
||
const len = buf.length
|
||
while (pos < len) {
|
||
const nl = buf.indexOf(NEWLINE, pos)
|
||
const lineEnd = nl === -1 ? len : nl + 1
|
||
if (
|
||
lineEnd - pos > PREFIX_LEN &&
|
||
buf[pos] === OPEN_BRACE &&
|
||
buf.compare(PARENT_PREFIX, 0, PREFIX_LEN, pos, pos + PREFIX_LEN) === 0
|
||
) {
|
||
// `{"parentUuid":null,` or `{"parentUuid":"<36 chars>",`
|
||
const parentStart =
|
||
buf[pos + PREFIX_LEN] === QUOTE ? pos + PREFIX_LEN + 1 : -1
|
||
// The top-level uuid is immediately followed by `","timestamp":"` in
|
||
// user/assistant/attachment entries (the create* helpers put them
|
||
// adjacent; both always defined). But the suffix is NOT unique:
|
||
// - agent_progress entries carry a nested Message in data.message,
|
||
// serialized BEFORE top-level uuid — that inner Message has its
|
||
// own uuid,timestamp adjacent, so its bytes also satisfy the
|
||
// suffix check.
|
||
// - mcpMeta/toolUseResult come AFTER top-level uuid and hold
|
||
// server-controlled Record<string,unknown> — a server returning
|
||
// {uuid:"<36>",timestamp:"..."} would also match.
|
||
// Collect all suffix matches; a single one is unambiguous (common
|
||
// case), multiple need a brace-depth check to pick the one at
|
||
// JSON nesting depth 1. Entries with NO suffix match (some progress
|
||
// variants put timestamp BEFORE uuid → `"uuid":"<36>"}` at EOL)
|
||
// have only one `"uuid":"` and the first-match fallback is sound.
|
||
let firstAny = -1
|
||
let suffix0 = -1
|
||
let suffixN: number[] | undefined
|
||
let from = pos
|
||
for (;;) {
|
||
const next = buf.indexOf(UUID_KEY, from)
|
||
if (next < 0 || next >= lineEnd) break
|
||
if (firstAny < 0) firstAny = next
|
||
const after = next + KEY_LEN + UUID_LEN
|
||
if (
|
||
after + TS_SUFFIX_LEN <= lineEnd &&
|
||
buf.compare(
|
||
TS_SUFFIX,
|
||
0,
|
||
TS_SUFFIX_LEN,
|
||
after,
|
||
after + TS_SUFFIX_LEN,
|
||
) === 0
|
||
) {
|
||
if (suffix0 < 0) suffix0 = next
|
||
else (suffixN ??= [suffix0]).push(next)
|
||
}
|
||
from = next + KEY_LEN
|
||
}
|
||
const uk = suffixN
|
||
? pickDepthOneUuidCandidate(buf, pos, suffixN)
|
||
: suffix0 >= 0
|
||
? suffix0
|
||
: firstAny
|
||
if (uk >= 0) {
|
||
const uuidStart = uk + KEY_LEN
|
||
// UUIDs are pure ASCII so latin1 avoids UTF-8 decode overhead.
|
||
const uuid = buf.toString('latin1', uuidStart, uuidStart + UUID_LEN)
|
||
uuidToSlot.set(uuid, msgIdx.length)
|
||
msgIdx.push(pos, lineEnd, parentStart)
|
||
} else {
|
||
metaRanges.push(pos, lineEnd)
|
||
}
|
||
} else {
|
||
metaRanges.push(pos, lineEnd)
|
||
}
|
||
pos = lineEnd
|
||
}
|
||
|
||
// Leaf = last non-sidechain entry. isSidechain is the 2nd or 3rd key
|
||
// (after parentUuid, maybe logicalParentUuid) so indexOf from lineStart
|
||
// finds it within a few dozen bytes when present; when absent it spills
|
||
// into the next line, caught by the bounds check.
|
||
let leafSlot = -1
|
||
for (let i = msgIdx.length - 3; i >= 0; i -= 3) {
|
||
const sc = buf.indexOf(SIDECHAIN_TRUE, msgIdx[i]!)
|
||
if (sc === -1 || sc >= msgIdx[i + 1]!) {
|
||
leafSlot = i
|
||
break
|
||
}
|
||
}
|
||
if (leafSlot < 0) return buf
|
||
|
||
// Walk parentUuid to root. Collect kept-message line starts and sum their
|
||
// byte lengths so we can decide whether the concat is worth it. A dangling
|
||
// parent (uuid not in file) is the normal termination for forked sessions
|
||
// and post-boundary chains -- same semantics as buildConversationChain.
|
||
// Correctness against index poisoning rests on the timestamp suffix check
|
||
// above: a nested `"uuid":"` match without the suffix never becomes uk.
|
||
const seen = new Set<number>()
|
||
const chain = new Set<number>()
|
||
let chainBytes = 0
|
||
let slot: number | undefined = leafSlot
|
||
while (slot !== undefined) {
|
||
if (seen.has(slot)) break
|
||
seen.add(slot)
|
||
chain.add(msgIdx[slot]!)
|
||
chainBytes += msgIdx[slot + 1]! - msgIdx[slot]!
|
||
const parentStart = msgIdx[slot + 2]!
|
||
if (parentStart < 0) break
|
||
const parent = buf.toString('latin1', parentStart, parentStart + UUID_LEN)
|
||
slot = uuidToSlot.get(parent)
|
||
}
|
||
|
||
// parseJSONL cost scales with bytes, not entry count. A session can have
|
||
// thousands of dead entries by count but only single-digit-% of bytes if
|
||
// the dead branches are short turns and the live chain holds the fat
|
||
// assistant responses (measured: 107 MB session, 69% dead entries, 30%
|
||
// dead bytes - index+concat overhead exceeded parse savings). Gate on
|
||
// bytes: only stitch if we would drop at least half the buffer. Metadata
|
||
// is tiny so len - chainBytes approximates dead bytes closely enough.
|
||
// Near break-even the concat memcpy (copying chainBytes into a fresh
|
||
// allocation) dominates, so a conservative 50% gate stays safely on the
|
||
// winning side.
|
||
if (len - chainBytes < len >> 1) return buf
|
||
|
||
// Merge chain entries with metadata in original file order. Both msgIdx and
|
||
// metaRanges are already sorted by offset; interleave them into subarray
|
||
// views and concat once.
|
||
const parts: Buffer[] = []
|
||
let m = 0
|
||
for (let i = 0; i < msgIdx.length; i += 3) {
|
||
const start = msgIdx[i]!
|
||
while (m < metaRanges.length && metaRanges[m]! < start) {
|
||
parts.push(buf.subarray(metaRanges[m]!, metaRanges[m + 1]!))
|
||
m += 2
|
||
}
|
||
if (chain.has(start)) {
|
||
parts.push(buf.subarray(start, msgIdx[i + 1]!))
|
||
}
|
||
}
|
||
while (m < metaRanges.length) {
|
||
parts.push(buf.subarray(metaRanges[m]!, metaRanges[m + 1]!))
|
||
m += 2
|
||
}
|
||
return Buffer.concat(parts)
|
||
}
|
||
|
||
/**
|
||
* Loads all messages, summaries, and file history snapshots from a transcript file.
|
||
* Returns the messages, summaries, custom titles, tags, file history snapshots, and attribution snapshots.
|
||
*/
|
||
export async function loadTranscriptFile(
|
||
filePath: string,
|
||
opts?: { keepAllLeaves?: boolean },
|
||
): Promise<{
|
||
messages: Map<UUID, TranscriptMessage>
|
||
summaries: Map<UUID, string>
|
||
customTitles: Map<UUID, string>
|
||
tags: Map<UUID, string>
|
||
agentNames: Map<UUID, string>
|
||
agentColors: Map<UUID, string>
|
||
agentSettings: Map<UUID, string>
|
||
prNumbers: Map<UUID, number>
|
||
prUrls: Map<UUID, string>
|
||
prRepositories: Map<UUID, string>
|
||
modes: Map<UUID, string>
|
||
worktreeStates: Map<UUID, PersistedWorktreeSession | null>
|
||
fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage>
|
||
attributionSnapshots: Map<UUID, AttributionSnapshotMessage>
|
||
contentReplacements: Map<UUID, ContentReplacementRecord[]>
|
||
agentContentReplacements: Map<AgentId, ContentReplacementRecord[]>
|
||
contextCollapseCommits: ContextCollapseCommitEntry[]
|
||
contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined
|
||
leafUuids: Set<UUID>
|
||
}> {
|
||
const messages = new Map<UUID, TranscriptMessage>()
|
||
const summaries = new Map<UUID, string>()
|
||
const customTitles = new Map<UUID, string>()
|
||
const tags = new Map<UUID, string>()
|
||
const agentNames = new Map<UUID, string>()
|
||
const agentColors = new Map<UUID, string>()
|
||
const agentSettings = new Map<UUID, string>()
|
||
const prNumbers = new Map<UUID, number>()
|
||
const prUrls = new Map<UUID, string>()
|
||
const prRepositories = new Map<UUID, string>()
|
||
const modes = new Map<UUID, string>()
|
||
const worktreeStates = new Map<UUID, PersistedWorktreeSession | null>()
|
||
const fileHistorySnapshots = new Map<UUID, FileHistorySnapshotMessage>()
|
||
const attributionSnapshots = new Map<UUID, AttributionSnapshotMessage>()
|
||
const contentReplacements = new Map<UUID, ContentReplacementRecord[]>()
|
||
const agentContentReplacements = new Map<
|
||
AgentId,
|
||
ContentReplacementRecord[]
|
||
>()
|
||
// Array, not Map — commit order matters (nested collapses).
|
||
const contextCollapseCommits: ContextCollapseCommitEntry[] = []
|
||
// Last-wins — later entries supersede.
|
||
let contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined
|
||
|
||
try {
|
||
// For large transcripts, avoid materializing megabytes of stale content.
|
||
// Single forward chunked read: attribution-snapshot lines are skipped at
|
||
// the fd level (never buffered), compact boundaries truncate the
|
||
// accumulator in-stream. Peak allocation is the OUTPUT size, not the
|
||
// file size — a 151 MB session that is 84% stale attr-snaps allocates
|
||
// ~32 MB instead of 159+64 MB. This matters because mimalloc does not
|
||
// return those pages to the OS even after JS-level GC frees the backing
|
||
// buffers (measured: arrayBuffers=0 after Bun.gc(true) but RSS stuck at
|
||
// ~316 MB on the old scan+strip path vs ~155 MB here).
|
||
//
|
||
// Pre-boundary metadata (agent-setting, mode, pr-link, etc.) is recovered
|
||
// via a cheap byte-level forward scan of [0, boundary).
|
||
let buf: Buffer | null = null
|
||
let metadataLines: string[] | null = null
|
||
let hasPreservedSegment = false
|
||
if (!isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP)) {
|
||
const { size } = await stat(filePath)
|
||
if (size > SKIP_PRECOMPACT_THRESHOLD) {
|
||
const scan = await readTranscriptForLoad(filePath, size)
|
||
buf = scan.postBoundaryBuf
|
||
hasPreservedSegment = scan.hasPreservedSegment
|
||
// >0 means we truncated pre-boundary bytes and must recover
|
||
// session-scoped metadata from that range. A preservedSegment
|
||
// boundary does not truncate (preserved messages are physically
|
||
// pre-boundary), so offset stays 0 unless an EARLIER non-preserved
|
||
// boundary already truncated — in which case the preserved messages
|
||
// for the later boundary are post-that-earlier-boundary and were
|
||
// kept, and we still want the metadata scan.
|
||
if (scan.boundaryStartOffset > 0) {
|
||
metadataLines = await scanPreBoundaryMetadata(
|
||
filePath,
|
||
scan.boundaryStartOffset,
|
||
)
|
||
}
|
||
}
|
||
}
|
||
buf ??= await readFile(filePath)
|
||
// For large buffers (which here means readTranscriptForLoad output with
|
||
// attr-snaps already stripped at the fd level — the <5MB readFile path
|
||
// falls through the size gate below), the dominant cost is parsing dead
|
||
// fork branches that buildConversationChain would discard anyway. Skip
|
||
// when the caller needs all
|
||
// leaves (loadAllLogsFromSessionFile for /insights picks the branch with
|
||
// most user messages, not the latest), when the boundary has a
|
||
// preservedSegment (those messages keep their pre-compact parentUuid on
|
||
// disk -- applyPreservedSegmentRelinks splices them in-memory AFTER
|
||
// parse, so a pre-parse chain walk would drop them as orphans), and when
|
||
// CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP is set (that kill switch means
|
||
// "load everything, skip nothing"; this is another skip-before-parse
|
||
// optimization and the scan it depends on for hasPreservedSegment did
|
||
// not run).
|
||
if (
|
||
!opts?.keepAllLeaves &&
|
||
!hasPreservedSegment &&
|
||
!isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP) &&
|
||
buf.length > SKIP_PRECOMPACT_THRESHOLD
|
||
) {
|
||
buf = walkChainBeforeParse(buf)
|
||
}
|
||
|
||
// First pass: process metadata-only lines collected during the boundary scan.
|
||
// These populate the session-scoped maps (agentSettings, modes, prNumbers,
|
||
// etc.) for entries written before the compact boundary. Any overlap with
|
||
// the post-boundary buffer is harmless — later values overwrite earlier ones.
|
||
if (metadataLines && metadataLines.length > 0) {
|
||
const metaEntries = parseJSONL<Entry>(
|
||
Buffer.from(metadataLines.join('\n')),
|
||
)
|
||
for (const entry of metaEntries) {
|
||
if (entry.type === 'summary' && entry.leafUuid) {
|
||
summaries.set(entry.leafUuid, entry.summary)
|
||
} else if (entry.type === 'custom-title' && entry.sessionId) {
|
||
customTitles.set(entry.sessionId, entry.customTitle)
|
||
} else if (entry.type === 'tag' && entry.sessionId) {
|
||
tags.set(entry.sessionId, entry.tag)
|
||
} else if (entry.type === 'agent-name' && entry.sessionId) {
|
||
agentNames.set(entry.sessionId, entry.agentName)
|
||
} else if (entry.type === 'agent-color' && entry.sessionId) {
|
||
agentColors.set(entry.sessionId, entry.agentColor)
|
||
} else if (entry.type === 'agent-setting' && entry.sessionId) {
|
||
agentSettings.set(entry.sessionId, entry.agentSetting)
|
||
} else if (entry.type === 'mode' && entry.sessionId) {
|
||
modes.set(entry.sessionId, entry.mode)
|
||
} else if (entry.type === 'worktree-state' && entry.sessionId) {
|
||
worktreeStates.set(entry.sessionId, entry.worktreeSession)
|
||
} else if (entry.type === 'pr-link' && entry.sessionId) {
|
||
prNumbers.set(entry.sessionId, entry.prNumber)
|
||
prUrls.set(entry.sessionId, entry.prUrl)
|
||
prRepositories.set(entry.sessionId, entry.prRepository)
|
||
}
|
||
}
|
||
}
|
||
|
||
const entries = parseJSONL<Entry>(buf)
|
||
|
||
// Bridge map for legacy progress entries: progress_uuid → progress_parent_uuid.
|
||
// PR #24099 removed progress from isTranscriptMessage, so old transcripts with
|
||
// progress in the parentUuid chain would truncate at buildConversationChain
|
||
// when messages.get(progressUuid) returns undefined. Since transcripts are
|
||
// append-only (parents before children), we record each progress→parent link
|
||
// as we see it, chain-resolving through consecutive progress entries, then
|
||
// rewrite any subsequent message whose parentUuid lands in the bridge.
|
||
const progressBridge = new Map<UUID, UUID | null>()
|
||
|
||
for (const entry of entries) {
|
||
// Legacy progress check runs before the Entry-typed else-if chain —
|
||
// progress is not in the Entry union, so checking it after TypeScript
|
||
// has narrowed `entry` intersects to `never`.
|
||
if (isLegacyProgressEntry(entry)) {
|
||
// Chain-resolve through consecutive progress entries so a later
|
||
// message pointing at the tail of a progress run bridges to the
|
||
// nearest non-progress ancestor in one lookup.
|
||
const parent = entry.parentUuid
|
||
progressBridge.set(
|
||
entry.uuid,
|
||
parent && progressBridge.has(parent)
|
||
? (progressBridge.get(parent) ?? null)
|
||
: parent,
|
||
)
|
||
continue
|
||
}
|
||
if (isTranscriptMessage(entry)) {
|
||
if (entry.parentUuid && progressBridge.has(entry.parentUuid)) {
|
||
entry.parentUuid = progressBridge.get(entry.parentUuid) ?? null
|
||
}
|
||
messages.set(entry.uuid, entry)
|
||
// Compact boundary: prior marble-origami-commit entries reference
|
||
// messages that won't be in the post-boundary chain. The >5MB
|
||
// backward-scan path discards them naturally by never reading the
|
||
// pre-boundary bytes; the <5MB path reads everything, so discard
|
||
// here. Without this, getStats().collapsedSpans in /context
|
||
// overcounts (projectView silently skips the stale commits but
|
||
// they're still in the log).
|
||
if (isCompactBoundaryMessage(entry)) {
|
||
contextCollapseCommits.length = 0
|
||
contextCollapseSnapshot = undefined
|
||
}
|
||
} else if (entry.type === 'summary' && entry.leafUuid) {
|
||
summaries.set(entry.leafUuid, entry.summary)
|
||
} else if (entry.type === 'custom-title' && entry.sessionId) {
|
||
customTitles.set(entry.sessionId, entry.customTitle)
|
||
} else if (entry.type === 'tag' && entry.sessionId) {
|
||
tags.set(entry.sessionId, entry.tag)
|
||
} else if (entry.type === 'agent-name' && entry.sessionId) {
|
||
agentNames.set(entry.sessionId, entry.agentName)
|
||
} else if (entry.type === 'agent-color' && entry.sessionId) {
|
||
agentColors.set(entry.sessionId, entry.agentColor)
|
||
} else if (entry.type === 'agent-setting' && entry.sessionId) {
|
||
agentSettings.set(entry.sessionId, entry.agentSetting)
|
||
} else if (entry.type === 'mode' && entry.sessionId) {
|
||
modes.set(entry.sessionId, entry.mode)
|
||
} else if (entry.type === 'worktree-state' && entry.sessionId) {
|
||
worktreeStates.set(entry.sessionId, entry.worktreeSession)
|
||
} else if (entry.type === 'pr-link' && entry.sessionId) {
|
||
prNumbers.set(entry.sessionId, entry.prNumber)
|
||
prUrls.set(entry.sessionId, entry.prUrl)
|
||
prRepositories.set(entry.sessionId, entry.prRepository)
|
||
} else if (entry.type === 'file-history-snapshot') {
|
||
fileHistorySnapshots.set(entry.messageId, entry)
|
||
} else if (entry.type === 'attribution-snapshot') {
|
||
attributionSnapshots.set(entry.messageId, entry)
|
||
} else if (entry.type === 'content-replacement') {
|
||
// Subagent decisions key by agentId (sidechain resume); main-thread
|
||
// decisions key by sessionId (/resume).
|
||
if (entry.agentId) {
|
||
const existing = agentContentReplacements.get(entry.agentId) ?? []
|
||
agentContentReplacements.set(entry.agentId, existing)
|
||
existing.push(...entry.replacements)
|
||
} else {
|
||
const existing = contentReplacements.get(entry.sessionId) ?? []
|
||
contentReplacements.set(entry.sessionId, existing)
|
||
existing.push(...entry.replacements)
|
||
}
|
||
} else if (entry.type === 'marble-origami-commit') {
|
||
contextCollapseCommits.push(entry)
|
||
} else if (entry.type === 'marble-origami-snapshot') {
|
||
contextCollapseSnapshot = entry
|
||
}
|
||
}
|
||
} catch {
|
||
// File doesn't exist or can't be read
|
||
}
|
||
|
||
applyPreservedSegmentRelinks(messages)
|
||
applySnipRemovals(messages)
|
||
|
||
// Compute leaf UUIDs once at load time
|
||
// Only user/assistant messages should be considered as leaves for anchoring resume.
|
||
// Other message types (system, attachment) are metadata or auxiliary and shouldn't
|
||
// anchor a conversation chain.
|
||
//
|
||
// We use standard parent relationship for main chain detection, but also need to
|
||
// handle cases where the last message is a system/metadata message.
|
||
// For each conversation chain (identified by following parent links), the leaf
|
||
// is the most recent user/assistant message.
|
||
const allMessages = [...messages.values()]
|
||
|
||
// Standard leaf computation using parent relationships
|
||
const parentUuids = new Set(
|
||
allMessages
|
||
.map(msg => msg.parentUuid)
|
||
.filter((uuid): uuid is UUID => uuid !== null),
|
||
)
|
||
|
||
// Find all terminal messages (messages with no children)
|
||
const terminalMessages = allMessages.filter(msg => !parentUuids.has(msg.uuid))
|
||
|
||
const leafUuids = new Set<UUID>()
|
||
let hasCycle = false
|
||
|
||
if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_pebble_leaf_prune', false)) {
|
||
// Build a set of UUIDs that have user/assistant children
|
||
// (these are mid-conversation nodes, not dead ends)
|
||
const hasUserAssistantChild = new Set<UUID>()
|
||
for (const msg of allMessages) {
|
||
if (msg.parentUuid && (msg.type === 'user' || msg.type === 'assistant')) {
|
||
hasUserAssistantChild.add(msg.parentUuid)
|
||
}
|
||
}
|
||
|
||
// For each terminal message, walk back to find the nearest user/assistant ancestor.
|
||
// Skip ancestors that already have user/assistant children - those are mid-conversation
|
||
// nodes where the conversation continued (e.g., an assistant tool_use message whose
|
||
// progress child is terminal, but whose tool_result child continues the conversation).
|
||
for (const terminal of terminalMessages) {
|
||
const seen = new Set<UUID>()
|
||
let current: TranscriptMessage | undefined = terminal
|
||
while (current) {
|
||
if (seen.has(current.uuid)) {
|
||
hasCycle = true
|
||
break
|
||
}
|
||
seen.add(current.uuid)
|
||
if (current.type === 'user' || current.type === 'assistant') {
|
||
if (!hasUserAssistantChild.has(current.uuid)) {
|
||
leafUuids.add(current.uuid)
|
||
}
|
||
break
|
||
}
|
||
current = current.parentUuid
|
||
? messages.get(current.parentUuid)
|
||
: undefined
|
||
}
|
||
}
|
||
} else {
|
||
// Original leaf computation: walk back from terminal messages to find
|
||
// the nearest user/assistant ancestor unconditionally
|
||
for (const terminal of terminalMessages) {
|
||
const seen = new Set<UUID>()
|
||
let current: TranscriptMessage | undefined = terminal
|
||
while (current) {
|
||
if (seen.has(current.uuid)) {
|
||
hasCycle = true
|
||
break
|
||
}
|
||
seen.add(current.uuid)
|
||
if (current.type === 'user' || current.type === 'assistant') {
|
||
leafUuids.add(current.uuid)
|
||
break
|
||
}
|
||
current = current.parentUuid
|
||
? messages.get(current.parentUuid)
|
||
: undefined
|
||
}
|
||
}
|
||
}
|
||
|
||
if (hasCycle) {
|
||
logEvent('tengu_transcript_parent_cycle', {})
|
||
}
|
||
|
||
return {
|
||
messages,
|
||
summaries,
|
||
customTitles,
|
||
tags,
|
||
agentNames,
|
||
agentColors,
|
||
agentSettings,
|
||
prNumbers,
|
||
prUrls,
|
||
prRepositories,
|
||
modes,
|
||
worktreeStates,
|
||
fileHistorySnapshots,
|
||
attributionSnapshots,
|
||
contentReplacements,
|
||
agentContentReplacements,
|
||
contextCollapseCommits,
|
||
contextCollapseSnapshot,
|
||
leafUuids,
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Loads all messages, summaries, file history snapshots, and attribution snapshots from a specific session file.
|
||
*/
|
||
async function loadSessionFile(sessionId: UUID): Promise<{
|
||
messages: Map<UUID, TranscriptMessage>
|
||
summaries: Map<UUID, string>
|
||
customTitles: Map<UUID, string>
|
||
tags: Map<UUID, string>
|
||
agentSettings: Map<UUID, string>
|
||
worktreeStates: Map<UUID, PersistedWorktreeSession | null>
|
||
fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage>
|
||
attributionSnapshots: Map<UUID, AttributionSnapshotMessage>
|
||
contentReplacements: Map<UUID, ContentReplacementRecord[]>
|
||
contextCollapseCommits: ContextCollapseCommitEntry[]
|
||
contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined
|
||
}> {
|
||
const sessionFile = join(
|
||
getSessionProjectDir() ?? getProjectDir(getOriginalCwd()),
|
||
`${sessionId}.jsonl`,
|
||
)
|
||
return loadTranscriptFile(sessionFile)
|
||
}
|
||
|
||
/**
|
||
* Gets message UUIDs for a specific session without loading all sessions.
|
||
* Memoized to avoid re-reading the same session file multiple times.
|
||
*/
|
||
const getSessionMessages = memoize(
|
||
async (sessionId: UUID): Promise<Set<UUID>> => {
|
||
const { messages } = await loadSessionFile(sessionId)
|
||
return new Set(messages.keys())
|
||
},
|
||
(sessionId: UUID) => sessionId,
|
||
)
|
||
|
||
/**
|
||
* Clear the memoized session messages cache.
|
||
* Call after compaction when old message UUIDs are no longer valid.
|
||
*/
|
||
export function clearSessionMessagesCache(): void {
|
||
getSessionMessages.cache.clear?.()
|
||
}
|
||
|
||
/**
|
||
* Check if a message UUID exists in the session storage
|
||
*/
|
||
export async function doesMessageExistInSession(
|
||
sessionId: UUID,
|
||
messageUuid: UUID,
|
||
): Promise<boolean> {
|
||
const messageSet = await getSessionMessages(sessionId)
|
||
return messageSet.has(messageUuid)
|
||
}
|
||
|
||
export async function getLastSessionLog(
|
||
sessionId: UUID,
|
||
): Promise<LogOption | null> {
|
||
// Single read: load all session data at once instead of reading the file twice
|
||
const {
|
||
messages,
|
||
summaries,
|
||
customTitles,
|
||
tags,
|
||
agentSettings,
|
||
worktreeStates,
|
||
fileHistorySnapshots,
|
||
attributionSnapshots,
|
||
contentReplacements,
|
||
contextCollapseCommits,
|
||
contextCollapseSnapshot,
|
||
} = await loadSessionFile(sessionId)
|
||
if (messages.size === 0) return null
|
||
// Prime getSessionMessages cache so recordTranscript (called after REPL
|
||
// mount on --resume) skips a second full file load. -170~227ms on large sessions.
|
||
// Guard: only prime if cache is empty. Mid-session callers (e.g. IssueFeedback)
|
||
// may call getLastSessionLog on the current session — overwriting a live cache
|
||
// with a stale disk snapshot would lose unflushed UUIDs and break dedup.
|
||
if (!getSessionMessages.cache.has(sessionId)) {
|
||
getSessionMessages.cache.set(
|
||
sessionId,
|
||
Promise.resolve(new Set(messages.keys())),
|
||
)
|
||
}
|
||
|
||
// Find the most recent non-sidechain message
|
||
const lastMessage = findLatestMessage(messages.values(), m => !m.isSidechain)
|
||
if (!lastMessage) return null
|
||
|
||
// Build the transcript chain from the last message
|
||
const transcript = buildConversationChain(messages, lastMessage)
|
||
|
||
const summary = summaries.get(lastMessage.uuid)
|
||
const customTitle = customTitles.get(lastMessage.sessionId as UUID)
|
||
const tag = tags.get(lastMessage.sessionId as UUID)
|
||
const agentSetting = agentSettings.get(sessionId)
|
||
return {
|
||
...convertToLogOption(
|
||
transcript,
|
||
0,
|
||
summary,
|
||
customTitle,
|
||
buildFileHistorySnapshotChain(fileHistorySnapshots, transcript),
|
||
tag,
|
||
getTranscriptPathForSession(sessionId),
|
||
buildAttributionSnapshotChain(attributionSnapshots, transcript),
|
||
agentSetting,
|
||
contentReplacements.get(sessionId) ?? [],
|
||
),
|
||
worktreeSession: worktreeStates.get(sessionId),
|
||
contextCollapseCommits: contextCollapseCommits.filter(
|
||
e => e.sessionId === sessionId,
|
||
),
|
||
contextCollapseSnapshot:
|
||
contextCollapseSnapshot?.sessionId === sessionId
|
||
? contextCollapseSnapshot
|
||
: undefined,
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Loads the list of message logs
|
||
* @param limit Optional limit on number of session files to load
|
||
* @returns List of message logs sorted by date
|
||
*/
|
||
export async function loadMessageLogs(limit?: number): Promise<LogOption[]> {
|
||
const sessionLogs = await fetchLogs(limit)
|
||
// fetchLogs returns lite (stat-only) logs — enrich them to get metadata.
|
||
// enrichLogs already filters out sidechains, empty sessions, etc.
|
||
const { logs: enriched } = await enrichLogs(
|
||
sessionLogs,
|
||
0,
|
||
sessionLogs.length,
|
||
)
|
||
|
||
// enrichLogs returns fresh unshared objects — mutate in place to avoid
|
||
// re-spreading every 30-field LogOption just to renumber the index.
|
||
const sorted = sortLogs(enriched)
|
||
sorted.forEach((log, i) => {
|
||
log.value = i
|
||
})
|
||
return sorted
|
||
}
|
||
|
||
/**
|
||
* Loads message logs from all project directories.
|
||
* @param limit Optional limit on number of session files to load per project (used when no index exists)
|
||
* @returns List of message logs sorted by date
|
||
*/
|
||
export async function loadAllProjectsMessageLogs(
|
||
limit?: number,
|
||
options?: { skipIndex?: boolean; initialEnrichCount?: number },
|
||
): Promise<LogOption[]> {
|
||
if (options?.skipIndex) {
|
||
// Load all sessions with full message data (e.g. for /insights analysis)
|
||
return loadAllProjectsMessageLogsFull(limit)
|
||
}
|
||
const result = await loadAllProjectsMessageLogsProgressive(
|
||
limit,
|
||
options?.initialEnrichCount ?? INITIAL_ENRICH_COUNT,
|
||
)
|
||
return result.logs
|
||
}
|
||
|
||
async function loadAllProjectsMessageLogsFull(
|
||
limit?: number,
|
||
): Promise<LogOption[]> {
|
||
const projectsDir = getProjectsDir()
|
||
|
||
let dirents: Dirent[]
|
||
try {
|
||
dirents = await readdir(projectsDir, { withFileTypes: true })
|
||
} catch {
|
||
return []
|
||
}
|
||
|
||
const projectDirs = dirents
|
||
.filter(dirent => dirent.isDirectory())
|
||
.map(dirent => join(projectsDir, dirent.name))
|
||
|
||
const logsPerProject = await Promise.all(
|
||
projectDirs.map(projectDir => getLogsWithoutIndex(projectDir, limit)),
|
||
)
|
||
const allLogs = logsPerProject.flat()
|
||
|
||
// Deduplicate — same session+leaf can appear in multiple project dirs.
|
||
// This path creates one LogOption per leaf, so use sessionId+leafUuid key.
|
||
const deduped = new Map<string, LogOption>()
|
||
for (const log of allLogs) {
|
||
const key = `${log.sessionId ?? ''}:${log.leafUuid ?? ''}`
|
||
const existing = deduped.get(key)
|
||
if (!existing || log.modified.getTime() > existing.modified.getTime()) {
|
||
deduped.set(key, log)
|
||
}
|
||
}
|
||
|
||
// deduped values are fresh from getLogsWithoutIndex — safe to mutate
|
||
const sorted = sortLogs([...deduped.values()])
|
||
sorted.forEach((log, i) => {
|
||
log.value = i
|
||
})
|
||
return sorted
|
||
}
|
||
|
||
export async function loadAllProjectsMessageLogsProgressive(
|
||
limit?: number,
|
||
initialEnrichCount: number = INITIAL_ENRICH_COUNT,
|
||
): Promise<SessionLogResult> {
|
||
const projectsDir = getProjectsDir()
|
||
|
||
let dirents: Dirent[]
|
||
try {
|
||
dirents = await readdir(projectsDir, { withFileTypes: true })
|
||
} catch {
|
||
return { logs: [], allStatLogs: [], nextIndex: 0 }
|
||
}
|
||
|
||
const projectDirs = dirents
|
||
.filter(dirent => dirent.isDirectory())
|
||
.map(dirent => join(projectsDir, dirent.name))
|
||
|
||
const rawLogs: LogOption[] = []
|
||
for (const projectDir of projectDirs) {
|
||
rawLogs.push(...(await getSessionFilesLite(projectDir, limit)))
|
||
}
|
||
// Deduplicate — same session can appear in multiple project dirs
|
||
const sorted = deduplicateLogsBySessionId(rawLogs)
|
||
|
||
const { logs, nextIndex } = await enrichLogs(sorted, 0, initialEnrichCount)
|
||
|
||
// enrichLogs returns fresh unshared objects — safe to mutate in place
|
||
logs.forEach((log, i) => {
|
||
log.value = i
|
||
})
|
||
return { logs, allStatLogs: sorted, nextIndex }
|
||
}
|
||
|
||
/**
|
||
* Loads message logs from all worktrees of the same git repository.
|
||
* Falls back to loadMessageLogs if no worktrees provided.
|
||
*
|
||
* Uses pure filesystem metadata for fast loading.
|
||
*
|
||
* @param worktreePaths Array of worktree paths (from getWorktreePaths)
|
||
* @param limit Optional limit on number of session files to load per project
|
||
* @returns List of message logs sorted by date
|
||
*/
|
||
/**
|
||
* Result of loading session logs with progressive enrichment support.
|
||
*/
|
||
export type SessionLogResult = {
|
||
/** Enriched logs ready for display */
|
||
logs: LogOption[]
|
||
/** Full stat-only list for progressive loading (call enrichLogs to get more) */
|
||
allStatLogs: LogOption[]
|
||
/** Index into allStatLogs where progressive loading should continue from */
|
||
nextIndex: number
|
||
}
|
||
|
||
export async function loadSameRepoMessageLogs(
|
||
worktreePaths: string[],
|
||
limit?: number,
|
||
initialEnrichCount: number = INITIAL_ENRICH_COUNT,
|
||
): Promise<LogOption[]> {
|
||
const result = await loadSameRepoMessageLogsProgressive(
|
||
worktreePaths,
|
||
limit,
|
||
initialEnrichCount,
|
||
)
|
||
return result.logs
|
||
}
|
||
|
||
export async function loadSameRepoMessageLogsProgressive(
|
||
worktreePaths: string[],
|
||
limit?: number,
|
||
initialEnrichCount: number = INITIAL_ENRICH_COUNT,
|
||
): Promise<SessionLogResult> {
|
||
logForDebugging(
|
||
`/resume: loading sessions for cwd=${getOriginalCwd()}, worktrees=[${worktreePaths.join(', ')}]`,
|
||
)
|
||
const allStatLogs = await getStatOnlyLogsForWorktrees(worktreePaths, limit)
|
||
logForDebugging(`/resume: found ${allStatLogs.length} session files on disk`)
|
||
|
||
const { logs, nextIndex } = await enrichLogs(
|
||
allStatLogs,
|
||
0,
|
||
initialEnrichCount,
|
||
)
|
||
|
||
// enrichLogs returns fresh unshared objects — safe to mutate in place
|
||
logs.forEach((log, i) => {
|
||
log.value = i
|
||
})
|
||
return { logs, allStatLogs, nextIndex }
|
||
}
|
||
|
||
/**
|
||
* Gets stat-only logs for worktree paths (no file reads).
|
||
*/
|
||
async function getStatOnlyLogsForWorktrees(
|
||
worktreePaths: string[],
|
||
limit?: number,
|
||
): Promise<LogOption[]> {
|
||
const projectsDir = getProjectsDir()
|
||
|
||
if (worktreePaths.length <= 1) {
|
||
const cwd = getOriginalCwd()
|
||
const projectDir = getProjectDir(cwd)
|
||
return getSessionFilesLite(projectDir, undefined, cwd)
|
||
}
|
||
|
||
// On Windows, drive letter case can differ between git worktree list
|
||
// output (e.g. C:/Users/...) and how paths were stored in project
|
||
// directories (e.g. c:/Users/...). Use case-insensitive comparison.
|
||
const caseInsensitive = process.platform === 'win32'
|
||
|
||
// Sort worktree paths by sanitized prefix length (longest first) so
|
||
// more specific matches take priority over shorter ones. Without this,
|
||
// a short prefix like -code-myrepo could match -code-myrepo-worktree1
|
||
// before the longer, more specific prefix gets a chance.
|
||
const indexed = worktreePaths.map(wt => {
|
||
const sanitized = sanitizePath(wt)
|
||
return {
|
||
path: wt,
|
||
prefix: caseInsensitive ? sanitized.toLowerCase() : sanitized,
|
||
}
|
||
})
|
||
indexed.sort((a, b) => b.prefix.length - a.prefix.length)
|
||
|
||
const allLogs: LogOption[] = []
|
||
const seenDirs = new Set<string>()
|
||
|
||
let allDirents: Dirent[]
|
||
try {
|
||
allDirents = await readdir(projectsDir, { withFileTypes: true })
|
||
} catch (e) {
|
||
// Fall back to current project
|
||
logForDebugging(
|
||
`Failed to read projects dir ${projectsDir}, falling back to current project: ${e}`,
|
||
)
|
||
const projectDir = getProjectDir(getOriginalCwd())
|
||
return getSessionFilesLite(projectDir, limit, getOriginalCwd())
|
||
}
|
||
|
||
for (const dirent of allDirents) {
|
||
if (!dirent.isDirectory()) continue
|
||
const dirName = caseInsensitive ? dirent.name.toLowerCase() : dirent.name
|
||
if (seenDirs.has(dirName)) continue
|
||
|
||
for (const { path: wtPath, prefix } of indexed) {
|
||
if (dirName === prefix || dirName.startsWith(prefix + '-')) {
|
||
seenDirs.add(dirName)
|
||
allLogs.push(
|
||
...(await getSessionFilesLite(
|
||
join(projectsDir, dirent.name),
|
||
undefined,
|
||
wtPath,
|
||
)),
|
||
)
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
// Deduplicate by sessionId — the same session can appear in multiple
|
||
// worktree project dirs. Keep the entry with the newest modified time.
|
||
return deduplicateLogsBySessionId(allLogs)
|
||
}
|
||
|
||
/**
|
||
* Retrieves the transcript for a specific agent by agentId.
|
||
* Directly loads the agent-specific transcript file.
|
||
* @param agentId The agent ID to search for
|
||
* @returns The conversation chain and budget replacement records for the agent,
|
||
* or null if not found
|
||
*/
|
||
export async function getAgentTranscript(agentId: AgentId): Promise<{
|
||
messages: Message[]
|
||
contentReplacements: ContentReplacementRecord[]
|
||
} | null> {
|
||
const agentFile = getAgentTranscriptPath(agentId)
|
||
|
||
try {
|
||
const { messages, agentContentReplacements } =
|
||
await loadTranscriptFile(agentFile)
|
||
|
||
// Find messages with matching agentId
|
||
const agentMessages = Array.from(messages.values()).filter(
|
||
msg => msg.agentId === agentId && msg.isSidechain,
|
||
)
|
||
|
||
if (agentMessages.length === 0) {
|
||
return null
|
||
}
|
||
|
||
// Find the most recent leaf message with this agentId
|
||
const parentUuids = new Set(agentMessages.map(msg => msg.parentUuid))
|
||
const leafMessage = findLatestMessage(
|
||
agentMessages,
|
||
msg => !parentUuids.has(msg.uuid),
|
||
)
|
||
|
||
if (!leafMessage) {
|
||
return null
|
||
}
|
||
|
||
// Build the conversation chain
|
||
const transcript = buildConversationChain(messages, leafMessage)
|
||
|
||
// Filter to only include messages with this agentId
|
||
const agentTranscript = transcript.filter(msg => msg.agentId === agentId)
|
||
|
||
return {
|
||
// Convert TranscriptMessage[] to Message[]
|
||
messages: agentTranscript.map(
|
||
({ isSidechain, parentUuid, ...msg }) => msg,
|
||
),
|
||
contentReplacements: agentContentReplacements.get(agentId) ?? [],
|
||
}
|
||
} catch {
|
||
return null
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Extract agent IDs from progress messages in the conversation.
|
||
* Agent/skill progress messages have type 'progress' with data.type
|
||
* 'agent_progress' or 'skill_progress' and data.agentId.
|
||
* This captures sync agents that emit progress messages during execution.
|
||
*/
|
||
export function extractAgentIdsFromMessages(messages: Message[]): string[] {
|
||
const agentIds: string[] = []
|
||
|
||
for (const message of messages) {
|
||
if (
|
||
message.type === 'progress' &&
|
||
message.data &&
|
||
typeof message.data === 'object' &&
|
||
'type' in message.data &&
|
||
(message.data.type === 'agent_progress' ||
|
||
message.data.type === 'skill_progress') &&
|
||
'agentId' in message.data &&
|
||
typeof message.data.agentId === 'string'
|
||
) {
|
||
agentIds.push(message.data.agentId)
|
||
}
|
||
}
|
||
|
||
return uniq(agentIds)
|
||
}
|
||
|
||
/**
|
||
* Extract teammate transcripts directly from AppState tasks.
|
||
* In-process teammates store their messages in task.messages,
|
||
* which is more reliable than loading from disk since each teammate turn
|
||
* uses a random agentId for transcript storage.
|
||
*/
|
||
export function extractTeammateTranscriptsFromTasks(tasks: {
|
||
[taskId: string]: {
|
||
type: string
|
||
identity?: { agentId: string }
|
||
messages?: Message[]
|
||
}
|
||
}): { [agentId: string]: Message[] } {
|
||
const transcripts: { [agentId: string]: Message[] } = {}
|
||
|
||
for (const task of Object.values(tasks)) {
|
||
if (
|
||
task.type === 'in_process_teammate' &&
|
||
task.identity?.agentId &&
|
||
task.messages &&
|
||
task.messages.length > 0
|
||
) {
|
||
transcripts[task.identity.agentId] = task.messages
|
||
}
|
||
}
|
||
|
||
return transcripts
|
||
}
|
||
|
||
/**
|
||
* Load subagent transcripts for the given agent IDs
|
||
*/
|
||
export async function loadSubagentTranscripts(
|
||
agentIds: string[],
|
||
): Promise<{ [agentId: string]: Message[] }> {
|
||
const results = await Promise.all(
|
||
agentIds.map(async agentId => {
|
||
try {
|
||
const result = await getAgentTranscript(asAgentId(agentId))
|
||
if (result && result.messages.length > 0) {
|
||
return { agentId, transcript: result.messages }
|
||
}
|
||
return null
|
||
} catch {
|
||
// Skip if transcript can't be loaded
|
||
return null
|
||
}
|
||
}),
|
||
)
|
||
|
||
const transcripts: { [agentId: string]: Message[] } = {}
|
||
for (const result of results) {
|
||
if (result) {
|
||
transcripts[result.agentId] = result.transcript
|
||
}
|
||
}
|
||
return transcripts
|
||
}
|
||
|
||
// Globs the session's subagents dir directly — unlike AppState.tasks, this survives task eviction.
|
||
export async function loadAllSubagentTranscriptsFromDisk(): Promise<{
|
||
[agentId: string]: Message[]
|
||
}> {
|
||
const subagentsDir = join(
|
||
getSessionProjectDir() ?? getProjectDir(getOriginalCwd()),
|
||
getSessionId(),
|
||
'subagents',
|
||
)
|
||
let entries: Dirent[]
|
||
try {
|
||
entries = await readdir(subagentsDir, { withFileTypes: true })
|
||
} catch {
|
||
return {}
|
||
}
|
||
// Filename format is the inverse of getAgentTranscriptPath() — keep in sync.
|
||
const agentIds = entries
|
||
.filter(
|
||
d =>
|
||
d.isFile() && d.name.startsWith('agent-') && d.name.endsWith('.jsonl'),
|
||
)
|
||
.map(d => d.name.slice('agent-'.length, -'.jsonl'.length))
|
||
return loadSubagentTranscripts(agentIds)
|
||
}
|
||
|
||
// Exported so useLogMessages can sync-compute the last loggable uuid
|
||
// without awaiting recordTranscript's return value (race-free hint tracking).
|
||
export function isLoggableMessage(m: Message): boolean {
|
||
if (m.type === 'progress') return false
|
||
// IMPORTANT: We deliberately filter out most attachments for non-ants because
|
||
// they have sensitive info for training that we don't want exposed to the public.
|
||
// When enabled, we allow hook_additional_context through since it contains
|
||
// user-configured hook output that is useful for session context on resume.
|
||
if (m.type === 'attachment' && getUserType() !== 'ant') {
|
||
if (
|
||
m.attachment.type === 'hook_additional_context' &&
|
||
isEnvTruthy(process.env.CLAUDE_CODE_SAVE_HOOK_ADDITIONAL_CONTEXT)
|
||
) {
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
return true
|
||
}
|
||
|
||
function collectReplIds(messages: readonly Message[]): Set<string> {
|
||
const ids = new Set<string>()
|
||
for (const m of messages) {
|
||
if (m.type === 'assistant' && Array.isArray(m.message.content)) {
|
||
for (const b of m.message.content) {
|
||
if (b.type === 'tool_use' && b.name === REPL_TOOL_NAME) {
|
||
ids.add(b.id)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return ids
|
||
}
|
||
|
||
/**
|
||
* For external users, make REPL invisible in the persisted transcript: strip
|
||
* REPL tool_use/tool_result pairs and promote isVirtual messages to real. On
|
||
* --resume the model then sees a coherent native-tool-call history (assistant
|
||
* called Bash, got result, called Read, got result) without the REPL wrapper.
|
||
* Ant transcripts keep the wrapper so /share training data sees REPL usage.
|
||
*
|
||
* replIds is pre-collected from the FULL session array, not the slice being
|
||
* transformed — recordTranscript receives incremental slices where the REPL
|
||
* tool_use (earlier render) and its tool_result (later render, after async
|
||
* execution) land in separate calls. A fresh per-call Set would miss the id
|
||
* and leave an orphaned tool_result on disk.
|
||
*/
|
||
function transformMessagesForExternalTranscript(
|
||
messages: Transcript,
|
||
replIds: Set<string>,
|
||
): Transcript {
|
||
return messages.flatMap(m => {
|
||
if (m.type === 'assistant' && Array.isArray(m.message.content)) {
|
||
const content = m.message.content
|
||
const hasRepl = content.some(
|
||
b => b.type === 'tool_use' && b.name === REPL_TOOL_NAME,
|
||
)
|
||
const filtered = hasRepl
|
||
? content.filter(
|
||
b => !(b.type === 'tool_use' && b.name === REPL_TOOL_NAME),
|
||
)
|
||
: content
|
||
if (filtered.length === 0) return []
|
||
if (m.isVirtual) {
|
||
const { isVirtual: _omit, ...rest } = m
|
||
return [{ ...rest, message: { ...m.message, content: filtered } }]
|
||
}
|
||
if (filtered !== content) {
|
||
return [{ ...m, message: { ...m.message, content: filtered } }]
|
||
}
|
||
return [m]
|
||
}
|
||
if (m.type === 'user' && Array.isArray(m.message.content)) {
|
||
const content = m.message.content
|
||
const hasRepl = content.some(
|
||
b => b.type === 'tool_result' && replIds.has(b.tool_use_id),
|
||
)
|
||
const filtered = hasRepl
|
||
? content.filter(
|
||
b => !(b.type === 'tool_result' && replIds.has(b.tool_use_id)),
|
||
)
|
||
: content
|
||
if (filtered.length === 0) return []
|
||
if (m.isVirtual) {
|
||
const { isVirtual: _omit, ...rest } = m
|
||
return [{ ...rest, message: { ...m.message, content: filtered } }]
|
||
}
|
||
if (filtered !== content) {
|
||
return [{ ...m, message: { ...m.message, content: filtered } }]
|
||
}
|
||
return [m]
|
||
}
|
||
// string-content user, system, attachment
|
||
if ('isVirtual' in m && m.isVirtual) {
|
||
const { isVirtual: _omit, ...rest } = m
|
||
return [rest]
|
||
}
|
||
return [m]
|
||
}) as Transcript
|
||
}
|
||
|
||
export function cleanMessagesForLogging(
|
||
messages: Message[],
|
||
allMessages: readonly Message[] = messages,
|
||
): Transcript {
|
||
const filtered = messages.filter(isLoggableMessage) as Transcript
|
||
return getUserType() !== 'ant'
|
||
? transformMessagesForExternalTranscript(
|
||
filtered,
|
||
collectReplIds(allMessages),
|
||
)
|
||
: filtered
|
||
}
|
||
|
||
/**
|
||
* Gets a log by its index
|
||
* @param index Index in the sorted list of logs (0-based)
|
||
* @returns Log data or null if not found
|
||
*/
|
||
export async function getLogByIndex(index: number): Promise<LogOption | null> {
|
||
const logs = await loadMessageLogs()
|
||
return logs[index] || null
|
||
}
|
||
|
||
/**
|
||
* Looks up unresolved tool uses in the transcript by tool_use_id.
|
||
* Returns the assistant message containing the tool_use, or null if not found
|
||
* or the tool call already has a tool_result.
|
||
*/
|
||
export async function findUnresolvedToolUse(
|
||
toolUseId: string,
|
||
): Promise<AssistantMessage | null> {
|
||
try {
|
||
const transcriptPath = getTranscriptPath()
|
||
const { messages } = await loadTranscriptFile(transcriptPath)
|
||
|
||
let toolUseMessage = null
|
||
|
||
// Find the tool use but make sure there's not also a result
|
||
for (const message of messages.values()) {
|
||
if (message.type === 'assistant') {
|
||
const content = message.message.content
|
||
if (Array.isArray(content)) {
|
||
for (const block of content) {
|
||
if (block.type === 'tool_use' && block.id === toolUseId) {
|
||
toolUseMessage = message
|
||
break
|
||
}
|
||
}
|
||
}
|
||
} else if (message.type === 'user') {
|
||
const content = message.message.content
|
||
if (Array.isArray(content)) {
|
||
for (const block of content) {
|
||
if (
|
||
block.type === 'tool_result' &&
|
||
block.tool_use_id === toolUseId
|
||
) {
|
||
// Found tool result, bail out
|
||
return null
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return toolUseMessage
|
||
} catch {
|
||
return null
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Gets all session JSONL files in a project directory with their stats.
|
||
* Returns a map of sessionId → {path, mtime, ctime, size}.
|
||
* Stats are batched via Promise.all to avoid serial syscalls in the hot loop.
|
||
*/
|
||
export async function getSessionFilesWithMtime(
|
||
projectDir: string,
|
||
): Promise<
|
||
Map<string, { path: string; mtime: number; ctime: number; size: number }>
|
||
> {
|
||
const sessionFilesMap = new Map<
|
||
string,
|
||
{ path: string; mtime: number; ctime: number; size: number }
|
||
>()
|
||
|
||
let dirents: Dirent[]
|
||
try {
|
||
dirents = await readdir(projectDir, { withFileTypes: true })
|
||
} catch {
|
||
// Directory doesn't exist - return empty map
|
||
return sessionFilesMap
|
||
}
|
||
|
||
const candidates: Array<{ sessionId: string; filePath: string }> = []
|
||
for (const dirent of dirents) {
|
||
if (!dirent.isFile() || !dirent.name.endsWith('.jsonl')) continue
|
||
const sessionId = validateUuid(basename(dirent.name, '.jsonl'))
|
||
if (!sessionId) continue
|
||
candidates.push({ sessionId, filePath: join(projectDir, dirent.name) })
|
||
}
|
||
|
||
await Promise.all(
|
||
candidates.map(async ({ sessionId, filePath }) => {
|
||
try {
|
||
const st = await stat(filePath)
|
||
sessionFilesMap.set(sessionId, {
|
||
path: filePath,
|
||
mtime: st.mtime.getTime(),
|
||
ctime: st.birthtime.getTime(),
|
||
size: st.size,
|
||
})
|
||
} catch {
|
||
logForDebugging(`Failed to stat session file: ${filePath}`)
|
||
}
|
||
}),
|
||
)
|
||
|
||
return sessionFilesMap
|
||
}
|
||
|
||
/**
|
||
* Number of sessions to enrich on the initial load of the resume picker.
|
||
* Each enrichment reads up to 128 KB per file (head + tail), so 50 sessions
|
||
* means ~6.4 MB of I/O — fast on any modern filesystem while giving users
|
||
* a much better initial view than the previous default of 10.
|
||
*/
|
||
const INITIAL_ENRICH_COUNT = 50
|
||
|
||
type LiteMetadata = {
|
||
firstPrompt: string
|
||
gitBranch?: string
|
||
isSidechain: boolean
|
||
projectPath?: string
|
||
teamName?: string
|
||
customTitle?: string
|
||
summary?: string
|
||
tag?: string
|
||
agentSetting?: string
|
||
prNumber?: number
|
||
prUrl?: string
|
||
prRepository?: string
|
||
}
|
||
|
||
/**
|
||
* Loads all logs from a single session file with full message data.
|
||
* Builds a LogOption for each leaf message in the file.
|
||
*/
|
||
export async function loadAllLogsFromSessionFile(
|
||
sessionFile: string,
|
||
projectPathOverride?: string,
|
||
): Promise<LogOption[]> {
|
||
const {
|
||
messages,
|
||
summaries,
|
||
customTitles,
|
||
tags,
|
||
agentNames,
|
||
agentColors,
|
||
agentSettings,
|
||
prNumbers,
|
||
prUrls,
|
||
prRepositories,
|
||
modes,
|
||
fileHistorySnapshots,
|
||
attributionSnapshots,
|
||
contentReplacements,
|
||
leafUuids,
|
||
} = await loadTranscriptFile(sessionFile, { keepAllLeaves: true })
|
||
|
||
if (messages.size === 0) return []
|
||
|
||
const leafMessages: TranscriptMessage[] = []
|
||
// Build parentUuid → children index once (O(n)), so trailing-message lookup is O(1) per leaf
|
||
const childrenByParent = new Map<UUID, TranscriptMessage[]>()
|
||
for (const msg of messages.values()) {
|
||
if (leafUuids.has(msg.uuid)) {
|
||
leafMessages.push(msg)
|
||
} else if (msg.parentUuid) {
|
||
const siblings = childrenByParent.get(msg.parentUuid)
|
||
if (siblings) {
|
||
siblings.push(msg)
|
||
} else {
|
||
childrenByParent.set(msg.parentUuid, [msg])
|
||
}
|
||
}
|
||
}
|
||
|
||
const logs: LogOption[] = []
|
||
|
||
for (const leafMessage of leafMessages) {
|
||
const chain = buildConversationChain(messages, leafMessage)
|
||
if (chain.length === 0) continue
|
||
|
||
// Append trailing messages that are children of the leaf
|
||
const trailingMessages = childrenByParent.get(leafMessage.uuid)
|
||
if (trailingMessages) {
|
||
// ISO-8601 UTC timestamps are lexically sortable
|
||
trailingMessages.sort((a, b) =>
|
||
a.timestamp < b.timestamp ? -1 : a.timestamp > b.timestamp ? 1 : 0,
|
||
)
|
||
chain.push(...trailingMessages)
|
||
}
|
||
|
||
const firstMessage = chain[0]!
|
||
const sessionId = leafMessage.sessionId as UUID
|
||
|
||
logs.push({
|
||
date: leafMessage.timestamp,
|
||
messages: removeExtraFields(chain),
|
||
fullPath: sessionFile,
|
||
value: 0,
|
||
created: new Date(firstMessage.timestamp),
|
||
modified: new Date(leafMessage.timestamp),
|
||
firstPrompt: extractFirstPrompt(chain),
|
||
messageCount: countVisibleMessages(chain),
|
||
isSidechain: firstMessage.isSidechain ?? false,
|
||
sessionId,
|
||
leafUuid: leafMessage.uuid,
|
||
summary: summaries.get(leafMessage.uuid),
|
||
customTitle: customTitles.get(sessionId),
|
||
tag: tags.get(sessionId),
|
||
agentName: agentNames.get(sessionId),
|
||
agentColor: agentColors.get(sessionId),
|
||
agentSetting: agentSettings.get(sessionId),
|
||
mode: modes.get(sessionId) as LogOption['mode'],
|
||
prNumber: prNumbers.get(sessionId),
|
||
prUrl: prUrls.get(sessionId),
|
||
prRepository: prRepositories.get(sessionId),
|
||
gitBranch: leafMessage.gitBranch,
|
||
projectPath: projectPathOverride ?? firstMessage.cwd,
|
||
fileHistorySnapshots: buildFileHistorySnapshotChain(
|
||
fileHistorySnapshots,
|
||
chain,
|
||
),
|
||
attributionSnapshots: buildAttributionSnapshotChain(
|
||
attributionSnapshots,
|
||
chain,
|
||
),
|
||
contentReplacements: contentReplacements.get(sessionId) ?? [],
|
||
})
|
||
}
|
||
|
||
return logs
|
||
}
|
||
|
||
/**
|
||
* Gets logs by loading all session files fully, bypassing the session index.
|
||
* Use this when you need full message data (e.g., for /insights analysis).
|
||
|
||
*/
|
||
async function getLogsWithoutIndex(
|
||
projectDir: string,
|
||
limit?: number,
|
||
): Promise<LogOption[]> {
|
||
const sessionFilesMap = await getSessionFilesWithMtime(projectDir)
|
||
if (sessionFilesMap.size === 0) return []
|
||
|
||
// If limit specified, only load N most recent files by mtime
|
||
let filesToProcess: Array<{ path: string; mtime: number }>
|
||
if (limit && sessionFilesMap.size > limit) {
|
||
filesToProcess = [...sessionFilesMap.values()]
|
||
.sort((a, b) => b.mtime - a.mtime)
|
||
.slice(0, limit)
|
||
} else {
|
||
filesToProcess = [...sessionFilesMap.values()]
|
||
}
|
||
|
||
const logs: LogOption[] = []
|
||
for (const fileInfo of filesToProcess) {
|
||
try {
|
||
const fileLogOptions = await loadAllLogsFromSessionFile(fileInfo.path)
|
||
logs.push(...fileLogOptions)
|
||
} catch {
|
||
logForDebugging(`Failed to load session file: ${fileInfo.path}`)
|
||
}
|
||
}
|
||
|
||
return logs
|
||
}
|
||
|
||
/**
|
||
* Reads the first and last ~64KB of a JSONL file and extracts lite metadata.
|
||
*
|
||
* Head (first 64KB): isSidechain, projectPath, teamName, firstPrompt.
|
||
* Tail (last 64KB): customTitle, tag, PR link, latest gitBranch.
|
||
*
|
||
* Accepts a shared buffer to avoid per-file allocation overhead.
|
||
*/
|
||
async function readLiteMetadata(
|
||
filePath: string,
|
||
fileSize: number,
|
||
buf: Buffer,
|
||
): Promise<LiteMetadata> {
|
||
const { head, tail } = await readHeadAndTail(filePath, fileSize, buf)
|
||
if (!head) return { firstPrompt: '', isSidechain: false }
|
||
|
||
// Extract stable metadata from the first line via string search.
|
||
// Works even when the first line is truncated (>64KB message).
|
||
const isSidechain =
|
||
head.includes('"isSidechain":true') || head.includes('"isSidechain": true')
|
||
const projectPath = extractJsonStringField(head, 'cwd')
|
||
const teamName = extractJsonStringField(head, 'teamName')
|
||
const agentSetting = extractJsonStringField(head, 'agentSetting')
|
||
|
||
// Prefer the last-prompt tail entry — captured by extractFirstPrompt at
|
||
// write time (filtered, authoritative) and shows what the user was most
|
||
// recently doing. Head scan is the fallback for sessions written before
|
||
// last-prompt entries existed. Raw string scrapes of head are last resort
|
||
// and catch array-format content blocks (VS Code <ide_selection> metadata).
|
||
const firstPrompt =
|
||
extractLastJsonStringField(tail, 'lastPrompt') ||
|
||
extractFirstPromptFromChunk(head) ||
|
||
extractJsonStringFieldPrefix(head, 'content', 200) ||
|
||
extractJsonStringFieldPrefix(head, 'text', 200) ||
|
||
''
|
||
|
||
// Extract tail metadata via string search (last occurrence wins).
|
||
// User titles (customTitle field, from custom-title entries) win over
|
||
// AI titles (aiTitle field, from ai-title entries). The distinct field
|
||
// names mean extractLastJsonStringField naturally disambiguates.
|
||
const customTitle =
|
||
extractLastJsonStringField(tail, 'customTitle') ??
|
||
extractLastJsonStringField(head, 'customTitle') ??
|
||
extractLastJsonStringField(tail, 'aiTitle') ??
|
||
extractLastJsonStringField(head, 'aiTitle')
|
||
const summary = extractLastJsonStringField(tail, 'summary')
|
||
const tag = extractLastJsonStringField(tail, 'tag')
|
||
const gitBranch =
|
||
extractLastJsonStringField(tail, 'gitBranch') ??
|
||
extractJsonStringField(head, 'gitBranch')
|
||
|
||
// PR link fields — prNumber is a number not a string, so try both
|
||
const prUrl = extractLastJsonStringField(tail, 'prUrl')
|
||
const prRepository = extractLastJsonStringField(tail, 'prRepository')
|
||
let prNumber: number | undefined
|
||
const prNumStr = extractLastJsonStringField(tail, 'prNumber')
|
||
if (prNumStr) {
|
||
prNumber = parseInt(prNumStr, 10) || undefined
|
||
}
|
||
if (!prNumber) {
|
||
const prNumMatch = tail.lastIndexOf('"prNumber":')
|
||
if (prNumMatch >= 0) {
|
||
const afterColon = tail.slice(prNumMatch + 11, prNumMatch + 25)
|
||
const num = parseInt(afterColon.trim(), 10)
|
||
if (num > 0) prNumber = num
|
||
}
|
||
}
|
||
|
||
return {
|
||
firstPrompt,
|
||
gitBranch,
|
||
isSidechain,
|
||
projectPath,
|
||
teamName,
|
||
customTitle,
|
||
summary,
|
||
tag,
|
||
agentSetting,
|
||
prNumber,
|
||
prUrl,
|
||
prRepository,
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Scans a chunk of text for the first meaningful user prompt.
|
||
*/
|
||
function extractFirstPromptFromChunk(chunk: string): string {
|
||
let start = 0
|
||
let hasTickMessages = false
|
||
let firstCommandFallback = ''
|
||
while (start < chunk.length) {
|
||
const newlineIdx = chunk.indexOf('\n', start)
|
||
const line =
|
||
newlineIdx >= 0 ? chunk.slice(start, newlineIdx) : chunk.slice(start)
|
||
start = newlineIdx >= 0 ? newlineIdx + 1 : chunk.length
|
||
|
||
if (!line.includes('"type":"user"') && !line.includes('"type": "user"')) {
|
||
continue
|
||
}
|
||
if (line.includes('"tool_result"')) continue
|
||
if (line.includes('"isMeta":true') || line.includes('"isMeta": true'))
|
||
continue
|
||
|
||
try {
|
||
const entry = jsonParse(line) as Record<string, unknown>
|
||
if (entry.type !== 'user') continue
|
||
|
||
const message = entry.message as Record<string, unknown> | undefined
|
||
if (!message) continue
|
||
|
||
const content = message.content
|
||
// Collect all text values from the message content. For array content
|
||
// (common in VS Code where IDE metadata tags come before the user's
|
||
// actual prompt), iterate all text blocks so we don't miss the real
|
||
// prompt hidden behind <ide_selection>/<ide_opened_file> blocks.
|
||
const texts: string[] = []
|
||
if (typeof content === 'string') {
|
||
texts.push(content)
|
||
} else if (Array.isArray(content)) {
|
||
for (const block of content) {
|
||
const b = block as Record<string, unknown>
|
||
if (b.type === 'text' && typeof b.text === 'string') {
|
||
texts.push(b.text as string)
|
||
}
|
||
}
|
||
}
|
||
|
||
for (const text of texts) {
|
||
if (!text) continue
|
||
|
||
let result = text.replace(/\n/g, ' ').trim()
|
||
|
||
// Skip command messages (slash commands) but remember the first one
|
||
// as a fallback title. Matches skip logic in
|
||
// getFirstMeaningfulUserMessageTextContent, but instead of discarding
|
||
// command messages entirely, we format them cleanly (e.g. "/clear")
|
||
// so the session still appears in the resume picker.
|
||
const commandNameTag = extractTag(result, COMMAND_NAME_TAG)
|
||
if (commandNameTag) {
|
||
const name = commandNameTag.replace(/^\//, '')
|
||
const commandArgs = extractTag(result, 'command-args')?.trim() || ''
|
||
if (builtInCommandNames().has(name) || !commandArgs) {
|
||
if (!firstCommandFallback) {
|
||
firstCommandFallback = commandNameTag
|
||
}
|
||
continue
|
||
}
|
||
// Custom command with meaningful args — use clean display
|
||
return commandArgs
|
||
? `${commandNameTag} ${commandArgs}`
|
||
: commandNameTag
|
||
}
|
||
|
||
// Format bash input with ! prefix before the generic XML skip
|
||
const bashInput = extractTag(result, 'bash-input')
|
||
if (bashInput) return `! ${bashInput}`
|
||
|
||
if (SKIP_FIRST_PROMPT_PATTERN.test(result)) {
|
||
if (
|
||
(feature('PROACTIVE') || feature('KAIROS')) &&
|
||
result.startsWith(`<${TICK_TAG}>`)
|
||
)
|
||
hasTickMessages = true
|
||
continue
|
||
}
|
||
if (result.length > 200) {
|
||
result = result.slice(0, 200).trim() + '…'
|
||
}
|
||
return result
|
||
}
|
||
} catch {
|
||
continue
|
||
}
|
||
}
|
||
// Session started with a slash command but had no subsequent real message —
|
||
// use the clean command name so the session still appears in the resume picker
|
||
if (firstCommandFallback) return firstCommandFallback
|
||
// Proactive sessions have only tick messages — give them a synthetic prompt
|
||
// so they're not filtered out by enrichLogs
|
||
if ((feature('PROACTIVE') || feature('KAIROS')) && hasTickMessages)
|
||
return 'Proactive session'
|
||
return ''
|
||
}
|
||
|
||
/**
|
||
* Like extractJsonStringField but returns the first `maxLen` characters of the
|
||
* value even when the closing quote is missing (truncated buffer). Newline
|
||
* escapes are replaced with spaces and the result is trimmed.
|
||
*/
|
||
function extractJsonStringFieldPrefix(
|
||
text: string,
|
||
key: string,
|
||
maxLen: number,
|
||
): string {
|
||
const patterns = [`"${key}":"`, `"${key}": "`]
|
||
for (const pattern of patterns) {
|
||
const idx = text.indexOf(pattern)
|
||
if (idx < 0) continue
|
||
|
||
const valueStart = idx + pattern.length
|
||
// Grab up to maxLen characters from the value, stopping at closing quote
|
||
let i = valueStart
|
||
let collected = 0
|
||
while (i < text.length && collected < maxLen) {
|
||
if (text[i] === '\\') {
|
||
i += 2 // skip escaped char
|
||
collected++
|
||
continue
|
||
}
|
||
if (text[i] === '"') break
|
||
i++
|
||
collected++
|
||
}
|
||
const raw = text.slice(valueStart, i)
|
||
return raw.replace(/\\n/g, ' ').replace(/\\t/g, ' ').trim()
|
||
}
|
||
return ''
|
||
}
|
||
|
||
/**
|
||
* Deduplicates logs by sessionId, keeping the entry with the newest
|
||
* modified time. Returns sorted logs with sequential value indices.
|
||
*/
|
||
function deduplicateLogsBySessionId(logs: LogOption[]): LogOption[] {
|
||
const deduped = new Map<string, LogOption>()
|
||
for (const log of logs) {
|
||
if (!log.sessionId) continue
|
||
const existing = deduped.get(log.sessionId)
|
||
if (!existing || log.modified.getTime() > existing.modified.getTime()) {
|
||
deduped.set(log.sessionId, log)
|
||
}
|
||
}
|
||
return sortLogs([...deduped.values()]).map((log, i) => ({
|
||
...log,
|
||
value: i,
|
||
}))
|
||
}
|
||
|
||
/**
|
||
* Returns lite LogOption[] from pure filesystem metadata (stat only).
|
||
* No file reads — instant. Call `enrichLogs` to enrich
|
||
* visible sessions with firstPrompt, gitBranch, customTitle, etc.
|
||
*/
|
||
export async function getSessionFilesLite(
|
||
projectDir: string,
|
||
limit?: number,
|
||
projectPath?: string,
|
||
): Promise<LogOption[]> {
|
||
const sessionFilesMap = await getSessionFilesWithMtime(projectDir)
|
||
|
||
// Sort by mtime descending and apply limit
|
||
let entries = [...sessionFilesMap.entries()].sort(
|
||
(a, b) => b[1].mtime - a[1].mtime,
|
||
)
|
||
if (limit && entries.length > limit) {
|
||
entries = entries.slice(0, limit)
|
||
}
|
||
|
||
const logs: LogOption[] = []
|
||
|
||
for (const [sessionId, fileInfo] of entries) {
|
||
logs.push({
|
||
date: new Date(fileInfo.mtime).toISOString(),
|
||
messages: [],
|
||
isLite: true,
|
||
fullPath: fileInfo.path,
|
||
value: 0,
|
||
created: new Date(fileInfo.ctime),
|
||
modified: new Date(fileInfo.mtime),
|
||
firstPrompt: '',
|
||
messageCount: 0,
|
||
fileSize: fileInfo.size,
|
||
isSidechain: false,
|
||
sessionId,
|
||
projectPath,
|
||
})
|
||
}
|
||
|
||
// logs are freshly pushed above — safe to mutate in place
|
||
const sorted = sortLogs(logs)
|
||
sorted.forEach((log, i) => {
|
||
log.value = i
|
||
})
|
||
return sorted
|
||
}
|
||
|
||
/**
|
||
* Enriches a lite log with metadata from its JSONL file.
|
||
* Returns the enriched log, or null if the log has no meaningful content
|
||
* (no firstPrompt, no customTitle — e.g., metadata-only session files).
|
||
*/
|
||
async function enrichLog(
|
||
log: LogOption,
|
||
readBuf: Buffer,
|
||
): Promise<LogOption | null> {
|
||
if (!log.isLite || !log.fullPath) return log
|
||
|
||
const meta = await readLiteMetadata(log.fullPath, log.fileSize ?? 0, readBuf)
|
||
|
||
const enriched: LogOption = {
|
||
...log,
|
||
isLite: false,
|
||
firstPrompt: meta.firstPrompt,
|
||
gitBranch: meta.gitBranch,
|
||
isSidechain: meta.isSidechain,
|
||
teamName: meta.teamName,
|
||
customTitle: meta.customTitle,
|
||
summary: meta.summary,
|
||
tag: meta.tag,
|
||
agentSetting: meta.agentSetting,
|
||
prNumber: meta.prNumber,
|
||
prUrl: meta.prUrl,
|
||
prRepository: meta.prRepository,
|
||
projectPath: meta.projectPath ?? log.projectPath,
|
||
}
|
||
|
||
// Provide a fallback title for sessions where we couldn't extract the first
|
||
// prompt (e.g., large first messages that exceed the 16KB read buffer).
|
||
// Previously these sessions were silently dropped, making them inaccessible
|
||
// via /resume after crashes or large-context sessions.
|
||
if (!enriched.firstPrompt && !enriched.customTitle) {
|
||
enriched.firstPrompt = '(session)'
|
||
}
|
||
// Filter: skip sidechains and agent sessions
|
||
if (enriched.isSidechain) {
|
||
logForDebugging(
|
||
`Session ${log.sessionId} filtered from /resume: isSidechain=true`,
|
||
)
|
||
return null
|
||
}
|
||
if (enriched.teamName) {
|
||
logForDebugging(
|
||
`Session ${log.sessionId} filtered from /resume: teamName=${enriched.teamName}`,
|
||
)
|
||
return null
|
||
}
|
||
|
||
return enriched
|
||
}
|
||
|
||
/**
|
||
* Enriches enough lite logs from `allLogs` (starting at `startIndex`) to
|
||
* produce `count` valid results. Returns the valid enriched logs and the
|
||
* index where scanning stopped (for progressive loading to continue from).
|
||
*/
|
||
export async function enrichLogs(
|
||
allLogs: LogOption[],
|
||
startIndex: number,
|
||
count: number,
|
||
): Promise<{ logs: LogOption[]; nextIndex: number }> {
|
||
const result: LogOption[] = []
|
||
const readBuf = Buffer.alloc(LITE_READ_BUF_SIZE)
|
||
let i = startIndex
|
||
|
||
while (i < allLogs.length && result.length < count) {
|
||
const log = allLogs[i]!
|
||
i++
|
||
|
||
const enriched = await enrichLog(log, readBuf)
|
||
if (enriched) {
|
||
result.push(enriched)
|
||
}
|
||
}
|
||
|
||
const scanned = i - startIndex
|
||
const filtered = scanned - result.length
|
||
if (filtered > 0) {
|
||
logForDebugging(
|
||
`/resume: enriched ${scanned} sessions, ${filtered} filtered out, ${result.length} visible (${allLogs.length - i} remaining on disk)`,
|
||
)
|
||
}
|
||
|
||
return { logs: result, nextIndex: i }
|
||
}
|