import { execFileSync } from 'child_process'; import { tmpdir } from 'os'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { existsSync as existsSyncEvents, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'; import { join } from 'path'; import type { MovementResult } from './agent-loop.js'; import type { PieceDef } from './piece-runner.js'; vi.mock('./agent-loop.js', () => ({ executeMovement: vi.fn(), })); import { executeMovement } from './agent-loop.js'; import { loadPiece, runPiece, normalizeRequiredMcp, validatePieceDef, validateAllowedSshConnections } from './piece-runner.js'; const executeMovementMock = vi.mocked(executeMovement); function makePiece(): PieceDef { return { name: 'test-piece', description: 'test', max_movements: 10, initial_movement: 'execute', movements: [ { name: 'execute', edit: true, persona: 'worker', instruction: 'execute', allowed_tools: [], rules: [], default_next: 'verify', }, { name: 'verify', edit: false, persona: 'reviewer', instruction: 'verify', allowed_tools: [], rules: [], default_next: 'execute', }, { name: 'analyze', edit: true, persona: 'analyst', instruction: 'analyze', allowed_tools: [], rules: [], default_next: 'COMPLETE', }, { name: 'plan', edit: false, persona: 'planner', instruction: 'plan', allowed_tools: [], rules: [], default_next: 'analyze', }, { name: 'verify', edit: false, persona: 'reviewer', instruction: 'verify', allowed_tools: [], rules: [], default_next: 'plan', }, ], }; } function makeWorkspace(): string { return mkdtempSync(join(tmpdir(), 'piece-runner-test-')); } function makeGitWorkspace(): string { const workspace = makeWorkspace(); execFileSync('git', ['init', '--initial-branch=main'], { cwd: workspace }); execFileSync('git', ['config', 'user.name', 'Test User'], { cwd: workspace }); execFileSync('git', ['config', 'user.email', 'test@example.com'], { cwd: workspace }); writeFileSync(join(workspace, 'README.md'), 'line1\nline2\n', 'utf-8'); execFileSync('git', ['add', 'README.md'], { cwd: workspace }); execFileSync('git', ['commit', '-m', 'init'], { cwd: workspace }); return workspace; } describe('piece-runner review feedback flow', () => { let workspacePath = ''; beforeEach(() => { executeMovementMock.mockReset(); }); afterEach(() => { if (workspacePath) { rmSync(workspacePath, { recursive: true, force: true }); workspacePath = ''; } }); it('carries cumulative verify feedback into later execute/analyze movements', async () => { workspacePath = makeWorkspace(); const instructions: string[] = []; const results: MovementResult[] = [ { next: 'verify', output: 'first draft', toolsUsed: [] }, { next: 'execute', output: 'review 1: fix title', toolsUsed: [] }, { next: 'verify', output: 'second draft', toolsUsed: [] }, { next: 'analyze', output: 'review 2: add conclusion', toolsUsed: [] }, { next: 'COMPLETE', output: 'done', toolsUsed: [] }, ]; executeMovementMock.mockImplementation(async (_movement, instruction) => { instructions.push(instruction); const next = results.shift(); if (!next) throw new Error('no mock result left'); return next; }); const result = await runPiece(makePiece(), 'TASK', {} as never, workspacePath); expect(result.status).toBe('completed'); expect(instructions[0]).toBe('TASK'); expect(instructions[2]).toContain('これまでのレビュー指摘'); expect(instructions[2]).toContain('review 1: fix title'); expect(instructions[2]).not.toContain('review 2: add conclusion'); expect(instructions[4]).toContain('review 1: fix title'); expect(instructions[4]).toContain('review 2: add conclusion'); }); it('defaults ownerId and userId to "local" for owner-less (no-auth) jobs', async () => { workspacePath = makeWorkspace(); let capturedCtx: { ownerId?: unknown; userId?: unknown } | undefined; executeMovementMock.mockImplementation(async (_movement, _instruction, _client, ctx) => { capturedCtx = ctx as typeof capturedCtx; return { next: 'COMPLETE', output: 'done', toolsUsed: [] }; }); // options omit ownerId/userId entirely — a no-auth job has no owner. // (runPiece args: piece, instruction, client, workspace, callbacks, toolsConfig, options) await runPiece(makePiece(), 'TASK', {} as never, workspacePath, undefined, undefined, {}); expect(capturedCtx?.ownerId).toBe('local'); expect(capturedCtx?.userId).toBe('local'); }); it('preserves a real ownerId/userId when the job is owned (auth mode)', async () => { workspacePath = makeWorkspace(); let capturedCtx: { ownerId?: unknown; userId?: unknown } | undefined; executeMovementMock.mockImplementation(async (_movement, _instruction, _client, ctx) => { capturedCtx = ctx as typeof capturedCtx; return { next: 'COMPLETE', output: 'done', toolsUsed: [] }; }); await runPiece(makePiece(), 'TASK', {} as never, workspacePath, undefined, undefined, { ownerId: 'alice', userId: 'alice' }); expect(capturedCtx?.ownerId).toBe('alice'); expect(capturedCtx?.userId).toBe('alice'); }); it('appends safe git status and diff context after verify loops', async () => { workspacePath = makeGitWorkspace(); const instructions: string[] = []; let callIndex = 0; executeMovementMock.mockImplementation(async (_movement, instruction) => { instructions.push(instruction); if (callIndex === 0) { mkdirSync(join(workspacePath, 'output'), { recursive: true }); mkdirSync(join(workspacePath, 'input'), { recursive: true }); mkdirSync(join(workspacePath, 'logs'), { recursive: true }); writeFileSync(join(workspacePath, 'README.md'), 'line1\nline2 changed\n', 'utf-8'); writeFileSync(join(workspacePath, 'output', 'report.md'), '# report\n', 'utf-8'); writeFileSync(join(workspacePath, 'input', 'noise.txt'), 'ignore me\n', 'utf-8'); writeFileSync(join(workspacePath, 'logs', 'runtime.log'), 'ignore me too\n', 'utf-8'); callIndex++; return { next: 'verify', output: 'draft ready', toolsUsed: [] }; } if (callIndex === 1) { callIndex++; return { next: 'execute', output: '[判定] needs_fix\n## 問題点\n- README.md: wording\n## 期待する修正\n- tighten wording', toolsUsed: [] }; } callIndex++; return { next: 'COMPLETE', output: 'done', toolsUsed: [] }; }); const result = await runPiece(makePiece(), 'TASK', {} as never, workspacePath); expect(result.status).toBe('completed'); expect(instructions[2]).toContain('これまでのレビュー指摘'); expect(instructions[2]).toContain('## 現在の変更状況'); expect(instructions[2]).toContain('## 変更差分(抜粋)'); expect(instructions[2]).toContain('README.md'); expect(instructions[2]).toContain('output/report.md'); expect(instructions[2]).not.toContain('input/noise.txt'); expect(instructions[2]).not.toContain('logs/runtime.log'); }); it('aborts when loop detection fires due to consecutive revisits', async () => { workspacePath = makeWorkspace(); // Movement always transitions back to itself: execute→execute→execute... // This triggers the consecutive visit counter executeMovementMock.mockResolvedValue({ next: 'execute', output: 'still going', toolsUsed: [] }); const piece = makePiece(); // Set a low max_consecutive_revisits so the test triggers quickly for (const m of piece.movements) { (m as any).max_consecutive_revisits = 2; } const result = await runPiece(piece, 'TASK', {} as never, workspacePath); expect(result.status).toBe('aborted'); expect(result.abortReason).toBe('loop_detected'); expect(result.finalOutput).toContain('Loop detected'); }); it('falls back to default_next when ASK limit is reached', async () => { workspacePath = makeWorkspace(); // First call: ASK, Second call: ASK (limit reached), Third call: COMPLETE from fallback movement const results: MovementResult[] = [ { next: 'ASK', output: 'Need info 1', toolsUsed: [] }, { next: 'ASK', output: 'Need info 2', toolsUsed: [] }, // After ASK limit, piece-runner should fall back to default_next ("verify") // verify returns COMPLETE { next: 'COMPLETE', output: 'done from fallback', toolsUsed: [] }, ]; executeMovementMock.mockImplementation(async () => { const next = results.shift(); if (!next) throw new Error('no mock result left'); return next; }); const result = await runPiece( makePiece(), 'TASK', {} as never, workspacePath, undefined, undefined, { askCount: 0, maxAskPerJob: 1 }, ); // First ASK (askCount=1, maxAsk=1): returned to caller as waiting_human expect(result.status).toBe('waiting_human'); expect(result.finalOutput).toBe('Need info 1'); }); it('aborts when ASK limit reached and no fallback transition exists', async () => { workspacePath = makeWorkspace(); // Piece with a single movement that has no default_next and no rules with forward transitions const singlePiece: PieceDef = { name: 'test-single', description: 'test', max_movements: 10, initial_movement: 'execute', movements: [ { name: 'execute', edit: true, persona: 'worker', instruction: 'do work', allowed_tools: [], rules: [], // No default_next }, ], }; executeMovementMock.mockResolvedValue({ next: 'ASK', output: 'Need info', toolsUsed: [] }); const result = await runPiece( singlePiece, 'TASK', {} as never, workspacePath, undefined, undefined, { askCount: 1, maxAskPerJob: 1 }, // Already at limit ); // askCount starts at 1, +1 = 2 which > maxAsk=1, so it should try to find fallback // No fallback exists, so it aborts expect(result.status).toBe('aborted'); expect(result.abortReason).toBe('ask_limit_reached'); }); it('keeps piece YAML review prompts structured and plan-aware', () => { const piecesDir = join(process.cwd(), 'pieces'); const general = loadPiece('general', piecesDir); const office = loadPiece('office-process', piecesDir); const research = loadPiece('research', piecesDir); expect(general.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 問題点'); expect(general.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 合格基準'); expect(office.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 期待する修正'); expect(office.movements.find((m) => m.name === 'process')?.instruction).toContain('合格基準'); expect(research.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 問題点'); expect(research.movements.find((m) => m.name === 'analyze')?.instruction).toContain('合格基準'); // After Phase 6a-2: verify has a single rule (analyze fallback) since // COMPLETE / ABORT / ASK terminals moved to the `complete` tool. expect(research.movements.find((m) => m.name === 'verify')?.rules[0]?.next).toBe('analyze'); }); }); describe('loadPiece terminal-rule validation (Phase 6b)', () => { let tempDir: string; beforeEach(() => { tempDir = mkdtempSync(join(tmpdir(), 'phase6b-loadpiece-')); }); afterEach(() => { rmSync(tempDir, { recursive: true, force: true }); }); function writePiece(name: string, body: string): void { writeFileSync(join(tempDir, `${name}.yaml`), body, 'utf-8'); } it('rejects custom piece with rules[].next: COMPLETE', () => { writePiece('bad', `name: bad description: terminal in rules max_movements: 1 initial_movement: only movements: - name: only edit: false persona: p instruction: i allowed_tools: [Read] default_next: COMPLETE rules: - condition: done next: COMPLETE `); expect(() => loadPiece('bad', 'pieces', tempDir)).toThrow(/reserved terminal next values/); }); it('rejects rules[].next: ABORT and ASK with the same error', () => { writePiece('bad-abort', `name: bad-abort description: x max_movements: 1 initial_movement: only movements: - name: only edit: false persona: p instruction: i allowed_tools: [Read] rules: - condition: fail next: ABORT `); expect(() => loadPiece('bad-abort', 'pieces', tempDir)).toThrow(/rule\.next="ABORT"/); writePiece('bad-ask', `name: bad-ask description: x max_movements: 1 initial_movement: only movements: - name: only edit: false persona: p instruction: i allowed_tools: [Read] rules: - condition: ask next: ASK `); expect(() => loadPiece('bad-ask', 'pieces', tempDir)).toThrow(/rule\.next="ASK"/); }); it('accepts default_next: COMPLETE (engine-internal sentinel)', () => { writePiece('good', `name: good description: terminal only via default_next max_movements: 1 initial_movement: only movements: - name: only edit: false persona: p instruction: i allowed_tools: [Read] default_next: COMPLETE rules: [] `); const piece = loadPiece('good', 'pieces', tempDir); expect(piece.movements[0]?.default_next).toBe('COMPLETE'); }); it('accepts movement-to-movement rules + WAIT_SUBTASKS sentinel', () => { writePiece('multi', `name: multi description: x max_movements: 1 initial_movement: a movements: - name: a edit: false persona: p instruction: i allowed_tools: [Read] default_next: b rules: - condition: spawn done next: WAIT_SUBTASKS - condition: do next next: b - name: b edit: false persona: p instruction: i allowed_tools: [Read] default_next: COMPLETE rules: [] `); expect(() => loadPiece('multi', 'pieces', tempDir)).not.toThrow(); }); it('all 12 bundled pieces load without validation errors', () => { const piecesDir = join(process.cwd(), 'pieces'); const names = ['brainstorming', 'chat', 'data-process', 'general', 'office-process', 'piece-builder', 'research', 'slide', 'sns-research', 'ssh-console', 'ssh-ops', 'x-ai-digest']; for (const name of names) { expect(() => loadPiece(name, piecesDir)).not.toThrow(); } }); it('ssh-console piece declares SshConsole* tools and wildcard allowed_ssh_connections', () => { const piece = loadPiece('ssh-console', join(process.cwd(), 'pieces')); expect(piece.name).toBe('ssh-console'); expect(piece.movements).toHaveLength(1); const interact = piece.movements[0]!; expect(interact.name).toBe('interact'); expect(interact.allowed_tools).toEqual(expect.arrayContaining([ 'SshConsoleEnsure', 'SshConsoleSend', 'SshConsoleSnapshot', ])); expect(interact.allowed_ssh_connections).toEqual(['*']); expect(interact.default_next).toBe('COMPLETE'); }); it('ssh-ops piece declares SSH tools and wildcard allowed_ssh_connections', () => { const piece = loadPiece('ssh-ops', join(process.cwd(), 'pieces')); const execute = piece.movements.find((m) => m.name === 'execute'); expect(execute).toBeDefined(); expect(execute!.allowed_tools).toEqual(expect.arrayContaining(['SshExec', 'SshUpload', 'SshDownload'])); expect(execute!.allowed_ssh_connections).toEqual(['*']); const verify = piece.movements.find((m) => m.name === 'verify'); expect(verify).toBeDefined(); // verify has no SSH tools, so allowed_ssh_connections is optional and omitted. expect(verify!.allowed_ssh_connections).toBeUndefined(); }); }); describe('runPiece max_movements defensive default', () => { let workspace = ''; beforeEach(() => { executeMovementMock.mockReset(); workspace = mkdtempSync(join(tmpdir(), 'mm-default-')); }); afterEach(() => { if (workspace) { rmSync(workspace, { recursive: true, force: true }); workspace = ''; } }); // Regression: a piece YAML missing max_movements (e.g. an LLM-corrupted // override) used to make `while (steps < undefined)` false on the first // iteration, aborting instantly with "Exceeded max movements (undefined)" // before any movement ran. it('still iterates when piece.max_movements is missing (falls back to default)', async () => { executeMovementMock.mockResolvedValue({ next: 'COMPLETE', output: 'ok', toolsUsed: [] }); const piece = makePiece(); delete (piece as Partial).max_movements; const result = await runPiece(piece as PieceDef, 'TASK', {} as never, workspace); expect(result.status).toBe('completed'); expect(executeMovementMock).toHaveBeenCalled(); }); it('still iterates when piece.max_movements is 0 or negative', async () => { executeMovementMock.mockResolvedValue({ next: 'COMPLETE', output: 'ok', toolsUsed: [] }); const piece = makePiece(); (piece as PieceDef).max_movements = 0; const result = await runPiece(piece, 'TASK', {} as never, workspace); expect(result.status).toBe('completed'); expect(executeMovementMock).toHaveBeenCalled(); }); }); import { buildFollowupNotice } from './piece-runner.js'; describe('buildFollowupNotice (option C)', () => { let workspace: string; beforeEach(() => { workspace = mkdtempSync(join(tmpdir(), 'followup-test-')); }); afterEach(() => { rmSync(workspace, { recursive: true, force: true }); }); it('returns empty string for a fresh workspace (no follow-up signal)', () => { expect(buildFollowupNotice(workspace)).toBe(''); }); it('returns empty when output/ exists but is empty', () => { mkdirSync(join(workspace, 'output'), { recursive: true }); expect(buildFollowupNotice(workspace)).toBe(''); }); it('detects follow-up when output/ has any non-hidden file', () => { mkdirSync(join(workspace, 'output'), { recursive: true }); writeFileSync(join(workspace, 'output', 'report.md'), 'prior work', 'utf-8'); const notice = buildFollowupNotice(workspace); expect(notice).toContain('【継続タスク】'); expect(notice).toContain('CreateChecklist'); expect(notice).toContain('2 回目以降'); }); it('detects follow-up when subtasks/ has content (multi-stage flows)', () => { mkdirSync(join(workspace, 'subtasks', '1'), { recursive: true }); writeFileSync(join(workspace, 'subtasks', '1', 'placeholder.txt'), 'x', 'utf-8'); expect(buildFollowupNotice(workspace)).toContain('【継続タスク】'); }); it('ignores hidden / engine-internal files', () => { mkdirSync(join(workspace, 'output'), { recursive: true }); // Phase 5 engine-internal artifacts must NOT count as follow-up signal, // otherwise the very first run would incorrectly self-flag. writeFileSync(join(workspace, 'output', 'memory-delta.json'), '{}', 'utf-8'); writeFileSync(join(workspace, 'output', '.gitkeep'), '', 'utf-8'); expect(buildFollowupNotice(workspace)).toBe(''); }); }); // ============================================================ // Traceability T-2 — handoff / delta / followup / context_action // ============================================================ import { createFileEventLogger, parseEventLine, type EventBase } from '../progress/event-log.js'; import type { OpenAICompatClient, LLMEvent } from '../llm/openai-compat.js'; function readAllEvents(workspacePath: string): EventBase[] { const path = join(workspacePath, 'logs', 'events.jsonl'); if (!existsSyncEvents(path)) return []; const lines = readFileSync(path, 'utf-8').trim().split('\n').filter(Boolean); return lines.map((l) => { const r = parseEventLine(l); if (r.kind !== 'ok') throw new Error(`bad event: ${l}`); return r.event; }); } describe('Traceability T-2: piece-runner emission for subtask boundary + followup', () => { let workspace: string; beforeEach(() => { workspace = mkdtempSync(join(tmpdir(), 'trace-t2-')); }); afterEach(() => { rmSync(workspace, { recursive: true, force: true }); vi.mocked(executeMovement).mockReset(); }); it('emits memory_handoff_read when a parent handoff exists at startup', async () => { // Simulate a parent handoff already in the workspace. mkdirSync(join(workspace, 'input'), { recursive: true }); writeFileSync( join(workspace, 'input', 'memory-handoff.json'), JSON.stringify({ version: 1, handoffId: 'h-1', parentJobId: 'parent-job-1', parentWorkspaceRelative: '../..', createdAt: '2026-05-02T00:00:00.000Z', facts: [{ claim: 'parent X', confidence: 'high', evidencePaths: [], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'portable', evidenceKind: 'none', lineage: [] }], decisions: [], openQuestions: [], doNotRepeat: [], }), 'utf-8', ); vi.mocked(executeMovement).mockResolvedValue({ next: 'COMPLETE', output: 'done', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }], }; const fakeClient = {} as OpenAICompatClient; await runPiece(piece, 'task', fakeClient, workspace); const events = readAllEvents(workspace); const handoffRead = events.find((e) => e.kind === 'memory_handoff_read'); expect(handoffRead).toBeDefined(); const payload = handoffRead?.payload as { parentJobId: string }; expect(payload.parentJobId).toBe('parent-job-1'); }); it('emits followup_detected when output/ has prior content', async () => { mkdirSync(join(workspace, 'output'), { recursive: true }); writeFileSync(join(workspace, 'output', 'prior.md'), 'previous turn output', 'utf-8'); vi.mocked(executeMovement).mockResolvedValue({ next: 'COMPLETE', output: 'done', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }], }; await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); const events = readAllEvents(workspace); expect(events.some((e) => e.kind === 'followup_detected')).toBe(true); }); it('emits memory_delta_absorb (skipped_already_absorbed) when re-resuming', async () => { // Pre-seed a child delta + an absorbed-deltas log saying it's already done. const childWs = join(workspace, 'subtasks', '1'); mkdirSync(join(childWs, 'output'), { recursive: true }); writeFileSync(join(childWs, 'output', 'memory-delta.json'), JSON.stringify({ version: 1, deltaId: 'd-1', childJobId: 'child-1', childWorkspaceRelative: 'subtasks/1', childStatus: 'success', partial: false, createdAt: '2026-05-02T00:00:00.000Z', facts: [{ claim: 'child finding', confidence: 'high', evidencePaths: [], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'portable', evidenceKind: 'none', lineage: [] }], decisions: [], openQuestions: [], doNotRepeat: [], }), 'utf-8'); mkdirSync(join(workspace, 'logs'), { recursive: true }); writeFileSync(join(workspace, 'logs', 'absorbed-deltas.json'), JSON.stringify({ version: 1, ids: ['d-1'] }), 'utf-8'); vi.mocked(executeMovement).mockResolvedValue({ next: 'COMPLETE', output: 'done', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }], }; await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); const events = readAllEvents(workspace); const absorb = events.find((e) => e.kind === 'memory_delta_absorb'); expect(absorb).toBeDefined(); const payload = absorb?.payload as { outcome: string }; expect(payload.outcome).toBe('skipped_already_absorbed'); }); it('emits memory_delta_absorb (merged) and counts when a fresh delta is found', async () => { const childWs = join(workspace, 'subtasks', '1'); mkdirSync(join(childWs, 'output'), { recursive: true }); writeFileSync(join(childWs, 'output', 'memory-delta.json'), JSON.stringify({ version: 1, deltaId: 'd-2', childJobId: 'child-2', childWorkspaceRelative: 'subtasks/1', childStatus: 'success', partial: false, createdAt: '2026-05-02T00:00:00.000Z', facts: [{ claim: 'child A', confidence: 'high', evidencePaths: ['output/a.ts'], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'workspace_local', evidenceKind: 'local_path', lineage: [] }], decisions: [], openQuestions: [], doNotRepeat: [], }), 'utf-8'); vi.mocked(executeMovement).mockResolvedValue({ next: 'COMPLETE', output: 'done', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }], }; await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); const events = readAllEvents(workspace); const absorb = events.find((e) => e.kind === 'memory_delta_absorb' && (e.payload as { outcome: string }).outcome === 'merged'); expect(absorb).toBeDefined(); const payload = absorb?.payload as { counts: { factsAdded: number } }; expect(payload.counts.factsAdded).toBe(1); }); it('emits run_start and run_complete bookending each piece run', async () => { vi.mocked(executeMovement).mockResolvedValue({ next: 'COMPLETE', output: 'done', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }], }; await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); const events = readAllEvents(workspace); expect(events[0]!.kind).toBe('run_start'); expect(events[events.length - 1]!.kind).toBe('run_complete'); const completePayload = events[events.length - 1]!.payload as { status: string }; expect(completePayload.status).toBe('completed'); }); }); import { existsSync as existsSyncSnapshot, readdirSync as readdirSnapshot } from 'fs'; describe('Cancel-traceability PR1: memory snapshot on terminal non-success', () => { let workspace: string; beforeEach(() => { workspace = mkdtempSync(join(tmpdir(), 'cancel-snap-')); }); afterEach(() => { rmSync(workspace, { recursive: true, force: true }); vi.mocked(executeMovement).mockReset(); }); function findSnapshotFiles(): string[] { const dir = join(workspace, 'logs'); if (!existsSyncSnapshot(dir)) return []; return readdirSnapshot(dir).filter((f) => f.startsWith('memory-snapshot-') && f.endsWith('.json')); } it('writes snapshot + meta-event when cancelled before any movement', async () => { const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }], }; // cancel BEFORE movement runs: cancelCheck returns true on first guard. const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace, undefined, undefined, { cancelCheck: () => true }); expect(result.status).toBe('cancelled'); expect(result.memorySnapshotPath).toBeDefined(); expect(result.memorySnapshotPath).toMatch(/^logs\/memory-snapshot-cancelled-/); const files = findSnapshotFiles(); expect(files.length).toBe(1); expect(files[0]).toMatch(/^memory-snapshot-cancelled-.*\.json$/); const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8')); expect(fileContent.schemaVersion).toBe(2); expect(fileContent.status).toBe('cancelled'); expect(fileContent.memory).toBeDefined(); expect(fileContent.memory.facts).toEqual([]); expect(fileContent.runId).toBeDefined(); // v2 forensics fields expect(fileContent.finalOutput).toBeDefined(); expect(Array.isArray(fileContent.movementHistory)).toBe(true); expect(Array.isArray(fileContent.lessons)).toBe(true); expect(Array.isArray(fileContent.contextActions)).toBe(true); expect(fileContent.stats).toBeDefined(); expect(typeof fileContent.stats.totalSteps).toBe('number'); expect(fileContent.eventsLogRelative).toBe('logs/events.jsonl'); const events = readAllEvents(workspace); const written = events.find((e) => e.kind === 'memory_snapshot_written'); expect(written).toBeDefined(); expect((written!.payload as { status: string }).status).toBe('cancelled'); expect((written!.payload as { path: string }).path).toMatch(/^logs\/memory-snapshot-cancelled-/); const runComplete = events[events.length - 1]!; expect(runComplete.kind).toBe('run_complete'); const payload = runComplete.payload as { status: string; cancel?: { phase: string; snapshotPath: string }; memorySnapshotPath?: string }; expect(payload.status).toBe('cancelled'); expect(payload.memorySnapshotPath).toBeDefined(); expect(payload.cancel?.phase).toBe('before_movement'); expect(payload.cancel?.snapshotPath).toBe(payload.memorySnapshotPath); }); it('writes snapshot when cancelled mid-movement (ABORT with cancelled output)', async () => { vi.mocked(executeMovement).mockResolvedValue({ next: 'ABORT', output: 'Job was cancelled by user request', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }], }; const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); expect(result.status).toBe('cancelled'); expect(result.memorySnapshotPath).toBeDefined(); const events = readAllEvents(workspace); const runComplete = events[events.length - 1]!; expect(runComplete.kind).toBe('run_complete'); const payload = runComplete.payload as { cancel?: { phase: string; movement: string } }; expect(payload.cancel?.phase).toBe('mid_movement'); expect(payload.cancel?.movement).toBe('m'); }); it('writes snapshot on aborted (max_movements exceeded)', async () => { // Always return next='m' to bounce back, hitting max_movements. vi.mocked(executeMovement).mockResolvedValue({ next: 'm', output: 'still working', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [{ condition: 'always', next: 'm' }], default_next: 'COMPLETE', max_consecutive_revisits: 100, }], }; const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); expect(result.status).toBe('aborted'); expect(result.memorySnapshotPath).toBeDefined(); expect(result.memorySnapshotPath).toMatch(/^logs\/memory-snapshot-aborted-/); const events = readAllEvents(workspace); const written = events.find((e) => e.kind === 'memory_snapshot_written'); expect(written).toBeDefined(); expect((written!.payload as { status: string }).status).toBe('aborted'); }); it('v2 snapshot captures finalOutput / movementHistory / lessons on agent-self-abort', async () => { // Simulate `complete({status:'aborted', abort_reason:'...'})` — the // forensics gap that motivated schemaVersion=2. The LLM's abort_reason // surfaces as MovementResult.output and PieceRunResult.finalOutput; v1 // dropped it entirely from the snapshot. const abortReasonText = 'Cannot proceed: required input file is missing and user is unavailable'; vi.mocked(executeMovement).mockResolvedValue({ next: 'ABORT', output: abortReasonText, toolsUsed: ['Read', 'Glob'], lessons: 'Lesson: validate input presence before plan phase', }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 3, initial_movement: 'respond', movements: [{ name: 'respond', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE', }], }; const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); expect(result.status).toBe('aborted'); expect(result.abortReason).toBe('movement_abort'); expect(result.memorySnapshotPath).toBeDefined(); const files = findSnapshotFiles(); expect(files.length).toBe(1); const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8')); expect(fileContent.schemaVersion).toBe(2); expect(fileContent.status).toBe('aborted'); expect(fileContent.abortReason).toBe('movement_abort'); expect(fileContent.currentMovement).toBe('respond'); // The LLM's abort_reason text — the most important forensic field — is // now preserved verbatim at top level. expect(fileContent.finalOutput).toBe(abortReasonText); // movementHistory shows the path taken with per-step tool usage. expect(Array.isArray(fileContent.movementHistory)).toBe(true); expect(fileContent.movementHistory.length).toBe(1); expect(fileContent.movementHistory[0]).toMatchObject({ name: 'respond', next: 'ABORT', toolsUsed: ['Read', 'Glob'], outputPreview: abortReasonText, outputTruncated: false, hasLessons: true, }); // lessons accumulated from movement results are persisted. expect(Array.isArray(fileContent.lessons)).toBe(true); expect(fileContent.lessons.length).toBe(1); expect(fileContent.lessons[0].movement).toBe('respond'); expect(fileContent.lessons[0].lessons).toContain('validate input presence'); expect(fileContent.stats.totalSteps).toBe(1); expect(fileContent.stats.movementCount).toBe(1); expect(fileContent.eventsLogRelative).toBe('logs/events.jsonl'); }); it('v2 snapshot truncates long movement outputs but preserves finalOutput in full', async () => { const longOutput = 'x'.repeat(2_000); vi.mocked(executeMovement).mockResolvedValue({ next: 'ABORT', output: longOutput, toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE', }], }; const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); expect(result.status).toBe('aborted'); const files = findSnapshotFiles(); const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8')); expect(fileContent.movementHistory[0].outputPreview.length).toBe(500); expect(fileContent.movementHistory[0].outputTruncated).toBe(true); // finalOutput keeps the full text uncapped so the LLM's reasoning isn't lost. expect(fileContent.finalOutput).toBe(longOutput); }); it('does NOT write snapshot on successful completion', async () => { vi.mocked(executeMovement).mockResolvedValue({ next: 'COMPLETE', output: 'done', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }], }; const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); expect(result.status).toBe('completed'); expect(result.memorySnapshotPath).toBeUndefined(); expect(findSnapshotFiles()).toEqual([]); const events = readAllEvents(workspace); expect(events.find((e) => e.kind === 'memory_snapshot_written')).toBeUndefined(); }); it('does NOT write snapshot on waiting_subtasks (transient pause)', async () => { vi.mocked(executeMovement).mockResolvedValue({ next: 'WAIT_SUBTASKS', output: 'spawned children', toolsUsed: [], }); const piece: PieceDef = { name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [{ condition: 'spawned', next: 'WAIT_SUBTASKS' }], default_next: 'COMPLETE', }], }; const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace); expect(result.status).toBe('waiting_subtasks'); expect(result.memorySnapshotPath).toBeUndefined(); expect(findSnapshotFiles()).toEqual([]); }); }); describe('piece required_mcp parsing', () => { function makePieceWithMcp(required_mcp: unknown): PieceDef { return { name: 'mcp-test', description: 'test', max_movements: 1, initial_movement: 'm', required_mcp: required_mcp as string[], movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [] }], }; } it('retains valid required_mcp slugs', () => { const piece = makePieceWithMcp(['canva', 'notion']); normalizeRequiredMcp(piece, 'mcp-test'); expect(piece.required_mcp).toEqual(['canva', 'notion']); }); it('drops invalid slugs and keeps only valid ones', () => { const piece = makePieceWithMcp(['canva', 'BAD!!', 123]); normalizeRequiredMcp(piece, 'mcp-test'); expect(piece.required_mcp).toEqual(['canva']); }); it('leaves required_mcp undefined when field is absent', () => { const piece: PieceDef = { name: 'mcp-test', description: 'test', max_movements: 1, initial_movement: 'm', movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [] }], }; normalizeRequiredMcp(piece, 'mcp-test'); expect(piece.required_mcp).toBeUndefined(); }); it('normalizes required_mcp to empty array when field is not an array', () => { const piece = makePieceWithMcp('not-an-array' as unknown as string[]); normalizeRequiredMcp(piece, 'mcp-test'); expect(piece.required_mcp).toEqual([]); }); }); // Phase 4: per-movement SSH connection allowlist validation. describe('allowed_ssh_connections validation (Phase 4)', () => { function makeMovement(overrides: Partial = {}): PieceDef['movements'][number] { return { name: 'm1', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], ...overrides, }; } function makePiece(movements: PieceDef['movements']): PieceDef { return { name: 'ssh-test', description: 'test', max_movements: 1, initial_movement: movements[0]?.name ?? 'm1', movements, }; } it('passes when no SSH tools and no allowlist', () => { const piece = makePiece([makeMovement({ allowed_tools: ['Read'] })]); expect(validateAllowedSshConnections(piece)).toEqual([]); expect(() => validatePieceDef(piece)).not.toThrow(); }); it('passes when SSH tool present and allowlist declared (UUID)', () => { const piece = makePiece([ makeMovement({ allowed_tools: ['SshExec', 'Read'], allowed_ssh_connections: ['6f9619ff-8b86-d011-b42d-00c04fc964ff'], }), ]); expect(validateAllowedSshConnections(piece)).toEqual([]); }); it('passes when SSH tool present and allowlist declared (empty array = explicit deny)', () => { const piece = makePiece([ makeMovement({ allowed_tools: ['SshExec'], allowed_ssh_connections: [] }), ]); expect(validateAllowedSshConnections(piece)).toEqual([]); }); it('passes when allowlist is wildcard ["*"]', () => { const piece = makePiece([ makeMovement({ allowed_tools: ['SshUpload'], allowed_ssh_connections: ['*'] }), ]); expect(validateAllowedSshConnections(piece)).toEqual([]); }); it('rejects when SSH tool present but allowlist missing', () => { const piece = makePiece([makeMovement({ allowed_tools: ['SshExec'] })]); const errors = validateAllowedSshConnections(piece); expect(errors).toHaveLength(1); expect(errors[0]).toContain('uses SSH tool(s) but allowed_ssh_connections is not declared'); expect(() => validatePieceDef(piece)).toThrow(/allowed_ssh_connections/); }); it('rejects SshUpload without allowlist', () => { const piece = makePiece([makeMovement({ allowed_tools: ['SshUpload'] })]); expect(validateAllowedSshConnections(piece)).toHaveLength(1); }); it('rejects SshDownload without allowlist', () => { const piece = makePiece([makeMovement({ allowed_tools: ['SshDownload'] })]); expect(validateAllowedSshConnections(piece)).toHaveLength(1); }); it('rejects non-array allowlist', () => { const piece = makePiece([ makeMovement({ allowed_tools: ['SshExec'], allowed_ssh_connections: 'not-an-array' as unknown as string[], }), ]); const errors = validateAllowedSshConnections(piece); expect(errors[0]).toMatch(/must be an array/); }); it('rejects non-string entries', () => { const piece = makePiece([ makeMovement({ allowed_tools: ['SshExec'], allowed_ssh_connections: [123 as unknown as string], }), ]); const errors = validateAllowedSshConnections(piece); expect(errors[0]).toMatch(/must be a string/); }); it('rejects entries that are neither wildcard nor valid id format', () => { const piece = makePiece([ makeMovement({ allowed_tools: ['SshExec'], allowed_ssh_connections: ['short'], }), ]); const errors = validateAllowedSshConnections(piece); expect(errors[0]).toMatch(/must be '\*' or a lowercase hex/); }); it('rejects uppercase / non-hex characters in ids', () => { const piece = makePiece([ makeMovement({ allowed_tools: ['SshExec'], allowed_ssh_connections: ['ZZZZZZZZ-not-hex'], }), ]); expect(validateAllowedSshConnections(piece)).toHaveLength(1); }); it('allowlist without SSH tool is allowed (no-op, future-proofing)', () => { const piece = makePiece([ makeMovement({ allowed_tools: ['Read'], allowed_ssh_connections: ['6f9619ff-8b86-d011-b42d-00c04fc964ff'], }), ]); expect(validateAllowedSshConnections(piece)).toEqual([]); }); it('reports offenders across multiple movements', () => { const piece = makePiece([ makeMovement({ name: 'm1', allowed_tools: ['SshExec'] }), makeMovement({ name: 'm2', allowed_tools: ['Read'] }), makeMovement({ name: 'm3', allowed_tools: ['SshDownload'], allowed_ssh_connections: ['*'] }), makeMovement({ name: 'm4', allowed_tools: ['SshUpload'], allowed_ssh_connections: ['BAD_ID'] }), ]); const errors = validateAllowedSshConnections(piece); expect(errors).toHaveLength(2); expect(errors[0]).toContain('movement="m1"'); expect(errors[1]).toContain('movement="m4"'); }); it('validatePieceDef composes error message with piece name', () => { const piece = makePiece([makeMovement({ allowed_tools: ['SshExec'] })]); expect(() => validatePieceDef(piece)).toThrow(/Piece "ssh-test" has invalid allowed_ssh_connections/); }); }); // --- Task 1: loadPiece multi-dir support --- describe('loadPiece multi-dir (string | string[])', () => { it('resolves from a list of custom dirs (per-user wins over builtin name miss)', () => { const dirA = mkdtempSync(join(tmpdir(), 'pa-')); // empty const dirB = mkdtempSync(join(tmpdir(), 'pb-')); writeFileSync( join(dirB, 'mycustom.yaml'), `name: mycustom\ndescription: d\nmax_movements: 1\ninitial_movement: go\nmovements:\n - name: go\n edit: false\n persona: w\n instruction: x\n allowed_tools: []\n rules: []\n default_next: COMPLETE\n`, ); // array form: searches dirA then dirB then builtin const p = loadPiece('mycustom', 'pieces', [dirA, dirB]); expect(p.name).toBe('mycustom'); // builtin still resolvable when not in any custom dir expect(() => loadPiece('chat', 'pieces', [dirA, dirB])).not.toThrow(); rmSync(dirA, { recursive: true }); rmSync(dirB, { recursive: true }); }); it('first dir wins when same name appears in two custom dirs', () => { const dirA = mkdtempSync(join(tmpdir(), 'pa-')); const dirB = mkdtempSync(join(tmpdir(), 'pb-')); writeFileSync( join(dirA, 'dup.yaml'), `name: dup\ndescription: from-a\nmax_movements: 1\ninitial_movement: go\nmovements:\n - name: go\n edit: false\n persona: w\n instruction: x\n allowed_tools: []\n rules: []\n default_next: COMPLETE\n`, ); writeFileSync( join(dirB, 'dup.yaml'), `name: dup\ndescription: from-b\nmax_movements: 1\ninitial_movement: go\nmovements:\n - name: go\n edit: false\n persona: w\n instruction: x\n allowed_tools: []\n rules: []\n default_next: COMPLETE\n`, ); const p = loadPiece('dup', 'pieces', [dirA, dirB]); expect(p.description).toBe('from-a'); rmSync(dirA, { recursive: true }); rmSync(dirB, { recursive: true }); }); it('string form still works (backward compat)', () => { const dir = mkdtempSync(join(tmpdir(), 'pc-')); writeFileSync( join(dir, 'strcompat.yaml'), `name: strcompat\ndescription: str\nmax_movements: 1\ninitial_movement: go\nmovements:\n - name: go\n edit: false\n persona: w\n instruction: x\n allowed_tools: []\n rules: []\n default_next: COMPLETE\n`, ); const p = loadPiece('strcompat', 'pieces', dir); expect(p.name).toBe('strcompat'); rmSync(dir, { recursive: true }); }); });