maestro/src/engine/piece-runner.test.ts

import { execFileSync } from 'child_process';
import { tmpdir } from 'os';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'fs';
import { join } from 'path';
import type { MovementResult } from './agent-loop.js';
import type { PieceDef } from './piece-runner.js';

vi.mock('./agent-loop.js', () => ({
  executeMovement: vi.fn(),
}));

import { executeMovement } from './agent-loop.js';
import { loadPiece, runPiece, normalizeRequiredMcp, validatePieceDef, validateAllowedSshConnections } from './piece-runner.js';

const executeMovementMock = vi.mocked(executeMovement);

function makePiece(): PieceDef {
  return {
    name: 'test-piece',
    description: 'test',
    max_movements: 10,
    initial_movement: 'execute',
    movements: [
      {
        name: 'execute',
        edit: true,
        persona: 'worker',
        instruction: 'execute',
        allowed_tools: [],
        rules: [],
        default_next: 'verify',
      },
      {
        name: 'verify',
        edit: false,
        persona: 'reviewer',
        instruction: 'verify',
        allowed_tools: [],
        rules: [],
        default_next: 'execute',
      },
      {
        name: 'analyze',
        edit: true,
        persona: 'analyst',
        instruction: 'analyze',
        allowed_tools: [],
        rules: [],
        default_next: 'COMPLETE',
      },
      {
        name: 'plan',
        edit: false,
        persona: 'planner',
        instruction: 'plan',
        allowed_tools: [],
        rules: [],
        default_next: 'analyze',
      },
      {
        name: 'verify',
        edit: false,
        persona: 'reviewer',
        instruction: 'verify',
        allowed_tools: [],
        rules: [],
        default_next: 'plan',
      },
    ],
  };
}

function makeWorkspace(): string {
  return mkdtempSync(join(tmpdir(), 'piece-runner-test-'));
}

function makeGitWorkspace(): string {
  const workspace = makeWorkspace();
  execFileSync('git', ['init', '--initial-branch=main'], { cwd: workspace });
  execFileSync('git', ['config', 'user.name', 'Test User'], { cwd: workspace });
  execFileSync('git', ['config', 'user.email', 'test@example.com'], { cwd: workspace });
  writeFileSync(join(workspace, 'README.md'), 'line1\nline2\n', 'utf-8');
  execFileSync('git', ['add', 'README.md'], { cwd: workspace });
  execFileSync('git', ['commit', '-m', 'init'], { cwd: workspace });
  return workspace;
}

describe('piece-runner review feedback flow', () => {
  let workspacePath = '';

  beforeEach(() => {
    executeMovementMock.mockReset();
  });

  afterEach(() => {
    if (workspacePath) {
      rmSync(workspacePath, { recursive: true, force: true });
      workspacePath = '';
    }
  });

  it('carries cumulative verify feedback into later execute/analyze movements', async () => {
    workspacePath = makeWorkspace();
    const instructions: string[] = [];
    const results: MovementResult[] = [
      { next: 'verify', output: 'first draft', toolsUsed: [] },
      { next: 'execute', output: 'review 1: fix title', toolsUsed: [] },
      { next: 'verify', output: 'second draft', toolsUsed: [] },
      { next: 'analyze', output: 'review 2: add conclusion', toolsUsed: [] },
      { next: 'COMPLETE', output: 'done', toolsUsed: [] },
    ];

    executeMovementMock.mockImplementation(async (_movement, instruction) => {
      instructions.push(instruction);
      const next = results.shift();
      if (!next) throw new Error('no mock result left');
      return next;
    });

    const result = await runPiece(makePiece(), 'TASK', {} as never, workspacePath);

    expect(result.status).toBe('completed');
    expect(instructions[0]).toBe('TASK');
    expect(instructions[2]).toContain('これまでのレビュー指摘');
    expect(instructions[2]).toContain('review 1: fix title');
    expect(instructions[2]).not.toContain('review 2: add conclusion');
    expect(instructions[4]).toContain('review 1: fix title');
    expect(instructions[4]).toContain('review 2: add conclusion');
  });

  it('appends safe git status and diff context after verify loops', async () => {
    workspacePath = makeGitWorkspace();
    const instructions: string[] = [];
    let callIndex = 0;

    executeMovementMock.mockImplementation(async (_movement, instruction) => {
      instructions.push(instruction);

      if (callIndex === 0) {
        mkdirSync(join(workspacePath, 'output'), { recursive: true });
        mkdirSync(join(workspacePath, 'input'), { recursive: true });
        mkdirSync(join(workspacePath, 'logs'), { recursive: true });
        writeFileSync(join(workspacePath, 'README.md'), 'line1\nline2 changed\n', 'utf-8');
        writeFileSync(join(workspacePath, 'output', 'report.md'), '# report\n', 'utf-8');
        writeFileSync(join(workspacePath, 'input', 'noise.txt'), 'ignore me\n', 'utf-8');
        writeFileSync(join(workspacePath, 'logs', 'runtime.log'), 'ignore me too\n', 'utf-8');
        callIndex++;
        return { next: 'verify', output: 'draft ready', toolsUsed: [] };
      }

      if (callIndex === 1) {
        callIndex++;
        return { next: 'execute', output: '[判定] needs_fix\n## 問題点\n- README.md: wording\n## 期待する修正\n- tighten wording', toolsUsed: [] };
      }

      callIndex++;
      return { next: 'COMPLETE', output: 'done', toolsUsed: [] };
    });

    const result = await runPiece(makePiece(), 'TASK', {} as never, workspacePath);

    expect(result.status).toBe('completed');
    expect(instructions[2]).toContain('これまでのレビュー指摘');
    expect(instructions[2]).toContain('## 現在の変更状況');
    expect(instructions[2]).toContain('## 変更差分（抜粋）');
    expect(instructions[2]).toContain('README.md');
    expect(instructions[2]).toContain('output/report.md');
    expect(instructions[2]).not.toContain('input/noise.txt');
    expect(instructions[2]).not.toContain('logs/runtime.log');
  });

  it('aborts when loop detection fires due to consecutive revisits', async () => {
    workspacePath = makeWorkspace();

    // Movement always transitions back to itself: execute→execute→execute...
    // This triggers the consecutive visit counter
    executeMovementMock.mockResolvedValue({ next: 'execute', output: 'still going', toolsUsed: [] });

    const piece = makePiece();
    // Set a low max_consecutive_revisits so the test triggers quickly
    for (const m of piece.movements) {
      (m as any).max_consecutive_revisits = 2;
    }

    const result = await runPiece(piece, 'TASK', {} as never, workspacePath);

    expect(result.status).toBe('aborted');
    expect(result.abortReason).toBe('loop_detected');
    expect(result.finalOutput).toContain('Loop detected');
  });

  it('falls back to default_next when ASK limit is reached', async () => {
    workspacePath = makeWorkspace();

    // First call: ASK, Second call: ASK (limit reached), Third call: COMPLETE from fallback movement
    const results: MovementResult[] = [
      { next: 'ASK', output: 'Need info 1', toolsUsed: [] },
      { next: 'ASK', output: 'Need info 2', toolsUsed: [] },
      // After ASK limit, piece-runner should fall back to default_next ("verify")
      // verify returns COMPLETE
      { next: 'COMPLETE', output: 'done from fallback', toolsUsed: [] },
    ];

    executeMovementMock.mockImplementation(async () => {
      const next = results.shift();
      if (!next) throw new Error('no mock result left');
      return next;
    });

    const result = await runPiece(
      makePiece(),
      'TASK',
      {} as never,
      workspacePath,
      undefined,
      undefined,
      { askCount: 0, maxAskPerJob: 1 },
    );

    // First ASK (askCount=1, maxAsk=1): returned to caller as waiting_human
    expect(result.status).toBe('waiting_human');
    expect(result.finalOutput).toBe('Need info 1');
  });

  it('aborts when ASK limit reached and no fallback transition exists', async () => {
    workspacePath = makeWorkspace();

    // Piece with a single movement that has no default_next and no rules with forward transitions
    const singlePiece: PieceDef = {
      name: 'test-single',
      description: 'test',
      max_movements: 10,
      initial_movement: 'execute',
      movements: [
        {
          name: 'execute',
          edit: true,
          persona: 'worker',
          instruction: 'do work',
          allowed_tools: [],
          rules: [],
          // No default_next
        },
      ],
    };

    executeMovementMock.mockResolvedValue({ next: 'ASK', output: 'Need info', toolsUsed: [] });

    const result = await runPiece(
      singlePiece,
      'TASK',
      {} as never,
      workspacePath,
      undefined,
      undefined,
      { askCount: 1, maxAskPerJob: 1 }, // Already at limit
    );

    // askCount starts at 1, +1 = 2 which > maxAsk=1, so it should try to find fallback
    // No fallback exists, so it aborts
    expect(result.status).toBe('aborted');
    expect(result.abortReason).toBe('ask_limit_reached');
  });

  it('keeps piece YAML review prompts structured and plan-aware', () => {
    const piecesDir = join(process.cwd(), 'pieces');

    const general = loadPiece('general', piecesDir);
    const office = loadPiece('office-process', piecesDir);
    const research = loadPiece('research', piecesDir);

    expect(general.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 問題点');
    expect(general.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 合格基準');
    expect(office.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 期待する修正');
    expect(office.movements.find((m) => m.name === 'process')?.instruction).toContain('合格基準');
    expect(research.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 問題点');
    expect(research.movements.find((m) => m.name === 'analyze')?.instruction).toContain('合格基準');
    // After Phase 6a-2: verify has a single rule (analyze fallback) since
    // COMPLETE / ABORT / ASK terminals moved to the `complete` tool.
    expect(research.movements.find((m) => m.name === 'verify')?.rules[0]?.next).toBe('analyze');
  });
});

describe('loadPiece terminal-rule validation (Phase 6b)', () => {
  let tempDir: string;

  beforeEach(() => {
    tempDir = mkdtempSync(join(tmpdir(), 'phase6b-loadpiece-'));
  });

  afterEach(() => {
    rmSync(tempDir, { recursive: true, force: true });
  });

  function writePiece(name: string, body: string): void {
    writeFileSync(join(tempDir, `${name}.yaml`), body, 'utf-8');
  }

  it('rejects custom piece with rules[].next: COMPLETE', () => {
    writePiece('bad', `name: bad
description: terminal in rules
max_movements: 1
initial_movement: only
movements:
  - name: only
    edit: false
    persona: p
    instruction: i
    allowed_tools: [Read]
    default_next: COMPLETE
    rules:
      - condition: done
        next: COMPLETE
`);
    expect(() => loadPiece('bad', 'pieces', tempDir)).toThrow(/reserved terminal next values/);
  });

  it('rejects rules[].next: ABORT and ASK with the same error', () => {
    writePiece('bad-abort', `name: bad-abort
description: x
max_movements: 1
initial_movement: only
movements:
  - name: only
    edit: false
    persona: p
    instruction: i
    allowed_tools: [Read]
    rules:
      - condition: fail
        next: ABORT
`);
    expect(() => loadPiece('bad-abort', 'pieces', tempDir)).toThrow(/rule\.next="ABORT"/);

    writePiece('bad-ask', `name: bad-ask
description: x
max_movements: 1
initial_movement: only
movements:
  - name: only
    edit: false
    persona: p
    instruction: i
    allowed_tools: [Read]
    rules:
      - condition: ask
        next: ASK
`);
    expect(() => loadPiece('bad-ask', 'pieces', tempDir)).toThrow(/rule\.next="ASK"/);
  });

  it('accepts default_next: COMPLETE (engine-internal sentinel)', () => {
    writePiece('good', `name: good
description: terminal only via default_next
max_movements: 1
initial_movement: only
movements:
  - name: only
    edit: false
    persona: p
    instruction: i
    allowed_tools: [Read]
    default_next: COMPLETE
    rules: []
`);
    const piece = loadPiece('good', 'pieces', tempDir);
    expect(piece.movements[0]?.default_next).toBe('COMPLETE');
  });

  it('accepts movement-to-movement rules + WAIT_SUBTASKS sentinel', () => {
    writePiece('multi', `name: multi
description: x
max_movements: 1
initial_movement: a
movements:
  - name: a
    edit: false
    persona: p
    instruction: i
    allowed_tools: [Read]
    default_next: b
    rules:
      - condition: spawn done
        next: WAIT_SUBTASKS
      - condition: do next
        next: b
  - name: b
    edit: false
    persona: p
    instruction: i
    allowed_tools: [Read]
    default_next: COMPLETE
    rules: []
`);
    expect(() => loadPiece('multi', 'pieces', tempDir)).not.toThrow();
  });

  it('all 12 bundled pieces load without validation errors', () => {
    const piecesDir = join(process.cwd(), 'pieces');
    const names = ['brainstorming', 'chat', 'data-process', 'general',
                   'office-process', 'piece-builder', 'research', 'slide', 'sns-research',
                   'ssh-console', 'ssh-ops', 'x-ai-digest'];
    for (const name of names) {
      expect(() => loadPiece(name, piecesDir)).not.toThrow();
    }
  });

  it('ssh-console piece declares SshConsole* tools and wildcard allowed_ssh_connections', () => {
    const piece = loadPiece('ssh-console', join(process.cwd(), 'pieces'));
    expect(piece.name).toBe('ssh-console');
    expect(piece.movements).toHaveLength(1);
    const interact = piece.movements[0]!;
    expect(interact.name).toBe('interact');
    expect(interact.allowed_tools).toEqual(expect.arrayContaining([
      'SshConsoleEnsure', 'SshConsoleSend', 'SshConsoleSnapshot',
    ]));
    expect(interact.allowed_ssh_connections).toEqual(['*']);
    expect(interact.default_next).toBe('COMPLETE');
  });

  it('ssh-ops piece declares SSH tools and wildcard allowed_ssh_connections', () => {
    const piece = loadPiece('ssh-ops', join(process.cwd(), 'pieces'));
    const execute = piece.movements.find((m) => m.name === 'execute');
    expect(execute).toBeDefined();
    expect(execute!.allowed_tools).toEqual(expect.arrayContaining(['SshExec', 'SshUpload', 'SshDownload']));
    expect(execute!.allowed_ssh_connections).toEqual(['*']);
    const verify = piece.movements.find((m) => m.name === 'verify');
    expect(verify).toBeDefined();
    // verify has no SSH tools, so allowed_ssh_connections is optional and omitted.
    expect(verify!.allowed_ssh_connections).toBeUndefined();
  });
});

describe('runPiece max_movements defensive default', () => {
  let workspace = '';

  beforeEach(() => {
    executeMovementMock.mockReset();
    workspace = mkdtempSync(join(tmpdir(), 'mm-default-'));
  });

  afterEach(() => {
    if (workspace) {
      rmSync(workspace, { recursive: true, force: true });
      workspace = '';
    }
  });

  // Regression: a piece YAML missing max_movements (e.g. an LLM-corrupted
  // override) used to make `while (steps < undefined)` false on the first
  // iteration, aborting instantly with "Exceeded max movements (undefined)"
  // before any movement ran.
  it('still iterates when piece.max_movements is missing (falls back to default)', async () => {
    executeMovementMock.mockResolvedValue({ next: 'COMPLETE', output: 'ok', toolsUsed: [] });
    const piece = makePiece();
    delete (piece as Partial<PieceDef>).max_movements;
    const result = await runPiece(piece as PieceDef, 'TASK', {} as never, workspace);
    expect(result.status).toBe('completed');
    expect(executeMovementMock).toHaveBeenCalled();
  });

  it('still iterates when piece.max_movements is 0 or negative', async () => {
    executeMovementMock.mockResolvedValue({ next: 'COMPLETE', output: 'ok', toolsUsed: [] });
    const piece = makePiece();
    (piece as PieceDef).max_movements = 0;
    const result = await runPiece(piece, 'TASK', {} as never, workspace);
    expect(result.status).toBe('completed');
    expect(executeMovementMock).toHaveBeenCalled();
  });
});

import { buildFollowupNotice } from './piece-runner.js';

describe('buildFollowupNotice (option C)', () => {
  let workspace: string;

  beforeEach(() => {
    workspace = mkdtempSync(join(tmpdir(), 'followup-test-'));
  });

  afterEach(() => {
    rmSync(workspace, { recursive: true, force: true });
  });

  it('returns empty string for a fresh workspace (no follow-up signal)', () => {
    expect(buildFollowupNotice(workspace)).toBe('');
  });

  it('returns empty when output/ exists but is empty', () => {
    mkdirSync(join(workspace, 'output'), { recursive: true });
    expect(buildFollowupNotice(workspace)).toBe('');
  });

  it('detects follow-up when output/ has any non-hidden file', () => {
    mkdirSync(join(workspace, 'output'), { recursive: true });
    writeFileSync(join(workspace, 'output', 'report.md'), 'prior work', 'utf-8');
    const notice = buildFollowupNotice(workspace);
    expect(notice).toContain('【継続タスク】');
    expect(notice).toContain('CreateChecklist');
    expect(notice).toContain('2 回目以降');
  });

  it('detects follow-up when subtasks/ has content (multi-stage flows)', () => {
    mkdirSync(join(workspace, 'subtasks', '1'), { recursive: true });
    writeFileSync(join(workspace, 'subtasks', '1', 'placeholder.txt'), 'x', 'utf-8');
    expect(buildFollowupNotice(workspace)).toContain('【継続タスク】');
  });

  it('ignores hidden / engine-internal files', () => {
    mkdirSync(join(workspace, 'output'), { recursive: true });
    // Phase 5 engine-internal artifacts must NOT count as follow-up signal,
    // otherwise the very first run would incorrectly self-flag.
    writeFileSync(join(workspace, 'output', 'memory-delta.json'), '{}', 'utf-8');
    writeFileSync(join(workspace, 'output', '.gitkeep'), '', 'utf-8');
    expect(buildFollowupNotice(workspace)).toBe('');
  });
});

// ============================================================
// Traceability T-2 — handoff / delta / followup / context_action
// ============================================================

import { runPiece } from './piece-runner.js';
import { readFileSync } from 'fs';
import { createFileEventLogger, parseEventLine, type EventBase } from '../progress/event-log.js';
import type { OpenAICompatClient, LLMEvent } from '../llm/openai-compat.js';

vi.mock('./agent-loop.js', () => ({
  executeMovement: vi.fn(),
}));

function readAllEvents(workspacePath: string): EventBase[] {
  const path = join(workspacePath, 'logs', 'events.jsonl');
  if (!existsSyncEvents(path)) return [];
  const lines = readFileSync(path, 'utf-8').trim().split('\n').filter(Boolean);
  return lines.map((l) => {
    const r = parseEventLine(l);
    if (r.kind !== 'ok') throw new Error(`bad event: ${l}`);
    return r.event;
  });
}

import { existsSync as existsSyncEvents } from 'fs';

describe('Traceability T-2: piece-runner emission for subtask boundary + followup', () => {
  let workspace: string;

  beforeEach(() => {
    workspace = mkdtempSync(join(tmpdir(), 'trace-t2-'));
  });

  afterEach(() => {
    rmSync(workspace, { recursive: true, force: true });
    vi.mocked(executeMovement).mockReset();
  });

  it('emits memory_handoff_read when a parent handoff exists at startup', async () => {
    // Simulate a parent handoff already in the workspace.
    mkdirSync(join(workspace, 'input'), { recursive: true });
    writeFileSync(
      join(workspace, 'input', 'memory-handoff.json'),
      JSON.stringify({
        version: 1,
        handoffId: 'h-1',
        parentJobId: 'parent-job-1',
        parentWorkspaceRelative: '../..',
        createdAt: '2026-05-02T00:00:00.000Z',
        facts: [{ claim: 'parent X', confidence: 'high', evidencePaths: [], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'portable', evidenceKind: 'none', lineage: [] }],
        decisions: [],
        openQuestions: [],
        doNotRepeat: [],
      }),
      'utf-8',
    );

    vi.mocked(executeMovement).mockResolvedValue({
      next: 'COMPLETE', output: 'done', toolsUsed: [],
    });

    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
    };
    const fakeClient = {} as OpenAICompatClient;
    await runPiece(piece, 'task', fakeClient, workspace);

    const events = readAllEvents(workspace);
    const handoffRead = events.find((e) => e.kind === 'memory_handoff_read');
    expect(handoffRead).toBeDefined();
    const payload = handoffRead?.payload as { parentJobId: string };
    expect(payload.parentJobId).toBe('parent-job-1');
  });

  it('emits followup_detected when output/ has prior content', async () => {
    mkdirSync(join(workspace, 'output'), { recursive: true });
    writeFileSync(join(workspace, 'output', 'prior.md'), 'previous turn output', 'utf-8');

    vi.mocked(executeMovement).mockResolvedValue({
      next: 'COMPLETE', output: 'done', toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
    };
    await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);

    const events = readAllEvents(workspace);
    expect(events.some((e) => e.kind === 'followup_detected')).toBe(true);
  });

  it('emits memory_delta_absorb (skipped_already_absorbed) when re-resuming', async () => {
    // Pre-seed a child delta + an absorbed-deltas log saying it's already done.
    const childWs = join(workspace, 'subtasks', '1');
    mkdirSync(join(childWs, 'output'), { recursive: true });
    writeFileSync(join(childWs, 'output', 'memory-delta.json'), JSON.stringify({
      version: 1,
      deltaId: 'd-1',
      childJobId: 'child-1',
      childWorkspaceRelative: 'subtasks/1',
      childStatus: 'success',
      partial: false,
      createdAt: '2026-05-02T00:00:00.000Z',
      facts: [{ claim: 'child finding', confidence: 'high', evidencePaths: [], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'portable', evidenceKind: 'none', lineage: [] }],
      decisions: [], openQuestions: [], doNotRepeat: [],
    }), 'utf-8');
    mkdirSync(join(workspace, 'logs'), { recursive: true });
    writeFileSync(join(workspace, 'logs', 'absorbed-deltas.json'), JSON.stringify({ version: 1, ids: ['d-1'] }), 'utf-8');

    vi.mocked(executeMovement).mockResolvedValue({
      next: 'COMPLETE', output: 'done', toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
    };
    await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);

    const events = readAllEvents(workspace);
    const absorb = events.find((e) => e.kind === 'memory_delta_absorb');
    expect(absorb).toBeDefined();
    const payload = absorb?.payload as { outcome: string };
    expect(payload.outcome).toBe('skipped_already_absorbed');
  });

  it('emits memory_delta_absorb (merged) and counts when a fresh delta is found', async () => {
    const childWs = join(workspace, 'subtasks', '1');
    mkdirSync(join(childWs, 'output'), { recursive: true });
    writeFileSync(join(childWs, 'output', 'memory-delta.json'), JSON.stringify({
      version: 1,
      deltaId: 'd-2',
      childJobId: 'child-2',
      childWorkspaceRelative: 'subtasks/1',
      childStatus: 'success',
      partial: false,
      createdAt: '2026-05-02T00:00:00.000Z',
      facts: [{ claim: 'child A', confidence: 'high', evidencePaths: ['output/a.ts'], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'workspace_local', evidenceKind: 'local_path', lineage: [] }],
      decisions: [], openQuestions: [], doNotRepeat: [],
    }), 'utf-8');

    vi.mocked(executeMovement).mockResolvedValue({
      next: 'COMPLETE', output: 'done', toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
    };
    await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);

    const events = readAllEvents(workspace);
    const absorb = events.find((e) => e.kind === 'memory_delta_absorb' && (e.payload as { outcome: string }).outcome === 'merged');
    expect(absorb).toBeDefined();
    const payload = absorb?.payload as { counts: { factsAdded: number } };
    expect(payload.counts.factsAdded).toBe(1);
  });

  it('emits run_start and run_complete bookending each piece run', async () => {
    vi.mocked(executeMovement).mockResolvedValue({
      next: 'COMPLETE', output: 'done', toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
    };
    await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);

    const events = readAllEvents(workspace);
    expect(events[0]!.kind).toBe('run_start');
    expect(events[events.length - 1]!.kind).toBe('run_complete');
    const completePayload = events[events.length - 1]!.payload as { status: string };
    expect(completePayload.status).toBe('completed');
  });
});

import { existsSync as existsSyncSnapshot, readdirSync as readdirSnapshot } from 'fs';

describe('Cancel-traceability PR1: memory snapshot on terminal non-success', () => {
  let workspace: string;

  beforeEach(() => {
    workspace = mkdtempSync(join(tmpdir(), 'cancel-snap-'));
  });

  afterEach(() => {
    rmSync(workspace, { recursive: true, force: true });
    vi.mocked(executeMovement).mockReset();
  });

  function findSnapshotFiles(): string[] {
    const dir = join(workspace, 'logs');
    if (!existsSyncSnapshot(dir)) return [];
    return readdirSnapshot(dir).filter((f) => f.startsWith('memory-snapshot-') && f.endsWith('.json'));
  }

  it('writes snapshot + meta-event when cancelled before any movement', async () => {
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
    };

    // cancel BEFORE movement runs: cancelCheck returns true on first guard.
    const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace, undefined, undefined, { cancelCheck: () => true });
    expect(result.status).toBe('cancelled');
    expect(result.memorySnapshotPath).toBeDefined();
    expect(result.memorySnapshotPath).toMatch(/^logs\/memory-snapshot-cancelled-/);

    const files = findSnapshotFiles();
    expect(files.length).toBe(1);
    expect(files[0]).toMatch(/^memory-snapshot-cancelled-.*\.json$/);

    const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8'));
    expect(fileContent.schemaVersion).toBe(2);
    expect(fileContent.status).toBe('cancelled');
    expect(fileContent.memory).toBeDefined();
    expect(fileContent.memory.facts).toEqual([]);
    expect(fileContent.runId).toBeDefined();
    // v2 forensics fields
    expect(fileContent.finalOutput).toBeDefined();
    expect(Array.isArray(fileContent.movementHistory)).toBe(true);
    expect(Array.isArray(fileContent.lessons)).toBe(true);
    expect(Array.isArray(fileContent.contextActions)).toBe(true);
    expect(fileContent.stats).toBeDefined();
    expect(typeof fileContent.stats.totalSteps).toBe('number');
    expect(fileContent.eventsLogRelative).toBe('logs/events.jsonl');

    const events = readAllEvents(workspace);
    const written = events.find((e) => e.kind === 'memory_snapshot_written');
    expect(written).toBeDefined();
    expect((written!.payload as { status: string }).status).toBe('cancelled');
    expect((written!.payload as { path: string }).path).toMatch(/^logs\/memory-snapshot-cancelled-/);

    const runComplete = events[events.length - 1]!;
    expect(runComplete.kind).toBe('run_complete');
    const payload = runComplete.payload as { status: string; cancel?: { phase: string; snapshotPath: string }; memorySnapshotPath?: string };
    expect(payload.status).toBe('cancelled');
    expect(payload.memorySnapshotPath).toBeDefined();
    expect(payload.cancel?.phase).toBe('before_movement');
    expect(payload.cancel?.snapshotPath).toBe(payload.memorySnapshotPath);
  });

  it('writes snapshot when cancelled mid-movement (ABORT with cancelled output)', async () => {
    vi.mocked(executeMovement).mockResolvedValue({
      next: 'ABORT', output: 'Job was cancelled by user request', toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
    };
    const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
    expect(result.status).toBe('cancelled');
    expect(result.memorySnapshotPath).toBeDefined();

    const events = readAllEvents(workspace);
    const runComplete = events[events.length - 1]!;
    expect(runComplete.kind).toBe('run_complete');
    const payload = runComplete.payload as { cancel?: { phase: string; movement: string } };
    expect(payload.cancel?.phase).toBe('mid_movement');
    expect(payload.cancel?.movement).toBe('m');
  });

  it('writes snapshot on aborted (max_movements exceeded)', async () => {
    // Always return next='m' to bounce back, hitting max_movements.
    vi.mocked(executeMovement).mockResolvedValue({
      next: 'm', output: 'still working', toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
      movements: [{
        name: 'm', edit: false, persona: 'p', instruction: 'i',
        allowed_tools: [],
        rules: [{ condition: 'always', next: 'm' }],
        default_next: 'COMPLETE',
        max_consecutive_revisits: 100,
      }],
    };
    const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
    expect(result.status).toBe('aborted');
    expect(result.memorySnapshotPath).toBeDefined();
    expect(result.memorySnapshotPath).toMatch(/^logs\/memory-snapshot-aborted-/);

    const events = readAllEvents(workspace);
    const written = events.find((e) => e.kind === 'memory_snapshot_written');
    expect(written).toBeDefined();
    expect((written!.payload as { status: string }).status).toBe('aborted');
  });

  it('v2 snapshot captures finalOutput / movementHistory / lessons on agent-self-abort', async () => {
    // Simulate `complete({status:'aborted', abort_reason:'...'})` — the
    // forensics gap that motivated schemaVersion=2. The LLM's abort_reason
    // surfaces as MovementResult.output and PieceRunResult.finalOutput; v1
    // dropped it entirely from the snapshot.
    const abortReasonText = 'Cannot proceed: required input file is missing and user is unavailable';
    vi.mocked(executeMovement).mockResolvedValue({
      next: 'ABORT',
      output: abortReasonText,
      toolsUsed: ['Read', 'Glob'],
      lessons: 'Lesson: validate input presence before plan phase',
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 3, initial_movement: 'respond',
      movements: [{
        name: 'respond', edit: false, persona: 'p', instruction: 'i',
        allowed_tools: [], rules: [], default_next: 'COMPLETE',
      }],
    };
    const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
    expect(result.status).toBe('aborted');
    expect(result.abortReason).toBe('movement_abort');
    expect(result.memorySnapshotPath).toBeDefined();

    const files = findSnapshotFiles();
    expect(files.length).toBe(1);
    const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8'));

    expect(fileContent.schemaVersion).toBe(2);
    expect(fileContent.status).toBe('aborted');
    expect(fileContent.abortReason).toBe('movement_abort');
    expect(fileContent.currentMovement).toBe('respond');

    // The LLM's abort_reason text — the most important forensic field — is
    // now preserved verbatim at top level.
    expect(fileContent.finalOutput).toBe(abortReasonText);

    // movementHistory shows the path taken with per-step tool usage.
    expect(Array.isArray(fileContent.movementHistory)).toBe(true);
    expect(fileContent.movementHistory.length).toBe(1);
    expect(fileContent.movementHistory[0]).toMatchObject({
      name: 'respond',
      next: 'ABORT',
      toolsUsed: ['Read', 'Glob'],
      outputPreview: abortReasonText,
      outputTruncated: false,
      hasLessons: true,
    });

    // lessons accumulated from movement results are persisted.
    expect(Array.isArray(fileContent.lessons)).toBe(true);
    expect(fileContent.lessons.length).toBe(1);
    expect(fileContent.lessons[0].movement).toBe('respond');
    expect(fileContent.lessons[0].lessons).toContain('validate input presence');

    expect(fileContent.stats.totalSteps).toBe(1);
    expect(fileContent.stats.movementCount).toBe(1);
    expect(fileContent.eventsLogRelative).toBe('logs/events.jsonl');
  });

  it('v2 snapshot truncates long movement outputs but preserves finalOutput in full', async () => {
    const longOutput = 'x'.repeat(2_000);
    vi.mocked(executeMovement).mockResolvedValue({
      next: 'ABORT',
      output: longOutput,
      toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm',
      movements: [{
        name: 'm', edit: false, persona: 'p', instruction: 'i',
        allowed_tools: [], rules: [], default_next: 'COMPLETE',
      }],
    };
    const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
    expect(result.status).toBe('aborted');

    const files = findSnapshotFiles();
    const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8'));
    expect(fileContent.movementHistory[0].outputPreview.length).toBe(500);
    expect(fileContent.movementHistory[0].outputTruncated).toBe(true);
    // finalOutput keeps the full text uncapped so the LLM's reasoning isn't lost.
    expect(fileContent.finalOutput).toBe(longOutput);
  });

  it('does NOT write snapshot on successful completion', async () => {
    vi.mocked(executeMovement).mockResolvedValue({
      next: 'COMPLETE', output: 'done', toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
    };
    const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
    expect(result.status).toBe('completed');
    expect(result.memorySnapshotPath).toBeUndefined();
    expect(findSnapshotFiles()).toEqual([]);

    const events = readAllEvents(workspace);
    expect(events.find((e) => e.kind === 'memory_snapshot_written')).toBeUndefined();
  });

  it('does NOT write snapshot on waiting_subtasks (transient pause)', async () => {
    vi.mocked(executeMovement).mockResolvedValue({
      next: 'WAIT_SUBTASKS', output: 'spawned children', toolsUsed: [],
    });
    const piece: PieceDef = {
      name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
      movements: [{
        name: 'm', edit: false, persona: 'p', instruction: 'i',
        allowed_tools: [],
        rules: [{ condition: 'spawned', next: 'WAIT_SUBTASKS' }],
        default_next: 'COMPLETE',
      }],
    };
    const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
    expect(result.status).toBe('waiting_subtasks');
    expect(result.memorySnapshotPath).toBeUndefined();
    expect(findSnapshotFiles()).toEqual([]);
  });
});

describe('piece required_mcp parsing', () => {
  function makePieceWithMcp(required_mcp: unknown): PieceDef {
    return {
      name: 'mcp-test',
      description: 'test',
      max_movements: 1,
      initial_movement: 'm',
      required_mcp: required_mcp as string[],
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [] }],
    };
  }

  it('retains valid required_mcp slugs', () => {
    const piece = makePieceWithMcp(['canva', 'notion']);
    normalizeRequiredMcp(piece, 'mcp-test');
    expect(piece.required_mcp).toEqual(['canva', 'notion']);
  });

  it('drops invalid slugs and keeps only valid ones', () => {
    const piece = makePieceWithMcp(['canva', 'BAD!!', 123]);
    normalizeRequiredMcp(piece, 'mcp-test');
    expect(piece.required_mcp).toEqual(['canva']);
  });

  it('leaves required_mcp undefined when field is absent', () => {
    const piece: PieceDef = {
      name: 'mcp-test',
      description: 'test',
      max_movements: 1,
      initial_movement: 'm',
      movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [] }],
    };
    normalizeRequiredMcp(piece, 'mcp-test');
    expect(piece.required_mcp).toBeUndefined();
  });

  it('normalizes required_mcp to empty array when field is not an array', () => {
    const piece = makePieceWithMcp('not-an-array' as unknown as string[]);
    normalizeRequiredMcp(piece, 'mcp-test');
    expect(piece.required_mcp).toEqual([]);
  });
});

// Phase 4: per-movement SSH connection allowlist validation.
describe('allowed_ssh_connections validation (Phase 4)', () => {
  function makeMovement(overrides: Partial<PieceDef['movements'][number]> = {}): PieceDef['movements'][number] {
    return {
      name: 'm1',
      edit: false,
      persona: 'p',
      instruction: 'i',
      allowed_tools: [],
      rules: [],
      ...overrides,
    };
  }
  function makePiece(movements: PieceDef['movements']): PieceDef {
    return {
      name: 'ssh-test',
      description: 'test',
      max_movements: 1,
      initial_movement: movements[0]?.name ?? 'm1',
      movements,
    };
  }

  it('passes when no SSH tools and no allowlist', () => {
    const piece = makePiece([makeMovement({ allowed_tools: ['Read'] })]);
    expect(validateAllowedSshConnections(piece)).toEqual([]);
    expect(() => validatePieceDef(piece)).not.toThrow();
  });

  it('passes when SSH tool present and allowlist declared (UUID)', () => {
    const piece = makePiece([
      makeMovement({
        allowed_tools: ['SshExec', 'Read'],
        allowed_ssh_connections: ['6f9619ff-8b86-d011-b42d-00c04fc964ff'],
      }),
    ]);
    expect(validateAllowedSshConnections(piece)).toEqual([]);
  });

  it('passes when SSH tool present and allowlist declared (empty array = explicit deny)', () => {
    const piece = makePiece([
      makeMovement({ allowed_tools: ['SshExec'], allowed_ssh_connections: [] }),
    ]);
    expect(validateAllowedSshConnections(piece)).toEqual([]);
  });

  it('passes when allowlist is wildcard ["*"]', () => {
    const piece = makePiece([
      makeMovement({ allowed_tools: ['SshUpload'], allowed_ssh_connections: ['*'] }),
    ]);
    expect(validateAllowedSshConnections(piece)).toEqual([]);
  });

  it('rejects when SSH tool present but allowlist missing', () => {
    const piece = makePiece([makeMovement({ allowed_tools: ['SshExec'] })]);
    const errors = validateAllowedSshConnections(piece);
    expect(errors).toHaveLength(1);
    expect(errors[0]).toContain('uses SSH tool(s) but allowed_ssh_connections is not declared');
    expect(() => validatePieceDef(piece)).toThrow(/allowed_ssh_connections/);
  });

  it('rejects SshUpload without allowlist', () => {
    const piece = makePiece([makeMovement({ allowed_tools: ['SshUpload'] })]);
    expect(validateAllowedSshConnections(piece)).toHaveLength(1);
  });

  it('rejects SshDownload without allowlist', () => {
    const piece = makePiece([makeMovement({ allowed_tools: ['SshDownload'] })]);
    expect(validateAllowedSshConnections(piece)).toHaveLength(1);
  });

  it('rejects non-array allowlist', () => {
    const piece = makePiece([
      makeMovement({
        allowed_tools: ['SshExec'],
        allowed_ssh_connections: 'not-an-array' as unknown as string[],
      }),
    ]);
    const errors = validateAllowedSshConnections(piece);
    expect(errors[0]).toMatch(/must be an array/);
  });

  it('rejects non-string entries', () => {
    const piece = makePiece([
      makeMovement({
        allowed_tools: ['SshExec'],
        allowed_ssh_connections: [123 as unknown as string],
      }),
    ]);
    const errors = validateAllowedSshConnections(piece);
    expect(errors[0]).toMatch(/must be a string/);
  });

  it('rejects entries that are neither wildcard nor valid id format', () => {
    const piece = makePiece([
      makeMovement({
        allowed_tools: ['SshExec'],
        allowed_ssh_connections: ['short'],
      }),
    ]);
    const errors = validateAllowedSshConnections(piece);
    expect(errors[0]).toMatch(/must be '\*' or a lowercase hex/);
  });

  it('rejects uppercase / non-hex characters in ids', () => {
    const piece = makePiece([
      makeMovement({
        allowed_tools: ['SshExec'],
        allowed_ssh_connections: ['ZZZZZZZZ-not-hex'],
      }),
    ]);
    expect(validateAllowedSshConnections(piece)).toHaveLength(1);
  });

  it('allowlist without SSH tool is allowed (no-op, future-proofing)', () => {
    const piece = makePiece([
      makeMovement({
        allowed_tools: ['Read'],
        allowed_ssh_connections: ['6f9619ff-8b86-d011-b42d-00c04fc964ff'],
      }),
    ]);
    expect(validateAllowedSshConnections(piece)).toEqual([]);
  });

  it('reports offenders across multiple movements', () => {
    const piece = makePiece([
      makeMovement({ name: 'm1', allowed_tools: ['SshExec'] }),
      makeMovement({ name: 'm2', allowed_tools: ['Read'] }),
      makeMovement({ name: 'm3', allowed_tools: ['SshDownload'], allowed_ssh_connections: ['*'] }),
      makeMovement({ name: 'm4', allowed_tools: ['SshUpload'], allowed_ssh_connections: ['BAD_ID'] }),
    ]);
    const errors = validateAllowedSshConnections(piece);
    expect(errors).toHaveLength(2);
    expect(errors[0]).toContain('movement="m1"');
    expect(errors[1]).toContain('movement="m4"');
  });

  it('validatePieceDef composes error message with piece name', () => {
    const piece = makePiece([makeMovement({ allowed_tools: ['SshExec'] })]);
    expect(() => validatePieceDef(piece)).toThrow(/Piece "ssh-test" has invalid allowed_ssh_connections/);
  });
});

// --- Task 1: loadPiece multi-dir support ---
describe('loadPiece multi-dir (string | string[])', () => {
  it('resolves from a list of custom dirs (per-user wins over builtin name miss)', () => {
    const dirA = mkdtempSync(join(tmpdir(), 'pa-')); // empty
    const dirB = mkdtempSync(join(tmpdir(), 'pb-'));
    writeFileSync(
      join(dirB, 'mycustom.yaml'),
      `name: mycustom\ndescription: d\nmax_movements: 1\ninitial_movement: go\nmovements:\n  - name: go\n    edit: false\n    persona: w\n    instruction: x\n    allowed_tools: []\n    rules: []\n    default_next: COMPLETE\n`,
    );
    // array form: searches dirA then dirB then builtin
    const p = loadPiece('mycustom', 'pieces', [dirA, dirB]);
    expect(p.name).toBe('mycustom');
    // builtin still resolvable when not in any custom dir
    expect(() => loadPiece('chat', 'pieces', [dirA, dirB])).not.toThrow();
    rmSync(dirA, { recursive: true });
    rmSync(dirB, { recursive: true });
  });

  it('first dir wins when same name appears in two custom dirs', () => {
    const dirA = mkdtempSync(join(tmpdir(), 'pa-'));
    const dirB = mkdtempSync(join(tmpdir(), 'pb-'));
    writeFileSync(
      join(dirA, 'dup.yaml'),
      `name: dup\ndescription: from-a\nmax_movements: 1\ninitial_movement: go\nmovements:\n  - name: go\n    edit: false\n    persona: w\n    instruction: x\n    allowed_tools: []\n    rules: []\n    default_next: COMPLETE\n`,
    );
    writeFileSync(
      join(dirB, 'dup.yaml'),
      `name: dup\ndescription: from-b\nmax_movements: 1\ninitial_movement: go\nmovements:\n  - name: go\n    edit: false\n    persona: w\n    instruction: x\n    allowed_tools: []\n    rules: []\n    default_next: COMPLETE\n`,
    );
    const p = loadPiece('dup', 'pieces', [dirA, dirB]);
    expect(p.description).toBe('from-a');
    rmSync(dirA, { recursive: true });
    rmSync(dirB, { recursive: true });
  });

  it('string form still works (backward compat)', () => {
    const dir = mkdtempSync(join(tmpdir(), 'pc-'));
    writeFileSync(
      join(dir, 'strcompat.yaml'),
      `name: strcompat\ndescription: str\nmax_movements: 1\ninitial_movement: go\nmovements:\n  - name: go\n    edit: false\n    persona: w\n    instruction: x\n    allowed_tools: []\n    rules: []\n    default_next: COMPLETE\n`,
    );
    const p = loadPiece('strcompat', 'pieces', dir);
    expect(p.name).toBe('strcompat');
    rmSync(dir, { recursive: true });
  });
});