1161 lines
45 KiB
TypeScript
1161 lines
45 KiB
TypeScript
import { execFileSync } from 'child_process';
|
|
import { tmpdir } from 'os';
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'fs';
|
|
import { join } from 'path';
|
|
import type { MovementResult } from './agent-loop.js';
|
|
import type { PieceDef } from './piece-runner.js';
|
|
|
|
vi.mock('./agent-loop.js', () => ({
|
|
executeMovement: vi.fn(),
|
|
}));
|
|
|
|
import { executeMovement } from './agent-loop.js';
|
|
import { loadPiece, runPiece, normalizeRequiredMcp, validatePieceDef, validateAllowedSshConnections } from './piece-runner.js';
|
|
|
|
const executeMovementMock = vi.mocked(executeMovement);
|
|
|
|
function makePiece(): PieceDef {
|
|
return {
|
|
name: 'test-piece',
|
|
description: 'test',
|
|
max_movements: 10,
|
|
initial_movement: 'execute',
|
|
movements: [
|
|
{
|
|
name: 'execute',
|
|
edit: true,
|
|
persona: 'worker',
|
|
instruction: 'execute',
|
|
allowed_tools: [],
|
|
rules: [],
|
|
default_next: 'verify',
|
|
},
|
|
{
|
|
name: 'verify',
|
|
edit: false,
|
|
persona: 'reviewer',
|
|
instruction: 'verify',
|
|
allowed_tools: [],
|
|
rules: [],
|
|
default_next: 'execute',
|
|
},
|
|
{
|
|
name: 'analyze',
|
|
edit: true,
|
|
persona: 'analyst',
|
|
instruction: 'analyze',
|
|
allowed_tools: [],
|
|
rules: [],
|
|
default_next: 'COMPLETE',
|
|
},
|
|
{
|
|
name: 'plan',
|
|
edit: false,
|
|
persona: 'planner',
|
|
instruction: 'plan',
|
|
allowed_tools: [],
|
|
rules: [],
|
|
default_next: 'analyze',
|
|
},
|
|
{
|
|
name: 'verify',
|
|
edit: false,
|
|
persona: 'reviewer',
|
|
instruction: 'verify',
|
|
allowed_tools: [],
|
|
rules: [],
|
|
default_next: 'plan',
|
|
},
|
|
],
|
|
};
|
|
}
|
|
|
|
function makeWorkspace(): string {
|
|
return mkdtempSync(join(tmpdir(), 'piece-runner-test-'));
|
|
}
|
|
|
|
function makeGitWorkspace(): string {
|
|
const workspace = makeWorkspace();
|
|
execFileSync('git', ['init', '--initial-branch=main'], { cwd: workspace });
|
|
execFileSync('git', ['config', 'user.name', 'Test User'], { cwd: workspace });
|
|
execFileSync('git', ['config', 'user.email', 'test@example.com'], { cwd: workspace });
|
|
writeFileSync(join(workspace, 'README.md'), 'line1\nline2\n', 'utf-8');
|
|
execFileSync('git', ['add', 'README.md'], { cwd: workspace });
|
|
execFileSync('git', ['commit', '-m', 'init'], { cwd: workspace });
|
|
return workspace;
|
|
}
|
|
|
|
describe('piece-runner review feedback flow', () => {
|
|
let workspacePath = '';
|
|
|
|
beforeEach(() => {
|
|
executeMovementMock.mockReset();
|
|
});
|
|
|
|
afterEach(() => {
|
|
if (workspacePath) {
|
|
rmSync(workspacePath, { recursive: true, force: true });
|
|
workspacePath = '';
|
|
}
|
|
});
|
|
|
|
it('carries cumulative verify feedback into later execute/analyze movements', async () => {
|
|
workspacePath = makeWorkspace();
|
|
const instructions: string[] = [];
|
|
const results: MovementResult[] = [
|
|
{ next: 'verify', output: 'first draft', toolsUsed: [] },
|
|
{ next: 'execute', output: 'review 1: fix title', toolsUsed: [] },
|
|
{ next: 'verify', output: 'second draft', toolsUsed: [] },
|
|
{ next: 'analyze', output: 'review 2: add conclusion', toolsUsed: [] },
|
|
{ next: 'COMPLETE', output: 'done', toolsUsed: [] },
|
|
];
|
|
|
|
executeMovementMock.mockImplementation(async (_movement, instruction) => {
|
|
instructions.push(instruction);
|
|
const next = results.shift();
|
|
if (!next) throw new Error('no mock result left');
|
|
return next;
|
|
});
|
|
|
|
const result = await runPiece(makePiece(), 'TASK', {} as never, workspacePath);
|
|
|
|
expect(result.status).toBe('completed');
|
|
expect(instructions[0]).toBe('TASK');
|
|
expect(instructions[2]).toContain('これまでのレビュー指摘');
|
|
expect(instructions[2]).toContain('review 1: fix title');
|
|
expect(instructions[2]).not.toContain('review 2: add conclusion');
|
|
expect(instructions[4]).toContain('review 1: fix title');
|
|
expect(instructions[4]).toContain('review 2: add conclusion');
|
|
});
|
|
|
|
it('appends safe git status and diff context after verify loops', async () => {
|
|
workspacePath = makeGitWorkspace();
|
|
const instructions: string[] = [];
|
|
let callIndex = 0;
|
|
|
|
executeMovementMock.mockImplementation(async (_movement, instruction) => {
|
|
instructions.push(instruction);
|
|
|
|
if (callIndex === 0) {
|
|
mkdirSync(join(workspacePath, 'output'), { recursive: true });
|
|
mkdirSync(join(workspacePath, 'input'), { recursive: true });
|
|
mkdirSync(join(workspacePath, 'logs'), { recursive: true });
|
|
writeFileSync(join(workspacePath, 'README.md'), 'line1\nline2 changed\n', 'utf-8');
|
|
writeFileSync(join(workspacePath, 'output', 'report.md'), '# report\n', 'utf-8');
|
|
writeFileSync(join(workspacePath, 'input', 'noise.txt'), 'ignore me\n', 'utf-8');
|
|
writeFileSync(join(workspacePath, 'logs', 'runtime.log'), 'ignore me too\n', 'utf-8');
|
|
callIndex++;
|
|
return { next: 'verify', output: 'draft ready', toolsUsed: [] };
|
|
}
|
|
|
|
if (callIndex === 1) {
|
|
callIndex++;
|
|
return { next: 'execute', output: '[判定] needs_fix\n## 問題点\n- README.md: wording\n## 期待する修正\n- tighten wording', toolsUsed: [] };
|
|
}
|
|
|
|
callIndex++;
|
|
return { next: 'COMPLETE', output: 'done', toolsUsed: [] };
|
|
});
|
|
|
|
const result = await runPiece(makePiece(), 'TASK', {} as never, workspacePath);
|
|
|
|
expect(result.status).toBe('completed');
|
|
expect(instructions[2]).toContain('これまでのレビュー指摘');
|
|
expect(instructions[2]).toContain('## 現在の変更状況');
|
|
expect(instructions[2]).toContain('## 変更差分(抜粋)');
|
|
expect(instructions[2]).toContain('README.md');
|
|
expect(instructions[2]).toContain('output/report.md');
|
|
expect(instructions[2]).not.toContain('input/noise.txt');
|
|
expect(instructions[2]).not.toContain('logs/runtime.log');
|
|
});
|
|
|
|
it('aborts when loop detection fires due to consecutive revisits', async () => {
|
|
workspacePath = makeWorkspace();
|
|
|
|
// Movement always transitions back to itself: execute→execute→execute...
|
|
// This triggers the consecutive visit counter
|
|
executeMovementMock.mockResolvedValue({ next: 'execute', output: 'still going', toolsUsed: [] });
|
|
|
|
const piece = makePiece();
|
|
// Set a low max_consecutive_revisits so the test triggers quickly
|
|
for (const m of piece.movements) {
|
|
(m as any).max_consecutive_revisits = 2;
|
|
}
|
|
|
|
const result = await runPiece(piece, 'TASK', {} as never, workspacePath);
|
|
|
|
expect(result.status).toBe('aborted');
|
|
expect(result.abortReason).toBe('loop_detected');
|
|
expect(result.finalOutput).toContain('Loop detected');
|
|
});
|
|
|
|
it('falls back to default_next when ASK limit is reached', async () => {
|
|
workspacePath = makeWorkspace();
|
|
|
|
// First call: ASK, Second call: ASK (limit reached), Third call: COMPLETE from fallback movement
|
|
const results: MovementResult[] = [
|
|
{ next: 'ASK', output: 'Need info 1', toolsUsed: [] },
|
|
{ next: 'ASK', output: 'Need info 2', toolsUsed: [] },
|
|
// After ASK limit, piece-runner should fall back to default_next ("verify")
|
|
// verify returns COMPLETE
|
|
{ next: 'COMPLETE', output: 'done from fallback', toolsUsed: [] },
|
|
];
|
|
|
|
executeMovementMock.mockImplementation(async () => {
|
|
const next = results.shift();
|
|
if (!next) throw new Error('no mock result left');
|
|
return next;
|
|
});
|
|
|
|
const result = await runPiece(
|
|
makePiece(),
|
|
'TASK',
|
|
{} as never,
|
|
workspacePath,
|
|
undefined,
|
|
undefined,
|
|
{ askCount: 0, maxAskPerJob: 1 },
|
|
);
|
|
|
|
// First ASK (askCount=1, maxAsk=1): returned to caller as waiting_human
|
|
expect(result.status).toBe('waiting_human');
|
|
expect(result.finalOutput).toBe('Need info 1');
|
|
});
|
|
|
|
it('aborts when ASK limit reached and no fallback transition exists', async () => {
|
|
workspacePath = makeWorkspace();
|
|
|
|
// Piece with a single movement that has no default_next and no rules with forward transitions
|
|
const singlePiece: PieceDef = {
|
|
name: 'test-single',
|
|
description: 'test',
|
|
max_movements: 10,
|
|
initial_movement: 'execute',
|
|
movements: [
|
|
{
|
|
name: 'execute',
|
|
edit: true,
|
|
persona: 'worker',
|
|
instruction: 'do work',
|
|
allowed_tools: [],
|
|
rules: [],
|
|
// No default_next
|
|
},
|
|
],
|
|
};
|
|
|
|
executeMovementMock.mockResolvedValue({ next: 'ASK', output: 'Need info', toolsUsed: [] });
|
|
|
|
const result = await runPiece(
|
|
singlePiece,
|
|
'TASK',
|
|
{} as never,
|
|
workspacePath,
|
|
undefined,
|
|
undefined,
|
|
{ askCount: 1, maxAskPerJob: 1 }, // Already at limit
|
|
);
|
|
|
|
// askCount starts at 1, +1 = 2 which > maxAsk=1, so it should try to find fallback
|
|
// No fallback exists, so it aborts
|
|
expect(result.status).toBe('aborted');
|
|
expect(result.abortReason).toBe('ask_limit_reached');
|
|
});
|
|
|
|
it('keeps piece YAML review prompts structured and plan-aware', () => {
|
|
const piecesDir = join(process.cwd(), 'pieces');
|
|
|
|
const general = loadPiece('general', piecesDir);
|
|
const office = loadPiece('office-process', piecesDir);
|
|
const research = loadPiece('research', piecesDir);
|
|
|
|
expect(general.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 問題点');
|
|
expect(general.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 合格基準');
|
|
expect(office.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 期待する修正');
|
|
expect(office.movements.find((m) => m.name === 'process')?.instruction).toContain('合格基準');
|
|
expect(research.movements.find((m) => m.name === 'verify')?.instruction).toContain('## 問題点');
|
|
expect(research.movements.find((m) => m.name === 'analyze')?.instruction).toContain('合格基準');
|
|
// After Phase 6a-2: verify has a single rule (analyze fallback) since
|
|
// COMPLETE / ABORT / ASK terminals moved to the `complete` tool.
|
|
expect(research.movements.find((m) => m.name === 'verify')?.rules[0]?.next).toBe('analyze');
|
|
});
|
|
});
|
|
|
|
describe('loadPiece terminal-rule validation (Phase 6b)', () => {
|
|
let tempDir: string;
|
|
|
|
beforeEach(() => {
|
|
tempDir = mkdtempSync(join(tmpdir(), 'phase6b-loadpiece-'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
rmSync(tempDir, { recursive: true, force: true });
|
|
});
|
|
|
|
function writePiece(name: string, body: string): void {
|
|
writeFileSync(join(tempDir, `${name}.yaml`), body, 'utf-8');
|
|
}
|
|
|
|
it('rejects custom piece with rules[].next: COMPLETE', () => {
|
|
writePiece('bad', `name: bad
|
|
description: terminal in rules
|
|
max_movements: 1
|
|
initial_movement: only
|
|
movements:
|
|
- name: only
|
|
edit: false
|
|
persona: p
|
|
instruction: i
|
|
allowed_tools: [Read]
|
|
default_next: COMPLETE
|
|
rules:
|
|
- condition: done
|
|
next: COMPLETE
|
|
`);
|
|
expect(() => loadPiece('bad', 'pieces', tempDir)).toThrow(/reserved terminal next values/);
|
|
});
|
|
|
|
it('rejects rules[].next: ABORT and ASK with the same error', () => {
|
|
writePiece('bad-abort', `name: bad-abort
|
|
description: x
|
|
max_movements: 1
|
|
initial_movement: only
|
|
movements:
|
|
- name: only
|
|
edit: false
|
|
persona: p
|
|
instruction: i
|
|
allowed_tools: [Read]
|
|
rules:
|
|
- condition: fail
|
|
next: ABORT
|
|
`);
|
|
expect(() => loadPiece('bad-abort', 'pieces', tempDir)).toThrow(/rule\.next="ABORT"/);
|
|
|
|
writePiece('bad-ask', `name: bad-ask
|
|
description: x
|
|
max_movements: 1
|
|
initial_movement: only
|
|
movements:
|
|
- name: only
|
|
edit: false
|
|
persona: p
|
|
instruction: i
|
|
allowed_tools: [Read]
|
|
rules:
|
|
- condition: ask
|
|
next: ASK
|
|
`);
|
|
expect(() => loadPiece('bad-ask', 'pieces', tempDir)).toThrow(/rule\.next="ASK"/);
|
|
});
|
|
|
|
it('accepts default_next: COMPLETE (engine-internal sentinel)', () => {
|
|
writePiece('good', `name: good
|
|
description: terminal only via default_next
|
|
max_movements: 1
|
|
initial_movement: only
|
|
movements:
|
|
- name: only
|
|
edit: false
|
|
persona: p
|
|
instruction: i
|
|
allowed_tools: [Read]
|
|
default_next: COMPLETE
|
|
rules: []
|
|
`);
|
|
const piece = loadPiece('good', 'pieces', tempDir);
|
|
expect(piece.movements[0]?.default_next).toBe('COMPLETE');
|
|
});
|
|
|
|
it('accepts movement-to-movement rules + WAIT_SUBTASKS sentinel', () => {
|
|
writePiece('multi', `name: multi
|
|
description: x
|
|
max_movements: 1
|
|
initial_movement: a
|
|
movements:
|
|
- name: a
|
|
edit: false
|
|
persona: p
|
|
instruction: i
|
|
allowed_tools: [Read]
|
|
default_next: b
|
|
rules:
|
|
- condition: spawn done
|
|
next: WAIT_SUBTASKS
|
|
- condition: do next
|
|
next: b
|
|
- name: b
|
|
edit: false
|
|
persona: p
|
|
instruction: i
|
|
allowed_tools: [Read]
|
|
default_next: COMPLETE
|
|
rules: []
|
|
`);
|
|
expect(() => loadPiece('multi', 'pieces', tempDir)).not.toThrow();
|
|
});
|
|
|
|
it('all 12 bundled pieces load without validation errors', () => {
|
|
const piecesDir = join(process.cwd(), 'pieces');
|
|
const names = ['brainstorming', 'chat', 'data-process', 'general',
|
|
'office-process', 'piece-builder', 'research', 'slide', 'sns-research',
|
|
'ssh-console', 'ssh-ops', 'x-ai-digest'];
|
|
for (const name of names) {
|
|
expect(() => loadPiece(name, piecesDir)).not.toThrow();
|
|
}
|
|
});
|
|
|
|
it('ssh-console piece declares SshConsole* tools and wildcard allowed_ssh_connections', () => {
|
|
const piece = loadPiece('ssh-console', join(process.cwd(), 'pieces'));
|
|
expect(piece.name).toBe('ssh-console');
|
|
expect(piece.movements).toHaveLength(1);
|
|
const interact = piece.movements[0]!;
|
|
expect(interact.name).toBe('interact');
|
|
expect(interact.allowed_tools).toEqual(expect.arrayContaining([
|
|
'SshConsoleEnsure', 'SshConsoleSend', 'SshConsoleSnapshot',
|
|
]));
|
|
expect(interact.allowed_ssh_connections).toEqual(['*']);
|
|
expect(interact.default_next).toBe('COMPLETE');
|
|
});
|
|
|
|
it('ssh-ops piece declares SSH tools and wildcard allowed_ssh_connections', () => {
|
|
const piece = loadPiece('ssh-ops', join(process.cwd(), 'pieces'));
|
|
const execute = piece.movements.find((m) => m.name === 'execute');
|
|
expect(execute).toBeDefined();
|
|
expect(execute!.allowed_tools).toEqual(expect.arrayContaining(['SshExec', 'SshUpload', 'SshDownload']));
|
|
expect(execute!.allowed_ssh_connections).toEqual(['*']);
|
|
const verify = piece.movements.find((m) => m.name === 'verify');
|
|
expect(verify).toBeDefined();
|
|
// verify has no SSH tools, so allowed_ssh_connections is optional and omitted.
|
|
expect(verify!.allowed_ssh_connections).toBeUndefined();
|
|
});
|
|
});
|
|
|
|
describe('runPiece max_movements defensive default', () => {
|
|
let workspace = '';
|
|
|
|
beforeEach(() => {
|
|
executeMovementMock.mockReset();
|
|
workspace = mkdtempSync(join(tmpdir(), 'mm-default-'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
if (workspace) {
|
|
rmSync(workspace, { recursive: true, force: true });
|
|
workspace = '';
|
|
}
|
|
});
|
|
|
|
// Regression: a piece YAML missing max_movements (e.g. an LLM-corrupted
|
|
// override) used to make `while (steps < undefined)` false on the first
|
|
// iteration, aborting instantly with "Exceeded max movements (undefined)"
|
|
// before any movement ran.
|
|
it('still iterates when piece.max_movements is missing (falls back to default)', async () => {
|
|
executeMovementMock.mockResolvedValue({ next: 'COMPLETE', output: 'ok', toolsUsed: [] });
|
|
const piece = makePiece();
|
|
delete (piece as Partial<PieceDef>).max_movements;
|
|
const result = await runPiece(piece as PieceDef, 'TASK', {} as never, workspace);
|
|
expect(result.status).toBe('completed');
|
|
expect(executeMovementMock).toHaveBeenCalled();
|
|
});
|
|
|
|
it('still iterates when piece.max_movements is 0 or negative', async () => {
|
|
executeMovementMock.mockResolvedValue({ next: 'COMPLETE', output: 'ok', toolsUsed: [] });
|
|
const piece = makePiece();
|
|
(piece as PieceDef).max_movements = 0;
|
|
const result = await runPiece(piece, 'TASK', {} as never, workspace);
|
|
expect(result.status).toBe('completed');
|
|
expect(executeMovementMock).toHaveBeenCalled();
|
|
});
|
|
});
|
|
|
|
import { buildFollowupNotice } from './piece-runner.js';
|
|
|
|
describe('buildFollowupNotice (option C)', () => {
|
|
let workspace: string;
|
|
|
|
beforeEach(() => {
|
|
workspace = mkdtempSync(join(tmpdir(), 'followup-test-'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
rmSync(workspace, { recursive: true, force: true });
|
|
});
|
|
|
|
it('returns empty string for a fresh workspace (no follow-up signal)', () => {
|
|
expect(buildFollowupNotice(workspace)).toBe('');
|
|
});
|
|
|
|
it('returns empty when output/ exists but is empty', () => {
|
|
mkdirSync(join(workspace, 'output'), { recursive: true });
|
|
expect(buildFollowupNotice(workspace)).toBe('');
|
|
});
|
|
|
|
it('detects follow-up when output/ has any non-hidden file', () => {
|
|
mkdirSync(join(workspace, 'output'), { recursive: true });
|
|
writeFileSync(join(workspace, 'output', 'report.md'), 'prior work', 'utf-8');
|
|
const notice = buildFollowupNotice(workspace);
|
|
expect(notice).toContain('【継続タスク】');
|
|
expect(notice).toContain('CreateChecklist');
|
|
expect(notice).toContain('2 回目以降');
|
|
});
|
|
|
|
it('detects follow-up when subtasks/ has content (multi-stage flows)', () => {
|
|
mkdirSync(join(workspace, 'subtasks', '1'), { recursive: true });
|
|
writeFileSync(join(workspace, 'subtasks', '1', 'placeholder.txt'), 'x', 'utf-8');
|
|
expect(buildFollowupNotice(workspace)).toContain('【継続タスク】');
|
|
});
|
|
|
|
it('ignores hidden / engine-internal files', () => {
|
|
mkdirSync(join(workspace, 'output'), { recursive: true });
|
|
// Phase 5 engine-internal artifacts must NOT count as follow-up signal,
|
|
// otherwise the very first run would incorrectly self-flag.
|
|
writeFileSync(join(workspace, 'output', 'memory-delta.json'), '{}', 'utf-8');
|
|
writeFileSync(join(workspace, 'output', '.gitkeep'), '', 'utf-8');
|
|
expect(buildFollowupNotice(workspace)).toBe('');
|
|
});
|
|
});
|
|
|
|
// ============================================================
|
|
// Traceability T-2 — handoff / delta / followup / context_action
|
|
// ============================================================
|
|
|
|
import { runPiece } from './piece-runner.js';
|
|
import { readFileSync } from 'fs';
|
|
import { createFileEventLogger, parseEventLine, type EventBase } from '../progress/event-log.js';
|
|
import type { OpenAICompatClient, LLMEvent } from '../llm/openai-compat.js';
|
|
|
|
vi.mock('./agent-loop.js', () => ({
|
|
executeMovement: vi.fn(),
|
|
}));
|
|
|
|
function readAllEvents(workspacePath: string): EventBase[] {
|
|
const path = join(workspacePath, 'logs', 'events.jsonl');
|
|
if (!existsSyncEvents(path)) return [];
|
|
const lines = readFileSync(path, 'utf-8').trim().split('\n').filter(Boolean);
|
|
return lines.map((l) => {
|
|
const r = parseEventLine(l);
|
|
if (r.kind !== 'ok') throw new Error(`bad event: ${l}`);
|
|
return r.event;
|
|
});
|
|
}
|
|
|
|
import { existsSync as existsSyncEvents } from 'fs';
|
|
|
|
describe('Traceability T-2: piece-runner emission for subtask boundary + followup', () => {
|
|
let workspace: string;
|
|
|
|
beforeEach(() => {
|
|
workspace = mkdtempSync(join(tmpdir(), 'trace-t2-'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
rmSync(workspace, { recursive: true, force: true });
|
|
vi.mocked(executeMovement).mockReset();
|
|
});
|
|
|
|
it('emits memory_handoff_read when a parent handoff exists at startup', async () => {
|
|
// Simulate a parent handoff already in the workspace.
|
|
mkdirSync(join(workspace, 'input'), { recursive: true });
|
|
writeFileSync(
|
|
join(workspace, 'input', 'memory-handoff.json'),
|
|
JSON.stringify({
|
|
version: 1,
|
|
handoffId: 'h-1',
|
|
parentJobId: 'parent-job-1',
|
|
parentWorkspaceRelative: '../..',
|
|
createdAt: '2026-05-02T00:00:00.000Z',
|
|
facts: [{ claim: 'parent X', confidence: 'high', evidencePaths: [], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'portable', evidenceKind: 'none', lineage: [] }],
|
|
decisions: [],
|
|
openQuestions: [],
|
|
doNotRepeat: [],
|
|
}),
|
|
'utf-8',
|
|
);
|
|
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'COMPLETE', output: 'done', toolsUsed: [],
|
|
});
|
|
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
|
|
};
|
|
const fakeClient = {} as OpenAICompatClient;
|
|
await runPiece(piece, 'task', fakeClient, workspace);
|
|
|
|
const events = readAllEvents(workspace);
|
|
const handoffRead = events.find((e) => e.kind === 'memory_handoff_read');
|
|
expect(handoffRead).toBeDefined();
|
|
const payload = handoffRead?.payload as { parentJobId: string };
|
|
expect(payload.parentJobId).toBe('parent-job-1');
|
|
});
|
|
|
|
it('emits followup_detected when output/ has prior content', async () => {
|
|
mkdirSync(join(workspace, 'output'), { recursive: true });
|
|
writeFileSync(join(workspace, 'output', 'prior.md'), 'previous turn output', 'utf-8');
|
|
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'COMPLETE', output: 'done', toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
|
|
};
|
|
await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
|
|
const events = readAllEvents(workspace);
|
|
expect(events.some((e) => e.kind === 'followup_detected')).toBe(true);
|
|
});
|
|
|
|
it('emits memory_delta_absorb (skipped_already_absorbed) when re-resuming', async () => {
|
|
// Pre-seed a child delta + an absorbed-deltas log saying it's already done.
|
|
const childWs = join(workspace, 'subtasks', '1');
|
|
mkdirSync(join(childWs, 'output'), { recursive: true });
|
|
writeFileSync(join(childWs, 'output', 'memory-delta.json'), JSON.stringify({
|
|
version: 1,
|
|
deltaId: 'd-1',
|
|
childJobId: 'child-1',
|
|
childWorkspaceRelative: 'subtasks/1',
|
|
childStatus: 'success',
|
|
partial: false,
|
|
createdAt: '2026-05-02T00:00:00.000Z',
|
|
facts: [{ claim: 'child finding', confidence: 'high', evidencePaths: [], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'portable', evidenceKind: 'none', lineage: [] }],
|
|
decisions: [], openQuestions: [], doNotRepeat: [],
|
|
}), 'utf-8');
|
|
mkdirSync(join(workspace, 'logs'), { recursive: true });
|
|
writeFileSync(join(workspace, 'logs', 'absorbed-deltas.json'), JSON.stringify({ version: 1, ids: ['d-1'] }), 'utf-8');
|
|
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'COMPLETE', output: 'done', toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
|
|
};
|
|
await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
|
|
const events = readAllEvents(workspace);
|
|
const absorb = events.find((e) => e.kind === 'memory_delta_absorb');
|
|
expect(absorb).toBeDefined();
|
|
const payload = absorb?.payload as { outcome: string };
|
|
expect(payload.outcome).toBe('skipped_already_absorbed');
|
|
});
|
|
|
|
it('emits memory_delta_absorb (merged) and counts when a fresh delta is found', async () => {
|
|
const childWs = join(workspace, 'subtasks', '1');
|
|
mkdirSync(join(childWs, 'output'), { recursive: true });
|
|
writeFileSync(join(childWs, 'output', 'memory-delta.json'), JSON.stringify({
|
|
version: 1,
|
|
deltaId: 'd-2',
|
|
childJobId: 'child-2',
|
|
childWorkspaceRelative: 'subtasks/1',
|
|
childStatus: 'success',
|
|
partial: false,
|
|
createdAt: '2026-05-02T00:00:00.000Z',
|
|
facts: [{ claim: 'child A', confidence: 'high', evidencePaths: ['output/a.ts'], evidenceUrls: [], observedAt: '2026-05-02T00:00:00.000Z', portability: 'workspace_local', evidenceKind: 'local_path', lineage: [] }],
|
|
decisions: [], openQuestions: [], doNotRepeat: [],
|
|
}), 'utf-8');
|
|
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'COMPLETE', output: 'done', toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
|
|
};
|
|
await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
|
|
const events = readAllEvents(workspace);
|
|
const absorb = events.find((e) => e.kind === 'memory_delta_absorb' && (e.payload as { outcome: string }).outcome === 'merged');
|
|
expect(absorb).toBeDefined();
|
|
const payload = absorb?.payload as { counts: { factsAdded: number } };
|
|
expect(payload.counts.factsAdded).toBe(1);
|
|
});
|
|
|
|
it('emits run_start and run_complete bookending each piece run', async () => {
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'COMPLETE', output: 'done', toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
|
|
};
|
|
await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
|
|
const events = readAllEvents(workspace);
|
|
expect(events[0]!.kind).toBe('run_start');
|
|
expect(events[events.length - 1]!.kind).toBe('run_complete');
|
|
const completePayload = events[events.length - 1]!.payload as { status: string };
|
|
expect(completePayload.status).toBe('completed');
|
|
});
|
|
});
|
|
|
|
import { existsSync as existsSyncSnapshot, readdirSync as readdirSnapshot } from 'fs';
|
|
|
|
describe('Cancel-traceability PR1: memory snapshot on terminal non-success', () => {
|
|
let workspace: string;
|
|
|
|
beforeEach(() => {
|
|
workspace = mkdtempSync(join(tmpdir(), 'cancel-snap-'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
rmSync(workspace, { recursive: true, force: true });
|
|
vi.mocked(executeMovement).mockReset();
|
|
});
|
|
|
|
function findSnapshotFiles(): string[] {
|
|
const dir = join(workspace, 'logs');
|
|
if (!existsSyncSnapshot(dir)) return [];
|
|
return readdirSnapshot(dir).filter((f) => f.startsWith('memory-snapshot-') && f.endsWith('.json'));
|
|
}
|
|
|
|
it('writes snapshot + meta-event when cancelled before any movement', async () => {
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
|
|
};
|
|
|
|
// cancel BEFORE movement runs: cancelCheck returns true on first guard.
|
|
const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace, undefined, undefined, { cancelCheck: () => true });
|
|
expect(result.status).toBe('cancelled');
|
|
expect(result.memorySnapshotPath).toBeDefined();
|
|
expect(result.memorySnapshotPath).toMatch(/^logs\/memory-snapshot-cancelled-/);
|
|
|
|
const files = findSnapshotFiles();
|
|
expect(files.length).toBe(1);
|
|
expect(files[0]).toMatch(/^memory-snapshot-cancelled-.*\.json$/);
|
|
|
|
const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8'));
|
|
expect(fileContent.schemaVersion).toBe(2);
|
|
expect(fileContent.status).toBe('cancelled');
|
|
expect(fileContent.memory).toBeDefined();
|
|
expect(fileContent.memory.facts).toEqual([]);
|
|
expect(fileContent.runId).toBeDefined();
|
|
// v2 forensics fields
|
|
expect(fileContent.finalOutput).toBeDefined();
|
|
expect(Array.isArray(fileContent.movementHistory)).toBe(true);
|
|
expect(Array.isArray(fileContent.lessons)).toBe(true);
|
|
expect(Array.isArray(fileContent.contextActions)).toBe(true);
|
|
expect(fileContent.stats).toBeDefined();
|
|
expect(typeof fileContent.stats.totalSteps).toBe('number');
|
|
expect(fileContent.eventsLogRelative).toBe('logs/events.jsonl');
|
|
|
|
const events = readAllEvents(workspace);
|
|
const written = events.find((e) => e.kind === 'memory_snapshot_written');
|
|
expect(written).toBeDefined();
|
|
expect((written!.payload as { status: string }).status).toBe('cancelled');
|
|
expect((written!.payload as { path: string }).path).toMatch(/^logs\/memory-snapshot-cancelled-/);
|
|
|
|
const runComplete = events[events.length - 1]!;
|
|
expect(runComplete.kind).toBe('run_complete');
|
|
const payload = runComplete.payload as { status: string; cancel?: { phase: string; snapshotPath: string }; memorySnapshotPath?: string };
|
|
expect(payload.status).toBe('cancelled');
|
|
expect(payload.memorySnapshotPath).toBeDefined();
|
|
expect(payload.cancel?.phase).toBe('before_movement');
|
|
expect(payload.cancel?.snapshotPath).toBe(payload.memorySnapshotPath);
|
|
});
|
|
|
|
it('writes snapshot when cancelled mid-movement (ABORT with cancelled output)', async () => {
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'ABORT', output: 'Job was cancelled by user request', toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
|
|
};
|
|
const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
expect(result.status).toBe('cancelled');
|
|
expect(result.memorySnapshotPath).toBeDefined();
|
|
|
|
const events = readAllEvents(workspace);
|
|
const runComplete = events[events.length - 1]!;
|
|
expect(runComplete.kind).toBe('run_complete');
|
|
const payload = runComplete.payload as { cancel?: { phase: string; movement: string } };
|
|
expect(payload.cancel?.phase).toBe('mid_movement');
|
|
expect(payload.cancel?.movement).toBe('m');
|
|
});
|
|
|
|
it('writes snapshot on aborted (max_movements exceeded)', async () => {
|
|
// Always return next='m' to bounce back, hitting max_movements.
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'm', output: 'still working', toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
|
|
movements: [{
|
|
name: 'm', edit: false, persona: 'p', instruction: 'i',
|
|
allowed_tools: [],
|
|
rules: [{ condition: 'always', next: 'm' }],
|
|
default_next: 'COMPLETE',
|
|
max_consecutive_revisits: 100,
|
|
}],
|
|
};
|
|
const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
expect(result.status).toBe('aborted');
|
|
expect(result.memorySnapshotPath).toBeDefined();
|
|
expect(result.memorySnapshotPath).toMatch(/^logs\/memory-snapshot-aborted-/);
|
|
|
|
const events = readAllEvents(workspace);
|
|
const written = events.find((e) => e.kind === 'memory_snapshot_written');
|
|
expect(written).toBeDefined();
|
|
expect((written!.payload as { status: string }).status).toBe('aborted');
|
|
});
|
|
|
|
it('v2 snapshot captures finalOutput / movementHistory / lessons on agent-self-abort', async () => {
|
|
// Simulate `complete({status:'aborted', abort_reason:'...'})` — the
|
|
// forensics gap that motivated schemaVersion=2. The LLM's abort_reason
|
|
// surfaces as MovementResult.output and PieceRunResult.finalOutput; v1
|
|
// dropped it entirely from the snapshot.
|
|
const abortReasonText = 'Cannot proceed: required input file is missing and user is unavailable';
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'ABORT',
|
|
output: abortReasonText,
|
|
toolsUsed: ['Read', 'Glob'],
|
|
lessons: 'Lesson: validate input presence before plan phase',
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 3, initial_movement: 'respond',
|
|
movements: [{
|
|
name: 'respond', edit: false, persona: 'p', instruction: 'i',
|
|
allowed_tools: [], rules: [], default_next: 'COMPLETE',
|
|
}],
|
|
};
|
|
const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
expect(result.status).toBe('aborted');
|
|
expect(result.abortReason).toBe('movement_abort');
|
|
expect(result.memorySnapshotPath).toBeDefined();
|
|
|
|
const files = findSnapshotFiles();
|
|
expect(files.length).toBe(1);
|
|
const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8'));
|
|
|
|
expect(fileContent.schemaVersion).toBe(2);
|
|
expect(fileContent.status).toBe('aborted');
|
|
expect(fileContent.abortReason).toBe('movement_abort');
|
|
expect(fileContent.currentMovement).toBe('respond');
|
|
|
|
// The LLM's abort_reason text — the most important forensic field — is
|
|
// now preserved verbatim at top level.
|
|
expect(fileContent.finalOutput).toBe(abortReasonText);
|
|
|
|
// movementHistory shows the path taken with per-step tool usage.
|
|
expect(Array.isArray(fileContent.movementHistory)).toBe(true);
|
|
expect(fileContent.movementHistory.length).toBe(1);
|
|
expect(fileContent.movementHistory[0]).toMatchObject({
|
|
name: 'respond',
|
|
next: 'ABORT',
|
|
toolsUsed: ['Read', 'Glob'],
|
|
outputPreview: abortReasonText,
|
|
outputTruncated: false,
|
|
hasLessons: true,
|
|
});
|
|
|
|
// lessons accumulated from movement results are persisted.
|
|
expect(Array.isArray(fileContent.lessons)).toBe(true);
|
|
expect(fileContent.lessons.length).toBe(1);
|
|
expect(fileContent.lessons[0].movement).toBe('respond');
|
|
expect(fileContent.lessons[0].lessons).toContain('validate input presence');
|
|
|
|
expect(fileContent.stats.totalSteps).toBe(1);
|
|
expect(fileContent.stats.movementCount).toBe(1);
|
|
expect(fileContent.eventsLogRelative).toBe('logs/events.jsonl');
|
|
});
|
|
|
|
it('v2 snapshot truncates long movement outputs but preserves finalOutput in full', async () => {
|
|
const longOutput = 'x'.repeat(2_000);
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'ABORT',
|
|
output: longOutput,
|
|
toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 3, initial_movement: 'm',
|
|
movements: [{
|
|
name: 'm', edit: false, persona: 'p', instruction: 'i',
|
|
allowed_tools: [], rules: [], default_next: 'COMPLETE',
|
|
}],
|
|
};
|
|
const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
expect(result.status).toBe('aborted');
|
|
|
|
const files = findSnapshotFiles();
|
|
const fileContent = JSON.parse(readFileSync(join(workspace, 'logs', files[0]!), 'utf-8'));
|
|
expect(fileContent.movementHistory[0].outputPreview.length).toBe(500);
|
|
expect(fileContent.movementHistory[0].outputTruncated).toBe(true);
|
|
// finalOutput keeps the full text uncapped so the LLM's reasoning isn't lost.
|
|
expect(fileContent.finalOutput).toBe(longOutput);
|
|
});
|
|
|
|
it('does NOT write snapshot on successful completion', async () => {
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'COMPLETE', output: 'done', toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [], default_next: 'COMPLETE' }],
|
|
};
|
|
const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
expect(result.status).toBe('completed');
|
|
expect(result.memorySnapshotPath).toBeUndefined();
|
|
expect(findSnapshotFiles()).toEqual([]);
|
|
|
|
const events = readAllEvents(workspace);
|
|
expect(events.find((e) => e.kind === 'memory_snapshot_written')).toBeUndefined();
|
|
});
|
|
|
|
it('does NOT write snapshot on waiting_subtasks (transient pause)', async () => {
|
|
vi.mocked(executeMovement).mockResolvedValue({
|
|
next: 'WAIT_SUBTASKS', output: 'spawned children', toolsUsed: [],
|
|
});
|
|
const piece: PieceDef = {
|
|
name: 'tester', description: 'd', max_movements: 1, initial_movement: 'm',
|
|
movements: [{
|
|
name: 'm', edit: false, persona: 'p', instruction: 'i',
|
|
allowed_tools: [],
|
|
rules: [{ condition: 'spawned', next: 'WAIT_SUBTASKS' }],
|
|
default_next: 'COMPLETE',
|
|
}],
|
|
};
|
|
const result = await runPiece(piece, 'task', {} as OpenAICompatClient, workspace);
|
|
expect(result.status).toBe('waiting_subtasks');
|
|
expect(result.memorySnapshotPath).toBeUndefined();
|
|
expect(findSnapshotFiles()).toEqual([]);
|
|
});
|
|
});
|
|
|
|
describe('piece required_mcp parsing', () => {
|
|
function makePieceWithMcp(required_mcp: unknown): PieceDef {
|
|
return {
|
|
name: 'mcp-test',
|
|
description: 'test',
|
|
max_movements: 1,
|
|
initial_movement: 'm',
|
|
required_mcp: required_mcp as string[],
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [] }],
|
|
};
|
|
}
|
|
|
|
it('retains valid required_mcp slugs', () => {
|
|
const piece = makePieceWithMcp(['canva', 'notion']);
|
|
normalizeRequiredMcp(piece, 'mcp-test');
|
|
expect(piece.required_mcp).toEqual(['canva', 'notion']);
|
|
});
|
|
|
|
it('drops invalid slugs and keeps only valid ones', () => {
|
|
const piece = makePieceWithMcp(['canva', 'BAD!!', 123]);
|
|
normalizeRequiredMcp(piece, 'mcp-test');
|
|
expect(piece.required_mcp).toEqual(['canva']);
|
|
});
|
|
|
|
it('leaves required_mcp undefined when field is absent', () => {
|
|
const piece: PieceDef = {
|
|
name: 'mcp-test',
|
|
description: 'test',
|
|
max_movements: 1,
|
|
initial_movement: 'm',
|
|
movements: [{ name: 'm', edit: false, persona: 'p', instruction: 'i', allowed_tools: [], rules: [] }],
|
|
};
|
|
normalizeRequiredMcp(piece, 'mcp-test');
|
|
expect(piece.required_mcp).toBeUndefined();
|
|
});
|
|
|
|
it('normalizes required_mcp to empty array when field is not an array', () => {
|
|
const piece = makePieceWithMcp('not-an-array' as unknown as string[]);
|
|
normalizeRequiredMcp(piece, 'mcp-test');
|
|
expect(piece.required_mcp).toEqual([]);
|
|
});
|
|
});
|
|
|
|
// Phase 4: per-movement SSH connection allowlist validation.
|
|
describe('allowed_ssh_connections validation (Phase 4)', () => {
|
|
function makeMovement(overrides: Partial<PieceDef['movements'][number]> = {}): PieceDef['movements'][number] {
|
|
return {
|
|
name: 'm1',
|
|
edit: false,
|
|
persona: 'p',
|
|
instruction: 'i',
|
|
allowed_tools: [],
|
|
rules: [],
|
|
...overrides,
|
|
};
|
|
}
|
|
function makePiece(movements: PieceDef['movements']): PieceDef {
|
|
return {
|
|
name: 'ssh-test',
|
|
description: 'test',
|
|
max_movements: 1,
|
|
initial_movement: movements[0]?.name ?? 'm1',
|
|
movements,
|
|
};
|
|
}
|
|
|
|
it('passes when no SSH tools and no allowlist', () => {
|
|
const piece = makePiece([makeMovement({ allowed_tools: ['Read'] })]);
|
|
expect(validateAllowedSshConnections(piece)).toEqual([]);
|
|
expect(() => validatePieceDef(piece)).not.toThrow();
|
|
});
|
|
|
|
it('passes when SSH tool present and allowlist declared (UUID)', () => {
|
|
const piece = makePiece([
|
|
makeMovement({
|
|
allowed_tools: ['SshExec', 'Read'],
|
|
allowed_ssh_connections: ['6f9619ff-8b86-d011-b42d-00c04fc964ff'],
|
|
}),
|
|
]);
|
|
expect(validateAllowedSshConnections(piece)).toEqual([]);
|
|
});
|
|
|
|
it('passes when SSH tool present and allowlist declared (empty array = explicit deny)', () => {
|
|
const piece = makePiece([
|
|
makeMovement({ allowed_tools: ['SshExec'], allowed_ssh_connections: [] }),
|
|
]);
|
|
expect(validateAllowedSshConnections(piece)).toEqual([]);
|
|
});
|
|
|
|
it('passes when allowlist is wildcard ["*"]', () => {
|
|
const piece = makePiece([
|
|
makeMovement({ allowed_tools: ['SshUpload'], allowed_ssh_connections: ['*'] }),
|
|
]);
|
|
expect(validateAllowedSshConnections(piece)).toEqual([]);
|
|
});
|
|
|
|
it('rejects when SSH tool present but allowlist missing', () => {
|
|
const piece = makePiece([makeMovement({ allowed_tools: ['SshExec'] })]);
|
|
const errors = validateAllowedSshConnections(piece);
|
|
expect(errors).toHaveLength(1);
|
|
expect(errors[0]).toContain('uses SSH tool(s) but allowed_ssh_connections is not declared');
|
|
expect(() => validatePieceDef(piece)).toThrow(/allowed_ssh_connections/);
|
|
});
|
|
|
|
it('rejects SshUpload without allowlist', () => {
|
|
const piece = makePiece([makeMovement({ allowed_tools: ['SshUpload'] })]);
|
|
expect(validateAllowedSshConnections(piece)).toHaveLength(1);
|
|
});
|
|
|
|
it('rejects SshDownload without allowlist', () => {
|
|
const piece = makePiece([makeMovement({ allowed_tools: ['SshDownload'] })]);
|
|
expect(validateAllowedSshConnections(piece)).toHaveLength(1);
|
|
});
|
|
|
|
it('rejects non-array allowlist', () => {
|
|
const piece = makePiece([
|
|
makeMovement({
|
|
allowed_tools: ['SshExec'],
|
|
allowed_ssh_connections: 'not-an-array' as unknown as string[],
|
|
}),
|
|
]);
|
|
const errors = validateAllowedSshConnections(piece);
|
|
expect(errors[0]).toMatch(/must be an array/);
|
|
});
|
|
|
|
it('rejects non-string entries', () => {
|
|
const piece = makePiece([
|
|
makeMovement({
|
|
allowed_tools: ['SshExec'],
|
|
allowed_ssh_connections: [123 as unknown as string],
|
|
}),
|
|
]);
|
|
const errors = validateAllowedSshConnections(piece);
|
|
expect(errors[0]).toMatch(/must be a string/);
|
|
});
|
|
|
|
it('rejects entries that are neither wildcard nor valid id format', () => {
|
|
const piece = makePiece([
|
|
makeMovement({
|
|
allowed_tools: ['SshExec'],
|
|
allowed_ssh_connections: ['short'],
|
|
}),
|
|
]);
|
|
const errors = validateAllowedSshConnections(piece);
|
|
expect(errors[0]).toMatch(/must be '\*' or a lowercase hex/);
|
|
});
|
|
|
|
it('rejects uppercase / non-hex characters in ids', () => {
|
|
const piece = makePiece([
|
|
makeMovement({
|
|
allowed_tools: ['SshExec'],
|
|
allowed_ssh_connections: ['ZZZZZZZZ-not-hex'],
|
|
}),
|
|
]);
|
|
expect(validateAllowedSshConnections(piece)).toHaveLength(1);
|
|
});
|
|
|
|
it('allowlist without SSH tool is allowed (no-op, future-proofing)', () => {
|
|
const piece = makePiece([
|
|
makeMovement({
|
|
allowed_tools: ['Read'],
|
|
allowed_ssh_connections: ['6f9619ff-8b86-d011-b42d-00c04fc964ff'],
|
|
}),
|
|
]);
|
|
expect(validateAllowedSshConnections(piece)).toEqual([]);
|
|
});
|
|
|
|
it('reports offenders across multiple movements', () => {
|
|
const piece = makePiece([
|
|
makeMovement({ name: 'm1', allowed_tools: ['SshExec'] }),
|
|
makeMovement({ name: 'm2', allowed_tools: ['Read'] }),
|
|
makeMovement({ name: 'm3', allowed_tools: ['SshDownload'], allowed_ssh_connections: ['*'] }),
|
|
makeMovement({ name: 'm4', allowed_tools: ['SshUpload'], allowed_ssh_connections: ['BAD_ID'] }),
|
|
]);
|
|
const errors = validateAllowedSshConnections(piece);
|
|
expect(errors).toHaveLength(2);
|
|
expect(errors[0]).toContain('movement="m1"');
|
|
expect(errors[1]).toContain('movement="m4"');
|
|
});
|
|
|
|
it('validatePieceDef composes error message with piece name', () => {
|
|
const piece = makePiece([makeMovement({ allowed_tools: ['SshExec'] })]);
|
|
expect(() => validatePieceDef(piece)).toThrow(/Piece "ssh-test" has invalid allowed_ssh_connections/);
|
|
});
|
|
});
|
|
|
|
// --- Task 1: loadPiece multi-dir support ---
|
|
describe('loadPiece multi-dir (string | string[])', () => {
|
|
it('resolves from a list of custom dirs (per-user wins over builtin name miss)', () => {
|
|
const dirA = mkdtempSync(join(tmpdir(), 'pa-')); // empty
|
|
const dirB = mkdtempSync(join(tmpdir(), 'pb-'));
|
|
writeFileSync(
|
|
join(dirB, 'mycustom.yaml'),
|
|
`name: mycustom\ndescription: d\nmax_movements: 1\ninitial_movement: go\nmovements:\n - name: go\n edit: false\n persona: w\n instruction: x\n allowed_tools: []\n rules: []\n default_next: COMPLETE\n`,
|
|
);
|
|
// array form: searches dirA then dirB then builtin
|
|
const p = loadPiece('mycustom', 'pieces', [dirA, dirB]);
|
|
expect(p.name).toBe('mycustom');
|
|
// builtin still resolvable when not in any custom dir
|
|
expect(() => loadPiece('chat', 'pieces', [dirA, dirB])).not.toThrow();
|
|
rmSync(dirA, { recursive: true });
|
|
rmSync(dirB, { recursive: true });
|
|
});
|
|
|
|
it('first dir wins when same name appears in two custom dirs', () => {
|
|
const dirA = mkdtempSync(join(tmpdir(), 'pa-'));
|
|
const dirB = mkdtempSync(join(tmpdir(), 'pb-'));
|
|
writeFileSync(
|
|
join(dirA, 'dup.yaml'),
|
|
`name: dup\ndescription: from-a\nmax_movements: 1\ninitial_movement: go\nmovements:\n - name: go\n edit: false\n persona: w\n instruction: x\n allowed_tools: []\n rules: []\n default_next: COMPLETE\n`,
|
|
);
|
|
writeFileSync(
|
|
join(dirB, 'dup.yaml'),
|
|
`name: dup\ndescription: from-b\nmax_movements: 1\ninitial_movement: go\nmovements:\n - name: go\n edit: false\n persona: w\n instruction: x\n allowed_tools: []\n rules: []\n default_next: COMPLETE\n`,
|
|
);
|
|
const p = loadPiece('dup', 'pieces', [dirA, dirB]);
|
|
expect(p.description).toBe('from-a');
|
|
rmSync(dirA, { recursive: true });
|
|
rmSync(dirB, { recursive: true });
|
|
});
|
|
|
|
it('string form still works (backward compat)', () => {
|
|
const dir = mkdtempSync(join(tmpdir(), 'pc-'));
|
|
writeFileSync(
|
|
join(dir, 'strcompat.yaml'),
|
|
`name: strcompat\ndescription: str\nmax_movements: 1\ninitial_movement: go\nmovements:\n - name: go\n edit: false\n persona: w\n instruction: x\n allowed_tools: []\n rules: []\n default_next: COMPLETE\n`,
|
|
);
|
|
const p = loadPiece('strcompat', 'pieces', dir);
|
|
expect(p.name).toBe('strcompat');
|
|
rmSync(dir, { recursive: true });
|
|
});
|
|
});
|