maestro/src/engine/reflection/reflection-runner.test.ts
oss-sync b5831943a4
Some checks failed
CI / build-and-test (push) Has been cancelled
sync: update from private repo (8bc400c)
2026-06-10 10:08:28 +00:00

185 lines
6.8 KiB
TypeScript

import { describe, it, expect, vi, beforeEach } from 'vitest';
import type { Job, Repository } from '../../db/repository.js';
import type { AppConfig } from '../../config.js';
vi.mock('./load-inputs.js', () => ({ loadReflectionInputs: vi.fn() }));
vi.mock('./reflection-prompt.js', () => ({
buildSystemPrompt: vi.fn().mockReturnValue('SYSTEM'),
buildUserPrompt: vi.fn().mockReturnValue('USER'),
}));
vi.mock('./llm-client.js', () => ({ callReflectionLlm: vi.fn() }));
vi.mock('./applier.js', () => ({ applyReflectionUnlocked: vi.fn() }));
vi.mock('./snapshot.js', () => ({ writeSnapshot: vi.fn() }));
vi.mock('./user-lock.js', () => ({
withUserLock: vi.fn(async (_dir: string, _user: string, fn: () => Promise<void>) => fn()),
}));
vi.mock('../piece-catalog.js', () => ({ PieceCatalog: vi.fn() }));
import { loadReflectionInputs } from './load-inputs.js';
import { callReflectionLlm } from './llm-client.js';
import { applyReflectionUnlocked } from './applier.js';
import { writeSnapshot } from './snapshot.js';
import { withUserLock } from './user-lock.js';
import { runReflectionJob } from './reflection-runner.js';
const PAYLOAD = {
originalJobId: 'orig-1',
userId: 'user-1',
pieceName: 'chat',
outcome: 'succeeded' as const,
};
function makeJob(payload: unknown = PAYLOAD): Job {
return {
id: 'refl-1',
payload: payload === null ? null : JSON.stringify(payload),
} as unknown as Job;
}
function makeDeps() {
const repo = {
recordReflectionMetric: vi.fn(),
} as unknown as Repository;
return {
repo,
config: { reflection: {}, userFolderRoot: 'data/users' } as unknown as AppConfig,
llmEndpoint: 'http://localhost:1',
llmApiKey: 'sk-reflection-test',
llmModel: 'test-model',
};
}
const INPUT = { memoryEntries: [], pieceYaml: 'yaml' };
const LLM_RESULT = {
parsed: { reasoning: 'because' },
tokensIn: 100,
tokensOut: 50,
durationMs: 5,
};
function applierResult(overrides: Record<string, unknown> = {}) {
return {
outcome: 'applied',
memoryDecisions: [
{ accepted: true, change: { op: 'add', name: 'fact-1', description: 'd', type: 'project', body: 'b' } },
{ accepted: false, code: 'too_vague', change: { op: 'add', name: 'fact-2', description: 'd', type: 'project', body: 'b' } },
],
pieceApplied: false,
...overrides,
};
}
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(loadReflectionInputs).mockResolvedValue(INPUT as never);
vi.mocked(callReflectionLlm).mockResolvedValue(LLM_RESULT as never);
vi.mocked(applyReflectionUnlocked).mockResolvedValue(applierResult() as never);
vi.mocked(writeSnapshot).mockResolvedValue({ dir: '/snap/dir' } as never);
});
describe('runReflectionJob', () => {
it('returns failed for a job without payload and records no metric', async () => {
const deps = makeDeps();
const outcome = await runReflectionJob(deps, makeJob(null));
expect(outcome).toBe('failed');
expect(deps.repo.recordReflectionMetric).not.toHaveBeenCalled();
expect(loadReflectionInputs).not.toHaveBeenCalled();
});
it('records a failed metric when input loading throws', async () => {
vi.mocked(loadReflectionInputs).mockRejectedValue(new Error('db gone'));
const deps = makeDeps();
const outcome = await runReflectionJob(deps, makeJob());
expect(outcome).toBe('failed');
expect(deps.repo.recordReflectionMetric).toHaveBeenCalledWith(
expect.objectContaining({ outcome: 'failed', tokens_in: 0, tokens_out: 0 }),
);
expect(callReflectionLlm).not.toHaveBeenCalled();
});
it('records a failed metric when the LLM call throws', async () => {
vi.mocked(callReflectionLlm).mockRejectedValue(new Error('timeout'));
const deps = makeDeps();
const outcome = await runReflectionJob(deps, makeJob());
expect(outcome).toBe('failed');
expect(deps.repo.recordReflectionMetric).toHaveBeenCalledWith(
expect.objectContaining({ outcome: 'failed', tokens_in: 0 }),
);
expect(applyReflectionUnlocked).not.toHaveBeenCalled();
});
it('applies, snapshots inside the user lock and records an applied metric', async () => {
const deps = makeDeps();
const outcome = await runReflectionJob(deps, makeJob());
expect(outcome).toBe('applied');
expect(withUserLock).toHaveBeenCalledWith('data/users', 'user-1', expect.any(Function));
expect(writeSnapshot).toHaveBeenCalledTimes(1);
const snapMeta = vi.mocked(writeSnapshot).mock.calls[0]?.[3] as Record<string, unknown>;
expect(snapMeta).toMatchObject({
originalJobId: 'orig-1',
userId: 'user-1',
pieceName: 'chat',
outcome: 'applied',
memoryChanges: 1,
rejections: [{ code: 'too_vague', name: 'fact-2' }],
});
expect(deps.repo.recordReflectionMetric).toHaveBeenCalledWith(
expect.objectContaining({
reflection_job_id: 'refl-1',
original_job_id: 'orig-1',
user_id: 'user-1',
piece_name: 'chat',
outcome: 'applied',
memory_changes: 1,
piece_edited: 0,
tokens_in: 100,
tokens_out: 50,
}),
);
});
it('propagates an abstained outcome from the applier', async () => {
vi.mocked(applyReflectionUnlocked).mockResolvedValue(
applierResult({ outcome: 'abstained', memoryDecisions: [] }) as never,
);
const deps = makeDeps();
const outcome = await runReflectionJob(deps, makeJob());
expect(outcome).toBe('abstained');
expect(deps.repo.recordReflectionMetric).toHaveBeenCalledWith(
expect.objectContaining({ outcome: 'abstained', memory_changes: 0 }),
);
});
it('records a failed metric with LLM tokens when apply throws inside the lock', async () => {
vi.mocked(applyReflectionUnlocked).mockRejectedValue(new Error('lock contention'));
const deps = makeDeps();
const outcome = await runReflectionJob(deps, makeJob());
expect(outcome).toBe('failed');
expect(deps.repo.recordReflectionMetric).toHaveBeenCalledWith(
expect.objectContaining({ outcome: 'failed', tokens_in: 100, tokens_out: 50 }),
);
});
it('treats a snapshot failure as non-fatal', async () => {
vi.mocked(writeSnapshot).mockRejectedValue(new Error('disk full'));
const deps = makeDeps();
const outcome = await runReflectionJob(deps, makeJob());
expect(outcome).toBe('applied');
expect(deps.repo.recordReflectionMetric).toHaveBeenCalledWith(
expect.objectContaining({ outcome: 'applied' }),
);
});
});
describe('LLM credential propagation', () => {
it('passes the worker apiKey into the reflection LLM config (401 regression)', async () => {
vi.mocked(callReflectionLlm).mockResolvedValue(LLM_RESULT as never);
const deps = makeDeps();
await runReflectionJob(deps, makeJob());
expect(callReflectionLlm).toHaveBeenCalled();
const cfg = vi.mocked(callReflectionLlm).mock.calls[0]![0] as { apiKey?: string };
expect(cfg.apiKey).toBe('sk-reflection-test');
});
});