import { afterEach, describe, expect, it, vi } from 'vitest'; import type { LLMEvent, ToolDef } from '../llm/openai-compat.js'; import type { ToolContext } from './tools/index.js'; const { executeToolMock, getToolDefsMock } = vi.hoisted(() => ({ executeToolMock: vi.fn(), getToolDefsMock: vi.fn(), })); vi.mock('./tools/index.js', () => ({ executeTool: executeToolMock, getToolDefs: getToolDefsMock, })); import { executeMovement, type Movement } from './agent-loop.js'; function makeMovement(allowedTools: string[]): Movement { return { name: 'execute', edit: false, persona: 'worker', instruction: 'Do the work.', allowedTools, rules: [{ condition: 'done', next: 'COMPLETE' }], defaultNext: 'COMPLETE', }; } function makeToolDefs(names: string[]): ToolDef[] { return names.map((name) => ({ type: 'function', function: { name, description: name, parameters: { type: 'object', properties: {}, required: [] }, }, })); } function makeContext(): ToolContext { return { workspacePath: '/tmp/agent-loop-tool-loop-test', editAllowed: false, }; } /** Yields the supplied response scripts; once exhausted, yields empty batches. */ class FakeClient { private index = 0; constructor(private readonly responses: LLMEvent[][]) {} async *chat(_messages: unknown, _tools?: unknown): AsyncGenerator { const response = this.responses[this.index++] ?? [{ type: 'done' } as LLMEvent]; for (const event of response) yield event; } } /** One iteration that calls Read on the same path with the same args. */ function identicalReadBatch(): LLMEvent[] { return [ { type: 'tool_use', id: 'read', name: 'Read', input: { file_path: 'input/a.txt' } }, { type: 'done' }, ]; } describe('executeMovement tool-call loop detection', () => { afterEach(() => { executeToolMock.mockReset(); getToolDefsMock.mockReset(); }); it('aborts when the identical tool-call batch repeats up to the limit', async () => { getToolDefsMock.mockResolvedValue(makeToolDefs(['Read'])); executeToolMock.mockResolvedValue({ output: 'same content', isError: false }); // 6 identical batches available, but the default limit (5) should fire // on the 5th before dispatching it → only 4 Read executions. const client = new FakeClient(Array.from({ length: 6 }, identicalReadBatch)); const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext()); expect(result.next).toBe('ABORT'); expect(result.abortCode).toBe('tool_loop_detected'); expect(executeToolMock).toHaveBeenCalledTimes(4); }); it('honours a custom maxToolLoopRepeats from safetyConfig', async () => { getToolDefsMock.mockResolvedValue(makeToolDefs(['Read'])); executeToolMock.mockResolvedValue({ output: 'same content', isError: false }); const client = new FakeClient(Array.from({ length: 6 }, identicalReadBatch)); const result = await executeMovement( makeMovement(['Read']), 'task', client as never, makeContext(), { safetyConfig: { maxToolLoopRepeats: 3 } }, ); expect(result.next).toBe('ABORT'); expect(result.abortCode).toBe('tool_loop_detected'); // Limit 3 → aborts before the 3rd dispatch → 2 Read executions. expect(executeToolMock).toHaveBeenCalledTimes(2); }); it('does not abort when identical calls stay under the limit, then completes', async () => { getToolDefsMock.mockResolvedValue(makeToolDefs(['Read'])); executeToolMock.mockResolvedValue({ output: 'same content', isError: false }); // 4 identical Read batches (under the default limit of 5), then complete. const client = new FakeClient([ ...Array.from({ length: 4 }, identicalReadBatch), [{ type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } }, { type: 'done' }], ]); const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext()); expect(result.next).toBe('COMPLETE'); expect(executeToolMock).toHaveBeenCalledTimes(4); }); it('resets the counter when the tool-call args change (no false positive)', async () => { getToolDefsMock.mockResolvedValue(makeToolDefs(['Read'])); executeToolMock.mockResolvedValue({ output: 'content', isError: false }); // 8 Read calls but each on a different file → fingerprint changes every // iteration → never reaches the consecutive-repeat limit. Then complete. const varied: LLMEvent[][] = Array.from({ length: 8 }, (_, i) => [ { type: 'tool_use', id: `read-${i}`, name: 'Read', input: { file_path: `input/file-${i}.txt` } }, { type: 'done' }, ]); const client = new FakeClient([ ...varied, [{ type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } }, { type: 'done' }], ]); const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext()); expect(result.next).toBe('COMPLETE'); expect(executeToolMock).toHaveBeenCalledTimes(8); }); it('lets transition/complete win even if it shares a batch with a repeated call', async () => { getToolDefsMock.mockResolvedValue(makeToolDefs(['Read'])); executeToolMock.mockResolvedValue({ output: 'same content', isError: false }); // 4 identical Read batches (warning fires but no abort), then a batch that // repeats the same Read AND completes — complete must take precedence. const client = new FakeClient([ ...Array.from({ length: 4 }, identicalReadBatch), [ { type: 'tool_use', id: 'read', name: 'Read', input: { file_path: 'input/a.txt' } }, { type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } }, { type: 'done' }, ], ]); const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext()); expect(result.next).toBe('COMPLETE'); }); });