163 lines
5.9 KiB
TypeScript
163 lines
5.9 KiB
TypeScript
import { afterEach, describe, expect, it, vi } from 'vitest';
|
|
import type { LLMEvent, ToolDef } from '../llm/openai-compat.js';
|
|
import type { ToolContext } from './tools/index.js';
|
|
|
|
const { executeToolMock, getToolDefsMock } = vi.hoisted(() => ({
|
|
executeToolMock: vi.fn(),
|
|
getToolDefsMock: vi.fn(),
|
|
}));
|
|
|
|
vi.mock('./tools/index.js', () => ({
|
|
executeTool: executeToolMock,
|
|
getToolDefs: getToolDefsMock,
|
|
}));
|
|
|
|
import { executeMovement, type Movement } from './agent-loop.js';
|
|
|
|
function makeMovement(allowedTools: string[]): Movement {
|
|
return {
|
|
name: 'execute',
|
|
edit: false,
|
|
persona: 'worker',
|
|
instruction: 'Do the work.',
|
|
allowedTools,
|
|
rules: [{ condition: 'done', next: 'COMPLETE' }],
|
|
defaultNext: 'COMPLETE',
|
|
};
|
|
}
|
|
|
|
function makeToolDefs(names: string[]): ToolDef[] {
|
|
return names.map((name) => ({
|
|
type: 'function',
|
|
function: {
|
|
name,
|
|
description: name,
|
|
parameters: { type: 'object', properties: {}, required: [] },
|
|
},
|
|
}));
|
|
}
|
|
|
|
function makeContext(): ToolContext {
|
|
return {
|
|
workspacePath: '/tmp/agent-loop-tool-loop-test',
|
|
editAllowed: false,
|
|
};
|
|
}
|
|
|
|
/** Yields the supplied response scripts; once exhausted, yields empty batches. */
|
|
class FakeClient {
|
|
private index = 0;
|
|
constructor(private readonly responses: LLMEvent[][]) {}
|
|
async *chat(_messages: unknown, _tools?: unknown): AsyncGenerator<LLMEvent> {
|
|
const response = this.responses[this.index++] ?? [{ type: 'done' } as LLMEvent];
|
|
for (const event of response) yield event;
|
|
}
|
|
}
|
|
|
|
/** One iteration that calls Read on the same path with the same args. */
|
|
function identicalReadBatch(): LLMEvent[] {
|
|
return [
|
|
{ type: 'tool_use', id: 'read', name: 'Read', input: { file_path: 'input/a.txt' } },
|
|
{ type: 'done' },
|
|
];
|
|
}
|
|
|
|
describe('executeMovement tool-call loop detection', () => {
|
|
afterEach(() => {
|
|
executeToolMock.mockReset();
|
|
getToolDefsMock.mockReset();
|
|
});
|
|
|
|
it('aborts when the identical tool-call batch repeats up to the limit', async () => {
|
|
getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
|
|
executeToolMock.mockResolvedValue({ output: 'same content', isError: false });
|
|
|
|
// 6 identical batches available, but the default limit (5) should fire
|
|
// on the 5th before dispatching it → only 4 Read executions.
|
|
const client = new FakeClient(Array.from({ length: 6 }, identicalReadBatch));
|
|
|
|
const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext());
|
|
|
|
expect(result.next).toBe('ABORT');
|
|
expect(result.abortCode).toBe('tool_loop_detected');
|
|
expect(executeToolMock).toHaveBeenCalledTimes(4);
|
|
});
|
|
|
|
it('honours a custom maxToolLoopRepeats from safetyConfig', async () => {
|
|
getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
|
|
executeToolMock.mockResolvedValue({ output: 'same content', isError: false });
|
|
|
|
const client = new FakeClient(Array.from({ length: 6 }, identicalReadBatch));
|
|
|
|
const result = await executeMovement(
|
|
makeMovement(['Read']),
|
|
'task',
|
|
client as never,
|
|
makeContext(),
|
|
{ safetyConfig: { maxToolLoopRepeats: 3 } },
|
|
);
|
|
|
|
expect(result.next).toBe('ABORT');
|
|
expect(result.abortCode).toBe('tool_loop_detected');
|
|
// Limit 3 → aborts before the 3rd dispatch → 2 Read executions.
|
|
expect(executeToolMock).toHaveBeenCalledTimes(2);
|
|
});
|
|
|
|
it('does not abort when identical calls stay under the limit, then completes', async () => {
|
|
getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
|
|
executeToolMock.mockResolvedValue({ output: 'same content', isError: false });
|
|
|
|
// 4 identical Read batches (under the default limit of 5), then complete.
|
|
const client = new FakeClient([
|
|
...Array.from({ length: 4 }, identicalReadBatch),
|
|
[{ type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } }, { type: 'done' }],
|
|
]);
|
|
|
|
const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext());
|
|
|
|
expect(result.next).toBe('COMPLETE');
|
|
expect(executeToolMock).toHaveBeenCalledTimes(4);
|
|
});
|
|
|
|
it('resets the counter when the tool-call args change (no false positive)', async () => {
|
|
getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
|
|
executeToolMock.mockResolvedValue({ output: 'content', isError: false });
|
|
|
|
// 8 Read calls but each on a different file → fingerprint changes every
|
|
// iteration → never reaches the consecutive-repeat limit. Then complete.
|
|
const varied: LLMEvent[][] = Array.from({ length: 8 }, (_, i) => [
|
|
{ type: 'tool_use', id: `read-${i}`, name: 'Read', input: { file_path: `input/file-${i}.txt` } },
|
|
{ type: 'done' },
|
|
]);
|
|
const client = new FakeClient([
|
|
...varied,
|
|
[{ type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } }, { type: 'done' }],
|
|
]);
|
|
|
|
const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext());
|
|
|
|
expect(result.next).toBe('COMPLETE');
|
|
expect(executeToolMock).toHaveBeenCalledTimes(8);
|
|
});
|
|
|
|
it('lets transition/complete win even if it shares a batch with a repeated call', async () => {
|
|
getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
|
|
executeToolMock.mockResolvedValue({ output: 'same content', isError: false });
|
|
|
|
// 4 identical Read batches (warning fires but no abort), then a batch that
|
|
// repeats the same Read AND completes — complete must take precedence.
|
|
const client = new FakeClient([
|
|
...Array.from({ length: 4 }, identicalReadBatch),
|
|
[
|
|
{ type: 'tool_use', id: 'read', name: 'Read', input: { file_path: 'input/a.txt' } },
|
|
{ type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } },
|
|
{ type: 'done' },
|
|
],
|
|
]);
|
|
|
|
const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext());
|
|
|
|
expect(result.next).toBe('COMPLETE');
|
|
});
|
|
});
|