maestro/src/engine/agent-loop.tool-loop.test.ts

import { afterEach, describe, expect, it, vi } from 'vitest';
import type { LLMEvent, ToolDef } from '../llm/openai-compat.js';
import type { ToolContext } from './tools/index.js';

const { executeToolMock, getToolDefsMock } = vi.hoisted(() => ({
  executeToolMock: vi.fn(),
  getToolDefsMock: vi.fn(),
}));

vi.mock('./tools/index.js', () => ({
  executeTool: executeToolMock,
  getToolDefs: getToolDefsMock,
}));

import { executeMovement, type Movement } from './agent-loop.js';

function makeMovement(allowedTools: string[]): Movement {
  return {
    name: 'execute',
    edit: false,
    persona: 'worker',
    instruction: 'Do the work.',
    allowedTools,
    rules: [{ condition: 'done', next: 'COMPLETE' }],
    defaultNext: 'COMPLETE',
  };
}

function makeToolDefs(names: string[]): ToolDef[] {
  return names.map((name) => ({
    type: 'function',
    function: {
      name,
      description: name,
      parameters: { type: 'object', properties: {}, required: [] },
    },
  }));
}

function makeContext(): ToolContext {
  return {
    workspacePath: '/tmp/agent-loop-tool-loop-test',
    editAllowed: false,
  };
}

/** Yields the supplied response scripts; once exhausted, yields empty batches. */
class FakeClient {
  private index = 0;
  constructor(private readonly responses: LLMEvent[][]) {}
  async *chat(_messages: unknown, _tools?: unknown): AsyncGenerator<LLMEvent> {
    const response = this.responses[this.index++] ?? [{ type: 'done' } as LLMEvent];
    for (const event of response) yield event;
  }
}

/** One iteration that calls Read on the same path with the same args. */
function identicalReadBatch(): LLMEvent[] {
  return [
    { type: 'tool_use', id: 'read', name: 'Read', input: { file_path: 'input/a.txt' } },
    { type: 'done' },
  ];
}

describe('executeMovement tool-call loop detection', () => {
  afterEach(() => {
    executeToolMock.mockReset();
    getToolDefsMock.mockReset();
  });

  it('aborts when the identical tool-call batch repeats up to the limit', async () => {
    getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
    executeToolMock.mockResolvedValue({ output: 'same content', isError: false });

    // 6 identical batches available, but the default limit (5) should fire
    // on the 5th before dispatching it → only 4 Read executions.
    const client = new FakeClient(Array.from({ length: 6 }, identicalReadBatch));

    const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext());

    expect(result.next).toBe('ABORT');
    expect(result.abortCode).toBe('tool_loop_detected');
    expect(executeToolMock).toHaveBeenCalledTimes(4);
  });

  it('honours a custom maxToolLoopRepeats from safetyConfig', async () => {
    getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
    executeToolMock.mockResolvedValue({ output: 'same content', isError: false });

    const client = new FakeClient(Array.from({ length: 6 }, identicalReadBatch));

    const result = await executeMovement(
      makeMovement(['Read']),
      'task',
      client as never,
      makeContext(),
      { safetyConfig: { maxToolLoopRepeats: 3 } },
    );

    expect(result.next).toBe('ABORT');
    expect(result.abortCode).toBe('tool_loop_detected');
    // Limit 3 → aborts before the 3rd dispatch → 2 Read executions.
    expect(executeToolMock).toHaveBeenCalledTimes(2);
  });

  it('does not abort when identical calls stay under the limit, then completes', async () => {
    getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
    executeToolMock.mockResolvedValue({ output: 'same content', isError: false });

    // 4 identical Read batches (under the default limit of 5), then complete.
    const client = new FakeClient([
      ...Array.from({ length: 4 }, identicalReadBatch),
      [{ type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } }, { type: 'done' }],
    ]);

    const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext());

    expect(result.next).toBe('COMPLETE');
    expect(executeToolMock).toHaveBeenCalledTimes(4);
  });

  it('resets the counter when the tool-call args change (no false positive)', async () => {
    getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
    executeToolMock.mockResolvedValue({ output: 'content', isError: false });

    // 8 Read calls but each on a different file → fingerprint changes every
    // iteration → never reaches the consecutive-repeat limit. Then complete.
    const varied: LLMEvent[][] = Array.from({ length: 8 }, (_, i) => [
      { type: 'tool_use', id: `read-${i}`, name: 'Read', input: { file_path: `input/file-${i}.txt` } },
      { type: 'done' },
    ]);
    const client = new FakeClient([
      ...varied,
      [{ type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } }, { type: 'done' }],
    ]);

    const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext());

    expect(result.next).toBe('COMPLETE');
    expect(executeToolMock).toHaveBeenCalledTimes(8);
  });

  it('lets transition/complete win even if it shares a batch with a repeated call', async () => {
    getToolDefsMock.mockResolvedValue(makeToolDefs(['Read']));
    executeToolMock.mockResolvedValue({ output: 'same content', isError: false });

    // 4 identical Read batches (warning fires but no abort), then a batch that
    // repeats the same Read AND completes — complete must take precedence.
    const client = new FakeClient([
      ...Array.from({ length: 4 }, identicalReadBatch),
      [
        { type: 'tool_use', id: 'read', name: 'Read', input: { file_path: 'input/a.txt' } },
        { type: 'tool_use', id: 'c', name: 'complete', input: { status: 'success', result: 'done' } },
        { type: 'done' },
      ],
    ]);

    const result = await executeMovement(makeMovement(['Read']), 'task', client as never, makeContext());

    expect(result.next).toBe('COMPLETE');
  });
});