sync: update from private repo (5091989)

2026-06-10 10:15:49 +00:00 · 2026-06-10 10:15:49 +00:00 · 000a2474aa
commit 000a2474aa
parent b5831943a4
2 changed files with 154 additions and 46 deletions
--- a/src/engine/reflection/llm-client.test.ts
+++ b/src/engine/reflection/llm-client.test.ts
@ -1,5 +1,5 @@
-import { describe, it, expect, vi, afterEach } from 'vitest';
-import { callReflectionLlm } from './llm-client.js';
+import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest';
+import { callReflectionLlm, setReflectionRetrySleep } from './llm-client.js';
 import type { ReflectionLlmConfig } from './llm-client.js';

 const cfg: ReflectionLlmConfig = {
@ -7,18 +7,15 @@ const cfg: ReflectionLlmConfig = {
  model: 'test-model',
 };

-afterEach(() => {
-  vi.unstubAllGlobals();
-});
-
-describe('callReflectionLlm', () => {
-  it('happy path: parses tool_call arguments and extracts token usage', async () => {
 const validResult = {
  memory_changes: [],
  piece_changes: { should_edit: false },
  reasoning: 'x',
 };
-    const mockResponse = {
+
+const okResponse = {
+  ok: true,
+  json: () => Promise.resolve({
    choices: [
      {
        message: {
@ -33,16 +30,22 @@ describe('callReflectionLlm', () => {
        },
      },
    ],
-      usage: {
-        prompt_tokens: 42,
-        completion_tokens: 17,
-      },
+    usage: { prompt_tokens: 42, completion_tokens: 17 },
+  }),
 };

-    vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
-      ok: true,
-      json: () => Promise.resolve(mockResponse),
-    }));
+beforeEach(() => {
+  // No real backoff sleeps in tests.
+  setReflectionRetrySleep(async () => {});
+});
+
+afterEach(() => {
+  vi.unstubAllGlobals();
+});
+
+describe('callReflectionLlm', () => {
+  it('happy path: parses tool_call arguments and extracts token usage', async () => {
+    vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse));

    const result = await callReflectionLlm(cfg, 'system prompt', 'user prompt');

@ -54,21 +57,70 @@ describe('callReflectionLlm', () => {
    expect(result.durationMs).toBeGreaterThanOrEqual(0);
  });

-  it('error path: throws when no tool_calls present', async () => {
-    const mockResponse = {
-      choices: [
-        {
-          message: {},
-        },
-      ],
-    };
+  it('retries a 5xx (backend tool-call parse failure) and succeeds on resample', async () => {
+    const fetchMock = vi.fn()
+      .mockResolvedValueOnce({
+        ok: false,
+        status: 500,
+        text: () => Promise.resolve('{"error":{"message":"Failed to parse input at pos 41: <tool_call>..."}}'),
+      })
+      .mockResolvedValueOnce(okResponse);
+    vi.stubGlobal('fetch', fetchMock);

-    vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
+    const result = await callReflectionLlm(cfg, 's', 'u');
+    expect(result.parsed.reasoning).toBe('x');
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+  });
+
+  it('gives up after 3 attempts of persistent 5xx', async () => {
+    const fetchMock = vi.fn().mockResolvedValue({
+      ok: false,
+      status: 500,
+      text: () => Promise.resolve('parse error'),
+    });
+    vi.stubGlobal('fetch', fetchMock);
+
+    await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 500');
+    expect(fetchMock).toHaveBeenCalledTimes(3);
+  });
+
+  it('does NOT retry a 4xx (deterministic config error, e.g. invalid api key)', async () => {
+    const fetchMock = vi.fn().mockResolvedValue({
+      ok: false,
+      status: 401,
+      text: () => Promise.resolve('invalid api key'),
+    });
+    vi.stubGlobal('fetch', fetchMock);
+
+    await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 401');
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('retries when no tool_calls present, then throws after exhaustion', async () => {
+    const fetchMock = vi.fn().mockResolvedValue({
      ok: true,
-      json: () => Promise.resolve(mockResponse),
-    }));
+      json: () => Promise.resolve({ choices: [{ message: {} }] }),
+    });
+    vi.stubGlobal('fetch', fetchMock);

    await expect(callReflectionLlm(cfg, 'system prompt', 'user prompt'))
      .rejects.toThrow('no tool_call');
+    expect(fetchMock).toHaveBeenCalledTimes(3);
+  });
+
+  it('retries malformed tool_call arguments JSON', async () => {
+    const fetchMock = vi.fn()
+      .mockResolvedValueOnce({
+        ok: true,
+        json: () => Promise.resolve({
+          choices: [{ message: { tool_calls: [{ function: { name: 'submit_reflection', arguments: '{broken' } }] } }],
+        }),
+      })
+      .mockResolvedValueOnce(okResponse);
+    vi.stubGlobal('fetch', fetchMock);
+
+    const result = await callReflectionLlm(cfg, 's', 'u');
+    expect(result.parsed.reasoning).toBe('x');
+    expect(fetchMock).toHaveBeenCalledTimes(2);
  });
 });
--- a/src/engine/reflection/llm-client.ts
+++ b/src/engine/reflection/llm-client.ts
@ -16,12 +16,58 @@ export interface ReflectionLlmResult {
  raw: unknown;
 }

+/** Total attempts (1 initial + retries) for resample-worthy failures. */
+const MAX_ATTEMPTS = 3;
+/** Backoff before attempt 2 and 3. Injectable for tests. */
+const RETRY_DELAYS_MS = [500, 1500];
+
+let sleep = (ms: number) => new Promise<void>((r) => setTimeout(r, ms));
+/** Test hook: replace the backoff sleeper (avoids real timers in vitest). */
+export function setReflectionRetrySleep(fn: (ms: number) => Promise<void>): void {
+  sleep = fn;
+}
+
+/**
+ * Errors worth a resample: small reflection models occasionally emit
+ * malformed tool-call markup, which strict backends (e.g. llama-server's
+ * tool parser) reject with a 5xx like
+ * "Failed to parse input at pos 41: <tool_call>...". The sampling is
+ * stochastic (temperature 0.2), so simply asking again usually succeeds.
+ * 4xx (bad key, bad request shape) is deterministic config error — fail fast.
+ */
+class RetryableLlmError extends Error {}
+
 export async function callReflectionLlm(
  cfg: ReflectionLlmConfig,
  systemPrompt: string,
  userPrompt: string
 ): Promise<ReflectionLlmResult> {
  const start = Date.now();
+  let lastErr: Error | null = null;
+  for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
+    try {
+      return await callOnce(cfg, systemPrompt, userPrompt, start);
+    } catch (e) {
+      if (!(e instanceof RetryableLlmError)) throw e;
+      lastErr = e;
+      if (attempt < MAX_ATTEMPTS) {
+        const delay = RETRY_DELAYS_MS[attempt - 1] ?? 1500;
+        logger.warn(
+          `[reflection-llm] attempt ${attempt}/${MAX_ATTEMPTS} failed (${e.message.slice(0, 200)}); retrying in ${delay}ms`,
+        );
+        await sleep(delay);
+      }
+    }
+  }
+  throw lastErr ?? new Error('reflection LLM failed');
+}
+
+async function callOnce(
+  cfg: ReflectionLlmConfig,
+  systemPrompt: string,
+  userPrompt: string,
+  start: number,
+): Promise<ReflectionLlmResult> {
  const body: Record<string, unknown> = {
    messages: [
      { role: 'system', content: systemPrompt },
@ -43,12 +89,22 @@ export async function callReflectionLlm(
    body: JSON.stringify(body),
  });
  if (!resp.ok) {
-    throw new Error(`reflection LLM HTTP ${resp.status}: ${await resp.text()}`);
+    const text = await resp.text();
+    const msg = `reflection LLM HTTP ${resp.status}: ${text}`;
+    // 5xx: backend-side failure (incl. tool-call parse errors on malformed
+    // model output) — resample. 4xx: deterministic config error — fail fast.
+    if (resp.status >= 500) throw new RetryableLlmError(msg);
+    throw new Error(msg);
  }
  const data = await resp.json() as any;
  const toolCall = data.choices?.[0]?.message?.tool_calls?.[0];
-  if (!toolCall) throw new Error('reflection LLM returned no tool_call');
-  const parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult;
+  if (!toolCall) throw new RetryableLlmError('reflection LLM returned no tool_call');
+  let parsed: ReflectionResult;
+  try {
+    parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult;
+  } catch {
+    throw new RetryableLlmError('reflection LLM tool_call arguments were not valid JSON');
+  }
  return {
    parsed,
    tokensIn: data.usage?.prompt_tokens ?? 0,