sync: update from private repo (5091989)
Some checks failed
CI / build-and-test (push) Has been cancelled

This commit is contained in:
oss-sync 2026-06-10 10:15:49 +00:00
parent b5831943a4
commit 000a2474aa
2 changed files with 154 additions and 46 deletions

View File

@ -1,5 +1,5 @@
import { describe, it, expect, vi, afterEach } from 'vitest'; import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest';
import { callReflectionLlm } from './llm-client.js'; import { callReflectionLlm, setReflectionRetrySleep } from './llm-client.js';
import type { ReflectionLlmConfig } from './llm-client.js'; import type { ReflectionLlmConfig } from './llm-client.js';
const cfg: ReflectionLlmConfig = { const cfg: ReflectionLlmConfig = {
@ -7,42 +7,45 @@ const cfg: ReflectionLlmConfig = {
model: 'test-model', model: 'test-model',
}; };
const validResult = {
memory_changes: [],
piece_changes: { should_edit: false },
reasoning: 'x',
};
const okResponse = {
ok: true,
json: () => Promise.resolve({
choices: [
{
message: {
tool_calls: [
{
function: {
name: 'submit_reflection',
arguments: JSON.stringify(validResult),
},
},
],
},
},
],
usage: { prompt_tokens: 42, completion_tokens: 17 },
}),
};
beforeEach(() => {
// No real backoff sleeps in tests.
setReflectionRetrySleep(async () => {});
});
afterEach(() => { afterEach(() => {
vi.unstubAllGlobals(); vi.unstubAllGlobals();
}); });
describe('callReflectionLlm', () => { describe('callReflectionLlm', () => {
it('happy path: parses tool_call arguments and extracts token usage', async () => { it('happy path: parses tool_call arguments and extracts token usage', async () => {
const validResult = { vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse));
memory_changes: [],
piece_changes: { should_edit: false },
reasoning: 'x',
};
const mockResponse = {
choices: [
{
message: {
tool_calls: [
{
function: {
name: 'submit_reflection',
arguments: JSON.stringify(validResult),
},
},
],
},
},
],
usage: {
prompt_tokens: 42,
completion_tokens: 17,
},
};
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
ok: true,
json: () => Promise.resolve(mockResponse),
}));
const result = await callReflectionLlm(cfg, 'system prompt', 'user prompt'); const result = await callReflectionLlm(cfg, 'system prompt', 'user prompt');
@ -54,21 +57,70 @@ describe('callReflectionLlm', () => {
expect(result.durationMs).toBeGreaterThanOrEqual(0); expect(result.durationMs).toBeGreaterThanOrEqual(0);
}); });
it('error path: throws when no tool_calls present', async () => { it('retries a 5xx (backend tool-call parse failure) and succeeds on resample', async () => {
const mockResponse = { const fetchMock = vi.fn()
choices: [ .mockResolvedValueOnce({
{ ok: false,
message: {}, status: 500,
}, text: () => Promise.resolve('{"error":{"message":"Failed to parse input at pos 41: <tool_call>..."}}'),
], })
}; .mockResolvedValueOnce(okResponse);
vi.stubGlobal('fetch', fetchMock);
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ const result = await callReflectionLlm(cfg, 's', 'u');
expect(result.parsed.reasoning).toBe('x');
expect(fetchMock).toHaveBeenCalledTimes(2);
});
it('gives up after 3 attempts of persistent 5xx', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: false,
status: 500,
text: () => Promise.resolve('parse error'),
});
vi.stubGlobal('fetch', fetchMock);
await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 500');
expect(fetchMock).toHaveBeenCalledTimes(3);
});
it('does NOT retry a 4xx (deterministic config error, e.g. invalid api key)', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: false,
status: 401,
text: () => Promise.resolve('invalid api key'),
});
vi.stubGlobal('fetch', fetchMock);
await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 401');
expect(fetchMock).toHaveBeenCalledTimes(1);
});
it('retries when no tool_calls present, then throws after exhaustion', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: true, ok: true,
json: () => Promise.resolve(mockResponse), json: () => Promise.resolve({ choices: [{ message: {} }] }),
})); });
vi.stubGlobal('fetch', fetchMock);
await expect(callReflectionLlm(cfg, 'system prompt', 'user prompt')) await expect(callReflectionLlm(cfg, 'system prompt', 'user prompt'))
.rejects.toThrow('no tool_call'); .rejects.toThrow('no tool_call');
expect(fetchMock).toHaveBeenCalledTimes(3);
});
it('retries malformed tool_call arguments JSON', async () => {
const fetchMock = vi.fn()
.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({
choices: [{ message: { tool_calls: [{ function: { name: 'submit_reflection', arguments: '{broken' } }] } }],
}),
})
.mockResolvedValueOnce(okResponse);
vi.stubGlobal('fetch', fetchMock);
const result = await callReflectionLlm(cfg, 's', 'u');
expect(result.parsed.reasoning).toBe('x');
expect(fetchMock).toHaveBeenCalledTimes(2);
}); });
}); });

View File

@ -16,12 +16,58 @@ export interface ReflectionLlmResult {
raw: unknown; raw: unknown;
} }
/** Total attempts (1 initial + retries) for resample-worthy failures. */
const MAX_ATTEMPTS = 3;
/** Backoff before attempt 2 and 3. Injectable for tests. */
const RETRY_DELAYS_MS = [500, 1500];
let sleep = (ms: number) => new Promise<void>((r) => setTimeout(r, ms));
/** Test hook: replace the backoff sleeper (avoids real timers in vitest). */
export function setReflectionRetrySleep(fn: (ms: number) => Promise<void>): void {
sleep = fn;
}
/**
* Errors worth a resample: small reflection models occasionally emit
* malformed tool-call markup, which strict backends (e.g. llama-server's
* tool parser) reject with a 5xx like
* "Failed to parse input at pos 41: <tool_call>...". The sampling is
* stochastic (temperature 0.2), so simply asking again usually succeeds.
* 4xx (bad key, bad request shape) is deterministic config error fail fast.
*/
class RetryableLlmError extends Error {}
export async function callReflectionLlm( export async function callReflectionLlm(
cfg: ReflectionLlmConfig, cfg: ReflectionLlmConfig,
systemPrompt: string, systemPrompt: string,
userPrompt: string userPrompt: string
): Promise<ReflectionLlmResult> { ): Promise<ReflectionLlmResult> {
const start = Date.now(); const start = Date.now();
let lastErr: Error | null = null;
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
try {
return await callOnce(cfg, systemPrompt, userPrompt, start);
} catch (e) {
if (!(e instanceof RetryableLlmError)) throw e;
lastErr = e;
if (attempt < MAX_ATTEMPTS) {
const delay = RETRY_DELAYS_MS[attempt - 1] ?? 1500;
logger.warn(
`[reflection-llm] attempt ${attempt}/${MAX_ATTEMPTS} failed (${e.message.slice(0, 200)}); retrying in ${delay}ms`,
);
await sleep(delay);
}
}
}
throw lastErr ?? new Error('reflection LLM failed');
}
async function callOnce(
cfg: ReflectionLlmConfig,
systemPrompt: string,
userPrompt: string,
start: number,
): Promise<ReflectionLlmResult> {
const body: Record<string, unknown> = { const body: Record<string, unknown> = {
messages: [ messages: [
{ role: 'system', content: systemPrompt }, { role: 'system', content: systemPrompt },
@ -43,12 +89,22 @@ export async function callReflectionLlm(
body: JSON.stringify(body), body: JSON.stringify(body),
}); });
if (!resp.ok) { if (!resp.ok) {
throw new Error(`reflection LLM HTTP ${resp.status}: ${await resp.text()}`); const text = await resp.text();
const msg = `reflection LLM HTTP ${resp.status}: ${text}`;
// 5xx: backend-side failure (incl. tool-call parse errors on malformed
// model output) — resample. 4xx: deterministic config error — fail fast.
if (resp.status >= 500) throw new RetryableLlmError(msg);
throw new Error(msg);
} }
const data = await resp.json() as any; const data = await resp.json() as any;
const toolCall = data.choices?.[0]?.message?.tool_calls?.[0]; const toolCall = data.choices?.[0]?.message?.tool_calls?.[0];
if (!toolCall) throw new Error('reflection LLM returned no tool_call'); if (!toolCall) throw new RetryableLlmError('reflection LLM returned no tool_call');
const parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult; let parsed: ReflectionResult;
try {
parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult;
} catch {
throw new RetryableLlmError('reflection LLM tool_call arguments were not valid JSON');
}
return { return {
parsed, parsed,
tokensIn: data.usage?.prompt_tokens ?? 0, tokensIn: data.usage?.prompt_tokens ?? 0,