sync: update from private repo (5091989)
Some checks failed
CI / build-and-test (push) Has been cancelled
Some checks failed
CI / build-and-test (push) Has been cancelled
This commit is contained in:
parent
b5831943a4
commit
000a2474aa
@ -1,5 +1,5 @@
|
||||
import { describe, it, expect, vi, afterEach } from 'vitest';
|
||||
import { callReflectionLlm } from './llm-client.js';
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest';
|
||||
import { callReflectionLlm, setReflectionRetrySleep } from './llm-client.js';
|
||||
import type { ReflectionLlmConfig } from './llm-client.js';
|
||||
|
||||
const cfg: ReflectionLlmConfig = {
|
||||
@ -7,18 +7,15 @@ const cfg: ReflectionLlmConfig = {
|
||||
model: 'test-model',
|
||||
};
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
describe('callReflectionLlm', () => {
|
||||
it('happy path: parses tool_call arguments and extracts token usage', async () => {
|
||||
const validResult = {
|
||||
memory_changes: [],
|
||||
piece_changes: { should_edit: false },
|
||||
reasoning: 'x',
|
||||
};
|
||||
const mockResponse = {
|
||||
|
||||
const okResponse = {
|
||||
ok: true,
|
||||
json: () => Promise.resolve({
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
@ -33,16 +30,22 @@ describe('callReflectionLlm', () => {
|
||||
},
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 42,
|
||||
completion_tokens: 17,
|
||||
},
|
||||
usage: { prompt_tokens: 42, completion_tokens: 17 },
|
||||
}),
|
||||
};
|
||||
|
||||
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockResponse),
|
||||
}));
|
||||
beforeEach(() => {
|
||||
// No real backoff sleeps in tests.
|
||||
setReflectionRetrySleep(async () => {});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
describe('callReflectionLlm', () => {
|
||||
it('happy path: parses tool_call arguments and extracts token usage', async () => {
|
||||
vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse));
|
||||
|
||||
const result = await callReflectionLlm(cfg, 'system prompt', 'user prompt');
|
||||
|
||||
@ -54,21 +57,70 @@ describe('callReflectionLlm', () => {
|
||||
expect(result.durationMs).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('error path: throws when no tool_calls present', async () => {
|
||||
const mockResponse = {
|
||||
choices: [
|
||||
{
|
||||
message: {},
|
||||
},
|
||||
],
|
||||
};
|
||||
it('retries a 5xx (backend tool-call parse failure) and succeeds on resample', async () => {
|
||||
const fetchMock = vi.fn()
|
||||
.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 500,
|
||||
text: () => Promise.resolve('{"error":{"message":"Failed to parse input at pos 41: <tool_call>..."}}'),
|
||||
})
|
||||
.mockResolvedValueOnce(okResponse);
|
||||
vi.stubGlobal('fetch', fetchMock);
|
||||
|
||||
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
|
||||
const result = await callReflectionLlm(cfg, 's', 'u');
|
||||
expect(result.parsed.reasoning).toBe('x');
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('gives up after 3 attempts of persistent 5xx', async () => {
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
status: 500,
|
||||
text: () => Promise.resolve('parse error'),
|
||||
});
|
||||
vi.stubGlobal('fetch', fetchMock);
|
||||
|
||||
await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 500');
|
||||
expect(fetchMock).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('does NOT retry a 4xx (deterministic config error, e.g. invalid api key)', async () => {
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
status: 401,
|
||||
text: () => Promise.resolve('invalid api key'),
|
||||
});
|
||||
vi.stubGlobal('fetch', fetchMock);
|
||||
|
||||
await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 401');
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('retries when no tool_calls present, then throws after exhaustion', async () => {
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockResponse),
|
||||
}));
|
||||
json: () => Promise.resolve({ choices: [{ message: {} }] }),
|
||||
});
|
||||
vi.stubGlobal('fetch', fetchMock);
|
||||
|
||||
await expect(callReflectionLlm(cfg, 'system prompt', 'user prompt'))
|
||||
.rejects.toThrow('no tool_call');
|
||||
expect(fetchMock).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('retries malformed tool_call arguments JSON', async () => {
|
||||
const fetchMock = vi.fn()
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({
|
||||
choices: [{ message: { tool_calls: [{ function: { name: 'submit_reflection', arguments: '{broken' } }] } }],
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce(okResponse);
|
||||
vi.stubGlobal('fetch', fetchMock);
|
||||
|
||||
const result = await callReflectionLlm(cfg, 's', 'u');
|
||||
expect(result.parsed.reasoning).toBe('x');
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
@ -16,12 +16,58 @@ export interface ReflectionLlmResult {
|
||||
raw: unknown;
|
||||
}
|
||||
|
||||
/** Total attempts (1 initial + retries) for resample-worthy failures. */
|
||||
const MAX_ATTEMPTS = 3;
|
||||
/** Backoff before attempt 2 and 3. Injectable for tests. */
|
||||
const RETRY_DELAYS_MS = [500, 1500];
|
||||
|
||||
let sleep = (ms: number) => new Promise<void>((r) => setTimeout(r, ms));
|
||||
/** Test hook: replace the backoff sleeper (avoids real timers in vitest). */
|
||||
export function setReflectionRetrySleep(fn: (ms: number) => Promise<void>): void {
|
||||
sleep = fn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Errors worth a resample: small reflection models occasionally emit
|
||||
* malformed tool-call markup, which strict backends (e.g. llama-server's
|
||||
* tool parser) reject with a 5xx like
|
||||
* "Failed to parse input at pos 41: <tool_call>...". The sampling is
|
||||
* stochastic (temperature 0.2), so simply asking again usually succeeds.
|
||||
* 4xx (bad key, bad request shape) is deterministic config error — fail fast.
|
||||
*/
|
||||
class RetryableLlmError extends Error {}
|
||||
|
||||
export async function callReflectionLlm(
|
||||
cfg: ReflectionLlmConfig,
|
||||
systemPrompt: string,
|
||||
userPrompt: string
|
||||
): Promise<ReflectionLlmResult> {
|
||||
const start = Date.now();
|
||||
let lastErr: Error | null = null;
|
||||
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
|
||||
try {
|
||||
return await callOnce(cfg, systemPrompt, userPrompt, start);
|
||||
} catch (e) {
|
||||
if (!(e instanceof RetryableLlmError)) throw e;
|
||||
lastErr = e;
|
||||
if (attempt < MAX_ATTEMPTS) {
|
||||
const delay = RETRY_DELAYS_MS[attempt - 1] ?? 1500;
|
||||
logger.warn(
|
||||
`[reflection-llm] attempt ${attempt}/${MAX_ATTEMPTS} failed (${e.message.slice(0, 200)}); retrying in ${delay}ms`,
|
||||
);
|
||||
await sleep(delay);
|
||||
}
|
||||
}
|
||||
}
|
||||
throw lastErr ?? new Error('reflection LLM failed');
|
||||
}
|
||||
|
||||
async function callOnce(
|
||||
cfg: ReflectionLlmConfig,
|
||||
systemPrompt: string,
|
||||
userPrompt: string,
|
||||
start: number,
|
||||
): Promise<ReflectionLlmResult> {
|
||||
const body: Record<string, unknown> = {
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
@ -43,12 +89,22 @@ export async function callReflectionLlm(
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
throw new Error(`reflection LLM HTTP ${resp.status}: ${await resp.text()}`);
|
||||
const text = await resp.text();
|
||||
const msg = `reflection LLM HTTP ${resp.status}: ${text}`;
|
||||
// 5xx: backend-side failure (incl. tool-call parse errors on malformed
|
||||
// model output) — resample. 4xx: deterministic config error — fail fast.
|
||||
if (resp.status >= 500) throw new RetryableLlmError(msg);
|
||||
throw new Error(msg);
|
||||
}
|
||||
const data = await resp.json() as any;
|
||||
const toolCall = data.choices?.[0]?.message?.tool_calls?.[0];
|
||||
if (!toolCall) throw new Error('reflection LLM returned no tool_call');
|
||||
const parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult;
|
||||
if (!toolCall) throw new RetryableLlmError('reflection LLM returned no tool_call');
|
||||
let parsed: ReflectionResult;
|
||||
try {
|
||||
parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult;
|
||||
} catch {
|
||||
throw new RetryableLlmError('reflection LLM tool_call arguments were not valid JSON');
|
||||
}
|
||||
return {
|
||||
parsed,
|
||||
tokensIn: data.usage?.prompt_tokens ?? 0,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user