sync: update from private repo (5091989)
Some checks failed
CI / build-and-test (push) Has been cancelled
Some checks failed
CI / build-and-test (push) Has been cancelled
This commit is contained in:
parent
b5831943a4
commit
000a2474aa
@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect, vi, afterEach } from 'vitest';
|
import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest';
|
||||||
import { callReflectionLlm } from './llm-client.js';
|
import { callReflectionLlm, setReflectionRetrySleep } from './llm-client.js';
|
||||||
import type { ReflectionLlmConfig } from './llm-client.js';
|
import type { ReflectionLlmConfig } from './llm-client.js';
|
||||||
|
|
||||||
const cfg: ReflectionLlmConfig = {
|
const cfg: ReflectionLlmConfig = {
|
||||||
@ -7,18 +7,15 @@ const cfg: ReflectionLlmConfig = {
|
|||||||
model: 'test-model',
|
model: 'test-model',
|
||||||
};
|
};
|
||||||
|
|
||||||
afterEach(() => {
|
const validResult = {
|
||||||
vi.unstubAllGlobals();
|
|
||||||
});
|
|
||||||
|
|
||||||
describe('callReflectionLlm', () => {
|
|
||||||
it('happy path: parses tool_call arguments and extracts token usage', async () => {
|
|
||||||
const validResult = {
|
|
||||||
memory_changes: [],
|
memory_changes: [],
|
||||||
piece_changes: { should_edit: false },
|
piece_changes: { should_edit: false },
|
||||||
reasoning: 'x',
|
reasoning: 'x',
|
||||||
};
|
};
|
||||||
const mockResponse = {
|
|
||||||
|
const okResponse = {
|
||||||
|
ok: true,
|
||||||
|
json: () => Promise.resolve({
|
||||||
choices: [
|
choices: [
|
||||||
{
|
{
|
||||||
message: {
|
message: {
|
||||||
@ -33,16 +30,22 @@ describe('callReflectionLlm', () => {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
usage: {
|
usage: { prompt_tokens: 42, completion_tokens: 17 },
|
||||||
prompt_tokens: 42,
|
}),
|
||||||
completion_tokens: 17,
|
};
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
|
beforeEach(() => {
|
||||||
ok: true,
|
// No real backoff sleeps in tests.
|
||||||
json: () => Promise.resolve(mockResponse),
|
setReflectionRetrySleep(async () => {});
|
||||||
}));
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.unstubAllGlobals();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('callReflectionLlm', () => {
|
||||||
|
it('happy path: parses tool_call arguments and extracts token usage', async () => {
|
||||||
|
vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse));
|
||||||
|
|
||||||
const result = await callReflectionLlm(cfg, 'system prompt', 'user prompt');
|
const result = await callReflectionLlm(cfg, 'system prompt', 'user prompt');
|
||||||
|
|
||||||
@ -54,21 +57,70 @@ describe('callReflectionLlm', () => {
|
|||||||
expect(result.durationMs).toBeGreaterThanOrEqual(0);
|
expect(result.durationMs).toBeGreaterThanOrEqual(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('error path: throws when no tool_calls present', async () => {
|
it('retries a 5xx (backend tool-call parse failure) and succeeds on resample', async () => {
|
||||||
const mockResponse = {
|
const fetchMock = vi.fn()
|
||||||
choices: [
|
.mockResolvedValueOnce({
|
||||||
{
|
ok: false,
|
||||||
message: {},
|
status: 500,
|
||||||
},
|
text: () => Promise.resolve('{"error":{"message":"Failed to parse input at pos 41: <tool_call>..."}}'),
|
||||||
],
|
})
|
||||||
};
|
.mockResolvedValueOnce(okResponse);
|
||||||
|
vi.stubGlobal('fetch', fetchMock);
|
||||||
|
|
||||||
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
|
const result = await callReflectionLlm(cfg, 's', 'u');
|
||||||
|
expect(result.parsed.reasoning).toBe('x');
|
||||||
|
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('gives up after 3 attempts of persistent 5xx', async () => {
|
||||||
|
const fetchMock = vi.fn().mockResolvedValue({
|
||||||
|
ok: false,
|
||||||
|
status: 500,
|
||||||
|
text: () => Promise.resolve('parse error'),
|
||||||
|
});
|
||||||
|
vi.stubGlobal('fetch', fetchMock);
|
||||||
|
|
||||||
|
await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 500');
|
||||||
|
expect(fetchMock).toHaveBeenCalledTimes(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT retry a 4xx (deterministic config error, e.g. invalid api key)', async () => {
|
||||||
|
const fetchMock = vi.fn().mockResolvedValue({
|
||||||
|
ok: false,
|
||||||
|
status: 401,
|
||||||
|
text: () => Promise.resolve('invalid api key'),
|
||||||
|
});
|
||||||
|
vi.stubGlobal('fetch', fetchMock);
|
||||||
|
|
||||||
|
await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 401');
|
||||||
|
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('retries when no tool_calls present, then throws after exhaustion', async () => {
|
||||||
|
const fetchMock = vi.fn().mockResolvedValue({
|
||||||
ok: true,
|
ok: true,
|
||||||
json: () => Promise.resolve(mockResponse),
|
json: () => Promise.resolve({ choices: [{ message: {} }] }),
|
||||||
}));
|
});
|
||||||
|
vi.stubGlobal('fetch', fetchMock);
|
||||||
|
|
||||||
await expect(callReflectionLlm(cfg, 'system prompt', 'user prompt'))
|
await expect(callReflectionLlm(cfg, 'system prompt', 'user prompt'))
|
||||||
.rejects.toThrow('no tool_call');
|
.rejects.toThrow('no tool_call');
|
||||||
|
expect(fetchMock).toHaveBeenCalledTimes(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('retries malformed tool_call arguments JSON', async () => {
|
||||||
|
const fetchMock = vi.fn()
|
||||||
|
.mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
json: () => Promise.resolve({
|
||||||
|
choices: [{ message: { tool_calls: [{ function: { name: 'submit_reflection', arguments: '{broken' } }] } }],
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
.mockResolvedValueOnce(okResponse);
|
||||||
|
vi.stubGlobal('fetch', fetchMock);
|
||||||
|
|
||||||
|
const result = await callReflectionLlm(cfg, 's', 'u');
|
||||||
|
expect(result.parsed.reasoning).toBe('x');
|
||||||
|
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -16,12 +16,58 @@ export interface ReflectionLlmResult {
|
|||||||
raw: unknown;
|
raw: unknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Total attempts (1 initial + retries) for resample-worthy failures. */
|
||||||
|
const MAX_ATTEMPTS = 3;
|
||||||
|
/** Backoff before attempt 2 and 3. Injectable for tests. */
|
||||||
|
const RETRY_DELAYS_MS = [500, 1500];
|
||||||
|
|
||||||
|
let sleep = (ms: number) => new Promise<void>((r) => setTimeout(r, ms));
|
||||||
|
/** Test hook: replace the backoff sleeper (avoids real timers in vitest). */
|
||||||
|
export function setReflectionRetrySleep(fn: (ms: number) => Promise<void>): void {
|
||||||
|
sleep = fn;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Errors worth a resample: small reflection models occasionally emit
|
||||||
|
* malformed tool-call markup, which strict backends (e.g. llama-server's
|
||||||
|
* tool parser) reject with a 5xx like
|
||||||
|
* "Failed to parse input at pos 41: <tool_call>...". The sampling is
|
||||||
|
* stochastic (temperature 0.2), so simply asking again usually succeeds.
|
||||||
|
* 4xx (bad key, bad request shape) is deterministic config error — fail fast.
|
||||||
|
*/
|
||||||
|
class RetryableLlmError extends Error {}
|
||||||
|
|
||||||
export async function callReflectionLlm(
|
export async function callReflectionLlm(
|
||||||
cfg: ReflectionLlmConfig,
|
cfg: ReflectionLlmConfig,
|
||||||
systemPrompt: string,
|
systemPrompt: string,
|
||||||
userPrompt: string
|
userPrompt: string
|
||||||
): Promise<ReflectionLlmResult> {
|
): Promise<ReflectionLlmResult> {
|
||||||
const start = Date.now();
|
const start = Date.now();
|
||||||
|
let lastErr: Error | null = null;
|
||||||
|
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
|
||||||
|
try {
|
||||||
|
return await callOnce(cfg, systemPrompt, userPrompt, start);
|
||||||
|
} catch (e) {
|
||||||
|
if (!(e instanceof RetryableLlmError)) throw e;
|
||||||
|
lastErr = e;
|
||||||
|
if (attempt < MAX_ATTEMPTS) {
|
||||||
|
const delay = RETRY_DELAYS_MS[attempt - 1] ?? 1500;
|
||||||
|
logger.warn(
|
||||||
|
`[reflection-llm] attempt ${attempt}/${MAX_ATTEMPTS} failed (${e.message.slice(0, 200)}); retrying in ${delay}ms`,
|
||||||
|
);
|
||||||
|
await sleep(delay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw lastErr ?? new Error('reflection LLM failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function callOnce(
|
||||||
|
cfg: ReflectionLlmConfig,
|
||||||
|
systemPrompt: string,
|
||||||
|
userPrompt: string,
|
||||||
|
start: number,
|
||||||
|
): Promise<ReflectionLlmResult> {
|
||||||
const body: Record<string, unknown> = {
|
const body: Record<string, unknown> = {
|
||||||
messages: [
|
messages: [
|
||||||
{ role: 'system', content: systemPrompt },
|
{ role: 'system', content: systemPrompt },
|
||||||
@ -43,12 +89,22 @@ export async function callReflectionLlm(
|
|||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
});
|
});
|
||||||
if (!resp.ok) {
|
if (!resp.ok) {
|
||||||
throw new Error(`reflection LLM HTTP ${resp.status}: ${await resp.text()}`);
|
const text = await resp.text();
|
||||||
|
const msg = `reflection LLM HTTP ${resp.status}: ${text}`;
|
||||||
|
// 5xx: backend-side failure (incl. tool-call parse errors on malformed
|
||||||
|
// model output) — resample. 4xx: deterministic config error — fail fast.
|
||||||
|
if (resp.status >= 500) throw new RetryableLlmError(msg);
|
||||||
|
throw new Error(msg);
|
||||||
}
|
}
|
||||||
const data = await resp.json() as any;
|
const data = await resp.json() as any;
|
||||||
const toolCall = data.choices?.[0]?.message?.tool_calls?.[0];
|
const toolCall = data.choices?.[0]?.message?.tool_calls?.[0];
|
||||||
if (!toolCall) throw new Error('reflection LLM returned no tool_call');
|
if (!toolCall) throw new RetryableLlmError('reflection LLM returned no tool_call');
|
||||||
const parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult;
|
let parsed: ReflectionResult;
|
||||||
|
try {
|
||||||
|
parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult;
|
||||||
|
} catch {
|
||||||
|
throw new RetryableLlmError('reflection LLM tool_call arguments were not valid JSON');
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
parsed,
|
parsed,
|
||||||
tokensIn: data.usage?.prompt_tokens ?? 0,
|
tokensIn: data.usage?.prompt_tokens ?? 0,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user