maestro/src/engine/backend-probes.test.ts
2026-06-03 05:08:00 +00:00

453 lines
20 KiB
TypeScript

import { describe, it, expect, vi } from 'vitest';
import {
buildDirectProbe,
buildProxyProbe,
parseLlamaSlots,
parseLiteLLMHealth,
parseLlamaMetricsThroughput,
normalizeWorkerBase,
} from './backend-probes.js';
import type { WorkerDef } from '../config.js';
function fakeResponse(opts: { status?: number; ok?: boolean; jsonBody?: unknown; textBody?: string; throwOnJson?: boolean }): Response {
const ok = opts.ok ?? (opts.status === undefined || (opts.status >= 200 && opts.status < 300));
return {
ok,
status: opts.status ?? 200,
json: async () => {
if (opts.throwOnJson) throw new Error('json parse failed');
return opts.jsonBody;
},
text: async () => opts.textBody ?? '',
} as unknown as Response;
}
describe('normalizeWorkerBase', () => {
it('strips trailing slashes', () => {
expect(normalizeWorkerBase('http://x/')).toBe('http://x');
expect(normalizeWorkerBase('http://x///')).toBe('http://x');
});
it('strips a single trailing /v1', () => {
expect(normalizeWorkerBase('http://x/v1')).toBe('http://x');
expect(normalizeWorkerBase('http://x/v1/')).toBe('http://x');
});
it('leaves other paths alone', () => {
expect(normalizeWorkerBase('http://x/api')).toBe('http://x/api');
});
});
describe('parseLlamaSlots', () => {
it('counts processing slots and picks the first model', () => {
const parsed = parseLlamaSlots([
{ id: 0, is_processing: true, model: 'qwen3:8b' },
{ id: 1, is_processing: false, model: 'qwen3:8b' },
{ id: 2, is_processing: true, model: 'qwen3:8b' },
]);
expect(parsed).toEqual({ busySlots: 2, totalSlots: 3, loadedModel: 'qwen3:8b' });
});
it('handles legacy state-number shape', () => {
const parsed = parseLlamaSlots([
{ id: 0, state: 1, model: 'm' },
{ id: 1, state: 0, model: 'm' },
]);
expect(parsed.busySlots).toBe(1);
expect(parsed.totalSlots).toBe(2);
});
it('handles envelope { slots: [...] }', () => {
const parsed = parseLlamaSlots({ slots: [{ id: 0, is_processing: false }] });
expect(parsed.totalSlots).toBe(1);
});
it('returns zeros on garbage', () => {
const parsed = parseLlamaSlots(null);
expect(parsed).toEqual({ busySlots: 0, totalSlots: 0, loadedModel: null });
});
});
describe('parseLiteLLMHealth', () => {
it('returns one NodeStatus per healthy + unhealthy deployment', () => {
const ts = '2026-05-18T00:00:00.000Z';
const out = parseLiteLLMHealth({
healthy_endpoints: [{ model: 'gpu-a' }, { litellm_params: { model: 'gpu-b' } }],
unhealthy_endpoints: [{ model: 'gpu-down', error: 'timeout' }],
}, 'pool', ts);
// Order is unhealthy-first per Phase C dedup policy (see
// parseLiteLLMHealth), but consumers shouldn't rely on it — assert
// by set membership instead.
expect(out.map(s => s.nodeId).sort()).toEqual(['gpu-a', 'gpu-b', 'gpu-down']);
expect(out.find(s => s.nodeId === 'gpu-down')!.online).toBe(false);
expect(out.find(s => s.nodeId === 'gpu-down')!.lastProbeError).toBe('timeout');
expect(out.every(s => s.workerId === 'pool')).toBe(true);
expect(out.every(s => s.source === 'proxy')).toBe(true);
});
it('dedupes by deployment id with unhealthy winning precedence (flap detection)', () => {
const ts = 't';
const out = parseLiteLLMHealth({
healthy_endpoints: [{ model: 'gpu-a' }],
unhealthy_endpoints: [{ model: 'gpu-a', error: 'flap' }],
}, 'pool', ts);
// Phase C: when the same deployment appears in both lists, the
// unhealthy entry must surface so operators don't see a misleading
// green icon for a flapping backend.
expect(out).toHaveLength(1);
expect(out[0]!.online).toBe(false);
expect(out[0]!.lastProbeError).toBe('flap');
});
it('preserves unhealthy-only entries (regression)', () => {
const ts = 't';
const out = parseLiteLLMHealth({
unhealthy_endpoints: [{ model: 'gpu-down', error: 'unreachable' }],
}, 'pool', ts);
expect(out).toHaveLength(1);
expect(out[0]!.online).toBe(false);
expect(out[0]!.lastProbeError).toBe('unreachable');
});
it('skips entries with no deployment id', () => {
const out = parseLiteLLMHealth({ healthy_endpoints: [{ /* nothing */ }] }, 'pool', 't');
expect(out).toEqual([]);
});
it('extracts .message from LiteLLM post-1.40 object-form error', () => {
// LiteLLM ≥ 1.40 wraps errors as { message, type } objects. The
// previous string-only guard silently dropped the object form and
// left lastProbeError undefined, producing "red icon, no reason".
const out = parseLiteLLMHealth({
unhealthy_endpoints: [{
model: 'gpu-down',
error: { message: 'Timeout', type: 'Timeout' },
}],
}, 'pool', 't');
expect(out).toHaveLength(1);
expect(out[0]!.online).toBe(false);
expect(out[0]!.lastProbeError).toBe('Timeout');
});
it('falls back to JSON.stringify for arbitrary object errors (no .message)', () => {
// Some LiteLLM forks/middleware return structured errors with
// neither `message` nor a string form. Surfacing the JSON keeps
// operators able to diagnose without code spelunking.
const out = parseLiteLLMHealth({
unhealthy_endpoints: [{
model: 'gpu-x',
error: { code: 503, retryAfter: 30 },
}],
}, 'pool', 't');
expect(out).toHaveLength(1);
expect(out[0]!.lastProbeError).toBe('{"code":503,"retryAfter":30}');
});
it('leaves lastProbeError undefined when error is null/undefined (regression)', () => {
const out = parseLiteLLMHealth({
unhealthy_endpoints: [{ model: 'gpu-z', error: null }],
}, 'pool', 't');
expect(out).toHaveLength(1);
expect(out[0]!.lastProbeError).toBeUndefined();
});
it('still preserves string-form errors for pre-1.40 LiteLLM (regression)', () => {
const out = parseLiteLLMHealth({
unhealthy_endpoints: [{ model: 'gpu-old', error: 'classic string error' }],
}, 'pool', 't');
expect(out).toHaveLength(1);
expect(out[0]!.lastProbeError).toBe('classic string error');
});
describe('AAO Gateway extension (aao_busy_slots / aao_total_slots)', () => {
it('inherits the gateway-aggregated busy view when aao_* fields are present', () => {
// Multi-client sharing: gateway aggregates /slots across every
// AAO that talks to it, then ships the totals on /health. Each
// client AAO's local registry inherits the union view here so
// the dashboard tree reflects "some other AAO is using GPU X
// right now" even when this AAO isn't.
const out = parseLiteLLMHealth({
healthy_endpoints: [
{ model: 'gpu-a', litellm_params: { model: 'gpu-a' }, aao_busy_slots: 3, aao_total_slots: 4 },
{ model: 'gpu-b', litellm_params: { model: 'gpu-b' }, aao_busy_slots: 0, aao_total_slots: 4 },
],
}, 'gw', 't');
expect(out).toHaveLength(2);
const a = out.find((x) => x.nodeId === 'gpu-a')!;
const b = out.find((x) => x.nodeId === 'gpu-b')!;
expect(a).toMatchObject({ busy: true, busySlots: 3, totalSlots: 4, online: true });
expect(b).toMatchObject({ busy: false, busySlots: 0, totalSlots: 4, online: true });
});
it('treats missing aao_* fields as zero (vanilla LiteLLM compat)', () => {
const out = parseLiteLLMHealth({
healthy_endpoints: [{ model: 'm1', litellm_params: { model: 'm1' } }],
}, 'gw', 't');
expect(out[0]).toMatchObject({ busy: false, busySlots: 0, totalSlots: 0 });
});
it('coerces malformed aao_busy_slots to 0 without throwing', () => {
const out = parseLiteLLMHealth({
healthy_endpoints: [
{ model: 'm1', litellm_params: { model: 'm1' }, aao_busy_slots: 'three', aao_total_slots: -2 },
],
}, 'gw', 't');
expect(out[0]).toMatchObject({ busy: false, busySlots: 0, totalSlots: 0 });
});
it('floors fractional aao_busy_slots', () => {
const out = parseLiteLLMHealth({
healthy_endpoints: [
{ model: 'm1', litellm_params: { model: 'm1' }, aao_busy_slots: 2.9, aao_total_slots: 4 },
],
}, 'gw', 't');
expect(out[0]!.busySlots).toBe(2);
});
});
});
describe('parseLlamaMetricsThroughput', () => {
it('reads llamacpp:tokens_per_second when present', () => {
const body = [
'# HELP llamacpp:tokens_per_second current generation throughput',
'# TYPE llamacpp:tokens_per_second gauge',
'llamacpp:tokens_per_second 42.5',
].join('\n');
expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(42.5);
});
it('falls back to prompt_tokens_seconds when tokens_per_second is absent', () => {
const body = 'llamacpp:prompt_tokens_seconds 123.4\n';
expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(123.4);
});
it('tolerates label sets in the metric line', () => {
const body = 'llamacpp:tokens_per_second{model="qwen3:8b"} 99\n';
expect(parseLlamaMetricsThroughput(body)).toBe(99);
});
it('returns null when no recognised gauge appears', () => {
expect(parseLlamaMetricsThroughput('# nothing useful here\nfoo 1\n')).toBeNull();
expect(parseLlamaMetricsThroughput('')).toBeNull();
});
it('rejects negative or non-finite values', () => {
expect(parseLlamaMetricsThroughput('llamacpp:tokens_per_second -1\n')).toBeNull();
expect(parseLlamaMetricsThroughput('llamacpp:tokens_per_second NaN\n')).toBeNull();
});
it('does not match the metric name appearing inside another line', () => {
// The anchored regex requires the metric name at line start.
const body = '# llamacpp:tokens_per_second 9999 (in a comment)\n';
expect(parseLlamaMetricsThroughput(body)).toBeNull();
});
it('returns max across multi-label series within the same metric family', () => {
// Multi-model llama-server (1 process serving multiple slots with
// distinct `{model="..."}` labels) emits one line per label set.
// We must take the max — first-match would silently drop the
// faster sibling series.
const body = [
'llamacpp:tokens_per_second{model="qwen3:8b"} 5.0',
'llamacpp:tokens_per_second{model="qwen3:32b"} 80.0',
'llamacpp:tokens_per_second{model="qwen3:14b"} 42.0',
].join('\n') + '\n';
expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(80.0);
});
it('still returns the single value for single-label series (regression)', () => {
const body = 'llamacpp:tokens_per_second{model="qwen3:8b"} 42.5\n';
expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(42.5);
});
it('ignores NaN/negative values in multi-label series and returns the max of the valid ones', () => {
const body = [
'llamacpp:tokens_per_second{model="a"} NaN',
'llamacpp:tokens_per_second{model="b"} -1',
'llamacpp:tokens_per_second{model="c"} 7.5',
'llamacpp:tokens_per_second{model="d"} 3.2',
].join('\n') + '\n';
expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(7.5);
});
it('still prefers tokens_per_second over prompt_tokens_seconds (regression)', () => {
// If both families appear, tokens_per_second wins; we must not
// mix max-across-families.
const body = [
'llamacpp:tokens_per_second{model="a"} 10.0',
'llamacpp:prompt_tokens_seconds{model="a"} 9999.0',
].join('\n') + '\n';
expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(10.0);
});
});
describe('buildDirectProbe', () => {
const ts = '2026-05-18T00:00:00.000Z';
it('returns busySlots / totalSlots when /slots responds', async () => {
const fetchImpl = vi.fn()
// /slots
.mockResolvedValueOnce(fakeResponse({
jsonBody: [
{ id: 0, is_processing: true, model: 'qwen3:8b' },
{ id: 1, is_processing: false, model: 'qwen3:8b' },
],
}))
// /metrics — best-effort, returns empty so throughput stays null
.mockResolvedValueOnce(fakeResponse({ textBody: '' })) as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
const status = await probe({ id: 'w1', endpoint: 'http://w1/v1' });
expect(status.online).toBe(true);
expect(status.busySlots).toBe(1);
expect(status.totalSlots).toBe(2);
expect(status.busy).toBe(true);
expect(status.loadedModel).toBe('qwen3:8b');
// Verify the URL was normalized (no /v1) and reached /slots first:
const call = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]!;
expect(call[0]).toBe('http://w1/slots');
// Second call must be /metrics, also at the root.
const call2 = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[1]!;
expect(call2[0]).toBe('http://w1/metrics');
});
it('reports throughputTps when /metrics surfaces a gauge', async () => {
const fetchImpl = vi.fn()
.mockResolvedValueOnce(fakeResponse({ jsonBody: [{ id: 0, is_processing: true, model: 'qwen' }] }))
.mockResolvedValueOnce(fakeResponse({ textBody: 'llamacpp:tokens_per_second 87.5\n' })) as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
const status = await probe({ id: 'w1', endpoint: 'http://w1' });
expect(status.throughputTps).toBeCloseTo(87.5);
});
it('leaves throughputTps null when /metrics 404s (--metrics opt-in)', async () => {
const fetchImpl = vi.fn()
.mockResolvedValueOnce(fakeResponse({ jsonBody: [] }))
.mockResolvedValueOnce(fakeResponse({ status: 404, ok: false })) as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
const status = await probe({ id: 'w1', endpoint: 'http://w1' });
expect(status.online).toBe(true);
expect(status.throughputTps).toBeNull();
});
it('does not demote online state when /metrics throws', async () => {
const fetchImpl = vi.fn()
.mockResolvedValueOnce(fakeResponse({ jsonBody: [] }))
.mockRejectedValueOnce(new Error('econnreset')) as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
const status = await probe({ id: 'w1', endpoint: 'http://w1' });
expect(status.online).toBe(true);
expect(status.throughputTps).toBeNull();
});
it('does NOT forward worker.apiKey on direct probes (would leak LiteLLM virtual key)', async () => {
const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ jsonBody: [] })) as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
await probe({ id: 'w1', endpoint: 'http://w1', apiKey: 'sk-litellm-virtual-tok' });
const init = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]![1] as RequestInit;
const headers = init.headers as Record<string, string>;
expect(headers.Authorization).toBeUndefined();
// Also assert the apiKey never leaked through any other header name.
for (const v of Object.values(headers)) {
expect(v).not.toContain('sk-litellm-virtual-tok');
}
});
it('also omits Authorization on /health fallback (no apiKey on direct path)', async () => {
const mock = vi.fn()
.mockResolvedValueOnce(fakeResponse({ status: 404, ok: false }))
.mockResolvedValueOnce(fakeResponse({ status: 200, ok: true, jsonBody: { status: 'ok' } }));
const fetchImpl = mock as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
await probe({ id: 'w1', endpoint: 'http://w1', apiKey: 'sk-virtual' });
const healthInit = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[1]![1] as RequestInit;
expect((healthInit.headers as Record<string, string>).Authorization).toBeUndefined();
});
it('falls back to /health on 404 (--no-slots disabled)', async () => {
const mock = vi.fn()
.mockResolvedValueOnce(fakeResponse({ status: 404, ok: false }))
.mockResolvedValueOnce(fakeResponse({ status: 200, ok: true, jsonBody: { status: 'ok' } }));
const fetchImpl = mock as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
const status = await probe({ id: 'w1', endpoint: 'http://w1', model: 'qwen' });
expect(status.online).toBe(true);
expect(status.totalSlots).toBe(0);
expect(status.loadedModel).toBe('qwen');
});
it('reports offline + error message when fetch rejects', async () => {
const fetchImpl = vi.fn().mockRejectedValue(new Error('econnrefused')) as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
const status = await probe({ id: 'w1', endpoint: 'http://w1' });
expect(status.online).toBe(false);
expect(status.lastProbeError).toBe('econnrefused');
});
it('reports offline on non-fallback non-OK HTTP status', async () => {
const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ status: 500, ok: false })) as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, now: () => ts });
const status = await probe({ id: 'w1', endpoint: 'http://w1' });
expect(status.online).toBe(false);
expect(status.lastProbeError).toContain('500');
});
it('aborts after timeoutMs', async () => {
const fetchImpl = vi.fn().mockImplementation((_url: string, init: RequestInit) => {
return new Promise<Response>((_, reject) => {
init.signal!.addEventListener('abort', () => reject(new Error('aborted')));
});
}) as unknown as typeof fetch;
const probe = buildDirectProbe({ fetchImpl, timeoutMs: 10, now: () => ts });
const status = await probe({ id: 'w1', endpoint: 'http://w1' });
expect(status.online).toBe(false);
expect(status.lastProbeError).toBe('aborted');
});
});
describe('buildProxyProbe', () => {
const ts = '2026-05-18T00:00:00.000Z';
it('returns one status per deployment on success', async () => {
const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({
jsonBody: {
healthy_endpoints: [{ model: 'gpu-a' }, { model: 'gpu-b' }],
},
})) as unknown as typeof fetch;
const probe = buildProxyProbe({ fetchImpl, now: () => ts });
const statuses = await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true });
expect(statuses.map(s => s.nodeId).sort()).toEqual(['gpu-a', 'gpu-b']);
expect(statuses.every(s => s.online)).toBe(true);
// URL was normalised: /v1 stripped, /health appended at the root.
const call = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]!;
expect(call[0]).toBe('http://litellm/health');
});
it('forwards Bearer Authorization on proxy probes when apiKey is set', async () => {
const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({
jsonBody: { healthy_endpoints: [], unhealthy_endpoints: [] },
})) as unknown as typeof fetch;
const probe = buildProxyProbe({ fetchImpl, now: () => ts });
await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true, apiKey: 'sk-virtual' });
const init = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]![1] as RequestInit;
expect((init.headers as Record<string, string>).Authorization).toBe('Bearer sk-virtual');
});
it('returns a single offline status when /health is unreachable', async () => {
const fetchImpl = vi.fn().mockRejectedValue(new Error('econnrefused')) as unknown as typeof fetch;
const probe = buildProxyProbe({ fetchImpl, now: () => ts });
const statuses = await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true });
expect(statuses).toHaveLength(1);
expect(statuses[0]!.online).toBe(false);
expect(statuses[0]!.nodeId).toBe('pool');
});
it('returns a single status when the proxy is alive but reports zero deployments', async () => {
const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({
jsonBody: { healthy_endpoints: [], unhealthy_endpoints: [] },
})) as unknown as typeof fetch;
const probe = buildProxyProbe({ fetchImpl, now: () => ts });
const statuses = await probe({ id: 'pool', endpoint: 'http://litellm', proxy: true });
expect(statuses).toHaveLength(1);
expect(statuses[0]!.nodeId).toBe('pool');
expect(statuses[0]!.online).toBe(true);
});
});