import { describe, it, expect, vi } from 'vitest'; import { buildDirectProbe, buildProxyProbe, parseLlamaSlots, parseLiteLLMHealth, parseLlamaMetricsThroughput, normalizeWorkerBase, } from './backend-probes.js'; import type { WorkerDef } from '../config.js'; function fakeResponse(opts: { status?: number; ok?: boolean; jsonBody?: unknown; textBody?: string; throwOnJson?: boolean }): Response { const ok = opts.ok ?? (opts.status === undefined || (opts.status >= 200 && opts.status < 300)); return { ok, status: opts.status ?? 200, json: async () => { if (opts.throwOnJson) throw new Error('json parse failed'); return opts.jsonBody; }, text: async () => opts.textBody ?? '', } as unknown as Response; } describe('normalizeWorkerBase', () => { it('strips trailing slashes', () => { expect(normalizeWorkerBase('http://x/')).toBe('http://x'); expect(normalizeWorkerBase('http://x///')).toBe('http://x'); }); it('strips a single trailing /v1', () => { expect(normalizeWorkerBase('http://x/v1')).toBe('http://x'); expect(normalizeWorkerBase('http://x/v1/')).toBe('http://x'); }); it('leaves other paths alone', () => { expect(normalizeWorkerBase('http://x/api')).toBe('http://x/api'); }); }); describe('parseLlamaSlots', () => { it('counts processing slots and picks the first model', () => { const parsed = parseLlamaSlots([ { id: 0, is_processing: true, model: 'qwen3:8b' }, { id: 1, is_processing: false, model: 'qwen3:8b' }, { id: 2, is_processing: true, model: 'qwen3:8b' }, ]); expect(parsed).toEqual({ busySlots: 2, totalSlots: 3, loadedModel: 'qwen3:8b' }); }); it('handles legacy state-number shape', () => { const parsed = parseLlamaSlots([ { id: 0, state: 1, model: 'm' }, { id: 1, state: 0, model: 'm' }, ]); expect(parsed.busySlots).toBe(1); expect(parsed.totalSlots).toBe(2); }); it('handles envelope { slots: [...] }', () => { const parsed = parseLlamaSlots({ slots: [{ id: 0, is_processing: false }] }); expect(parsed.totalSlots).toBe(1); }); it('returns zeros on garbage', () => { const parsed = parseLlamaSlots(null); expect(parsed).toEqual({ busySlots: 0, totalSlots: 0, loadedModel: null }); }); }); describe('parseLiteLLMHealth', () => { it('returns one NodeStatus per healthy + unhealthy deployment', () => { const ts = '2026-05-18T00:00:00.000Z'; const out = parseLiteLLMHealth({ healthy_endpoints: [{ model: 'gpu-a' }, { litellm_params: { model: 'gpu-b' } }], unhealthy_endpoints: [{ model: 'gpu-down', error: 'timeout' }], }, 'pool', ts); // Order is unhealthy-first per Phase C dedup policy (see // parseLiteLLMHealth), but consumers shouldn't rely on it — assert // by set membership instead. expect(out.map(s => s.nodeId).sort()).toEqual(['gpu-a', 'gpu-b', 'gpu-down']); expect(out.find(s => s.nodeId === 'gpu-down')!.online).toBe(false); expect(out.find(s => s.nodeId === 'gpu-down')!.lastProbeError).toBe('timeout'); expect(out.every(s => s.workerId === 'pool')).toBe(true); expect(out.every(s => s.source === 'proxy')).toBe(true); }); it('dedupes by deployment id with unhealthy winning precedence (flap detection)', () => { const ts = 't'; const out = parseLiteLLMHealth({ healthy_endpoints: [{ model: 'gpu-a' }], unhealthy_endpoints: [{ model: 'gpu-a', error: 'flap' }], }, 'pool', ts); // Phase C: when the same deployment appears in both lists, the // unhealthy entry must surface so operators don't see a misleading // green icon for a flapping backend. expect(out).toHaveLength(1); expect(out[0]!.online).toBe(false); expect(out[0]!.lastProbeError).toBe('flap'); }); it('preserves unhealthy-only entries (regression)', () => { const ts = 't'; const out = parseLiteLLMHealth({ unhealthy_endpoints: [{ model: 'gpu-down', error: 'unreachable' }], }, 'pool', ts); expect(out).toHaveLength(1); expect(out[0]!.online).toBe(false); expect(out[0]!.lastProbeError).toBe('unreachable'); }); it('skips entries with no deployment id', () => { const out = parseLiteLLMHealth({ healthy_endpoints: [{ /* nothing */ }] }, 'pool', 't'); expect(out).toEqual([]); }); it('extracts .message from LiteLLM post-1.40 object-form error', () => { // LiteLLM ≥ 1.40 wraps errors as { message, type } objects. The // previous string-only guard silently dropped the object form and // left lastProbeError undefined, producing "red icon, no reason". const out = parseLiteLLMHealth({ unhealthy_endpoints: [{ model: 'gpu-down', error: { message: 'Timeout', type: 'Timeout' }, }], }, 'pool', 't'); expect(out).toHaveLength(1); expect(out[0]!.online).toBe(false); expect(out[0]!.lastProbeError).toBe('Timeout'); }); it('falls back to JSON.stringify for arbitrary object errors (no .message)', () => { // Some LiteLLM forks/middleware return structured errors with // neither `message` nor a string form. Surfacing the JSON keeps // operators able to diagnose without code spelunking. const out = parseLiteLLMHealth({ unhealthy_endpoints: [{ model: 'gpu-x', error: { code: 503, retryAfter: 30 }, }], }, 'pool', 't'); expect(out).toHaveLength(1); expect(out[0]!.lastProbeError).toBe('{"code":503,"retryAfter":30}'); }); it('leaves lastProbeError undefined when error is null/undefined (regression)', () => { const out = parseLiteLLMHealth({ unhealthy_endpoints: [{ model: 'gpu-z', error: null }], }, 'pool', 't'); expect(out).toHaveLength(1); expect(out[0]!.lastProbeError).toBeUndefined(); }); it('still preserves string-form errors for pre-1.40 LiteLLM (regression)', () => { const out = parseLiteLLMHealth({ unhealthy_endpoints: [{ model: 'gpu-old', error: 'classic string error' }], }, 'pool', 't'); expect(out).toHaveLength(1); expect(out[0]!.lastProbeError).toBe('classic string error'); }); describe('AAO Gateway extension (aao_busy_slots / aao_total_slots)', () => { it('inherits the gateway-aggregated busy view when aao_* fields are present', () => { // Multi-client sharing: gateway aggregates /slots across every // AAO that talks to it, then ships the totals on /health. Each // client AAO's local registry inherits the union view here so // the dashboard tree reflects "some other AAO is using GPU X // right now" even when this AAO isn't. const out = parseLiteLLMHealth({ healthy_endpoints: [ { model: 'gpu-a', litellm_params: { model: 'gpu-a' }, aao_busy_slots: 3, aao_total_slots: 4 }, { model: 'gpu-b', litellm_params: { model: 'gpu-b' }, aao_busy_slots: 0, aao_total_slots: 4 }, ], }, 'gw', 't'); expect(out).toHaveLength(2); const a = out.find((x) => x.nodeId === 'gpu-a')!; const b = out.find((x) => x.nodeId === 'gpu-b')!; expect(a).toMatchObject({ busy: true, busySlots: 3, totalSlots: 4, online: true }); expect(b).toMatchObject({ busy: false, busySlots: 0, totalSlots: 4, online: true }); }); it('treats missing aao_* fields as zero (vanilla LiteLLM compat)', () => { const out = parseLiteLLMHealth({ healthy_endpoints: [{ model: 'm1', litellm_params: { model: 'm1' } }], }, 'gw', 't'); expect(out[0]).toMatchObject({ busy: false, busySlots: 0, totalSlots: 0 }); }); it('coerces malformed aao_busy_slots to 0 without throwing', () => { const out = parseLiteLLMHealth({ healthy_endpoints: [ { model: 'm1', litellm_params: { model: 'm1' }, aao_busy_slots: 'three', aao_total_slots: -2 }, ], }, 'gw', 't'); expect(out[0]).toMatchObject({ busy: false, busySlots: 0, totalSlots: 0 }); }); it('floors fractional aao_busy_slots', () => { const out = parseLiteLLMHealth({ healthy_endpoints: [ { model: 'm1', litellm_params: { model: 'm1' }, aao_busy_slots: 2.9, aao_total_slots: 4 }, ], }, 'gw', 't'); expect(out[0]!.busySlots).toBe(2); }); }); }); describe('parseLlamaMetricsThroughput', () => { it('reads llamacpp:tokens_per_second when present', () => { const body = [ '# HELP llamacpp:tokens_per_second current generation throughput', '# TYPE llamacpp:tokens_per_second gauge', 'llamacpp:tokens_per_second 42.5', ].join('\n'); expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(42.5); }); it('falls back to prompt_tokens_seconds when tokens_per_second is absent', () => { const body = 'llamacpp:prompt_tokens_seconds 123.4\n'; expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(123.4); }); it('tolerates label sets in the metric line', () => { const body = 'llamacpp:tokens_per_second{model="qwen3:8b"} 99\n'; expect(parseLlamaMetricsThroughput(body)).toBe(99); }); it('returns null when no recognised gauge appears', () => { expect(parseLlamaMetricsThroughput('# nothing useful here\nfoo 1\n')).toBeNull(); expect(parseLlamaMetricsThroughput('')).toBeNull(); }); it('rejects negative or non-finite values', () => { expect(parseLlamaMetricsThroughput('llamacpp:tokens_per_second -1\n')).toBeNull(); expect(parseLlamaMetricsThroughput('llamacpp:tokens_per_second NaN\n')).toBeNull(); }); it('does not match the metric name appearing inside another line', () => { // The anchored regex requires the metric name at line start. const body = '# llamacpp:tokens_per_second 9999 (in a comment)\n'; expect(parseLlamaMetricsThroughput(body)).toBeNull(); }); it('returns max across multi-label series within the same metric family', () => { // Multi-model llama-server (1 process serving multiple slots with // distinct `{model="..."}` labels) emits one line per label set. // We must take the max — first-match would silently drop the // faster sibling series. const body = [ 'llamacpp:tokens_per_second{model="qwen3:8b"} 5.0', 'llamacpp:tokens_per_second{model="qwen3:32b"} 80.0', 'llamacpp:tokens_per_second{model="qwen3:14b"} 42.0', ].join('\n') + '\n'; expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(80.0); }); it('still returns the single value for single-label series (regression)', () => { const body = 'llamacpp:tokens_per_second{model="qwen3:8b"} 42.5\n'; expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(42.5); }); it('ignores NaN/negative values in multi-label series and returns the max of the valid ones', () => { const body = [ 'llamacpp:tokens_per_second{model="a"} NaN', 'llamacpp:tokens_per_second{model="b"} -1', 'llamacpp:tokens_per_second{model="c"} 7.5', 'llamacpp:tokens_per_second{model="d"} 3.2', ].join('\n') + '\n'; expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(7.5); }); it('still prefers tokens_per_second over prompt_tokens_seconds (regression)', () => { // If both families appear, tokens_per_second wins; we must not // mix max-across-families. const body = [ 'llamacpp:tokens_per_second{model="a"} 10.0', 'llamacpp:prompt_tokens_seconds{model="a"} 9999.0', ].join('\n') + '\n'; expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(10.0); }); }); describe('buildDirectProbe', () => { const ts = '2026-05-18T00:00:00.000Z'; it('returns busySlots / totalSlots when /slots responds', async () => { const fetchImpl = vi.fn() // /slots .mockResolvedValueOnce(fakeResponse({ jsonBody: [ { id: 0, is_processing: true, model: 'qwen3:8b' }, { id: 1, is_processing: false, model: 'qwen3:8b' }, ], })) // /metrics — best-effort, returns empty so throughput stays null .mockResolvedValueOnce(fakeResponse({ textBody: '' })) as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); const status = await probe({ id: 'w1', endpoint: 'http://w1/v1' }); expect(status.online).toBe(true); expect(status.busySlots).toBe(1); expect(status.totalSlots).toBe(2); expect(status.busy).toBe(true); expect(status.loadedModel).toBe('qwen3:8b'); // Verify the URL was normalized (no /v1) and reached /slots first: const call = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]!; expect(call[0]).toBe('http://w1/slots'); // Second call must be /metrics, also at the root. const call2 = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[1]!; expect(call2[0]).toBe('http://w1/metrics'); }); it('reports throughputTps when /metrics surfaces a gauge', async () => { const fetchImpl = vi.fn() .mockResolvedValueOnce(fakeResponse({ jsonBody: [{ id: 0, is_processing: true, model: 'qwen' }] })) .mockResolvedValueOnce(fakeResponse({ textBody: 'llamacpp:tokens_per_second 87.5\n' })) as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); const status = await probe({ id: 'w1', endpoint: 'http://w1' }); expect(status.throughputTps).toBeCloseTo(87.5); }); it('leaves throughputTps null when /metrics 404s (--metrics opt-in)', async () => { const fetchImpl = vi.fn() .mockResolvedValueOnce(fakeResponse({ jsonBody: [] })) .mockResolvedValueOnce(fakeResponse({ status: 404, ok: false })) as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); const status = await probe({ id: 'w1', endpoint: 'http://w1' }); expect(status.online).toBe(true); expect(status.throughputTps).toBeNull(); }); it('does not demote online state when /metrics throws', async () => { const fetchImpl = vi.fn() .mockResolvedValueOnce(fakeResponse({ jsonBody: [] })) .mockRejectedValueOnce(new Error('econnreset')) as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); const status = await probe({ id: 'w1', endpoint: 'http://w1' }); expect(status.online).toBe(true); expect(status.throughputTps).toBeNull(); }); it('does NOT forward worker.apiKey on direct probes (would leak LiteLLM virtual key)', async () => { const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ jsonBody: [] })) as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); await probe({ id: 'w1', endpoint: 'http://w1', apiKey: 'sk-litellm-virtual-tok' }); const init = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]![1] as RequestInit; const headers = init.headers as Record; expect(headers.Authorization).toBeUndefined(); // Also assert the apiKey never leaked through any other header name. for (const v of Object.values(headers)) { expect(v).not.toContain('sk-litellm-virtual-tok'); } }); it('also omits Authorization on /health fallback (no apiKey on direct path)', async () => { const mock = vi.fn() .mockResolvedValueOnce(fakeResponse({ status: 404, ok: false })) .mockResolvedValueOnce(fakeResponse({ status: 200, ok: true, jsonBody: { status: 'ok' } })); const fetchImpl = mock as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); await probe({ id: 'w1', endpoint: 'http://w1', apiKey: 'sk-virtual' }); const healthInit = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[1]![1] as RequestInit; expect((healthInit.headers as Record).Authorization).toBeUndefined(); }); it('falls back to /health on 404 (--no-slots disabled)', async () => { const mock = vi.fn() .mockResolvedValueOnce(fakeResponse({ status: 404, ok: false })) .mockResolvedValueOnce(fakeResponse({ status: 200, ok: true, jsonBody: { status: 'ok' } })); const fetchImpl = mock as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); const status = await probe({ id: 'w1', endpoint: 'http://w1', model: 'qwen' }); expect(status.online).toBe(true); expect(status.totalSlots).toBe(0); expect(status.loadedModel).toBe('qwen'); }); it('reports offline + error message when fetch rejects', async () => { const fetchImpl = vi.fn().mockRejectedValue(new Error('econnrefused')) as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); const status = await probe({ id: 'w1', endpoint: 'http://w1' }); expect(status.online).toBe(false); expect(status.lastProbeError).toBe('econnrefused'); }); it('reports offline on non-fallback non-OK HTTP status', async () => { const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ status: 500, ok: false })) as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, now: () => ts }); const status = await probe({ id: 'w1', endpoint: 'http://w1' }); expect(status.online).toBe(false); expect(status.lastProbeError).toContain('500'); }); it('aborts after timeoutMs', async () => { const fetchImpl = vi.fn().mockImplementation((_url: string, init: RequestInit) => { return new Promise((_, reject) => { init.signal!.addEventListener('abort', () => reject(new Error('aborted'))); }); }) as unknown as typeof fetch; const probe = buildDirectProbe({ fetchImpl, timeoutMs: 10, now: () => ts }); const status = await probe({ id: 'w1', endpoint: 'http://w1' }); expect(status.online).toBe(false); expect(status.lastProbeError).toBe('aborted'); }); }); describe('buildProxyProbe', () => { const ts = '2026-05-18T00:00:00.000Z'; it('returns one status per deployment on success', async () => { const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ jsonBody: { healthy_endpoints: [{ model: 'gpu-a' }, { model: 'gpu-b' }], }, })) as unknown as typeof fetch; const probe = buildProxyProbe({ fetchImpl, now: () => ts }); const statuses = await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true }); expect(statuses.map(s => s.nodeId).sort()).toEqual(['gpu-a', 'gpu-b']); expect(statuses.every(s => s.online)).toBe(true); // URL was normalised: /v1 stripped, /health appended at the root. const call = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]!; expect(call[0]).toBe('http://litellm/health'); }); it('forwards Bearer Authorization on proxy probes when apiKey is set', async () => { const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ jsonBody: { healthy_endpoints: [], unhealthy_endpoints: [] }, })) as unknown as typeof fetch; const probe = buildProxyProbe({ fetchImpl, now: () => ts }); await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true, apiKey: 'sk-virtual' }); const init = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]![1] as RequestInit; expect((init.headers as Record).Authorization).toBe('Bearer sk-virtual'); }); it('returns a single offline status when /health is unreachable', async () => { const fetchImpl = vi.fn().mockRejectedValue(new Error('econnrefused')) as unknown as typeof fetch; const probe = buildProxyProbe({ fetchImpl, now: () => ts }); const statuses = await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true }); expect(statuses).toHaveLength(1); expect(statuses[0]!.online).toBe(false); expect(statuses[0]!.nodeId).toBe('pool'); }); it('returns a single status when the proxy is alive but reports zero deployments', async () => { const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ jsonBody: { healthy_endpoints: [], unhealthy_endpoints: [] }, })) as unknown as typeof fetch; const probe = buildProxyProbe({ fetchImpl, now: () => ts }); const statuses = await probe({ id: 'pool', endpoint: 'http://litellm', proxy: true }); expect(statuses).toHaveLength(1); expect(statuses[0]!.nodeId).toBe('pool'); expect(statuses[0]!.online).toBe(true); }); });