maestro/src/engine/backend-probes.test.ts

import { describe, it, expect, vi } from 'vitest';
import {
  buildDirectProbe,
  buildProxyProbe,
  parseLlamaSlots,
  parseLiteLLMHealth,
  parseLlamaMetricsThroughput,
  normalizeWorkerBase,
} from './backend-probes.js';
import type { WorkerDef } from '../config.js';

function fakeResponse(opts: { status?: number; ok?: boolean; jsonBody?: unknown; textBody?: string; throwOnJson?: boolean }): Response {
  const ok = opts.ok ?? (opts.status === undefined || (opts.status >= 200 && opts.status < 300));
  return {
    ok,
    status: opts.status ?? 200,
    json: async () => {
      if (opts.throwOnJson) throw new Error('json parse failed');
      return opts.jsonBody;
    },
    text: async () => opts.textBody ?? '',
  } as unknown as Response;
}

describe('normalizeWorkerBase', () => {
  it('strips trailing slashes', () => {
    expect(normalizeWorkerBase('http://x/')).toBe('http://x');
    expect(normalizeWorkerBase('http://x///')).toBe('http://x');
  });
  it('strips a single trailing /v1', () => {
    expect(normalizeWorkerBase('http://x/v1')).toBe('http://x');
    expect(normalizeWorkerBase('http://x/v1/')).toBe('http://x');
  });
  it('leaves other paths alone', () => {
    expect(normalizeWorkerBase('http://x/api')).toBe('http://x/api');
  });
});

describe('parseLlamaSlots', () => {
  it('counts processing slots and picks the first model', () => {
    const parsed = parseLlamaSlots([
      { id: 0, is_processing: true, model: 'qwen3:8b' },
      { id: 1, is_processing: false, model: 'qwen3:8b' },
      { id: 2, is_processing: true, model: 'qwen3:8b' },
    ]);
    expect(parsed).toEqual({ busySlots: 2, totalSlots: 3, loadedModel: 'qwen3:8b' });
  });

  it('handles legacy state-number shape', () => {
    const parsed = parseLlamaSlots([
      { id: 0, state: 1, model: 'm' },
      { id: 1, state: 0, model: 'm' },
    ]);
    expect(parsed.busySlots).toBe(1);
    expect(parsed.totalSlots).toBe(2);
  });

  it('handles envelope { slots: [...] }', () => {
    const parsed = parseLlamaSlots({ slots: [{ id: 0, is_processing: false }] });
    expect(parsed.totalSlots).toBe(1);
  });

  it('returns zeros on garbage', () => {
    const parsed = parseLlamaSlots(null);
    expect(parsed).toEqual({ busySlots: 0, totalSlots: 0, loadedModel: null });
  });
});

describe('parseLiteLLMHealth', () => {
  it('returns one NodeStatus per healthy + unhealthy deployment', () => {
    const ts = '2026-05-18T00:00:00.000Z';
    const out = parseLiteLLMHealth({
      healthy_endpoints: [{ model: 'gpu-a' }, { litellm_params: { model: 'gpu-b' } }],
      unhealthy_endpoints: [{ model: 'gpu-down', error: 'timeout' }],
    }, 'pool', ts);
    // Order is unhealthy-first per Phase C dedup policy (see
    // parseLiteLLMHealth), but consumers shouldn't rely on it — assert
    // by set membership instead.
    expect(out.map(s => s.nodeId).sort()).toEqual(['gpu-a', 'gpu-b', 'gpu-down']);
    expect(out.find(s => s.nodeId === 'gpu-down')!.online).toBe(false);
    expect(out.find(s => s.nodeId === 'gpu-down')!.lastProbeError).toBe('timeout');
    expect(out.every(s => s.workerId === 'pool')).toBe(true);
    expect(out.every(s => s.source === 'proxy')).toBe(true);
  });

  it('dedupes by deployment id with unhealthy winning precedence (flap detection)', () => {
    const ts = 't';
    const out = parseLiteLLMHealth({
      healthy_endpoints: [{ model: 'gpu-a' }],
      unhealthy_endpoints: [{ model: 'gpu-a', error: 'flap' }],
    }, 'pool', ts);
    // Phase C: when the same deployment appears in both lists, the
    // unhealthy entry must surface so operators don't see a misleading
    // green icon for a flapping backend.
    expect(out).toHaveLength(1);
    expect(out[0]!.online).toBe(false);
    expect(out[0]!.lastProbeError).toBe('flap');
  });

  it('preserves unhealthy-only entries (regression)', () => {
    const ts = 't';
    const out = parseLiteLLMHealth({
      unhealthy_endpoints: [{ model: 'gpu-down', error: 'unreachable' }],
    }, 'pool', ts);
    expect(out).toHaveLength(1);
    expect(out[0]!.online).toBe(false);
    expect(out[0]!.lastProbeError).toBe('unreachable');
  });

  it('skips entries with no deployment id', () => {
    const out = parseLiteLLMHealth({ healthy_endpoints: [{ /* nothing */ }] }, 'pool', 't');
    expect(out).toEqual([]);
  });

  it('extracts .message from LiteLLM post-1.40 object-form error', () => {
    // LiteLLM ≥ 1.40 wraps errors as { message, type } objects. The
    // previous string-only guard silently dropped the object form and
    // left lastProbeError undefined, producing "red icon, no reason".
    const out = parseLiteLLMHealth({
      unhealthy_endpoints: [{
        model: 'gpu-down',
        error: { message: 'Timeout', type: 'Timeout' },
      }],
    }, 'pool', 't');
    expect(out).toHaveLength(1);
    expect(out[0]!.online).toBe(false);
    expect(out[0]!.lastProbeError).toBe('Timeout');
  });

  it('falls back to JSON.stringify for arbitrary object errors (no .message)', () => {
    // Some LiteLLM forks/middleware return structured errors with
    // neither `message` nor a string form. Surfacing the JSON keeps
    // operators able to diagnose without code spelunking.
    const out = parseLiteLLMHealth({
      unhealthy_endpoints: [{
        model: 'gpu-x',
        error: { code: 503, retryAfter: 30 },
      }],
    }, 'pool', 't');
    expect(out).toHaveLength(1);
    expect(out[0]!.lastProbeError).toBe('{"code":503,"retryAfter":30}');
  });

  it('leaves lastProbeError undefined when error is null/undefined (regression)', () => {
    const out = parseLiteLLMHealth({
      unhealthy_endpoints: [{ model: 'gpu-z', error: null }],
    }, 'pool', 't');
    expect(out).toHaveLength(1);
    expect(out[0]!.lastProbeError).toBeUndefined();
  });

  it('still preserves string-form errors for pre-1.40 LiteLLM (regression)', () => {
    const out = parseLiteLLMHealth({
      unhealthy_endpoints: [{ model: 'gpu-old', error: 'classic string error' }],
    }, 'pool', 't');
    expect(out).toHaveLength(1);
    expect(out[0]!.lastProbeError).toBe('classic string error');
  });

  describe('AAO Gateway extension (aao_busy_slots / aao_total_slots)', () => {
    it('inherits the gateway-aggregated busy view when aao_* fields are present', () => {
      // Multi-client sharing: gateway aggregates /slots across every
      // AAO that talks to it, then ships the totals on /health. Each
      // client AAO's local registry inherits the union view here so
      // the dashboard tree reflects "some other AAO is using GPU X
      // right now" even when this AAO isn't.
      const out = parseLiteLLMHealth({
        healthy_endpoints: [
          { model: 'gpu-a', litellm_params: { model: 'gpu-a' }, aao_busy_slots: 3, aao_total_slots: 4 },
          { model: 'gpu-b', litellm_params: { model: 'gpu-b' }, aao_busy_slots: 0, aao_total_slots: 4 },
        ],
      }, 'gw', 't');
      expect(out).toHaveLength(2);
      const a = out.find((x) => x.nodeId === 'gpu-a')!;
      const b = out.find((x) => x.nodeId === 'gpu-b')!;
      expect(a).toMatchObject({ busy: true, busySlots: 3, totalSlots: 4, online: true });
      expect(b).toMatchObject({ busy: false, busySlots: 0, totalSlots: 4, online: true });
    });

    it('treats missing aao_* fields as zero (vanilla LiteLLM compat)', () => {
      const out = parseLiteLLMHealth({
        healthy_endpoints: [{ model: 'm1', litellm_params: { model: 'm1' } }],
      }, 'gw', 't');
      expect(out[0]).toMatchObject({ busy: false, busySlots: 0, totalSlots: 0 });
    });

    it('coerces malformed aao_busy_slots to 0 without throwing', () => {
      const out = parseLiteLLMHealth({
        healthy_endpoints: [
          { model: 'm1', litellm_params: { model: 'm1' }, aao_busy_slots: 'three', aao_total_slots: -2 },
        ],
      }, 'gw', 't');
      expect(out[0]).toMatchObject({ busy: false, busySlots: 0, totalSlots: 0 });
    });

    it('floors fractional aao_busy_slots', () => {
      const out = parseLiteLLMHealth({
        healthy_endpoints: [
          { model: 'm1', litellm_params: { model: 'm1' }, aao_busy_slots: 2.9, aao_total_slots: 4 },
        ],
      }, 'gw', 't');
      expect(out[0]!.busySlots).toBe(2);
    });
  });
});

describe('parseLlamaMetricsThroughput', () => {
  it('reads llamacpp:tokens_per_second when present', () => {
    const body = [
      '# HELP llamacpp:tokens_per_second current generation throughput',
      '# TYPE llamacpp:tokens_per_second gauge',
      'llamacpp:tokens_per_second 42.5',
    ].join('\n');
    expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(42.5);
  });

  it('falls back to prompt_tokens_seconds when tokens_per_second is absent', () => {
    const body = 'llamacpp:prompt_tokens_seconds 123.4\n';
    expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(123.4);
  });

  it('tolerates label sets in the metric line', () => {
    const body = 'llamacpp:tokens_per_second{model="qwen3:8b"} 99\n';
    expect(parseLlamaMetricsThroughput(body)).toBe(99);
  });

  it('returns null when no recognised gauge appears', () => {
    expect(parseLlamaMetricsThroughput('# nothing useful here\nfoo 1\n')).toBeNull();
    expect(parseLlamaMetricsThroughput('')).toBeNull();
  });

  it('rejects negative or non-finite values', () => {
    expect(parseLlamaMetricsThroughput('llamacpp:tokens_per_second -1\n')).toBeNull();
    expect(parseLlamaMetricsThroughput('llamacpp:tokens_per_second NaN\n')).toBeNull();
  });

  it('does not match the metric name appearing inside another line', () => {
    // The anchored regex requires the metric name at line start.
    const body = '# llamacpp:tokens_per_second 9999 (in a comment)\n';
    expect(parseLlamaMetricsThroughput(body)).toBeNull();
  });

  it('returns max across multi-label series within the same metric family', () => {
    // Multi-model llama-server (1 process serving multiple slots with
    // distinct `{model="..."}` labels) emits one line per label set.
    // We must take the max — first-match would silently drop the
    // faster sibling series.
    const body = [
      'llamacpp:tokens_per_second{model="qwen3:8b"} 5.0',
      'llamacpp:tokens_per_second{model="qwen3:32b"} 80.0',
      'llamacpp:tokens_per_second{model="qwen3:14b"} 42.0',
    ].join('\n') + '\n';
    expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(80.0);
  });

  it('still returns the single value for single-label series (regression)', () => {
    const body = 'llamacpp:tokens_per_second{model="qwen3:8b"} 42.5\n';
    expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(42.5);
  });

  it('ignores NaN/negative values in multi-label series and returns the max of the valid ones', () => {
    const body = [
      'llamacpp:tokens_per_second{model="a"} NaN',
      'llamacpp:tokens_per_second{model="b"} -1',
      'llamacpp:tokens_per_second{model="c"} 7.5',
      'llamacpp:tokens_per_second{model="d"} 3.2',
    ].join('\n') + '\n';
    expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(7.5);
  });

  it('still prefers tokens_per_second over prompt_tokens_seconds (regression)', () => {
    // If both families appear, tokens_per_second wins; we must not
    // mix max-across-families.
    const body = [
      'llamacpp:tokens_per_second{model="a"} 10.0',
      'llamacpp:prompt_tokens_seconds{model="a"} 9999.0',
    ].join('\n') + '\n';
    expect(parseLlamaMetricsThroughput(body)).toBeCloseTo(10.0);
  });
});

describe('buildDirectProbe', () => {
  const ts = '2026-05-18T00:00:00.000Z';

  it('returns busySlots / totalSlots when /slots responds', async () => {
    const fetchImpl = vi.fn()
      // /slots
      .mockResolvedValueOnce(fakeResponse({
        jsonBody: [
          { id: 0, is_processing: true, model: 'qwen3:8b' },
          { id: 1, is_processing: false, model: 'qwen3:8b' },
        ],
      }))
      // /metrics — best-effort, returns empty so throughput stays null
      .mockResolvedValueOnce(fakeResponse({ textBody: '' })) as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    const status = await probe({ id: 'w1', endpoint: 'http://w1/v1' });
    expect(status.online).toBe(true);
    expect(status.busySlots).toBe(1);
    expect(status.totalSlots).toBe(2);
    expect(status.busy).toBe(true);
    expect(status.loadedModel).toBe('qwen3:8b');
    // Verify the URL was normalized (no /v1) and reached /slots first:
    const call = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]!;
    expect(call[0]).toBe('http://w1/slots');
    // Second call must be /metrics, also at the root.
    const call2 = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[1]!;
    expect(call2[0]).toBe('http://w1/metrics');
  });

  it('reports throughputTps when /metrics surfaces a gauge', async () => {
    const fetchImpl = vi.fn()
      .mockResolvedValueOnce(fakeResponse({ jsonBody: [{ id: 0, is_processing: true, model: 'qwen' }] }))
      .mockResolvedValueOnce(fakeResponse({ textBody: 'llamacpp:tokens_per_second 87.5\n' })) as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    const status = await probe({ id: 'w1', endpoint: 'http://w1' });
    expect(status.throughputTps).toBeCloseTo(87.5);
  });

  it('leaves throughputTps null when /metrics 404s (--metrics opt-in)', async () => {
    const fetchImpl = vi.fn()
      .mockResolvedValueOnce(fakeResponse({ jsonBody: [] }))
      .mockResolvedValueOnce(fakeResponse({ status: 404, ok: false })) as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    const status = await probe({ id: 'w1', endpoint: 'http://w1' });
    expect(status.online).toBe(true);
    expect(status.throughputTps).toBeNull();
  });

  it('does not demote online state when /metrics throws', async () => {
    const fetchImpl = vi.fn()
      .mockResolvedValueOnce(fakeResponse({ jsonBody: [] }))
      .mockRejectedValueOnce(new Error('econnreset')) as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    const status = await probe({ id: 'w1', endpoint: 'http://w1' });
    expect(status.online).toBe(true);
    expect(status.throughputTps).toBeNull();
  });

  it('does NOT forward worker.apiKey on direct probes (would leak LiteLLM virtual key)', async () => {
    const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ jsonBody: [] })) as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    await probe({ id: 'w1', endpoint: 'http://w1', apiKey: 'sk-litellm-virtual-tok' });
    const init = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]![1] as RequestInit;
    const headers = init.headers as Record<string, string>;
    expect(headers.Authorization).toBeUndefined();
    // Also assert the apiKey never leaked through any other header name.
    for (const v of Object.values(headers)) {
      expect(v).not.toContain('sk-litellm-virtual-tok');
    }
  });

  it('also omits Authorization on /health fallback (no apiKey on direct path)', async () => {
    const mock = vi.fn()
      .mockResolvedValueOnce(fakeResponse({ status: 404, ok: false }))
      .mockResolvedValueOnce(fakeResponse({ status: 200, ok: true, jsonBody: { status: 'ok' } }));
    const fetchImpl = mock as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    await probe({ id: 'w1', endpoint: 'http://w1', apiKey: 'sk-virtual' });
    const healthInit = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[1]![1] as RequestInit;
    expect((healthInit.headers as Record<string, string>).Authorization).toBeUndefined();
  });

  it('falls back to /health on 404 (--no-slots disabled)', async () => {
    const mock = vi.fn()
      .mockResolvedValueOnce(fakeResponse({ status: 404, ok: false }))
      .mockResolvedValueOnce(fakeResponse({ status: 200, ok: true, jsonBody: { status: 'ok' } }));
    const fetchImpl = mock as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    const status = await probe({ id: 'w1', endpoint: 'http://w1', model: 'qwen' });
    expect(status.online).toBe(true);
    expect(status.totalSlots).toBe(0);
    expect(status.loadedModel).toBe('qwen');
  });

  it('reports offline + error message when fetch rejects', async () => {
    const fetchImpl = vi.fn().mockRejectedValue(new Error('econnrefused')) as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    const status = await probe({ id: 'w1', endpoint: 'http://w1' });
    expect(status.online).toBe(false);
    expect(status.lastProbeError).toBe('econnrefused');
  });

  it('reports offline on non-fallback non-OK HTTP status', async () => {
    const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({ status: 500, ok: false })) as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, now: () => ts });
    const status = await probe({ id: 'w1', endpoint: 'http://w1' });
    expect(status.online).toBe(false);
    expect(status.lastProbeError).toContain('500');
  });

  it('aborts after timeoutMs', async () => {
    const fetchImpl = vi.fn().mockImplementation((_url: string, init: RequestInit) => {
      return new Promise<Response>((_, reject) => {
        init.signal!.addEventListener('abort', () => reject(new Error('aborted')));
      });
    }) as unknown as typeof fetch;
    const probe = buildDirectProbe({ fetchImpl, timeoutMs: 10, now: () => ts });
    const status = await probe({ id: 'w1', endpoint: 'http://w1' });
    expect(status.online).toBe(false);
    expect(status.lastProbeError).toBe('aborted');
  });
});

describe('buildProxyProbe', () => {
  const ts = '2026-05-18T00:00:00.000Z';

  it('returns one status per deployment on success', async () => {
    const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({
      jsonBody: {
        healthy_endpoints: [{ model: 'gpu-a' }, { model: 'gpu-b' }],
      },
    })) as unknown as typeof fetch;
    const probe = buildProxyProbe({ fetchImpl, now: () => ts });
    const statuses = await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true });
    expect(statuses.map(s => s.nodeId).sort()).toEqual(['gpu-a', 'gpu-b']);
    expect(statuses.every(s => s.online)).toBe(true);
    // URL was normalised: /v1 stripped, /health appended at the root.
    const call = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]!;
    expect(call[0]).toBe('http://litellm/health');
  });

  it('forwards Bearer Authorization on proxy probes when apiKey is set', async () => {
    const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({
      jsonBody: { healthy_endpoints: [], unhealthy_endpoints: [] },
    })) as unknown as typeof fetch;
    const probe = buildProxyProbe({ fetchImpl, now: () => ts });
    await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true, apiKey: 'sk-virtual' });
    const init = (fetchImpl as unknown as { mock: { calls: unknown[][] } }).mock.calls[0]![1] as RequestInit;
    expect((init.headers as Record<string, string>).Authorization).toBe('Bearer sk-virtual');
  });

  it('returns a single offline status when /health is unreachable', async () => {
    const fetchImpl = vi.fn().mockRejectedValue(new Error('econnrefused')) as unknown as typeof fetch;
    const probe = buildProxyProbe({ fetchImpl, now: () => ts });
    const statuses = await probe({ id: 'pool', endpoint: 'http://litellm/v1', proxy: true });
    expect(statuses).toHaveLength(1);
    expect(statuses[0]!.online).toBe(false);
    expect(statuses[0]!.nodeId).toBe('pool');
  });

  it('returns a single status when the proxy is alive but reports zero deployments', async () => {
    const fetchImpl = vi.fn().mockResolvedValue(fakeResponse({
      jsonBody: { healthy_endpoints: [], unhealthy_endpoints: [] },
    })) as unknown as typeof fetch;
    const probe = buildProxyProbe({ fetchImpl, now: () => ts });
    const statuses = await probe({ id: 'pool', endpoint: 'http://litellm', proxy: true });
    expect(statuses).toHaveLength(1);
    expect(statuses[0]!.nodeId).toBe('pool');
    expect(statuses[0]!.online).toBe(true);
  });
});