122 lines
4.8 KiB
TypeScript
122 lines
4.8 KiB
TypeScript
import type { Repository } from '../db/repository.js';
|
|
import type { WorkerDef } from '../config.js';
|
|
import type { BackendStatusRegistry, NodeStatus } from '../engine/backend-status-registry.js';
|
|
|
|
export interface WorkerStatusBackendRow {
|
|
/** Stable identifier from the upstream /health response (deployment id). */
|
|
id: string;
|
|
/** idle = registry says zero busy slots, running = at least one in-flight. */
|
|
state: 'idle' | 'running';
|
|
/** Number of in-flight slots reported by the registry. */
|
|
busySlots: number;
|
|
/** Total slot capacity. 0 when the registry hasn't probed yet. */
|
|
totalSlots: number;
|
|
/** false when the most recent /health probe failed. null when unprobed. */
|
|
online: boolean | null;
|
|
}
|
|
|
|
export interface WorkerStatusRow {
|
|
id: string;
|
|
name: string;
|
|
roles: string[];
|
|
state: 'idle' | 'running';
|
|
/** True when this row represents a `proxy: true` worker (LiteLLM / AAO Gateway). */
|
|
proxy: boolean;
|
|
/**
|
|
* Slot pressure from the BackendStatusRegistry, populated for direct
|
|
* workers when the registry has seen at least one probe cycle and a
|
|
* matching `nodeId === worker.id` row exists. Proxy workers leave
|
|
* these undefined — the meaningful breakdown lives in `backends[]`
|
|
* (per-backend, since the proxy itself doesn't have its own
|
|
* /slots-style busy figure).
|
|
*/
|
|
busySlots?: number;
|
|
totalSlots?: number;
|
|
/** Probe liveness. Same gating as busySlots/totalSlots. */
|
|
online?: boolean;
|
|
/**
|
|
* Per-backend rows for proxy workers — populated when a
|
|
* BackendStatusRegistry is wired and the registry has seen at least
|
|
* one probe cycle for this worker. Omitted (undefined, not empty)
|
|
* for direct workers so the UI can distinguish "no backends because
|
|
* this is a direct worker" from "proxy worker with zero backends
|
|
* reported".
|
|
*/
|
|
backends?: WorkerStatusBackendRow[];
|
|
}
|
|
|
|
/**
|
|
* Build the per-worker status list for the Side Info Panel.
|
|
*
|
|
* Proxy workers fan out into a `backends[]` list when a
|
|
* `BackendStatusRegistry` is supplied — the Worker widget renders the
|
|
* tree at the same granularity as the Node Status widget, so an
|
|
* operator can see *which* backend behind a LiteLLM / AAO Gateway
|
|
* front is currently in use rather than just "the proxy is busy".
|
|
*
|
|
* Privacy: returns idle/running booleans + slot counts only. Never job
|
|
* ids, titles, or owners, since the panel is shown to all users in a
|
|
* multi-tenant deployment.
|
|
*/
|
|
export async function collectWorkerStatuses(
|
|
repo: Repository,
|
|
workers: WorkerDef[],
|
|
registry: Pick<BackendStatusRegistry, 'getAll'> | null = null,
|
|
): Promise<WorkerStatusRow[]> {
|
|
// Build a workerId → NodeStatus[] map once per call so we don't
|
|
// O(N*M) the registry snapshot per worker. registry.getAll() copies
|
|
// its internal cache, so calling it once is cheap.
|
|
const byWorker = new Map<string, NodeStatus[]>();
|
|
if (registry) {
|
|
for (const ns of registry.getAll()) {
|
|
const list = byWorker.get(ns.workerId);
|
|
if (list) list.push(ns);
|
|
else byWorker.set(ns.workerId, [ns]);
|
|
}
|
|
}
|
|
|
|
return workers.map((w) => {
|
|
const isProxy = w.proxy === true;
|
|
const row: WorkerStatusRow = {
|
|
id: w.id,
|
|
name: w.id,
|
|
roles: w.roles ?? [],
|
|
state: repo.isWorkerBusy(w.id) ? 'running' : 'idle',
|
|
proxy: isProxy,
|
|
};
|
|
if (isProxy && registry) {
|
|
// Filter to backend-source rows only — the registry also stores a
|
|
// self-row for the proxy worker itself (source='proxy', nodeId =
|
|
// workerId) which would otherwise show up duplicated as a child
|
|
// of itself.
|
|
const rows = (byWorker.get(w.id) ?? []).filter((ns) => ns.nodeId !== w.id);
|
|
row.backends = rows.map((ns) => ({
|
|
id: ns.nodeId,
|
|
state: ns.busySlots > 0 ? 'running' : 'idle',
|
|
busySlots: ns.busySlots,
|
|
totalSlots: ns.totalSlots,
|
|
online: ns.online,
|
|
}));
|
|
} else if (!isProxy && registry) {
|
|
// Direct workers: the registry stores one row keyed by the
|
|
// worker id (source='direct', populated from llama-server
|
|
// /slots). Surface its slot pressure at the row level so the
|
|
// UI can render `(busy/total)` next to the state badge — same
|
|
// signal proxy backends get, just one level higher in the
|
|
// tree because direct workers have no expansion.
|
|
const selfRow = (byWorker.get(w.id) ?? []).find((ns) => ns.nodeId === w.id);
|
|
if (selfRow) {
|
|
row.busySlots = selfRow.busySlots;
|
|
row.totalSlots = selfRow.totalSlots;
|
|
row.online = selfRow.online;
|
|
// Re-derive state from the probe too — it sees in-flight
|
|
// requests that didn't go through the local jobs table
|
|
// (e.g. anything dispatched outside AAO). `repo.isWorkerBusy`
|
|
// alone misses those.
|
|
if (selfRow.busySlots > 0) row.state = 'running';
|
|
}
|
|
}
|
|
return row;
|
|
});
|
|
}
|