83 lines
3.2 KiB
TypeScript
83 lines
3.2 KiB
TypeScript
/**
|
|
* Backend tracker for proxy worker jobs.
|
|
*
|
|
* History: originally "first backend wins" (2026-05-18 design, Open Question
|
|
* #3 case 1) to keep the UI pet from flickering while the gateway rebalanced
|
|
* every request. As of 2026-06 the gateway honors `x-aao-preferred-backend`
|
|
* (client-side sticky routing for KV-cache reuse), so backend switches are
|
|
* RARE — they only happen when the preferred backend goes offline or
|
|
* saturates. The tracker therefore now follows the CURRENT backend:
|
|
*
|
|
* - `jobs.last_backend_id` is updated whenever the resolved backend CHANGES,
|
|
* so the UI (pet, badges) tracks where the job actually runs.
|
|
* - Persistence happens via `updateJob({ lastBackendId })`. If that DB write
|
|
* FAILS, the in-memory value is left unchanged so the next
|
|
* `onBackendResolved` event retries the persist (a transient DB error must
|
|
* not permanently lose the worker → backend mapping).
|
|
*
|
|
* This module isolates the "advance only after persist succeeds" invariant
|
|
* from `Worker.buildPieceCallbacks`, which already has a dozen other
|
|
* concerns and is hard to unit-test in isolation.
|
|
*/
|
|
|
|
export interface StickyBackendLogger {
|
|
debug: (msg: string) => void;
|
|
info: (msg: string) => void;
|
|
warn: (msg: string) => void;
|
|
}
|
|
|
|
export interface StickyBackendEvent {
|
|
backendId: string;
|
|
cacheKey: string | null;
|
|
}
|
|
|
|
export interface BackendTracker {
|
|
/** The onBackendResolved callback for the agent loop (fire-and-forget safe). */
|
|
onEvent: (event: StickyBackendEvent) => Promise<void>;
|
|
/**
|
|
* The most recently persisted backend id (or the initial DB value).
|
|
* Used as the `x-aao-preferred-backend` hint on the next LLM request.
|
|
*/
|
|
current: () => string | null;
|
|
}
|
|
|
|
/**
|
|
* Build the backend tracker. `persist(backendId)` is the DB write
|
|
* (typically `repo.updateJob(jobId, { lastBackendId })`); it must reject on
|
|
* failure so the in-memory value stays put for retry.
|
|
*/
|
|
export function createStickyBackendResolver(opts: {
|
|
initial: string | null;
|
|
persist: (backendId: string) => Promise<void>;
|
|
logger: StickyBackendLogger;
|
|
workerId: string;
|
|
jobId: string;
|
|
}): BackendTracker {
|
|
const { initial, persist, logger, workerId, jobId } = opts;
|
|
let current: string | null = initial;
|
|
|
|
async function onEvent({ backendId, cacheKey }: StickyBackendEvent): Promise<void> {
|
|
if (current === backendId) return; // unchanged — nothing to persist
|
|
try {
|
|
await persist(backendId);
|
|
// Only advance AFTER persist succeeds. If we advanced first and the
|
|
// persist failed, the next identical event would short-circuit on the
|
|
// equality check and the DB would stay stale forever.
|
|
const previous = current;
|
|
current = backendId;
|
|
logger.info(
|
|
previous
|
|
? `[worker:${workerId}] job ${jobId} backend switched: ${previous} → ${backendId} cache=${cacheKey ?? 'miss'}`
|
|
: `[worker:${workerId}] job ${jobId} backend resolved: ${backendId} cache=${cacheKey ?? 'miss'}`,
|
|
);
|
|
} catch (err) {
|
|
logger.warn(
|
|
`[worker:${workerId}] failed to persist lastBackendId for job ${jobId}: ${err} — keeping ${current ?? 'unset'} for retry`,
|
|
);
|
|
// Intentionally do NOT advance. Next event retries.
|
|
}
|
|
}
|
|
|
|
return { onEvent, current: () => current };
|
|
}
|