/** * Backend tracker for proxy worker jobs. * * History: originally "first backend wins" (2026-05-18 design, Open Question * #3 case 1) to keep the UI pet from flickering while the gateway rebalanced * every request. As of 2026-06 the gateway honors `x-aao-preferred-backend` * (client-side sticky routing for KV-cache reuse), so backend switches are * RARE — they only happen when the preferred backend goes offline or * saturates. The tracker therefore now follows the CURRENT backend: * * - `jobs.last_backend_id` is updated whenever the resolved backend CHANGES, * so the UI (pet, badges) tracks where the job actually runs. * - Persistence happens via `updateJob({ lastBackendId })`. If that DB write * FAILS, the in-memory value is left unchanged so the next * `onBackendResolved` event retries the persist (a transient DB error must * not permanently lose the worker → backend mapping). * * This module isolates the "advance only after persist succeeds" invariant * from `Worker.buildPieceCallbacks`, which already has a dozen other * concerns and is hard to unit-test in isolation. */ export interface StickyBackendLogger { debug: (msg: string) => void; info: (msg: string) => void; warn: (msg: string) => void; } export interface StickyBackendEvent { backendId: string; cacheKey: string | null; } export interface BackendTracker { /** The onBackendResolved callback for the agent loop (fire-and-forget safe). */ onEvent: (event: StickyBackendEvent) => Promise; /** * The most recently persisted backend id (or the initial DB value). * Used as the `x-aao-preferred-backend` hint on the next LLM request. */ current: () => string | null; } /** * Build the backend tracker. `persist(backendId)` is the DB write * (typically `repo.updateJob(jobId, { lastBackendId })`); it must reject on * failure so the in-memory value stays put for retry. */ export function createStickyBackendResolver(opts: { initial: string | null; persist: (backendId: string) => Promise; logger: StickyBackendLogger; workerId: string; jobId: string; }): BackendTracker { const { initial, persist, logger, workerId, jobId } = opts; let current: string | null = initial; async function onEvent({ backendId, cacheKey }: StickyBackendEvent): Promise { if (current === backendId) return; // unchanged — nothing to persist try { await persist(backendId); // Only advance AFTER persist succeeds. If we advanced first and the // persist failed, the next identical event would short-circuit on the // equality check and the DB would stay stale forever. const previous = current; current = backendId; logger.info( previous ? `[worker:${workerId}] job ${jobId} backend switched: ${previous} → ${backendId} cache=${cacheKey ?? 'miss'}` : `[worker:${workerId}] job ${jobId} backend resolved: ${backendId} cache=${cacheKey ?? 'miss'}`, ); } catch (err) { logger.warn( `[worker:${workerId}] failed to persist lastBackendId for job ${jobId}: ${err} — keeping ${current ?? 'unset'} for retry`, ); // Intentionally do NOT advance. Next event retries. } } return { onEvent, current: () => current }; }