maestro/src/bridge/shutdown.ts
2026-06-03 05:08:00 +00:00

129 lines
4.5 KiB
TypeScript

/**
* shutdown.ts — central registry for process-exit cleanup hooks.
*
* Why this exists
* ───────────────
* Phase B + Phase C each installed their own `process.on('SIGTERM', …)`
* and `process.on('SIGINT', …)` handlers (SSH console teardown, then
* BackendStatusRegistry shutdown). Stacking N independent handlers
* per signal has three issues:
*
* 1. Node's default MaxListeners is 10. Phase D will add more
* subsystems; we'll start emitting MaxListenersExceededWarning.
* 2. No ordering guarantee. If two hooks both touch shared state
* (e.g. a logger flush + a worker drain), interleaving is
* non-deterministic.
* 3. No idempotence guard. Multiple signals (SIGTERM then SIGINT)
* would re-run every hook.
*
* The registry solves all three: subsystems register a single hook
* each, the registry installs exactly one listener per signal, hooks
* run concurrently (Promise.allSettled — one slow hook doesn't gate
* the others), and a `shutdownStarted` flag prevents re-entry.
*
* Test surface
* ────────────
* `runShutdown` and `__resetShutdownForTests` are exported so unit
* tests can drive the pure logic without raising real signals (which
* would terminate the test runner). `installSignalHandlers` is the
* production entry point and is called once from `startServer`.
*/
import { logger } from '../logger.js';
export type ShutdownHook = () => Promise<void> | void;
interface RegisteredHook {
name: string;
fn: ShutdownHook;
}
const hooks: RegisteredHook[] = [];
let shutdownStarted = false;
let signalsInstalled = false;
let exitFn: (code: number) => void = (code) => process.exit(code);
/**
* Register a hook to run during graceful shutdown.
*
* `name` is used in shutdown logs only — it should describe the
* subsystem (e.g. `"ssh-console"`, `"backend-status-registry"`) so
* stuck or slow shutdowns are diagnosable from the log line.
*
* Hooks may be sync or return a Promise. Errors are caught and
* logged; one hook's failure never blocks the others.
*/
export function registerShutdownHook(name: string, fn: ShutdownHook): void {
hooks.push({ name, fn });
}
/**
* Drain all registered hooks and exit the process.
*
* Concurrent (Promise.allSettled) rather than sequential because the
* hooks operate on independent subsystems — sequential would just
* sum their latencies (and BackendStatusRegistry.stop alone can take
* up to ~3s waiting for in-flight probes to abort).
*
* Idempotent: if shutdown is already in progress, second calls are
* silently dropped (no double-drain, no double-exit).
*/
export async function runShutdown(signal: string): Promise<void> {
if (shutdownStarted) return;
shutdownStarted = true;
logger.info(`[shutdown] received ${signal}, draining ${hooks.length} hook(s)`);
const results = await Promise.allSettled(
hooks.map(async (h) => {
try {
await h.fn();
} catch (e) {
// Re-throw so allSettled records `rejected` with the original
// reason; the catch is here only to ensure sync throws surface
// the same way as async rejections.
throw e instanceof Error ? e : new Error(String(e));
}
}),
);
for (let i = 0; i < results.length; i++) {
const r = results[i]!;
const h = hooks[i]!;
if (r.status === 'rejected') {
const reason = r.reason instanceof Error ? r.reason.message : String(r.reason);
logger.warn(`[shutdown] hook ${h.name} rejected: ${reason}`);
}
}
exitFn(0);
}
/**
* Install the SIGTERM / SIGINT listeners exactly once.
*
* Safe to call multiple times — subsequent calls are no-ops so unit
* tests and integration paths can both invoke it without doubling
* the listeners.
*/
export function installSignalHandlers(): void {
if (signalsInstalled) return;
signalsInstalled = true;
process.on('SIGTERM', () => { void runShutdown('SIGTERM'); });
process.on('SIGINT', () => { void runShutdown('SIGINT'); });
}
/**
* Test-only reset. Clears registered hooks, the started flag, the
* installed-signals flag, and the exit function. Real production
* code must never call this — the singleton state is the entire
* point of the registry.
*/
export function __resetShutdownForTests(opts?: { exitFn?: (code: number) => void }): void {
hooks.length = 0;
shutdownStarted = false;
signalsInstalled = false;
exitFn = opts?.exitFn ?? ((code) => process.exit(code));
}
/** Test-only accessor for the current registered-hook count. */
export function __getRegisteredHookCountForTests(): number {
return hooks.length;
}