/** * Gateway-mode startup sequence. Invoked from src/main.ts when * `AAO_MODE=gateway`. Deliberately does NOT open the DB — Phase 1 is * stateless and we want a minimal-footprint deploy (gateway-only nodes * shouldn't need write access to ./data). * * Order of operations: * 1. ConfigManager loads config.yaml (worker-mode path reused — same * file, gateway picks its slice via readGatewayConfig). * 2. Validate the gateway block; refuse to start if errors exist or * gateway.enabled !== true (typo guard — running gateway mode on a * worker config would silently 404 every request). * 3. Stand up a BackendStatusRegistry over the gateway backends. We * adapt each backend into a WorkerDef-shaped record so we can * reuse buildDirectProbe() unchanged. * 4. Build the Express app via createGatewayApp() and listen. * 5. Wire SIGTERM / SIGINT through the shared shutdown registry so * both the registry and the http.Server quiesce before exit. */ import { ConfigManager } from '../config-manager.js'; import { logger } from '../logger.js'; import { createBackendStatusRegistry, type BackendStatusRegistry, } from '../engine/backend-status-registry.js'; import { buildDirectProbe } from '../engine/backend-probes.js'; import type { WorkerDef } from '../config.js'; import { Repository } from '../db/repository.js'; import { runMigrations } from '../db/migrate.js'; import { readGatewayConfig, validateGatewayConfig, lintGatewayConfig, type GatewayBackendConfig, type GatewayConfig, } from './config.js'; import { createGatewayApp } from './server.js'; import { importConfigKeysToDb, logOrphanedConfigImports } from './config-migration.js'; import { registerShutdownHook, installSignalHandlers, } from '../bridge/shutdown.js'; import { createSharedGatewayDependencies } from './shared-dependencies.js'; import { createGatewayRegistry } from '../metrics/registry.js'; import type { GatewayMetrics } from '../metrics/gateway-metrics.js'; import type { Registry as PromRegistry } from 'prom-client'; /** * Legacy default for the graceful budget before we force-disconnect * outstanding connections. Retained as an export for downstream * callers that imported it pre-F8; runtime now reads * gateway.shutdown_graceful_sec from config (default 30s — see * DEFAULT_GATEWAY_SHUTDOWN_GRACEFUL_SEC in src/gateway/config.ts). * The bump from 5s to 30s gives SSE clients a real chance to receive * the `gateway_shutdown` event and end cleanly before the force-close. * * @deprecated use gatewayConfig.shutdownGracefulSec instead. */ export const DEFAULT_SHUTDOWN_GRACEFUL_MS = 5_000; /** * Close an http.Server with a hard timeout. If `server.close()` does * not return within `gracefulMs`, we call `server.closeAllConnections()` * (Node 18+) to force-drop any in-flight sockets and resolve. * * Exported for unit testing — the production wiring lives in start(). */ export async function closeServerWithTimeout( server: { close(cb: () => void): unknown; closeAllConnections?: () => void }, gracefulMs: number, ): Promise { let timer: ReturnType | undefined; await new Promise(resolve => { let done = false; const finish = (): void => { if (done) return; done = true; if (timer) clearTimeout(timer); resolve(); }; timer = setTimeout(() => { if (done) return; logger.warn( `[gateway-bootstrap] server.close() exceeded ${gracefulMs}ms, force-closing connections`, ); try { server.closeAllConnections?.(); } catch (e) { logger.warn(`[gateway-bootstrap] closeAllConnections threw: ${e instanceof Error ? e.message : String(e)}`); } finish(); }, gracefulMs); if (typeof timer.unref === 'function') timer.unref(); server.close(() => finish()); }); } /** * Translate gateway backends into the WorkerDef shape so the existing * direct-probe (buildDirectProbe) can probe `/slots` and `/metrics` * verbatim. Gateway backends are always direct (proxy worker = LiteLLM, * which is the thing we're replacing). * * The `proxy: false` flag is critical here: setting it true would * route through buildProxyProbe and hit `/health` (LiteLLM-style), * which is meaningless for a raw llama-server. */ export function buildWorkerDefsFromBackends(backends: GatewayBackendConfig[]): WorkerDef[] { return backends.map(b => ({ id: b.id, endpoint: b.endpoint, model: b.model, maxConcurrency: b.maxSlots, apiKey: b.apiKey, proxy: false, enabled: true, roles: ['auto'], })); } export interface StartGatewayOptions { configPath?: string; /** Optional fetch override for tests. */ fetchImpl?: typeof fetch; /** * Test hook: skip `app.listen()` so unit tests can drive the Express * app via supertest. Bootstrap still runs end-to-end (registry, * shutdown wiring) so listen-related code paths are covered. */ skipListen?: boolean; /** * Override the DB path. Default reads DB_PATH env or * `./data/maestro.db` to match worker mode so a single-host * deployment can run gateway + worker against one DB. */ dbPath?: string; /** * Phase 2a escape hatch: skip Repository init + virtual-key auto * import. Used by hardened deployments that want a pure stateless * gateway and accept the Phase 1 config-only key path. The DB is also * skipped automatically when `process.env.AAO_GATEWAY_NO_DB === '1'`. */ skipDb?: boolean; } export interface StartedGateway { registry: BackendStatusRegistry; config: GatewayConfig; /** http.Server when started for real, null when skipListen=true. */ server: ReturnType | null; /** Open Repository instance when DB is used; null in stateless mode. */ repo: Repository | null; /** * Phase 3b: Prometheus metrics registry + handle. Null when * gateway.metrics.enabled = false. Exposed so tests can scrape the * registry directly without going through HTTP. */ metrics: GatewayMetrics | null; metricsRegistry: PromRegistry | null; } /** * Start gateway mode. Throws on validation failure so the process exits * with a non-zero status (and a clear log message) rather than silently * coming up half-configured. */ export async function start(opts: StartGatewayOptions = {}): Promise { const configPath = opts.configPath ?? 'config.yaml'; logger.info(`maestro starting (mode=gateway) configPath=${configPath}`); const configManager = new ConfigManager(configPath); const appConfig = configManager.getConfig(); const gatewayConfig = readGatewayConfig(appConfig); if (!gatewayConfig.enabled) { // We exit hard here. A common failure mode is "deployed AAO with // AAO_MODE=gateway but forgot to enable the block" — silently // running with zero backends would 404 every request and look // mysteriously broken. throw new Error( 'gateway mode requested (AAO_MODE=gateway) but gateway.enabled is not true in config.yaml', ); } // GATEWAY_PORT env で config の listen_port を override 可能。 // ops 用 (container / systemd で port を環境ごとに振り分ける用途)。 // 範囲外 / 数値 NG なら warn だけ出して config 値を使う (config validation で // どのみち弾かれる安全側)。 const portEnv = process.env['GATEWAY_PORT']; if (portEnv !== undefined && portEnv.length > 0) { const parsed = Number(portEnv); if (Number.isInteger(parsed) && parsed >= 1 && parsed <= 65535) { if (parsed !== gatewayConfig.listenPort) { logger.info(`[gateway-bootstrap] GATEWAY_PORT env override: ${gatewayConfig.listenPort} → ${parsed}`); gatewayConfig.listenPort = parsed; } } else { logger.warn(`[gateway-bootstrap] GATEWAY_PORT='${portEnv}' is not a valid port (1-65535); ignoring`); } } const validationErrors = validateGatewayConfig(gatewayConfig); if (validationErrors.length > 0) { for (const e of validationErrors) { logger.error(`[gateway-bootstrap] config error: ${e}`); } throw new Error(`gateway config has ${validationErrors.length} error(s); refusing to start`); } // Non-fatal lint (role↔id/model collisions). Routes fine, just ambiguously. for (const w of lintGatewayConfig(gatewayConfig)) { logger.warn(`[gateway-bootstrap] config warning: ${w}`); } logger.info( `[gateway-bootstrap] enabled listen_port=${gatewayConfig.listenPort} backends=${gatewayConfig.backends.length} virtual_keys=${gatewayConfig.virtualKeys.length}`, ); // Phase 3b: Prometheus metrics registry. Build once at startup so the // same registry is shared by every middleware + handler. Disabled by // config.metrics.enabled=false (default true). When disabled, no // /metrics endpoint is mounted and no counters fire (handles stay // null down-tree). const metricsConfig = gatewayConfig.metrics ?? { enabled: true, prefix: 'aao_gateway' }; let promRegistry: PromRegistry | null = null; const metricsPrefix = metricsConfig.prefix ?? 'aao_gateway'; if (metricsConfig.enabled !== false) { promRegistry = createGatewayRegistry(metricsPrefix); logger.info(`[gateway-bootstrap] metrics enabled prefix=${metricsPrefix}`); } else { logger.info('[gateway-bootstrap] metrics disabled (gateway.metrics.enabled=false)'); } // Phase 2a: open the Repository so the gateway can authenticate against // DB-backed virtual keys. `skipDb` (or env AAO_GATEWAY_NO_DB=1) keeps // the Phase 1 stateless deploy reachable for hardened operators who // accept the config-only path's deprecation warning. const skipDb = opts.skipDb === true || process.env['AAO_GATEWAY_NO_DB'] === '1'; let repo: Repository | null = null; if (!skipDb) { const dbPath = opts.dbPath ?? process.env['DB_PATH'] ?? './data/maestro.db'; repo = new Repository(dbPath); runMigrations(repo.getDb()); const result = importConfigKeysToDb(gatewayConfig, repo); logger.info( `[gateway-bootstrap] imported ${result.imported} virtual key(s) from config to DB ` + `(${result.skipped} already present, ${result.resynced} resynced from YAML drift)`, ); // Phase 3a F1: surface keys that exist in DB but were dropped from // config.yaml. Those keys are still valid bearer tokens until an // admin explicitly revokes them — the warn line tells operators the // gap exists so they don't ship "fixed" config believing the key is // disabled. logOrphanedConfigImports(gatewayConfig, repo); } else { logger.info('[gateway-bootstrap] skipDb=true — running stateless (Phase 1 config-only key path)'); } // Stand up the registry. We pin the backend list at start; hot-reload // is Phase 1 Open Q#2 — falls out for free once we hook ConfigManager // events, but Phase 1 ships without it to keep blast radius small. const registry = createBackendStatusRegistry({ getWorkers: () => buildWorkerDefsFromBackends(gatewayConfig.backends), probeDirect: buildDirectProbe(), // Proxy probe path is unreachable (we never mark gateway backends as // proxy=true), but the registry interface requires the callback. probeProxy: async () => [], pollIntervalMs: gatewayConfig.registry?.pollIntervalMs, idlePollIntervalMs: gatewayConfig.registry?.idlePollIntervalMs, }); registry.start(); // Phase 3c: assemble the shared dependency bundle. Same code path the // same-process bridge mount uses; only the registry / promRegistry // ownership differs (separate-deploy owns both; same-process borrows // the worker bridge's instances). const shared = createSharedGatewayDependencies({ config: gatewayConfig, registry, repo, promRegistry, prefix: metricsPrefix, }); shared.start(); const metrics: GatewayMetrics | null = shared.metrics; const { app } = createGatewayApp({ config: gatewayConfig, registry, fetchImpl: opts.fetchImpl, dbLookup: shared.dbLookup, touchLastUsed: shared.touchLastUsed, postAuthMiddleware: shared.postAuthMiddleware, usageRecorder: shared.usageRecorder ?? undefined, streamRegistry: shared.streamRegistry, inflight: shared.inflight, metrics: metrics ?? undefined, metricsRegistry: promRegistry ?? undefined, }); let server: ReturnType | null = null; if (!opts.skipListen) { const { createServer } = await import('http'); server = createServer(app); await new Promise((resolve, reject) => { const onErr = (err: Error): void => { server?.off('listening', onListen); reject(err); }; const onListen = (): void => { server?.off('error', onErr); resolve(); }; server!.once('error', onErr); server!.once('listening', onListen); server!.listen(gatewayConfig.listenPort); }); logger.info(`[gateway-bootstrap] listening on port ${gatewayConfig.listenPort}`); } // Shutdown wiring — reuse the shared registry from Phase B so a single // SIGTERM drains both subsystems. Hooks run via Promise.allSettled so // a slow registry stop doesn't block the http.close. registerShutdownHook('gateway-backend-status-registry', async () => { await registry.stop(); }); if (server) { registerShutdownHook('gateway-http-server', async () => { // Phase 3c: shared.stop() handles signalShutdown + rate-limiter // flush + metrics teardown in one shot. We invoke it BEFORE // closing the http.Server so in-flight SSE clients see the // `gateway_shutdown` event while the connection is still alive // (force-close afterwards just yanks the socket). try { await shared.stop(); } catch (e) { logger.warn(`[gateway-bootstrap] shared.stop threw: ${e instanceof Error ? e.message : String(e)}`); } // server.close() waits for ALL in-flight connections to finish. // For a streaming gateway that means SIGTERM can hang for the // full requestTimeoutSec (default 600s) while a chat completion // streams — well past systemd's TimeoutStopSec, which then // SIGKILLs the process. Race the graceful close against the // configured budget (gateway.shutdown_graceful_sec, default 30s // — see DEFAULT_GATEWAY_SHUTDOWN_GRACEFUL_SEC) and force- // disconnect any survivors so we exit cleanly under k8s / // systemd. const gracefulMs = Math.max(1, Math.floor(gatewayConfig.shutdownGracefulSec * 1000)); await closeServerWithTimeout(server!, gracefulMs); }); } else { // skipListen path (tests): still tear down the shared bundle on // signal so we don't leak the rate-limiter flush interval. registerShutdownHook('gateway-shared', async () => { try { await shared.stop(); } catch { /* noop */ } }); } if (repo) { registerShutdownHook('gateway-db', async () => { try { repo!.close(); } catch (e) { logger.warn(`[gateway-bootstrap] repo.close threw: ${e instanceof Error ? e.message : String(e)}`); } }); } installSignalHandlers(); logger.info('maestro ready (mode=gateway)'); return { registry, config: gatewayConfig, server, repo, metrics, metricsRegistry: promRegistry }; }