maestro/src/gateway/bootstrap.ts
oss-sync 8ac98d2630
Some checks failed
CI / build-and-test (push) Has been cancelled
sync: update from private repo (22cd034)
2026-06-09 06:02:32 +00:00

373 lines
15 KiB
TypeScript

/**
* Gateway-mode startup sequence. Invoked from src/main.ts when
* `AAO_MODE=gateway`. Deliberately does NOT open the DB — Phase 1 is
* stateless and we want a minimal-footprint deploy (gateway-only nodes
* shouldn't need write access to ./data).
*
* Order of operations:
* 1. ConfigManager loads config.yaml (worker-mode path reused — same
* file, gateway picks its slice via readGatewayConfig).
* 2. Validate the gateway block; refuse to start if errors exist or
* gateway.enabled !== true (typo guard — running gateway mode on a
* worker config would silently 404 every request).
* 3. Stand up a BackendStatusRegistry over the gateway backends. We
* adapt each backend into a WorkerDef-shaped record so we can
* reuse buildDirectProbe() unchanged.
* 4. Build the Express app via createGatewayApp() and listen.
* 5. Wire SIGTERM / SIGINT through the shared shutdown registry so
* both the registry and the http.Server quiesce before exit.
*/
import { ConfigManager } from '../config-manager.js';
import { logger } from '../logger.js';
import {
createBackendStatusRegistry,
type BackendStatusRegistry,
} from '../engine/backend-status-registry.js';
import { buildDirectProbe } from '../engine/backend-probes.js';
import type { WorkerDef } from '../config.js';
import { Repository } from '../db/repository.js';
import { runMigrations } from '../db/migrate.js';
import {
readGatewayConfig,
validateGatewayConfig,
lintGatewayConfig,
type GatewayBackendConfig,
type GatewayConfig,
} from './config.js';
import { createGatewayApp } from './server.js';
import { importConfigKeysToDb, logOrphanedConfigImports } from './config-migration.js';
import {
registerShutdownHook,
installSignalHandlers,
} from '../bridge/shutdown.js';
import { createSharedGatewayDependencies } from './shared-dependencies.js';
import { createGatewayRegistry } from '../metrics/registry.js';
import type { GatewayMetrics } from '../metrics/gateway-metrics.js';
import type { Registry as PromRegistry } from 'prom-client';
/**
* Legacy default for the graceful budget before we force-disconnect
* outstanding connections. Retained as an export for downstream
* callers that imported it pre-F8; runtime now reads
* gateway.shutdown_graceful_sec from config (default 30s — see
* DEFAULT_GATEWAY_SHUTDOWN_GRACEFUL_SEC in src/gateway/config.ts).
* The bump from 5s to 30s gives SSE clients a real chance to receive
* the `gateway_shutdown` event and end cleanly before the force-close.
*
* @deprecated use gatewayConfig.shutdownGracefulSec instead.
*/
export const DEFAULT_SHUTDOWN_GRACEFUL_MS = 5_000;
/**
* Close an http.Server with a hard timeout. If `server.close()` does
* not return within `gracefulMs`, we call `server.closeAllConnections()`
* (Node 18+) to force-drop any in-flight sockets and resolve.
*
* Exported for unit testing — the production wiring lives in start().
*/
export async function closeServerWithTimeout(
server: { close(cb: () => void): unknown; closeAllConnections?: () => void },
gracefulMs: number,
): Promise<void> {
let timer: ReturnType<typeof setTimeout> | undefined;
await new Promise<void>(resolve => {
let done = false;
const finish = (): void => {
if (done) return;
done = true;
if (timer) clearTimeout(timer);
resolve();
};
timer = setTimeout(() => {
if (done) return;
logger.warn(
`[gateway-bootstrap] server.close() exceeded ${gracefulMs}ms, force-closing connections`,
);
try {
server.closeAllConnections?.();
} catch (e) {
logger.warn(`[gateway-bootstrap] closeAllConnections threw: ${e instanceof Error ? e.message : String(e)}`);
}
finish();
}, gracefulMs);
if (typeof timer.unref === 'function') timer.unref();
server.close(() => finish());
});
}
/**
* Translate gateway backends into the WorkerDef shape so the existing
* direct-probe (buildDirectProbe) can probe `/slots` and `/metrics`
* verbatim. Gateway backends are always direct (proxy worker = LiteLLM,
* which is the thing we're replacing).
*
* The `proxy: false` flag is critical here: setting it true would
* route through buildProxyProbe and hit `/health` (LiteLLM-style),
* which is meaningless for a raw llama-server.
*/
export function buildWorkerDefsFromBackends(backends: GatewayBackendConfig[]): WorkerDef[] {
return backends.map(b => ({
id: b.id,
endpoint: b.endpoint,
model: b.model,
maxConcurrency: b.maxSlots,
apiKey: b.apiKey,
proxy: false,
enabled: true,
roles: ['auto'],
}));
}
export interface StartGatewayOptions {
configPath?: string;
/** Optional fetch override for tests. */
fetchImpl?: typeof fetch;
/**
* Test hook: skip `app.listen()` so unit tests can drive the Express
* app via supertest. Bootstrap still runs end-to-end (registry,
* shutdown wiring) so listen-related code paths are covered.
*/
skipListen?: boolean;
/**
* Override the DB path. Default reads DB_PATH env or
* `./data/maestro.db` to match worker mode so a single-host
* deployment can run gateway + worker against one DB.
*/
dbPath?: string;
/**
* Phase 2a escape hatch: skip Repository init + virtual-key auto
* import. Used by hardened deployments that want a pure stateless
* gateway and accept the Phase 1 config-only key path. The DB is also
* skipped automatically when `process.env.AAO_GATEWAY_NO_DB === '1'`.
*/
skipDb?: boolean;
}
export interface StartedGateway {
registry: BackendStatusRegistry;
config: GatewayConfig;
/** http.Server when started for real, null when skipListen=true. */
server: ReturnType<typeof import('http').createServer> | null;
/** Open Repository instance when DB is used; null in stateless mode. */
repo: Repository | null;
/**
* Phase 3b: Prometheus metrics registry + handle. Null when
* gateway.metrics.enabled = false. Exposed so tests can scrape the
* registry directly without going through HTTP.
*/
metrics: GatewayMetrics | null;
metricsRegistry: PromRegistry | null;
}
/**
* Start gateway mode. Throws on validation failure so the process exits
* with a non-zero status (and a clear log message) rather than silently
* coming up half-configured.
*/
export async function start(opts: StartGatewayOptions = {}): Promise<StartedGateway> {
const configPath = opts.configPath ?? 'config.yaml';
logger.info(`maestro starting (mode=gateway) configPath=${configPath}`);
const configManager = new ConfigManager(configPath);
const appConfig = configManager.getConfig();
const gatewayConfig = readGatewayConfig(appConfig);
if (!gatewayConfig.enabled) {
// We exit hard here. A common failure mode is "deployed AAO with
// AAO_MODE=gateway but forgot to enable the block" — silently
// running with zero backends would 404 every request and look
// mysteriously broken.
throw new Error(
'gateway mode requested (AAO_MODE=gateway) but gateway.enabled is not true in config.yaml',
);
}
// GATEWAY_PORT env で config の listen_port を override 可能。
// ops 用 (container / systemd で port を環境ごとに振り分ける用途)。
// 範囲外 / 数値 NG なら warn だけ出して config 値を使う (config validation で
// どのみち弾かれる安全側)。
const portEnv = process.env['GATEWAY_PORT'];
if (portEnv !== undefined && portEnv.length > 0) {
const parsed = Number(portEnv);
if (Number.isInteger(parsed) && parsed >= 1 && parsed <= 65535) {
if (parsed !== gatewayConfig.listenPort) {
logger.info(`[gateway-bootstrap] GATEWAY_PORT env override: ${gatewayConfig.listenPort}${parsed}`);
gatewayConfig.listenPort = parsed;
}
} else {
logger.warn(`[gateway-bootstrap] GATEWAY_PORT='${portEnv}' is not a valid port (1-65535); ignoring`);
}
}
const validationErrors = validateGatewayConfig(gatewayConfig);
if (validationErrors.length > 0) {
for (const e of validationErrors) {
logger.error(`[gateway-bootstrap] config error: ${e}`);
}
throw new Error(`gateway config has ${validationErrors.length} error(s); refusing to start`);
}
// Non-fatal lint (role↔id/model collisions). Routes fine, just ambiguously.
for (const w of lintGatewayConfig(gatewayConfig)) {
logger.warn(`[gateway-bootstrap] config warning: ${w}`);
}
logger.info(
`[gateway-bootstrap] enabled listen_port=${gatewayConfig.listenPort} backends=${gatewayConfig.backends.length} virtual_keys=${gatewayConfig.virtualKeys.length}`,
);
// Phase 3b: Prometheus metrics registry. Build once at startup so the
// same registry is shared by every middleware + handler. Disabled by
// config.metrics.enabled=false (default true). When disabled, no
// /metrics endpoint is mounted and no counters fire (handles stay
// null down-tree).
const metricsConfig = gatewayConfig.metrics ?? { enabled: true, prefix: 'aao_gateway' };
let promRegistry: PromRegistry | null = null;
const metricsPrefix = metricsConfig.prefix ?? 'aao_gateway';
if (metricsConfig.enabled !== false) {
promRegistry = createGatewayRegistry(metricsPrefix);
logger.info(`[gateway-bootstrap] metrics enabled prefix=${metricsPrefix}`);
} else {
logger.info('[gateway-bootstrap] metrics disabled (gateway.metrics.enabled=false)');
}
// Phase 2a: open the Repository so the gateway can authenticate against
// DB-backed virtual keys. `skipDb` (or env AAO_GATEWAY_NO_DB=1) keeps
// the Phase 1 stateless deploy reachable for hardened operators who
// accept the config-only path's deprecation warning.
const skipDb =
opts.skipDb === true || process.env['AAO_GATEWAY_NO_DB'] === '1';
let repo: Repository | null = null;
if (!skipDb) {
const dbPath = opts.dbPath ?? process.env['DB_PATH'] ?? './data/maestro.db';
repo = new Repository(dbPath);
runMigrations(repo.getDb());
const result = importConfigKeysToDb(gatewayConfig, repo);
logger.info(
`[gateway-bootstrap] imported ${result.imported} virtual key(s) from config to DB ` +
`(${result.skipped} already present, ${result.resynced} resynced from YAML drift)`,
);
// Phase 3a F1: surface keys that exist in DB but were dropped from
// config.yaml. Those keys are still valid bearer tokens until an
// admin explicitly revokes them — the warn line tells operators the
// gap exists so they don't ship "fixed" config believing the key is
// disabled.
logOrphanedConfigImports(gatewayConfig, repo);
} else {
logger.info('[gateway-bootstrap] skipDb=true — running stateless (Phase 1 config-only key path)');
}
// Stand up the registry. We pin the backend list at start; hot-reload
// is Phase 1 Open Q#2 — falls out for free once we hook ConfigManager
// events, but Phase 1 ships without it to keep blast radius small.
const registry = createBackendStatusRegistry({
getWorkers: () => buildWorkerDefsFromBackends(gatewayConfig.backends),
probeDirect: buildDirectProbe(),
// Proxy probe path is unreachable (we never mark gateway backends as
// proxy=true), but the registry interface requires the callback.
probeProxy: async () => [],
pollIntervalMs: gatewayConfig.registry?.pollIntervalMs,
idlePollIntervalMs: gatewayConfig.registry?.idlePollIntervalMs,
});
registry.start();
// Phase 3c: assemble the shared dependency bundle. Same code path the
// same-process bridge mount uses; only the registry / promRegistry
// ownership differs (separate-deploy owns both; same-process borrows
// the worker bridge's instances).
const shared = createSharedGatewayDependencies({
config: gatewayConfig,
registry,
repo,
promRegistry,
prefix: metricsPrefix,
});
shared.start();
const metrics: GatewayMetrics | null = shared.metrics;
const { app } = createGatewayApp({
config: gatewayConfig,
registry,
fetchImpl: opts.fetchImpl,
dbLookup: shared.dbLookup,
touchLastUsed: shared.touchLastUsed,
postAuthMiddleware: shared.postAuthMiddleware,
usageRecorder: shared.usageRecorder ?? undefined,
streamRegistry: shared.streamRegistry,
inflight: shared.inflight,
metrics: metrics ?? undefined,
metricsRegistry: promRegistry ?? undefined,
});
let server: ReturnType<typeof import('http').createServer> | null = null;
if (!opts.skipListen) {
const { createServer } = await import('http');
server = createServer(app);
await new Promise<void>((resolve, reject) => {
const onErr = (err: Error): void => {
server?.off('listening', onListen);
reject(err);
};
const onListen = (): void => {
server?.off('error', onErr);
resolve();
};
server!.once('error', onErr);
server!.once('listening', onListen);
server!.listen(gatewayConfig.listenPort);
});
logger.info(`[gateway-bootstrap] listening on port ${gatewayConfig.listenPort}`);
}
// Shutdown wiring — reuse the shared registry from Phase B so a single
// SIGTERM drains both subsystems. Hooks run via Promise.allSettled so
// a slow registry stop doesn't block the http.close.
registerShutdownHook('gateway-backend-status-registry', async () => {
await registry.stop();
});
if (server) {
registerShutdownHook('gateway-http-server', async () => {
// Phase 3c: shared.stop() handles signalShutdown + rate-limiter
// flush + metrics teardown in one shot. We invoke it BEFORE
// closing the http.Server so in-flight SSE clients see the
// `gateway_shutdown` event while the connection is still alive
// (force-close afterwards just yanks the socket).
try {
await shared.stop();
} catch (e) {
logger.warn(`[gateway-bootstrap] shared.stop threw: ${e instanceof Error ? e.message : String(e)}`);
}
// server.close() waits for ALL in-flight connections to finish.
// For a streaming gateway that means SIGTERM can hang for the
// full requestTimeoutSec (default 600s) while a chat completion
// streams — well past systemd's TimeoutStopSec, which then
// SIGKILLs the process. Race the graceful close against the
// configured budget (gateway.shutdown_graceful_sec, default 30s
// — see DEFAULT_GATEWAY_SHUTDOWN_GRACEFUL_SEC) and force-
// disconnect any survivors so we exit cleanly under k8s /
// systemd.
const gracefulMs = Math.max(1, Math.floor(gatewayConfig.shutdownGracefulSec * 1000));
await closeServerWithTimeout(server!, gracefulMs);
});
} else {
// skipListen path (tests): still tear down the shared bundle on
// signal so we don't leak the rate-limiter flush interval.
registerShutdownHook('gateway-shared', async () => {
try { await shared.stop(); } catch { /* noop */ }
});
}
if (repo) {
registerShutdownHook('gateway-db', async () => {
try {
repo!.close();
} catch (e) {
logger.warn(`[gateway-bootstrap] repo.close threw: ${e instanceof Error ? e.message : String(e)}`);
}
});
}
installSignalHandlers();
logger.info('maestro ready (mode=gateway)');
return { registry, config: gatewayConfig, server, repo, metrics, metricsRegistry: promRegistry };
}