373 lines
15 KiB
TypeScript
373 lines
15 KiB
TypeScript
/**
|
|
* Gateway-mode startup sequence. Invoked from src/main.ts when
|
|
* `AAO_MODE=gateway`. Deliberately does NOT open the DB — Phase 1 is
|
|
* stateless and we want a minimal-footprint deploy (gateway-only nodes
|
|
* shouldn't need write access to ./data).
|
|
*
|
|
* Order of operations:
|
|
* 1. ConfigManager loads config.yaml (worker-mode path reused — same
|
|
* file, gateway picks its slice via readGatewayConfig).
|
|
* 2. Validate the gateway block; refuse to start if errors exist or
|
|
* gateway.enabled !== true (typo guard — running gateway mode on a
|
|
* worker config would silently 404 every request).
|
|
* 3. Stand up a BackendStatusRegistry over the gateway backends. We
|
|
* adapt each backend into a WorkerDef-shaped record so we can
|
|
* reuse buildDirectProbe() unchanged.
|
|
* 4. Build the Express app via createGatewayApp() and listen.
|
|
* 5. Wire SIGTERM / SIGINT through the shared shutdown registry so
|
|
* both the registry and the http.Server quiesce before exit.
|
|
*/
|
|
import { ConfigManager } from '../config-manager.js';
|
|
import { logger } from '../logger.js';
|
|
import {
|
|
createBackendStatusRegistry,
|
|
type BackendStatusRegistry,
|
|
} from '../engine/backend-status-registry.js';
|
|
import { buildDirectProbe } from '../engine/backend-probes.js';
|
|
import type { WorkerDef } from '../config.js';
|
|
import { Repository } from '../db/repository.js';
|
|
import { runMigrations } from '../db/migrate.js';
|
|
import {
|
|
readGatewayConfig,
|
|
validateGatewayConfig,
|
|
lintGatewayConfig,
|
|
type GatewayBackendConfig,
|
|
type GatewayConfig,
|
|
} from './config.js';
|
|
import { createGatewayApp } from './server.js';
|
|
import { importConfigKeysToDb, logOrphanedConfigImports } from './config-migration.js';
|
|
import {
|
|
registerShutdownHook,
|
|
installSignalHandlers,
|
|
} from '../bridge/shutdown.js';
|
|
import { createSharedGatewayDependencies } from './shared-dependencies.js';
|
|
import { createGatewayRegistry } from '../metrics/registry.js';
|
|
import type { GatewayMetrics } from '../metrics/gateway-metrics.js';
|
|
import type { Registry as PromRegistry } from 'prom-client';
|
|
|
|
/**
|
|
* Legacy default for the graceful budget before we force-disconnect
|
|
* outstanding connections. Retained as an export for downstream
|
|
* callers that imported it pre-F8; runtime now reads
|
|
* gateway.shutdown_graceful_sec from config (default 30s — see
|
|
* DEFAULT_GATEWAY_SHUTDOWN_GRACEFUL_SEC in src/gateway/config.ts).
|
|
* The bump from 5s to 30s gives SSE clients a real chance to receive
|
|
* the `gateway_shutdown` event and end cleanly before the force-close.
|
|
*
|
|
* @deprecated use gatewayConfig.shutdownGracefulSec instead.
|
|
*/
|
|
export const DEFAULT_SHUTDOWN_GRACEFUL_MS = 5_000;
|
|
|
|
/**
|
|
* Close an http.Server with a hard timeout. If `server.close()` does
|
|
* not return within `gracefulMs`, we call `server.closeAllConnections()`
|
|
* (Node 18+) to force-drop any in-flight sockets and resolve.
|
|
*
|
|
* Exported for unit testing — the production wiring lives in start().
|
|
*/
|
|
export async function closeServerWithTimeout(
|
|
server: { close(cb: () => void): unknown; closeAllConnections?: () => void },
|
|
gracefulMs: number,
|
|
): Promise<void> {
|
|
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
await new Promise<void>(resolve => {
|
|
let done = false;
|
|
const finish = (): void => {
|
|
if (done) return;
|
|
done = true;
|
|
if (timer) clearTimeout(timer);
|
|
resolve();
|
|
};
|
|
timer = setTimeout(() => {
|
|
if (done) return;
|
|
logger.warn(
|
|
`[gateway-bootstrap] server.close() exceeded ${gracefulMs}ms, force-closing connections`,
|
|
);
|
|
try {
|
|
server.closeAllConnections?.();
|
|
} catch (e) {
|
|
logger.warn(`[gateway-bootstrap] closeAllConnections threw: ${e instanceof Error ? e.message : String(e)}`);
|
|
}
|
|
finish();
|
|
}, gracefulMs);
|
|
if (typeof timer.unref === 'function') timer.unref();
|
|
server.close(() => finish());
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Translate gateway backends into the WorkerDef shape so the existing
|
|
* direct-probe (buildDirectProbe) can probe `/slots` and `/metrics`
|
|
* verbatim. Gateway backends are always direct (proxy worker = LiteLLM,
|
|
* which is the thing we're replacing).
|
|
*
|
|
* The `proxy: false` flag is critical here: setting it true would
|
|
* route through buildProxyProbe and hit `/health` (LiteLLM-style),
|
|
* which is meaningless for a raw llama-server.
|
|
*/
|
|
export function buildWorkerDefsFromBackends(backends: GatewayBackendConfig[]): WorkerDef[] {
|
|
return backends.map(b => ({
|
|
id: b.id,
|
|
endpoint: b.endpoint,
|
|
model: b.model,
|
|
maxConcurrency: b.maxSlots,
|
|
apiKey: b.apiKey,
|
|
proxy: false,
|
|
enabled: true,
|
|
roles: ['auto'],
|
|
}));
|
|
}
|
|
|
|
export interface StartGatewayOptions {
|
|
configPath?: string;
|
|
/** Optional fetch override for tests. */
|
|
fetchImpl?: typeof fetch;
|
|
/**
|
|
* Test hook: skip `app.listen()` so unit tests can drive the Express
|
|
* app via supertest. Bootstrap still runs end-to-end (registry,
|
|
* shutdown wiring) so listen-related code paths are covered.
|
|
*/
|
|
skipListen?: boolean;
|
|
/**
|
|
* Override the DB path. Default reads DB_PATH env or
|
|
* `./data/maestro.db` to match worker mode so a single-host
|
|
* deployment can run gateway + worker against one DB.
|
|
*/
|
|
dbPath?: string;
|
|
/**
|
|
* Phase 2a escape hatch: skip Repository init + virtual-key auto
|
|
* import. Used by hardened deployments that want a pure stateless
|
|
* gateway and accept the Phase 1 config-only key path. The DB is also
|
|
* skipped automatically when `process.env.AAO_GATEWAY_NO_DB === '1'`.
|
|
*/
|
|
skipDb?: boolean;
|
|
}
|
|
|
|
export interface StartedGateway {
|
|
registry: BackendStatusRegistry;
|
|
config: GatewayConfig;
|
|
/** http.Server when started for real, null when skipListen=true. */
|
|
server: ReturnType<typeof import('http').createServer> | null;
|
|
/** Open Repository instance when DB is used; null in stateless mode. */
|
|
repo: Repository | null;
|
|
/**
|
|
* Phase 3b: Prometheus metrics registry + handle. Null when
|
|
* gateway.metrics.enabled = false. Exposed so tests can scrape the
|
|
* registry directly without going through HTTP.
|
|
*/
|
|
metrics: GatewayMetrics | null;
|
|
metricsRegistry: PromRegistry | null;
|
|
}
|
|
|
|
/**
|
|
* Start gateway mode. Throws on validation failure so the process exits
|
|
* with a non-zero status (and a clear log message) rather than silently
|
|
* coming up half-configured.
|
|
*/
|
|
export async function start(opts: StartGatewayOptions = {}): Promise<StartedGateway> {
|
|
const configPath = opts.configPath ?? 'config.yaml';
|
|
logger.info(`maestro starting (mode=gateway) configPath=${configPath}`);
|
|
|
|
const configManager = new ConfigManager(configPath);
|
|
const appConfig = configManager.getConfig();
|
|
const gatewayConfig = readGatewayConfig(appConfig);
|
|
|
|
if (!gatewayConfig.enabled) {
|
|
// We exit hard here. A common failure mode is "deployed AAO with
|
|
// AAO_MODE=gateway but forgot to enable the block" — silently
|
|
// running with zero backends would 404 every request and look
|
|
// mysteriously broken.
|
|
throw new Error(
|
|
'gateway mode requested (AAO_MODE=gateway) but gateway.enabled is not true in config.yaml',
|
|
);
|
|
}
|
|
|
|
// GATEWAY_PORT env で config の listen_port を override 可能。
|
|
// ops 用 (container / systemd で port を環境ごとに振り分ける用途)。
|
|
// 範囲外 / 数値 NG なら warn だけ出して config 値を使う (config validation で
|
|
// どのみち弾かれる安全側)。
|
|
const portEnv = process.env['GATEWAY_PORT'];
|
|
if (portEnv !== undefined && portEnv.length > 0) {
|
|
const parsed = Number(portEnv);
|
|
if (Number.isInteger(parsed) && parsed >= 1 && parsed <= 65535) {
|
|
if (parsed !== gatewayConfig.listenPort) {
|
|
logger.info(`[gateway-bootstrap] GATEWAY_PORT env override: ${gatewayConfig.listenPort} → ${parsed}`);
|
|
gatewayConfig.listenPort = parsed;
|
|
}
|
|
} else {
|
|
logger.warn(`[gateway-bootstrap] GATEWAY_PORT='${portEnv}' is not a valid port (1-65535); ignoring`);
|
|
}
|
|
}
|
|
|
|
const validationErrors = validateGatewayConfig(gatewayConfig);
|
|
if (validationErrors.length > 0) {
|
|
for (const e of validationErrors) {
|
|
logger.error(`[gateway-bootstrap] config error: ${e}`);
|
|
}
|
|
throw new Error(`gateway config has ${validationErrors.length} error(s); refusing to start`);
|
|
}
|
|
|
|
// Non-fatal lint (role↔id/model collisions). Routes fine, just ambiguously.
|
|
for (const w of lintGatewayConfig(gatewayConfig)) {
|
|
logger.warn(`[gateway-bootstrap] config warning: ${w}`);
|
|
}
|
|
|
|
logger.info(
|
|
`[gateway-bootstrap] enabled listen_port=${gatewayConfig.listenPort} backends=${gatewayConfig.backends.length} virtual_keys=${gatewayConfig.virtualKeys.length}`,
|
|
);
|
|
|
|
// Phase 3b: Prometheus metrics registry. Build once at startup so the
|
|
// same registry is shared by every middleware + handler. Disabled by
|
|
// config.metrics.enabled=false (default true). When disabled, no
|
|
// /metrics endpoint is mounted and no counters fire (handles stay
|
|
// null down-tree).
|
|
const metricsConfig = gatewayConfig.metrics ?? { enabled: true, prefix: 'aao_gateway' };
|
|
let promRegistry: PromRegistry | null = null;
|
|
const metricsPrefix = metricsConfig.prefix ?? 'aao_gateway';
|
|
if (metricsConfig.enabled !== false) {
|
|
promRegistry = createGatewayRegistry(metricsPrefix);
|
|
logger.info(`[gateway-bootstrap] metrics enabled prefix=${metricsPrefix}`);
|
|
} else {
|
|
logger.info('[gateway-bootstrap] metrics disabled (gateway.metrics.enabled=false)');
|
|
}
|
|
|
|
// Phase 2a: open the Repository so the gateway can authenticate against
|
|
// DB-backed virtual keys. `skipDb` (or env AAO_GATEWAY_NO_DB=1) keeps
|
|
// the Phase 1 stateless deploy reachable for hardened operators who
|
|
// accept the config-only path's deprecation warning.
|
|
const skipDb =
|
|
opts.skipDb === true || process.env['AAO_GATEWAY_NO_DB'] === '1';
|
|
let repo: Repository | null = null;
|
|
if (!skipDb) {
|
|
const dbPath = opts.dbPath ?? process.env['DB_PATH'] ?? './data/maestro.db';
|
|
repo = new Repository(dbPath);
|
|
runMigrations(repo.getDb());
|
|
const result = importConfigKeysToDb(gatewayConfig, repo);
|
|
logger.info(
|
|
`[gateway-bootstrap] imported ${result.imported} virtual key(s) from config to DB ` +
|
|
`(${result.skipped} already present, ${result.resynced} resynced from YAML drift)`,
|
|
);
|
|
// Phase 3a F1: surface keys that exist in DB but were dropped from
|
|
// config.yaml. Those keys are still valid bearer tokens until an
|
|
// admin explicitly revokes them — the warn line tells operators the
|
|
// gap exists so they don't ship "fixed" config believing the key is
|
|
// disabled.
|
|
logOrphanedConfigImports(gatewayConfig, repo);
|
|
} else {
|
|
logger.info('[gateway-bootstrap] skipDb=true — running stateless (Phase 1 config-only key path)');
|
|
}
|
|
|
|
// Stand up the registry. We pin the backend list at start; hot-reload
|
|
// is Phase 1 Open Q#2 — falls out for free once we hook ConfigManager
|
|
// events, but Phase 1 ships without it to keep blast radius small.
|
|
const registry = createBackendStatusRegistry({
|
|
getWorkers: () => buildWorkerDefsFromBackends(gatewayConfig.backends),
|
|
probeDirect: buildDirectProbe(),
|
|
// Proxy probe path is unreachable (we never mark gateway backends as
|
|
// proxy=true), but the registry interface requires the callback.
|
|
probeProxy: async () => [],
|
|
pollIntervalMs: gatewayConfig.registry?.pollIntervalMs,
|
|
idlePollIntervalMs: gatewayConfig.registry?.idlePollIntervalMs,
|
|
});
|
|
registry.start();
|
|
|
|
// Phase 3c: assemble the shared dependency bundle. Same code path the
|
|
// same-process bridge mount uses; only the registry / promRegistry
|
|
// ownership differs (separate-deploy owns both; same-process borrows
|
|
// the worker bridge's instances).
|
|
const shared = createSharedGatewayDependencies({
|
|
config: gatewayConfig,
|
|
registry,
|
|
repo,
|
|
promRegistry,
|
|
prefix: metricsPrefix,
|
|
});
|
|
shared.start();
|
|
const metrics: GatewayMetrics | null = shared.metrics;
|
|
|
|
const { app } = createGatewayApp({
|
|
config: gatewayConfig,
|
|
registry,
|
|
fetchImpl: opts.fetchImpl,
|
|
dbLookup: shared.dbLookup,
|
|
touchLastUsed: shared.touchLastUsed,
|
|
postAuthMiddleware: shared.postAuthMiddleware,
|
|
usageRecorder: shared.usageRecorder ?? undefined,
|
|
streamRegistry: shared.streamRegistry,
|
|
inflight: shared.inflight,
|
|
metrics: metrics ?? undefined,
|
|
metricsRegistry: promRegistry ?? undefined,
|
|
});
|
|
|
|
let server: ReturnType<typeof import('http').createServer> | null = null;
|
|
if (!opts.skipListen) {
|
|
const { createServer } = await import('http');
|
|
server = createServer(app);
|
|
await new Promise<void>((resolve, reject) => {
|
|
const onErr = (err: Error): void => {
|
|
server?.off('listening', onListen);
|
|
reject(err);
|
|
};
|
|
const onListen = (): void => {
|
|
server?.off('error', onErr);
|
|
resolve();
|
|
};
|
|
server!.once('error', onErr);
|
|
server!.once('listening', onListen);
|
|
server!.listen(gatewayConfig.listenPort);
|
|
});
|
|
logger.info(`[gateway-bootstrap] listening on port ${gatewayConfig.listenPort}`);
|
|
}
|
|
|
|
// Shutdown wiring — reuse the shared registry from Phase B so a single
|
|
// SIGTERM drains both subsystems. Hooks run via Promise.allSettled so
|
|
// a slow registry stop doesn't block the http.close.
|
|
registerShutdownHook('gateway-backend-status-registry', async () => {
|
|
await registry.stop();
|
|
});
|
|
if (server) {
|
|
registerShutdownHook('gateway-http-server', async () => {
|
|
// Phase 3c: shared.stop() handles signalShutdown + rate-limiter
|
|
// flush + metrics teardown in one shot. We invoke it BEFORE
|
|
// closing the http.Server so in-flight SSE clients see the
|
|
// `gateway_shutdown` event while the connection is still alive
|
|
// (force-close afterwards just yanks the socket).
|
|
try {
|
|
await shared.stop();
|
|
} catch (e) {
|
|
logger.warn(`[gateway-bootstrap] shared.stop threw: ${e instanceof Error ? e.message : String(e)}`);
|
|
}
|
|
// server.close() waits for ALL in-flight connections to finish.
|
|
// For a streaming gateway that means SIGTERM can hang for the
|
|
// full requestTimeoutSec (default 600s) while a chat completion
|
|
// streams — well past systemd's TimeoutStopSec, which then
|
|
// SIGKILLs the process. Race the graceful close against the
|
|
// configured budget (gateway.shutdown_graceful_sec, default 30s
|
|
// — see DEFAULT_GATEWAY_SHUTDOWN_GRACEFUL_SEC) and force-
|
|
// disconnect any survivors so we exit cleanly under k8s /
|
|
// systemd.
|
|
const gracefulMs = Math.max(1, Math.floor(gatewayConfig.shutdownGracefulSec * 1000));
|
|
await closeServerWithTimeout(server!, gracefulMs);
|
|
});
|
|
} else {
|
|
// skipListen path (tests): still tear down the shared bundle on
|
|
// signal so we don't leak the rate-limiter flush interval.
|
|
registerShutdownHook('gateway-shared', async () => {
|
|
try { await shared.stop(); } catch { /* noop */ }
|
|
});
|
|
}
|
|
if (repo) {
|
|
registerShutdownHook('gateway-db', async () => {
|
|
try {
|
|
repo!.close();
|
|
} catch (e) {
|
|
logger.warn(`[gateway-bootstrap] repo.close threw: ${e instanceof Error ? e.message : String(e)}`);
|
|
}
|
|
});
|
|
}
|
|
installSignalHandlers();
|
|
|
|
logger.info('maestro ready (mode=gateway)');
|
|
return { registry, config: gatewayConfig, server, repo, metrics, metricsRegistry: promRegistry };
|
|
}
|