/** * AAO Gateway Phase 2a — admin REST API for virtual key management. * * Mounted on worker-mode server.ts (not on gateway-mode server.ts; the * gateway is intentionally read-only over auth state). Path prefix: * /api/admin/gateway/keys * * Endpoint shape (see also docs/superpowers/specs/2026-05-18-aao-gateway-mode-design.md * § Phase 2a / Admin REST API): * POST / — issue (raw key returned ONCE here) * GET / — list (raw key never returned) * GET /:id — single (raw key never returned) * POST /:id/revoke — soft delete * POST /:id/rotate — atomic re-issue (new key returned, old revoked) * DELETE /:id — hard delete (rejects source='config-import') * * Visibility: returned objects expose `keyPrefix` (sk-aao-XXXXXX) and * meta only. The raw bearer is surfaced exactly by issue/rotate; lose * it and you must rotate the key. */ import { Router, type RequestHandler, type Request } from 'express'; import type { Repository, GatewayVirtualKey } from '../db/repository.js'; import { generateVirtualKey } from '../gateway/key-format.js'; import { currentPeriodYearMonth } from '../gateway/period.js'; import type { KeyCache } from '../gateway/key-cache.js'; import type { GatewayMetrics } from '../metrics/gateway-metrics.js'; import { logger } from '../logger.js'; export interface AdminGatewayApiDeps { repo: Repository; /** Existing requireAdmin middleware (or a passthrough for auth-disabled deploys). */ requireAdmin: RequestHandler; /** * Extract the acting admin's user id from the request (for * createdBy / revokedBy). Returns null when auth is disabled; the * router records 'admin' in that case so audit history isn't blank. */ getUserId: (req: Request) => string | null; /** * Phase 3a F4: optional shared cache between auth + budget + rate * middlewares. When wired by the same-process deployment, admin * mutations (PATCH / revoke / rotate / delete) call cache.invalidate * so the next request sees fresh state immediately. Cross-process * setups fall back to the cache's 5s TTL. */ keyCache?: KeyCache; /** * Phase 3b post-review: optional gateway metrics handle. When wired, * the revoke / rotate / delete handlers remove the * `budgetUsedRatio{team, key_prefix}` series for the gone key — left * in place those labels would grow unbounded over the key lifecycle. */ gatewayMetrics?: GatewayMetrics; } /** Wire-format DTO (camelCase JSON). Raw key is opt-in for issue/rotate. */ interface GatewayKeyDto { id: string; object: 'gateway.key'; keyPrefix: string; team: string; allowedModels: string[] | null; source: GatewayVirtualKey['source']; createdAt: string; createdBy: string | null; revokedAt: string | null; revokedBy: string | null; lastUsedAt: string | null; /** Phase 2b: monthly tokens budget (null = unlimited). */ tokensBudget: number | null; /** Phase 2b: per-minute requests cap (null = unlimited). */ rateLimitRpm: number | null; /** Only present on POST / rotate responses. NEVER on list / get. */ key?: string; } function toDto(row: GatewayVirtualKey, raw?: string): GatewayKeyDto { const dto: GatewayKeyDto = { id: row.id, object: 'gateway.key', keyPrefix: row.keyPrefix, team: row.team, allowedModels: row.allowedModels, source: row.source, createdAt: row.createdAt, createdBy: row.createdBy, revokedAt: row.revokedAt, revokedBy: row.revokedBy, lastUsedAt: row.lastUsedAt, tokensBudget: row.tokensBudget, rateLimitRpm: row.rateLimitRpm, }; if (raw !== undefined) dto.key = raw; return dto; } /** * Hard caps to keep policy values comfortably inside SQLite's INTEGER * (54-bit) precision window. Past 2^53 numeric values get coerced to * REAL on UPSERT arithmetic and start dropping low bits — so a stated * budget like 1e20 would silently round and the running total could * never reach it. Either limit is well beyond any sane real-world usage: * * - tokensBudget: 1 trillion tokens / month. At GPT-4-class pricing * that's well over US$10M; if you legitimately need more, split keys. * - rateLimitRpm: 1,000,000 requests / minute. The in-memory window * array is bounded at 2× this, so the cap also keeps memory predictable. */ const MAX_TOKENS_BUDGET = 1_000_000_000_000; const MAX_RATE_LIMIT_RPM = 1_000_000; /** * Parse a {tokensBudget?, rateLimitRpm?} pair from a request body. * Returns the parsed values as positive integers, null (explicit * unlimited), or undefined (don't touch). * * Strict: a number that's NaN / Infinity / negative is a 400 — we * don't silently coerce because operators expect their stated limit * to be applied. Floats are floored (sub-integer budgets are meaningless). * Values above the hard cap are rejected — see MAX_* constants above * for the rationale (SQLite INT-vs-REAL precision boundary). */ function parseLimitsPatch(body: { tokensBudget?: unknown; rateLimitRpm?: unknown } | undefined): | { ok: true; tokensBudget?: number | null; rateLimitRpm?: number | null } | { ok: false; error: string } { const out: { tokensBudget?: number | null; rateLimitRpm?: number | null } = {}; if (body && Object.prototype.hasOwnProperty.call(body, 'tokensBudget')) { const v = body.tokensBudget; if (v === null) { out.tokensBudget = null; } else if (typeof v === 'number' && Number.isFinite(v) && v > 0) { if (v > MAX_TOKENS_BUDGET) { return { ok: false, error: `tokensBudget exceeds maximum (${MAX_TOKENS_BUDGET})` }; } out.tokensBudget = Math.floor(v); } else { return { ok: false, error: 'tokensBudget must be a positive integer or null' }; } } if (body && Object.prototype.hasOwnProperty.call(body, 'rateLimitRpm')) { const v = body.rateLimitRpm; if (v === null) { out.rateLimitRpm = null; } else if (typeof v === 'number' && Number.isFinite(v) && v > 0) { if (v > MAX_RATE_LIMIT_RPM) { return { ok: false, error: `rateLimitRpm exceeds maximum (${MAX_RATE_LIMIT_RPM})` }; } out.rateLimitRpm = Math.floor(v); } else { return { ok: false, error: 'rateLimitRpm must be a positive integer or null' }; } } return { ok: true, ...out }; } const TEAM_REGEX = /^[a-zA-Z0-9._-]{1,64}$/; const MAX_ALLOWED_MODELS = 64; const MAX_MODEL_NAME_LEN = 128; function parseAllowedModels(value: unknown): { ok: true; value: string[] | null } | { ok: false; error: string } { if (value === undefined || value === null) return { ok: true, value: null }; if (!Array.isArray(value)) return { ok: false, error: 'allowedModels must be an array of strings' }; if (value.length > MAX_ALLOWED_MODELS) { return { ok: false, error: `allowedModels supports at most ${MAX_ALLOWED_MODELS} entries` }; } const out: string[] = []; for (const m of value) { if (typeof m !== 'string' || m.length === 0 || m.length > MAX_MODEL_NAME_LEN) { return { ok: false, error: 'allowedModels entries must be non-empty strings ≤ 128 chars' }; } out.push(m); } return { ok: true, value: out }; } export function createAdminGatewayApi(deps: AdminGatewayApiDeps): Router { const router = Router(); const { repo, requireAdmin, getUserId, keyCache, gatewayMetrics } = deps; const actor = (req: Request): string => getUserId(req) ?? 'admin'; // Centralize cache invalidation so every mutation handler follows the // same pattern: mutate first, invalidate second. Calling on a no-op // cache (undefined) is a safe noop. const invalidate = (id: string): void => { try { keyCache?.invalidate(id); } catch (e) { // Cache invalidation is best-effort; a thrown invalidate would // most likely mean a bug in the cache, but we still don't want // it to roll back the user-visible mutation. logger.warn(`[admin-gateway] keyCache.invalidate threw for id=${id}: ${e instanceof Error ? e.message : String(e)}`); } }; // Phase 3b post-review: drop the per-key budget_used_ratio gauge label // when the key goes away. Without this, every revoked key leaves a // permanent {team, key_prefix} series in the registry — over enough // rotations the label space grows without bound. Best-effort: a // missing metrics handle (Phase 3b disabled), a label that was never // set (key revoked before its first usage write), or a prom-client // throw are all swallowed so admin mutation success isn't gated on // metric bookkeeping. const dropKeyMetricLabels = (row: { id: string; team: string }): void => { if (!gatewayMetrics) return; try { const prefix = row.id.slice(0, 8); gatewayMetrics.budgetUsedRatio.remove({ team: row.team, key_prefix: prefix }); } catch (e) { logger.warn( `[admin-gateway] metric label remove failed for id=${row.id}: ${e instanceof Error ? e.message : String(e)}`, ); } }; // POST / — issue a fresh sk-aao-* key. The raw value is returned in // the response body once and never again. router.post('/', requireAdmin, (req, res) => { const body = req.body as { team?: unknown; allowedModels?: unknown; tokensBudget?: unknown; rateLimitRpm?: unknown; } | undefined; const team = typeof body?.team === 'string' ? body.team.trim() : ''; if (!team || !TEAM_REGEX.test(team)) { res.status(400).json({ error: 'team must match /^[a-zA-Z0-9._-]{1,64}$/' }); return; } const allowed = parseAllowedModels(body?.allowedModels); if (!allowed.ok) { res.status(400).json({ error: allowed.error }); return; } const limits = parseLimitsPatch(body); if (!limits.ok) { res.status(400).json({ error: limits.error }); return; } const generated = generateVirtualKey(); let created: GatewayVirtualKey; try { created = repo.createGatewayVirtualKey({ keyHash: generated.hash, keyPrefix: generated.prefix, team, allowedModels: allowed.value, source: 'admin', createdBy: actor(req), // Phase 2b: optional budget / rate. Repository normalizer // accepts both null and undefined as unlimited. tokensBudget: limits.tokensBudget ?? null, rateLimitRpm: limits.rateLimitRpm ?? null, }); } catch (e) { // randomBytes collision is mathematically negligible; any throw // here is more likely a transient SQLite locking issue. logger.warn(`[admin-gateway] create failed: ${e instanceof Error ? e.message : String(e)}`); res.status(500).json({ error: 'failed to create key' }); return; } res.status(201).json(toDto(created, generated.raw)); }); // PATCH /:id — update policy fields (budget, rate limit, allowedModels). // Bearer / team / source / created_by are immutable here. Refuses to // touch config-import rows because those are managed via config.yaml // (consistent with the DELETE rule). router.patch('/:id', requireAdmin, (req, res) => { const id = req.params['id']!; // Parse + validate the body OUTSIDE the transaction so we don't pay // the SQLite serialization cost on bad input. const body = req.body as { tokensBudget?: unknown; rateLimitRpm?: unknown; allowedModels?: unknown; } | undefined; const limits = parseLimitsPatch(body); if (!limits.ok) { res.status(400).json({ error: limits.error }); return; } const patch: { tokensBudget?: number | null; rateLimitRpm?: number | null; allowedModels?: string[] | null; } = {}; if (Object.prototype.hasOwnProperty.call(limits, 'tokensBudget')) patch.tokensBudget = limits.tokensBudget!; if (Object.prototype.hasOwnProperty.call(limits, 'rateLimitRpm')) patch.rateLimitRpm = limits.rateLimitRpm!; if (body && Object.prototype.hasOwnProperty.call(body, 'allowedModels')) { if (body.allowedModels === null) { patch.allowedModels = null; } else { const parsed = parseAllowedModels(body.allowedModels); if (!parsed.ok) { res.status(400).json({ error: parsed.error }); return; } patch.allowedModels = parsed.value; } } if (Object.keys(patch).length === 0) { res.status(400).json({ error: 'patch body must include at least one of tokensBudget, rateLimitRpm, allowedModels' }); return; } // Phase 3a follow-up: close the TOCTOU race between the "is the row // revoked / config-import?" check and the UPDATE statement. Pre-fix // the read + update lived outside any transaction; a concurrent // revoke landing between the two would let PATCH overwrite a revoked // row (silent ghost mutation in the audit log). Wrap both in a // single better-sqlite3 transaction so the read and the conditional // update are atomic, and signal the disallowed conditions back to // the caller via typed sentinel errors. // // Sentinel error pattern (vs. structured return value): better-sqlite3 // transactions don't yet support typed Result returns, so we abuse // the error channel — caller-side `instanceof` would be cleaner but // string sentinels keep this contained to a single handler. let updated: GatewayVirtualKey; try { updated = repo.getDb().transaction(() => { const fresh = repo.findGatewayVirtualKeyById(id); if (!fresh) throw new Error('PATCH_NOT_FOUND'); // Mirrors the rotate handler which also returns 409 for revoked. // Pre-fix PATCH would silently update budget / rate / allowedModels // on a row that can no longer authenticate — the new values // would never apply to a real request and would mask audit // history. if (fresh.revokedAt !== null) { const err = new Error('PATCH_REVOKED'); (err as Error & { revokedAt?: string }).revokedAt = fresh.revokedAt; throw err; } if (fresh.source === 'config-import') throw new Error('PATCH_CONFIG_IMPORT'); return repo.updateGatewayVirtualKey(id, patch); })(); } catch (e) { const msg = e instanceof Error ? e.message : String(e); if (msg === 'PATCH_NOT_FOUND') { res.status(404).json({ error: 'key not found' }); return; } if (msg === 'PATCH_REVOKED') { const revokedAt = (e as Error & { revokedAt?: string }).revokedAt ?? null; res.status(409).json({ error: 'cannot modify a revoked key', revokedAt }); return; } if (msg === 'PATCH_CONFIG_IMPORT') { res.status(400).json({ error: "cannot PATCH a config-import key (manage tokens_budget / rate_limit_rpm / allowed_models via config.yaml's gateway.virtual_keys instead)", }); return; } logger.warn(`[admin-gateway] patch failed for id=${id}: ${msg}`); res.status(500).json({ error: 'patch failed' }); return; } // F4: drop the stale cache entry so the next auth/budget/rate // middleware reads the fresh row from DB. The cache will repopulate // on the next lookup (and stay coherent for 5s after that). invalidate(id); res.json(toDto(updated)); }); // GET /:id/usage — current month usage + budget headroom + recent rate // burn rate + last 12 months of history. Single endpoint so the UI // can render a key's detail panel in one round-trip. router.get('/:id/usage', requireAdmin, (req, res) => { const id = req.params['id']!; const row = repo.findGatewayVirtualKeyById(id); if (!row) { res.status(404).json({ error: 'key not found' }); return; } const period = currentPeriodYearMonth(); const current = repo.getGatewayKeyUsage(id, period); const tokensIn = current?.tokensIn ?? 0; const tokensOut = current?.tokensOut ?? 0; const tokensTotal = tokensIn + tokensOut; const remaining = row.tokensBudget !== null ? Math.max(0, row.tokensBudget - tokensTotal) : null; // History excludes the current period (UI shows it separately). const allHistory = repo.listGatewayKeyUsagesByKey(id, { limit: 13 }); const history = allHistory .filter(u => u.periodStart !== period) .slice(0, 12) .map(u => ({ period: u.periodStart, tokensIn: u.tokensIn, tokensOut: u.tokensOut, requests: u.requests, })); // Phase 3a F9: the previous `rateRecentRequests: null` field was // dead — the admin process and the gateway process are normally // separate, so the live RateLimiter handle was unreachable, and // the UI never displayed the value. Drop the field to keep the // wire schema lean. Phase 3b/3c can re-introduce it once gateway // IPC is in place. res.json({ keyId: id, currentPeriod: period, tokensIn, tokensOut, tokensTotal, tokensBudget: row.tokensBudget, remaining, requestsThisMonth: current?.requests ?? 0, rateLimitRpm: row.rateLimitRpm, history, }); }); // GET / — list. Supports ?team= and ?activeOnly=true. router.get('/', requireAdmin, (req, res) => { const team = typeof req.query['team'] === 'string' ? req.query['team'] : undefined; const activeOnly = req.query['activeOnly'] === 'true'; const rows = repo.listGatewayVirtualKeys({ team, activeOnly }); res.json({ keys: rows.map(r => toDto(r)) }); }); // GET /:id — single. Visible even when revoked so audit views work. router.get('/:id', requireAdmin, (req, res) => { const row = repo.findGatewayVirtualKeyById(req.params['id']!); if (!row) { res.status(404).json({ error: 'key not found' }); return; } res.json(toDto(row)); }); // POST /:id/revoke — soft delete. Idempotent: re-revoke is a 409 so // callers can distinguish "already revoked" from "didn't exist". router.post('/:id/revoke', requireAdmin, (req, res) => { const id = req.params['id']!; const row = repo.findGatewayVirtualKeyById(id); if (!row) { res.status(404).json({ error: 'key not found' }); return; } if (row.revokedAt !== null) { res.status(409).json({ error: 'key already revoked', revokedAt: row.revokedAt }); return; } const ok = repo.revokeGatewayVirtualKey(id, actor(req)); if (!ok) { // Lost a race with another revoke; refetch and return 409 for consistency. const refreshed = repo.findGatewayVirtualKeyById(id); // Invalidate even on the lost-race path: the cache might still // hold the pre-revoke row from a hot lookup just before the race. invalidate(id); res.status(409).json({ error: 'key already revoked', revokedAt: refreshed?.revokedAt ?? null }); return; } // F4: a revoked key MUST NOT keep authenticating from the cache. // The dbLookup wrapper additionally rejects cached rows with // revokedAt !== null as defense-in-depth. invalidate(id); // Phase 3b post-review: also drop the per-key budgetUsedRatio // gauge label so the prom-client registry doesn't grow unbounded // over the key lifecycle. dropKeyMetricLabels(row); const refreshed = repo.findGatewayVirtualKeyById(id)!; res.json({ ok: true, revokedAt: refreshed.revokedAt }); }); // POST /:id/rotate — atomic: issue a new key (inherits team + // allowedModels), then revoke the old. Performed under a better-sqlite3 // transaction so a crash mid-flight can't leave both active. router.post('/:id/rotate', requireAdmin, (req, res) => { const id = req.params['id']!; const old = repo.findGatewayVirtualKeyById(id); if (!old) { res.status(404).json({ error: 'key not found' }); return; } if (old.revokedAt !== null) { res.status(409).json({ error: 'cannot rotate a revoked key' }); return; } const generated = generateVirtualKey(); const by = actor(req); let created: GatewayVirtualKey; try { const tx = repo.getDb().transaction(() => { const c = repo.createGatewayVirtualKey({ keyHash: generated.hash, keyPrefix: generated.prefix, team: old.team, allowedModels: old.allowedModels, source: 'admin', createdBy: by, }); repo.revokeGatewayVirtualKey(old.id, by); return c; }); created = tx(); } catch (e) { logger.warn(`[admin-gateway] rotate failed: ${e instanceof Error ? e.message : String(e)}`); res.status(500).json({ error: 'rotate failed' }); return; } // F4: drop the OLD key from the cache so the prior bearer can't // re-auth. The newly-created row will be cache-warmed on its first // hit; no need to pre-populate. invalidate(old.id); // Phase 3b post-review: the old key prefix is gone — drop its // gauge label too. The new key will create its own label on first // usage write. dropKeyMetricLabels(old); res.status(201).json(toDto(created, generated.raw)); }); // DELETE /:id — hard delete. config-import rows are protected: an // operator should remove the entry from config.yaml instead so it // doesn't get re-imported on the next boot. The Repository enforces // the same rule (defense-in-depth) by throwing; we translate that to // a 400 with a human-readable message instead of leaking a 500. router.delete('/:id', requireAdmin, (req, res) => { const id = req.params['id']!; const row = repo.findGatewayVirtualKeyById(id); if (!row) { res.status(404).json({ error: 'key not found' }); return; } if (row.source === 'config-import') { res.status(400).json({ error: "cannot delete a config-import key (remove the entry from config.yaml's gateway.virtual_keys, then restart, or POST /revoke instead)", }); return; } try { repo.deleteGatewayVirtualKey(id); } catch (e) { const msg = e instanceof Error ? e.message : String(e); // Repository's defense-in-depth guard catches the case where the // row.source changes between our pre-check and the delete (race // with another writer flipping source via some future code path). if (/config-import/i.test(msg)) { res.status(400).json({ error: msg }); return; } logger.warn(`[admin-gateway] delete failed: ${msg}`); res.status(500).json({ error: 'delete failed' }); return; } // F4: hard delete must also wipe the cache — the bearer should // fail-auth on the next request, not after the TTL. invalidate(id); // Phase 3b post-review: drop metric label too. Same rationale as // revoke/rotate — keep registry bounded. dropKeyMetricLabels(row); res.status(204).end(); }); return router; }