maestro/src/bridge/admin-gateway-api.ts
2026-06-03 05:08:00 +00:00

555 lines
22 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* AAO Gateway Phase 2a — admin REST API for virtual key management.
*
* Mounted on worker-mode server.ts (not on gateway-mode server.ts; the
* gateway is intentionally read-only over auth state). Path prefix:
* /api/admin/gateway/keys
*
* Endpoint shape (see also docs/superpowers/specs/2026-05-18-aao-gateway-mode-design.md
* § Phase 2a / Admin REST API):
* POST / — issue (raw key returned ONCE here)
* GET / — list (raw key never returned)
* GET /:id — single (raw key never returned)
* POST /:id/revoke — soft delete
* POST /:id/rotate — atomic re-issue (new key returned, old revoked)
* DELETE /:id — hard delete (rejects source='config-import')
*
* Visibility: returned objects expose `keyPrefix` (sk-aao-XXXXXX) and
* meta only. The raw bearer is surfaced exactly by issue/rotate; lose
* it and you must rotate the key.
*/
import { Router, type RequestHandler, type Request } from 'express';
import type { Repository, GatewayVirtualKey } from '../db/repository.js';
import { generateVirtualKey } from '../gateway/key-format.js';
import { currentPeriodYearMonth } from '../gateway/period.js';
import type { KeyCache } from '../gateway/key-cache.js';
import type { GatewayMetrics } from '../metrics/gateway-metrics.js';
import { logger } from '../logger.js';
export interface AdminGatewayApiDeps {
repo: Repository;
/** Existing requireAdmin middleware (or a passthrough for auth-disabled deploys). */
requireAdmin: RequestHandler;
/**
* Extract the acting admin's user id from the request (for
* createdBy / revokedBy). Returns null when auth is disabled; the
* router records 'admin' in that case so audit history isn't blank.
*/
getUserId: (req: Request) => string | null;
/**
* Phase 3a F4: optional shared cache between auth + budget + rate
* middlewares. When wired by the same-process deployment, admin
* mutations (PATCH / revoke / rotate / delete) call cache.invalidate
* so the next request sees fresh state immediately. Cross-process
* setups fall back to the cache's 5s TTL.
*/
keyCache?: KeyCache;
/**
* Phase 3b post-review: optional gateway metrics handle. When wired,
* the revoke / rotate / delete handlers remove the
* `budgetUsedRatio{team, key_prefix}` series for the gone key — left
* in place those labels would grow unbounded over the key lifecycle.
*/
gatewayMetrics?: GatewayMetrics;
}
/** Wire-format DTO (camelCase JSON). Raw key is opt-in for issue/rotate. */
interface GatewayKeyDto {
id: string;
object: 'gateway.key';
keyPrefix: string;
team: string;
allowedModels: string[] | null;
source: GatewayVirtualKey['source'];
createdAt: string;
createdBy: string | null;
revokedAt: string | null;
revokedBy: string | null;
lastUsedAt: string | null;
/** Phase 2b: monthly tokens budget (null = unlimited). */
tokensBudget: number | null;
/** Phase 2b: per-minute requests cap (null = unlimited). */
rateLimitRpm: number | null;
/** Only present on POST / rotate responses. NEVER on list / get. */
key?: string;
}
function toDto(row: GatewayVirtualKey, raw?: string): GatewayKeyDto {
const dto: GatewayKeyDto = {
id: row.id,
object: 'gateway.key',
keyPrefix: row.keyPrefix,
team: row.team,
allowedModels: row.allowedModels,
source: row.source,
createdAt: row.createdAt,
createdBy: row.createdBy,
revokedAt: row.revokedAt,
revokedBy: row.revokedBy,
lastUsedAt: row.lastUsedAt,
tokensBudget: row.tokensBudget,
rateLimitRpm: row.rateLimitRpm,
};
if (raw !== undefined) dto.key = raw;
return dto;
}
/**
* Hard caps to keep policy values comfortably inside SQLite's INTEGER
* (54-bit) precision window. Past 2^53 numeric values get coerced to
* REAL on UPSERT arithmetic and start dropping low bits — so a stated
* budget like 1e20 would silently round and the running total could
* never reach it. Either limit is well beyond any sane real-world usage:
*
* - tokensBudget: 1 trillion tokens / month. At GPT-4-class pricing
* that's well over US$10M; if you legitimately need more, split keys.
* - rateLimitRpm: 1,000,000 requests / minute. The in-memory window
* array is bounded at 2× this, so the cap also keeps memory predictable.
*/
const MAX_TOKENS_BUDGET = 1_000_000_000_000;
const MAX_RATE_LIMIT_RPM = 1_000_000;
/**
* Parse a {tokensBudget?, rateLimitRpm?} pair from a request body.
* Returns the parsed values as positive integers, null (explicit
* unlimited), or undefined (don't touch).
*
* Strict: a number that's NaN / Infinity / negative is a 400 — we
* don't silently coerce because operators expect their stated limit
* to be applied. Floats are floored (sub-integer budgets are meaningless).
* Values above the hard cap are rejected — see MAX_* constants above
* for the rationale (SQLite INT-vs-REAL precision boundary).
*/
function parseLimitsPatch(body: { tokensBudget?: unknown; rateLimitRpm?: unknown } | undefined):
| { ok: true; tokensBudget?: number | null; rateLimitRpm?: number | null }
| { ok: false; error: string } {
const out: { tokensBudget?: number | null; rateLimitRpm?: number | null } = {};
if (body && Object.prototype.hasOwnProperty.call(body, 'tokensBudget')) {
const v = body.tokensBudget;
if (v === null) {
out.tokensBudget = null;
} else if (typeof v === 'number' && Number.isFinite(v) && v > 0) {
if (v > MAX_TOKENS_BUDGET) {
return { ok: false, error: `tokensBudget exceeds maximum (${MAX_TOKENS_BUDGET})` };
}
out.tokensBudget = Math.floor(v);
} else {
return { ok: false, error: 'tokensBudget must be a positive integer or null' };
}
}
if (body && Object.prototype.hasOwnProperty.call(body, 'rateLimitRpm')) {
const v = body.rateLimitRpm;
if (v === null) {
out.rateLimitRpm = null;
} else if (typeof v === 'number' && Number.isFinite(v) && v > 0) {
if (v > MAX_RATE_LIMIT_RPM) {
return { ok: false, error: `rateLimitRpm exceeds maximum (${MAX_RATE_LIMIT_RPM})` };
}
out.rateLimitRpm = Math.floor(v);
} else {
return { ok: false, error: 'rateLimitRpm must be a positive integer or null' };
}
}
return { ok: true, ...out };
}
const TEAM_REGEX = /^[a-zA-Z0-9._-]{1,64}$/;
const MAX_ALLOWED_MODELS = 64;
const MAX_MODEL_NAME_LEN = 128;
function parseAllowedModels(value: unknown): { ok: true; value: string[] | null } | { ok: false; error: string } {
if (value === undefined || value === null) return { ok: true, value: null };
if (!Array.isArray(value)) return { ok: false, error: 'allowedModels must be an array of strings' };
if (value.length > MAX_ALLOWED_MODELS) {
return { ok: false, error: `allowedModels supports at most ${MAX_ALLOWED_MODELS} entries` };
}
const out: string[] = [];
for (const m of value) {
if (typeof m !== 'string' || m.length === 0 || m.length > MAX_MODEL_NAME_LEN) {
return { ok: false, error: 'allowedModels entries must be non-empty strings ≤ 128 chars' };
}
out.push(m);
}
return { ok: true, value: out };
}
export function createAdminGatewayApi(deps: AdminGatewayApiDeps): Router {
const router = Router();
const { repo, requireAdmin, getUserId, keyCache, gatewayMetrics } = deps;
const actor = (req: Request): string => getUserId(req) ?? 'admin';
// Centralize cache invalidation so every mutation handler follows the
// same pattern: mutate first, invalidate second. Calling on a no-op
// cache (undefined) is a safe noop.
const invalidate = (id: string): void => {
try {
keyCache?.invalidate(id);
} catch (e) {
// Cache invalidation is best-effort; a thrown invalidate would
// most likely mean a bug in the cache, but we still don't want
// it to roll back the user-visible mutation.
logger.warn(`[admin-gateway] keyCache.invalidate threw for id=${id}: ${e instanceof Error ? e.message : String(e)}`);
}
};
// Phase 3b post-review: drop the per-key budget_used_ratio gauge label
// when the key goes away. Without this, every revoked key leaves a
// permanent {team, key_prefix} series in the registry — over enough
// rotations the label space grows without bound. Best-effort: a
// missing metrics handle (Phase 3b disabled), a label that was never
// set (key revoked before its first usage write), or a prom-client
// throw are all swallowed so admin mutation success isn't gated on
// metric bookkeeping.
const dropKeyMetricLabels = (row: { id: string; team: string }): void => {
if (!gatewayMetrics) return;
try {
const prefix = row.id.slice(0, 8);
gatewayMetrics.budgetUsedRatio.remove({ team: row.team, key_prefix: prefix });
} catch (e) {
logger.warn(
`[admin-gateway] metric label remove failed for id=${row.id}: ${e instanceof Error ? e.message : String(e)}`,
);
}
};
// POST / — issue a fresh sk-aao-* key. The raw value is returned in
// the response body once and never again.
router.post('/', requireAdmin, (req, res) => {
const body = req.body as {
team?: unknown;
allowedModels?: unknown;
tokensBudget?: unknown;
rateLimitRpm?: unknown;
} | undefined;
const team = typeof body?.team === 'string' ? body.team.trim() : '';
if (!team || !TEAM_REGEX.test(team)) {
res.status(400).json({ error: 'team must match /^[a-zA-Z0-9._-]{1,64}$/' });
return;
}
const allowed = parseAllowedModels(body?.allowedModels);
if (!allowed.ok) {
res.status(400).json({ error: allowed.error });
return;
}
const limits = parseLimitsPatch(body);
if (!limits.ok) {
res.status(400).json({ error: limits.error });
return;
}
const generated = generateVirtualKey();
let created: GatewayVirtualKey;
try {
created = repo.createGatewayVirtualKey({
keyHash: generated.hash,
keyPrefix: generated.prefix,
team,
allowedModels: allowed.value,
source: 'admin',
createdBy: actor(req),
// Phase 2b: optional budget / rate. Repository normalizer
// accepts both null and undefined as unlimited.
tokensBudget: limits.tokensBudget ?? null,
rateLimitRpm: limits.rateLimitRpm ?? null,
});
} catch (e) {
// randomBytes collision is mathematically negligible; any throw
// here is more likely a transient SQLite locking issue.
logger.warn(`[admin-gateway] create failed: ${e instanceof Error ? e.message : String(e)}`);
res.status(500).json({ error: 'failed to create key' });
return;
}
res.status(201).json(toDto(created, generated.raw));
});
// PATCH /:id — update policy fields (budget, rate limit, allowedModels).
// Bearer / team / source / created_by are immutable here. Refuses to
// touch config-import rows because those are managed via config.yaml
// (consistent with the DELETE rule).
router.patch('/:id', requireAdmin, (req, res) => {
const id = req.params['id']!;
// Parse + validate the body OUTSIDE the transaction so we don't pay
// the SQLite serialization cost on bad input.
const body = req.body as {
tokensBudget?: unknown;
rateLimitRpm?: unknown;
allowedModels?: unknown;
} | undefined;
const limits = parseLimitsPatch(body);
if (!limits.ok) {
res.status(400).json({ error: limits.error });
return;
}
const patch: {
tokensBudget?: number | null;
rateLimitRpm?: number | null;
allowedModels?: string[] | null;
} = {};
if (Object.prototype.hasOwnProperty.call(limits, 'tokensBudget')) patch.tokensBudget = limits.tokensBudget!;
if (Object.prototype.hasOwnProperty.call(limits, 'rateLimitRpm')) patch.rateLimitRpm = limits.rateLimitRpm!;
if (body && Object.prototype.hasOwnProperty.call(body, 'allowedModels')) {
if (body.allowedModels === null) {
patch.allowedModels = null;
} else {
const parsed = parseAllowedModels(body.allowedModels);
if (!parsed.ok) {
res.status(400).json({ error: parsed.error });
return;
}
patch.allowedModels = parsed.value;
}
}
if (Object.keys(patch).length === 0) {
res.status(400).json({ error: 'patch body must include at least one of tokensBudget, rateLimitRpm, allowedModels' });
return;
}
// Phase 3a follow-up: close the TOCTOU race between the "is the row
// revoked / config-import?" check and the UPDATE statement. Pre-fix
// the read + update lived outside any transaction; a concurrent
// revoke landing between the two would let PATCH overwrite a revoked
// row (silent ghost mutation in the audit log). Wrap both in a
// single better-sqlite3 transaction so the read and the conditional
// update are atomic, and signal the disallowed conditions back to
// the caller via typed sentinel errors.
//
// Sentinel error pattern (vs. structured return value): better-sqlite3
// transactions don't yet support typed Result returns, so we abuse
// the error channel — caller-side `instanceof` would be cleaner but
// string sentinels keep this contained to a single handler.
let updated: GatewayVirtualKey;
try {
updated = repo.getDb().transaction(() => {
const fresh = repo.findGatewayVirtualKeyById(id);
if (!fresh) throw new Error('PATCH_NOT_FOUND');
// Mirrors the rotate handler which also returns 409 for revoked.
// Pre-fix PATCH would silently update budget / rate / allowedModels
// on a row that can no longer authenticate — the new values
// would never apply to a real request and would mask audit
// history.
if (fresh.revokedAt !== null) {
const err = new Error('PATCH_REVOKED');
(err as Error & { revokedAt?: string }).revokedAt = fresh.revokedAt;
throw err;
}
if (fresh.source === 'config-import') throw new Error('PATCH_CONFIG_IMPORT');
return repo.updateGatewayVirtualKey(id, patch);
})();
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
if (msg === 'PATCH_NOT_FOUND') {
res.status(404).json({ error: 'key not found' });
return;
}
if (msg === 'PATCH_REVOKED') {
const revokedAt = (e as Error & { revokedAt?: string }).revokedAt ?? null;
res.status(409).json({ error: 'cannot modify a revoked key', revokedAt });
return;
}
if (msg === 'PATCH_CONFIG_IMPORT') {
res.status(400).json({
error:
"cannot PATCH a config-import key (manage tokens_budget / rate_limit_rpm / allowed_models via config.yaml's gateway.virtual_keys instead)",
});
return;
}
logger.warn(`[admin-gateway] patch failed for id=${id}: ${msg}`);
res.status(500).json({ error: 'patch failed' });
return;
}
// F4: drop the stale cache entry so the next auth/budget/rate
// middleware reads the fresh row from DB. The cache will repopulate
// on the next lookup (and stay coherent for 5s after that).
invalidate(id);
res.json(toDto(updated));
});
// GET /:id/usage — current month usage + budget headroom + recent rate
// burn rate + last 12 months of history. Single endpoint so the UI
// can render a key's detail panel in one round-trip.
router.get('/:id/usage', requireAdmin, (req, res) => {
const id = req.params['id']!;
const row = repo.findGatewayVirtualKeyById(id);
if (!row) {
res.status(404).json({ error: 'key not found' });
return;
}
const period = currentPeriodYearMonth();
const current = repo.getGatewayKeyUsage(id, period);
const tokensIn = current?.tokensIn ?? 0;
const tokensOut = current?.tokensOut ?? 0;
const tokensTotal = tokensIn + tokensOut;
const remaining = row.tokensBudget !== null ? Math.max(0, row.tokensBudget - tokensTotal) : null;
// History excludes the current period (UI shows it separately).
const allHistory = repo.listGatewayKeyUsagesByKey(id, { limit: 13 });
const history = allHistory
.filter(u => u.periodStart !== period)
.slice(0, 12)
.map(u => ({
period: u.periodStart,
tokensIn: u.tokensIn,
tokensOut: u.tokensOut,
requests: u.requests,
}));
// Phase 3a F9: the previous `rateRecentRequests: null` field was
// dead — the admin process and the gateway process are normally
// separate, so the live RateLimiter handle was unreachable, and
// the UI never displayed the value. Drop the field to keep the
// wire schema lean. Phase 3b/3c can re-introduce it once gateway
// IPC is in place.
res.json({
keyId: id,
currentPeriod: period,
tokensIn,
tokensOut,
tokensTotal,
tokensBudget: row.tokensBudget,
remaining,
requestsThisMonth: current?.requests ?? 0,
rateLimitRpm: row.rateLimitRpm,
history,
});
});
// GET / — list. Supports ?team= and ?activeOnly=true.
router.get('/', requireAdmin, (req, res) => {
const team = typeof req.query['team'] === 'string' ? req.query['team'] : undefined;
const activeOnly = req.query['activeOnly'] === 'true';
const rows = repo.listGatewayVirtualKeys({ team, activeOnly });
res.json({ keys: rows.map(r => toDto(r)) });
});
// GET /:id — single. Visible even when revoked so audit views work.
router.get('/:id', requireAdmin, (req, res) => {
const row = repo.findGatewayVirtualKeyById(req.params['id']!);
if (!row) {
res.status(404).json({ error: 'key not found' });
return;
}
res.json(toDto(row));
});
// POST /:id/revoke — soft delete. Idempotent: re-revoke is a 409 so
// callers can distinguish "already revoked" from "didn't exist".
router.post('/:id/revoke', requireAdmin, (req, res) => {
const id = req.params['id']!;
const row = repo.findGatewayVirtualKeyById(id);
if (!row) {
res.status(404).json({ error: 'key not found' });
return;
}
if (row.revokedAt !== null) {
res.status(409).json({ error: 'key already revoked', revokedAt: row.revokedAt });
return;
}
const ok = repo.revokeGatewayVirtualKey(id, actor(req));
if (!ok) {
// Lost a race with another revoke; refetch and return 409 for consistency.
const refreshed = repo.findGatewayVirtualKeyById(id);
// Invalidate even on the lost-race path: the cache might still
// hold the pre-revoke row from a hot lookup just before the race.
invalidate(id);
res.status(409).json({ error: 'key already revoked', revokedAt: refreshed?.revokedAt ?? null });
return;
}
// F4: a revoked key MUST NOT keep authenticating from the cache.
// The dbLookup wrapper additionally rejects cached rows with
// revokedAt !== null as defense-in-depth.
invalidate(id);
// Phase 3b post-review: also drop the per-key budgetUsedRatio
// gauge label so the prom-client registry doesn't grow unbounded
// over the key lifecycle.
dropKeyMetricLabels(row);
const refreshed = repo.findGatewayVirtualKeyById(id)!;
res.json({ ok: true, revokedAt: refreshed.revokedAt });
});
// POST /:id/rotate — atomic: issue a new key (inherits team +
// allowedModels), then revoke the old. Performed under a better-sqlite3
// transaction so a crash mid-flight can't leave both active.
router.post('/:id/rotate', requireAdmin, (req, res) => {
const id = req.params['id']!;
const old = repo.findGatewayVirtualKeyById(id);
if (!old) {
res.status(404).json({ error: 'key not found' });
return;
}
if (old.revokedAt !== null) {
res.status(409).json({ error: 'cannot rotate a revoked key' });
return;
}
const generated = generateVirtualKey();
const by = actor(req);
let created: GatewayVirtualKey;
try {
const tx = repo.getDb().transaction(() => {
const c = repo.createGatewayVirtualKey({
keyHash: generated.hash,
keyPrefix: generated.prefix,
team: old.team,
allowedModels: old.allowedModels,
source: 'admin',
createdBy: by,
});
repo.revokeGatewayVirtualKey(old.id, by);
return c;
});
created = tx();
} catch (e) {
logger.warn(`[admin-gateway] rotate failed: ${e instanceof Error ? e.message : String(e)}`);
res.status(500).json({ error: 'rotate failed' });
return;
}
// F4: drop the OLD key from the cache so the prior bearer can't
// re-auth. The newly-created row will be cache-warmed on its first
// hit; no need to pre-populate.
invalidate(old.id);
// Phase 3b post-review: the old key prefix is gone — drop its
// gauge label too. The new key will create its own label on first
// usage write.
dropKeyMetricLabels(old);
res.status(201).json(toDto(created, generated.raw));
});
// DELETE /:id — hard delete. config-import rows are protected: an
// operator should remove the entry from config.yaml instead so it
// doesn't get re-imported on the next boot. The Repository enforces
// the same rule (defense-in-depth) by throwing; we translate that to
// a 400 with a human-readable message instead of leaking a 500.
router.delete('/:id', requireAdmin, (req, res) => {
const id = req.params['id']!;
const row = repo.findGatewayVirtualKeyById(id);
if (!row) {
res.status(404).json({ error: 'key not found' });
return;
}
if (row.source === 'config-import') {
res.status(400).json({
error: "cannot delete a config-import key (remove the entry from config.yaml's gateway.virtual_keys, then restart, or POST /revoke instead)",
});
return;
}
try {
repo.deleteGatewayVirtualKey(id);
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
// Repository's defense-in-depth guard catches the case where the
// row.source changes between our pre-check and the delete (race
// with another writer flipping source via some future code path).
if (/config-import/i.test(msg)) {
res.status(400).json({ error: msg });
return;
}
logger.warn(`[admin-gateway] delete failed: ${msg}`);
res.status(500).json({ error: 'delete failed' });
return;
}
// F4: hard delete must also wipe the cache — the bearer should
// fail-auth on the next request, not after the TTL.
invalidate(id);
// Phase 3b post-review: drop metric label too. Same rationale as
// revoke/rotate — keep registry bounded.
dropKeyMetricLabels(row);
res.status(204).end();
});
return router;
}