maestro/src/bridge/admin-gateway-api.ts

/**
 * AAO Gateway Phase 2a — admin REST API for virtual key management.
 *
 * Mounted on worker-mode server.ts (not on gateway-mode server.ts; the
 * gateway is intentionally read-only over auth state). Path prefix:
 *   /api/admin/gateway/keys
 *
 * Endpoint shape (see also docs/superpowers/specs/2026-05-18-aao-gateway-mode-design.md
 * § Phase 2a / Admin REST API):
 *   POST   /                — issue (raw key returned ONCE here)
 *   GET    /                — list (raw key never returned)
 *   GET    /:id             — single (raw key never returned)
 *   POST   /:id/revoke      — soft delete
 *   POST   /:id/rotate      — atomic re-issue (new key returned, old revoked)
 *   DELETE /:id             — hard delete (rejects source='config-import')
 *
 * Visibility: returned objects expose `keyPrefix` (sk-aao-XXXXXX) and
 * meta only. The raw bearer is surfaced exactly by issue/rotate; lose
 * it and you must rotate the key.
 */
import { Router, type RequestHandler, type Request } from 'express';
import type { Repository, GatewayVirtualKey } from '../db/repository.js';
import { generateVirtualKey } from '../gateway/key-format.js';
import { currentPeriodYearMonth } from '../gateway/period.js';
import type { KeyCache } from '../gateway/key-cache.js';
import type { GatewayMetrics } from '../metrics/gateway-metrics.js';
import { logger } from '../logger.js';

export interface AdminGatewayApiDeps {
  repo: Repository;
  /** Existing requireAdmin middleware (or a passthrough for auth-disabled deploys). */
  requireAdmin: RequestHandler;
  /**
   * Extract the acting admin's user id from the request (for
   * createdBy / revokedBy). Returns null when auth is disabled; the
   * router records 'admin' in that case so audit history isn't blank.
   */
  getUserId: (req: Request) => string | null;
  /**
   * Phase 3a F4: optional shared cache between auth + budget + rate
   * middlewares. When wired by the same-process deployment, admin
   * mutations (PATCH / revoke / rotate / delete) call cache.invalidate
   * so the next request sees fresh state immediately. Cross-process
   * setups fall back to the cache's 5s TTL.
   */
  keyCache?: KeyCache;
  /**
   * Phase 3b post-review: optional gateway metrics handle. When wired,
   * the revoke / rotate / delete handlers remove the
   * `budgetUsedRatio{team, key_prefix}` series for the gone key — left
   * in place those labels would grow unbounded over the key lifecycle.
   */
  gatewayMetrics?: GatewayMetrics;
}

/** Wire-format DTO (camelCase JSON). Raw key is opt-in for issue/rotate. */
interface GatewayKeyDto {
  id: string;
  object: 'gateway.key';
  keyPrefix: string;
  team: string;
  allowedModels: string[] | null;
  source: GatewayVirtualKey['source'];
  createdAt: string;
  createdBy: string | null;
  revokedAt: string | null;
  revokedBy: string | null;
  lastUsedAt: string | null;
  /** Phase 2b: monthly tokens budget (null = unlimited). */
  tokensBudget: number | null;
  /** Phase 2b: per-minute requests cap (null = unlimited). */
  rateLimitRpm: number | null;
  /** Only present on POST / rotate responses. NEVER on list / get. */
  key?: string;
}

function toDto(row: GatewayVirtualKey, raw?: string): GatewayKeyDto {
  const dto: GatewayKeyDto = {
    id: row.id,
    object: 'gateway.key',
    keyPrefix: row.keyPrefix,
    team: row.team,
    allowedModels: row.allowedModels,
    source: row.source,
    createdAt: row.createdAt,
    createdBy: row.createdBy,
    revokedAt: row.revokedAt,
    revokedBy: row.revokedBy,
    lastUsedAt: row.lastUsedAt,
    tokensBudget: row.tokensBudget,
    rateLimitRpm: row.rateLimitRpm,
  };
  if (raw !== undefined) dto.key = raw;
  return dto;
}

/**
 * Hard caps to keep policy values comfortably inside SQLite's INTEGER
 * (54-bit) precision window. Past 2^53 numeric values get coerced to
 * REAL on UPSERT arithmetic and start dropping low bits — so a stated
 * budget like 1e20 would silently round and the running total could
 * never reach it. Either limit is well beyond any sane real-world usage:
 *
 * - tokensBudget: 1 trillion tokens / month. At GPT-4-class pricing
 *   that's well over US$10M; if you legitimately need more, split keys.
 * - rateLimitRpm: 1,000,000 requests / minute. The in-memory window
 *   array is bounded at 2× this, so the cap also keeps memory predictable.
 */
const MAX_TOKENS_BUDGET = 1_000_000_000_000;
const MAX_RATE_LIMIT_RPM = 1_000_000;

/**
 * Parse a {tokensBudget?, rateLimitRpm?} pair from a request body.
 * Returns the parsed values as positive integers, null (explicit
 * unlimited), or undefined (don't touch).
 *
 * Strict: a number that's NaN / Infinity / negative is a 400 — we
 * don't silently coerce because operators expect their stated limit
 * to be applied. Floats are floored (sub-integer budgets are meaningless).
 * Values above the hard cap are rejected — see MAX_* constants above
 * for the rationale (SQLite INT-vs-REAL precision boundary).
 */
function parseLimitsPatch(body: { tokensBudget?: unknown; rateLimitRpm?: unknown } | undefined):
  | { ok: true; tokensBudget?: number | null; rateLimitRpm?: number | null }
  | { ok: false; error: string } {
  const out: { tokensBudget?: number | null; rateLimitRpm?: number | null } = {};
  if (body && Object.prototype.hasOwnProperty.call(body, 'tokensBudget')) {
    const v = body.tokensBudget;
    if (v === null) {
      out.tokensBudget = null;
    } else if (typeof v === 'number' && Number.isFinite(v) && v > 0) {
      if (v > MAX_TOKENS_BUDGET) {
        return { ok: false, error: `tokensBudget exceeds maximum (${MAX_TOKENS_BUDGET})` };
      }
      out.tokensBudget = Math.floor(v);
    } else {
      return { ok: false, error: 'tokensBudget must be a positive integer or null' };
    }
  }
  if (body && Object.prototype.hasOwnProperty.call(body, 'rateLimitRpm')) {
    const v = body.rateLimitRpm;
    if (v === null) {
      out.rateLimitRpm = null;
    } else if (typeof v === 'number' && Number.isFinite(v) && v > 0) {
      if (v > MAX_RATE_LIMIT_RPM) {
        return { ok: false, error: `rateLimitRpm exceeds maximum (${MAX_RATE_LIMIT_RPM})` };
      }
      out.rateLimitRpm = Math.floor(v);
    } else {
      return { ok: false, error: 'rateLimitRpm must be a positive integer or null' };
    }
  }
  return { ok: true, ...out };
}

const TEAM_REGEX = /^[a-zA-Z0-9._-]{1,64}$/;
const MAX_ALLOWED_MODELS = 64;
const MAX_MODEL_NAME_LEN = 128;

function parseAllowedModels(value: unknown): { ok: true; value: string[] | null } | { ok: false; error: string } {
  if (value === undefined || value === null) return { ok: true, value: null };
  if (!Array.isArray(value)) return { ok: false, error: 'allowedModels must be an array of strings' };
  if (value.length > MAX_ALLOWED_MODELS) {
    return { ok: false, error: `allowedModels supports at most ${MAX_ALLOWED_MODELS} entries` };
  }
  const out: string[] = [];
  for (const m of value) {
    if (typeof m !== 'string' || m.length === 0 || m.length > MAX_MODEL_NAME_LEN) {
      return { ok: false, error: 'allowedModels entries must be non-empty strings ≤ 128 chars' };
    }
    out.push(m);
  }
  return { ok: true, value: out };
}

export function createAdminGatewayApi(deps: AdminGatewayApiDeps): Router {
  const router = Router();
  const { repo, requireAdmin, getUserId, keyCache, gatewayMetrics } = deps;
  const actor = (req: Request): string => getUserId(req) ?? 'admin';
  // Centralize cache invalidation so every mutation handler follows the
  // same pattern: mutate first, invalidate second. Calling on a no-op
  // cache (undefined) is a safe noop.
  const invalidate = (id: string): void => {
    try {
      keyCache?.invalidate(id);
    } catch (e) {
      // Cache invalidation is best-effort; a thrown invalidate would
      // most likely mean a bug in the cache, but we still don't want
      // it to roll back the user-visible mutation.
      logger.warn(`[admin-gateway] keyCache.invalidate threw for id=${id}: ${e instanceof Error ? e.message : String(e)}`);
    }
  };

  // Phase 3b post-review: drop the per-key budget_used_ratio gauge label
  // when the key goes away. Without this, every revoked key leaves a
  // permanent {team, key_prefix} series in the registry — over enough
  // rotations the label space grows without bound. Best-effort: a
  // missing metrics handle (Phase 3b disabled), a label that was never
  // set (key revoked before its first usage write), or a prom-client
  // throw are all swallowed so admin mutation success isn't gated on
  // metric bookkeeping.
  const dropKeyMetricLabels = (row: { id: string; team: string }): void => {
    if (!gatewayMetrics) return;
    try {
      const prefix = row.id.slice(0, 8);
      gatewayMetrics.budgetUsedRatio.remove({ team: row.team, key_prefix: prefix });
    } catch (e) {
      logger.warn(
        `[admin-gateway] metric label remove failed for id=${row.id}: ${e instanceof Error ? e.message : String(e)}`,
      );
    }
  };

  // POST / — issue a fresh sk-aao-* key. The raw value is returned in
  // the response body once and never again.
  router.post('/', requireAdmin, (req, res) => {
    const body = req.body as {
      team?: unknown;
      allowedModels?: unknown;
      tokensBudget?: unknown;
      rateLimitRpm?: unknown;
    } | undefined;
    const team = typeof body?.team === 'string' ? body.team.trim() : '';
    if (!team || !TEAM_REGEX.test(team)) {
      res.status(400).json({ error: 'team must match /^[a-zA-Z0-9._-]{1,64}$/' });
      return;
    }
    const allowed = parseAllowedModels(body?.allowedModels);
    if (!allowed.ok) {
      res.status(400).json({ error: allowed.error });
      return;
    }
    const limits = parseLimitsPatch(body);
    if (!limits.ok) {
      res.status(400).json({ error: limits.error });
      return;
    }
    const generated = generateVirtualKey();
    let created: GatewayVirtualKey;
    try {
      created = repo.createGatewayVirtualKey({
        keyHash: generated.hash,
        keyPrefix: generated.prefix,
        team,
        allowedModels: allowed.value,
        source: 'admin',
        createdBy: actor(req),
        // Phase 2b: optional budget / rate. Repository normalizer
        // accepts both null and undefined as unlimited.
        tokensBudget: limits.tokensBudget ?? null,
        rateLimitRpm: limits.rateLimitRpm ?? null,
      });
    } catch (e) {
      // randomBytes collision is mathematically negligible; any throw
      // here is more likely a transient SQLite locking issue.
      logger.warn(`[admin-gateway] create failed: ${e instanceof Error ? e.message : String(e)}`);
      res.status(500).json({ error: 'failed to create key' });
      return;
    }
    res.status(201).json(toDto(created, generated.raw));
  });

  // PATCH /:id — update policy fields (budget, rate limit, allowedModels).
  // Bearer / team / source / created_by are immutable here. Refuses to
  // touch config-import rows because those are managed via config.yaml
  // (consistent with the DELETE rule).
  router.patch('/:id', requireAdmin, (req, res) => {
    const id = req.params['id']!;
    // Parse + validate the body OUTSIDE the transaction so we don't pay
    // the SQLite serialization cost on bad input.
    const body = req.body as {
      tokensBudget?: unknown;
      rateLimitRpm?: unknown;
      allowedModels?: unknown;
    } | undefined;
    const limits = parseLimitsPatch(body);
    if (!limits.ok) {
      res.status(400).json({ error: limits.error });
      return;
    }
    const patch: {
      tokensBudget?: number | null;
      rateLimitRpm?: number | null;
      allowedModels?: string[] | null;
    } = {};
    if (Object.prototype.hasOwnProperty.call(limits, 'tokensBudget')) patch.tokensBudget = limits.tokensBudget!;
    if (Object.prototype.hasOwnProperty.call(limits, 'rateLimitRpm')) patch.rateLimitRpm = limits.rateLimitRpm!;
    if (body && Object.prototype.hasOwnProperty.call(body, 'allowedModels')) {
      if (body.allowedModels === null) {
        patch.allowedModels = null;
      } else {
        const parsed = parseAllowedModels(body.allowedModels);
        if (!parsed.ok) {
          res.status(400).json({ error: parsed.error });
          return;
        }
        patch.allowedModels = parsed.value;
      }
    }
    if (Object.keys(patch).length === 0) {
      res.status(400).json({ error: 'patch body must include at least one of tokensBudget, rateLimitRpm, allowedModels' });
      return;
    }
    // Phase 3a follow-up: close the TOCTOU race between the "is the row
    // revoked / config-import?" check and the UPDATE statement. Pre-fix
    // the read + update lived outside any transaction; a concurrent
    // revoke landing between the two would let PATCH overwrite a revoked
    // row (silent ghost mutation in the audit log). Wrap both in a
    // single better-sqlite3 transaction so the read and the conditional
    // update are atomic, and signal the disallowed conditions back to
    // the caller via typed sentinel errors.
    //
    // Sentinel error pattern (vs. structured return value): better-sqlite3
    // transactions don't yet support typed Result returns, so we abuse
    // the error channel — caller-side `instanceof` would be cleaner but
    // string sentinels keep this contained to a single handler.
    let updated: GatewayVirtualKey;
    try {
      updated = repo.getDb().transaction(() => {
        const fresh = repo.findGatewayVirtualKeyById(id);
        if (!fresh) throw new Error('PATCH_NOT_FOUND');
        // Mirrors the rotate handler which also returns 409 for revoked.
        // Pre-fix PATCH would silently update budget / rate / allowedModels
        // on a row that can no longer authenticate — the new values
        // would never apply to a real request and would mask audit
        // history.
        if (fresh.revokedAt !== null) {
          const err = new Error('PATCH_REVOKED');
          (err as Error & { revokedAt?: string }).revokedAt = fresh.revokedAt;
          throw err;
        }
        if (fresh.source === 'config-import') throw new Error('PATCH_CONFIG_IMPORT');
        return repo.updateGatewayVirtualKey(id, patch);
      })();
    } catch (e) {
      const msg = e instanceof Error ? e.message : String(e);
      if (msg === 'PATCH_NOT_FOUND') {
        res.status(404).json({ error: 'key not found' });
        return;
      }
      if (msg === 'PATCH_REVOKED') {
        const revokedAt = (e as Error & { revokedAt?: string }).revokedAt ?? null;
        res.status(409).json({ error: 'cannot modify a revoked key', revokedAt });
        return;
      }
      if (msg === 'PATCH_CONFIG_IMPORT') {
        res.status(400).json({
          error:
            "cannot PATCH a config-import key (manage tokens_budget / rate_limit_rpm / allowed_models via config.yaml's gateway.virtual_keys instead)",
        });
        return;
      }
      logger.warn(`[admin-gateway] patch failed for id=${id}: ${msg}`);
      res.status(500).json({ error: 'patch failed' });
      return;
    }
    // F4: drop the stale cache entry so the next auth/budget/rate
    // middleware reads the fresh row from DB. The cache will repopulate
    // on the next lookup (and stay coherent for 5s after that).
    invalidate(id);
    res.json(toDto(updated));
  });

  // GET /:id/usage — current month usage + budget headroom + recent rate
  // burn rate + last 12 months of history. Single endpoint so the UI
  // can render a key's detail panel in one round-trip.
  router.get('/:id/usage', requireAdmin, (req, res) => {
    const id = req.params['id']!;
    const row = repo.findGatewayVirtualKeyById(id);
    if (!row) {
      res.status(404).json({ error: 'key not found' });
      return;
    }
    const period = currentPeriodYearMonth();
    const current = repo.getGatewayKeyUsage(id, period);
    const tokensIn = current?.tokensIn ?? 0;
    const tokensOut = current?.tokensOut ?? 0;
    const tokensTotal = tokensIn + tokensOut;
    const remaining = row.tokensBudget !== null ? Math.max(0, row.tokensBudget - tokensTotal) : null;
    // History excludes the current period (UI shows it separately).
    const allHistory = repo.listGatewayKeyUsagesByKey(id, { limit: 13 });
    const history = allHistory
      .filter(u => u.periodStart !== period)
      .slice(0, 12)
      .map(u => ({
        period: u.periodStart,
        tokensIn: u.tokensIn,
        tokensOut: u.tokensOut,
        requests: u.requests,
      }));
    // Phase 3a F9: the previous `rateRecentRequests: null` field was
    // dead — the admin process and the gateway process are normally
    // separate, so the live RateLimiter handle was unreachable, and
    // the UI never displayed the value. Drop the field to keep the
    // wire schema lean. Phase 3b/3c can re-introduce it once gateway
    // IPC is in place.
    res.json({
      keyId: id,
      currentPeriod: period,
      tokensIn,
      tokensOut,
      tokensTotal,
      tokensBudget: row.tokensBudget,
      remaining,
      requestsThisMonth: current?.requests ?? 0,
      rateLimitRpm: row.rateLimitRpm,
      history,
    });
  });

  // GET / — list. Supports ?team= and ?activeOnly=true.
  router.get('/', requireAdmin, (req, res) => {
    const team = typeof req.query['team'] === 'string' ? req.query['team'] : undefined;
    const activeOnly = req.query['activeOnly'] === 'true';
    const rows = repo.listGatewayVirtualKeys({ team, activeOnly });
    res.json({ keys: rows.map(r => toDto(r)) });
  });

  // GET /:id — single. Visible even when revoked so audit views work.
  router.get('/:id', requireAdmin, (req, res) => {
    const row = repo.findGatewayVirtualKeyById(req.params['id']!);
    if (!row) {
      res.status(404).json({ error: 'key not found' });
      return;
    }
    res.json(toDto(row));
  });

  // POST /:id/revoke — soft delete. Idempotent: re-revoke is a 409 so
  // callers can distinguish "already revoked" from "didn't exist".
  router.post('/:id/revoke', requireAdmin, (req, res) => {
    const id = req.params['id']!;
    const row = repo.findGatewayVirtualKeyById(id);
    if (!row) {
      res.status(404).json({ error: 'key not found' });
      return;
    }
    if (row.revokedAt !== null) {
      res.status(409).json({ error: 'key already revoked', revokedAt: row.revokedAt });
      return;
    }
    const ok = repo.revokeGatewayVirtualKey(id, actor(req));
    if (!ok) {
      // Lost a race with another revoke; refetch and return 409 for consistency.
      const refreshed = repo.findGatewayVirtualKeyById(id);
      // Invalidate even on the lost-race path: the cache might still
      // hold the pre-revoke row from a hot lookup just before the race.
      invalidate(id);
      res.status(409).json({ error: 'key already revoked', revokedAt: refreshed?.revokedAt ?? null });
      return;
    }
    // F4: a revoked key MUST NOT keep authenticating from the cache.
    // The dbLookup wrapper additionally rejects cached rows with
    // revokedAt !== null as defense-in-depth.
    invalidate(id);
    // Phase 3b post-review: also drop the per-key budgetUsedRatio
    // gauge label so the prom-client registry doesn't grow unbounded
    // over the key lifecycle.
    dropKeyMetricLabels(row);
    const refreshed = repo.findGatewayVirtualKeyById(id)!;
    res.json({ ok: true, revokedAt: refreshed.revokedAt });
  });

  // POST /:id/rotate — atomic: issue a new key (inherits team +
  // allowedModels), then revoke the old. Performed under a better-sqlite3
  // transaction so a crash mid-flight can't leave both active.
  router.post('/:id/rotate', requireAdmin, (req, res) => {
    const id = req.params['id']!;
    const old = repo.findGatewayVirtualKeyById(id);
    if (!old) {
      res.status(404).json({ error: 'key not found' });
      return;
    }
    if (old.revokedAt !== null) {
      res.status(409).json({ error: 'cannot rotate a revoked key' });
      return;
    }
    const generated = generateVirtualKey();
    const by = actor(req);
    let created: GatewayVirtualKey;
    try {
      const tx = repo.getDb().transaction(() => {
        const c = repo.createGatewayVirtualKey({
          keyHash: generated.hash,
          keyPrefix: generated.prefix,
          team: old.team,
          allowedModels: old.allowedModels,
          source: 'admin',
          createdBy: by,
        });
        repo.revokeGatewayVirtualKey(old.id, by);
        return c;
      });
      created = tx();
    } catch (e) {
      logger.warn(`[admin-gateway] rotate failed: ${e instanceof Error ? e.message : String(e)}`);
      res.status(500).json({ error: 'rotate failed' });
      return;
    }
    // F4: drop the OLD key from the cache so the prior bearer can't
    // re-auth. The newly-created row will be cache-warmed on its first
    // hit; no need to pre-populate.
    invalidate(old.id);
    // Phase 3b post-review: the old key prefix is gone — drop its
    // gauge label too. The new key will create its own label on first
    // usage write.
    dropKeyMetricLabels(old);
    res.status(201).json(toDto(created, generated.raw));
  });

  // DELETE /:id — hard delete. config-import rows are protected: an
  // operator should remove the entry from config.yaml instead so it
  // doesn't get re-imported on the next boot. The Repository enforces
  // the same rule (defense-in-depth) by throwing; we translate that to
  // a 400 with a human-readable message instead of leaking a 500.
  router.delete('/:id', requireAdmin, (req, res) => {
    const id = req.params['id']!;
    const row = repo.findGatewayVirtualKeyById(id);
    if (!row) {
      res.status(404).json({ error: 'key not found' });
      return;
    }
    if (row.source === 'config-import') {
      res.status(400).json({
        error: "cannot delete a config-import key (remove the entry from config.yaml's gateway.virtual_keys, then restart, or POST /revoke instead)",
      });
      return;
    }
    try {
      repo.deleteGatewayVirtualKey(id);
    } catch (e) {
      const msg = e instanceof Error ? e.message : String(e);
      // Repository's defense-in-depth guard catches the case where the
      // row.source changes between our pre-check and the delete (race
      // with another writer flipping source via some future code path).
      if (/config-import/i.test(msg)) {
        res.status(400).json({ error: msg });
        return;
      }
      logger.warn(`[admin-gateway] delete failed: ${msg}`);
      res.status(500).json({ error: 'delete failed' });
      return;
    }
    // F4: hard delete must also wipe the cache — the bearer should
    // fail-auth on the next request, not after the TTL.
    invalidate(id);
    // Phase 3b post-review: drop metric label too. Same rationale as
    // revoke/rotate — keep registry bounded.
    dropKeyMetricLabels(row);
    res.status(204).end();
  });

  return router;
}