diff --git a/src/gateway/auth.allowed-models.test.ts b/src/gateway/auth.allowed-models.test.ts new file mode 100644 index 0000000..8225d0c --- /dev/null +++ b/src/gateway/auth.allowed-models.test.ts @@ -0,0 +1,83 @@ +/** + * `isModelAllowed` — virtual-key model allowlist, made role-aware for + * role-based routing (2026-06-09). + * + * The allowlist middleware runs BEFORE routing on the raw `request.model`. + * With role-based routing a worker sends a tier (auto/fast/quality/...) as + * `request.model`, so the check must understand roles or it 403s every + * role request from a key that has a model-name allowlist. + * + * Soundness rule for a role request: the gateway will route to SOME backend + * serving that role (least-busy, chosen post-check). Allow only when EVERY + * backend serving the role is within the allowlist (by id or model), so + * whichever the router picks is permitted. + */ +import { describe, it, expect } from 'vitest'; +import { isModelAllowed } from './auth.js'; +import type { GatewayBackendConfig } from './config.js'; + +function bk(id: string, model: string, roles?: string[]): GatewayBackendConfig { + return { id, endpoint: `http://${id}/v1`, model, maxSlots: 4, ...(roles ? { roles } : {}) }; +} + +describe('isModelAllowed (no allowlist / literal-model behaviour)', () => { + it('allows anything when allowlist is undefined', () => { + expect(isModelAllowed('qwen3:32b', undefined)).toBe(true); + expect(isModelAllowed('quality', undefined)).toBe(true); + }); + + it('denies an empty-string / missing requested model when an allowlist exists', () => { + expect(isModelAllowed(undefined, ['qwen3:32b'])).toBe(false); + expect(isModelAllowed('', ['qwen3:32b'])).toBe(false); + }); + + it('allows a literal model that is in the allowlist', () => { + expect(isModelAllowed('qwen3:32b', ['qwen3:32b'])).toBe(true); + }); + + it('denies a literal model not in the allowlist', () => { + expect(isModelAllowed('qwen3:70b', ['qwen3:32b'])).toBe(false); + }); + + it('empty allowlist array denies everything (intentional lockdown)', () => { + expect(isModelAllowed('qwen3:32b', [])).toBe(false); + expect(isModelAllowed('quality', [])).toBe(false); + }); +}); + +describe('isModelAllowed (role-aware)', () => { + it('allows a role that is itself listed (role-name allowlist)', () => { + expect(isModelAllowed('quality', ['quality', 'fast'])).toBe(true); + }); + + it('allows a role request when EVERY backend serving the role is in the model allowlist', () => { + const backends = [bk('gpu-a', 'qwen3:32b', ['quality'])]; + // Legacy model-name allowlist keeps working: the only quality backend + // serves an allowed model. + expect(isModelAllowed('quality', ['qwen3:32b'], backends)).toBe(true); + }); + + it('denies a role request when SOME backend serving the role is not allowed (sound)', () => { + const backends = [ + bk('gpu-a', 'qwen3:32b', ['quality']), + bk('gpu-b', 'qwen3:70b', ['quality']), // not in allowlist — router could pick it + ]; + expect(isModelAllowed('quality', ['qwen3:32b'], backends)).toBe(false); + }); + + it('matches a serving backend by id as well as model', () => { + const backends = [bk('gpu-a', 'qwen3:32b', ['fast'])]; + expect(isModelAllowed('fast', ['gpu-a'], backends)).toBe(true); + }); + + it('denies a role with no serving backend that is not literally allowlisted', () => { + const backends = [bk('gpu-a', 'qwen3:32b', ['quality'])]; + expect(isModelAllowed('reflection', ['qwen3:32b'], backends)).toBe(false); + }); + + it('treats an unspecified-roles backend as serving the role (must be allowlisted too)', () => { + const backends = [bk('generalist', 'qwen3:8b')]; // serves all roles + expect(isModelAllowed('quality', ['qwen3:8b'], backends)).toBe(true); + expect(isModelAllowed('quality', ['other'], backends)).toBe(false); + }); +}); diff --git a/src/gateway/auth.ts b/src/gateway/auth.ts index 89f0f5c..53093cf 100644 --- a/src/gateway/auth.ts +++ b/src/gateway/auth.ts @@ -21,7 +21,7 @@ */ import { createHash, timingSafeEqual } from 'crypto'; import type { Request, Response, NextFunction, RequestHandler } from 'express'; -import type { GatewayVirtualKey } from './config.js'; +import { backendServesRole, type GatewayVirtualKey, type GatewayBackendConfig } from './config.js'; import { hashKey } from './key-format.js'; import { logger } from '../logger.js'; import type { GatewayMetrics } from '../metrics/gateway-metrics.js'; @@ -132,10 +132,30 @@ export function extractBearer(headerVal: string | undefined): string | null { export function isModelAllowed( requestedModel: string | undefined, allowedModels: string[] | undefined, + backends?: GatewayBackendConfig[], ): boolean { if (allowedModels === undefined) return true; if (typeof requestedModel !== 'string' || requestedModel.length === 0) return false; - return allowedModels.includes(requestedModel); + // Direct hit: a literal model name OR a role-name allowlist entry. + if (allowedModels.includes(requestedModel)) return true; + // Role-based routing (2026-06-09): `requestedModel` may be a tier + // (auto/fast/quality/reflection) rather than a model name. The gateway + // routes it to SOME backend serving that role, chosen by least-busy AFTER + // this check — so we can't know which model the request lands on. Allow + // only when EVERY backend serving the role is within the allowlist (by id + // or model); then whichever the router picks is permitted. Conservative + // but sound, and keeps a legacy model-name allowlist working whenever the + // tier's backends are uniformly allowed. + if (backends && backends.length > 0) { + const serving = backends.filter(b => backendServesRole(b, requestedModel)); + if ( + serving.length > 0 && + serving.every(b => allowedModels.includes(b.id) || allowedModels.includes(b.model)) + ) { + return true; + } + } + return false; } interface AuthMiddlewareOptions { @@ -376,7 +396,11 @@ export function buildAuthMiddleware(opts: AuthMiddlewareOptions): RequestHandler * The 403 message intentionally surfaces the requested model — that's * the same model the caller sent, so it's not new information to them. */ -export function buildAllowedModelsMiddleware(): RequestHandler { +export function buildAllowedModelsMiddleware(opts?: { + /** Live backend list, so the allowlist can resolve a role request to the + * models it could reach (role-based routing). Omit for legacy/unit use. */ + getBackends?: () => GatewayBackendConfig[]; +}): RequestHandler { return function allowedModelsCheck(req: Request, res: Response, next: NextFunction): void { const auth = (req as AuthenticatedRequest).gatewayAuth; if (!auth) { @@ -388,7 +412,8 @@ export function buildAllowedModelsMiddleware(): RequestHandler { } const body = req.body as { model?: unknown } | undefined; const requested = typeof body?.model === 'string' ? body.model : undefined; - if (!isModelAllowed(requested, auth.allowedModels)) { + const backends = opts?.getBackends?.(); + if (!isModelAllowed(requested, auth.allowedModels, backends)) { res.status(403).json({ error: `model '${requested ?? ''}' not allowed for this key` }); return; } diff --git a/src/gateway/server.ts b/src/gateway/server.ts index fec77ad..8a079d4 100644 --- a/src/gateway/server.ts +++ b/src/gateway/server.ts @@ -121,7 +121,7 @@ export function createGatewayApp(deps: GatewayServerDeps): GatewayServer { metrics: deps.metrics, }); const jsonMw = express.json({ limit: `${CHAT_COMPLETIONS_BODY_LIMIT_MB}mb` }); - const modelsMw = buildAllowedModelsMiddleware(); + const modelsMw = buildAllowedModelsMiddleware({ getBackends: () => config.backends }); const chatHandler = buildChatCompletionsHandler({ router,