sync: update from private repo (5428cd5)
Some checks failed
CI / build-and-test (push) Has been cancelled
Some checks failed
CI / build-and-test (push) Has been cancelled
This commit is contained in:
parent
8ac98d2630
commit
44df3a7da1
83
src/gateway/auth.allowed-models.test.ts
Normal file
83
src/gateway/auth.allowed-models.test.ts
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
/**
|
||||||
|
* `isModelAllowed` — virtual-key model allowlist, made role-aware for
|
||||||
|
* role-based routing (2026-06-09).
|
||||||
|
*
|
||||||
|
* The allowlist middleware runs BEFORE routing on the raw `request.model`.
|
||||||
|
* With role-based routing a worker sends a tier (auto/fast/quality/...) as
|
||||||
|
* `request.model`, so the check must understand roles or it 403s every
|
||||||
|
* role request from a key that has a model-name allowlist.
|
||||||
|
*
|
||||||
|
* Soundness rule for a role request: the gateway will route to SOME backend
|
||||||
|
* serving that role (least-busy, chosen post-check). Allow only when EVERY
|
||||||
|
* backend serving the role is within the allowlist (by id or model), so
|
||||||
|
* whichever the router picks is permitted.
|
||||||
|
*/
|
||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { isModelAllowed } from './auth.js';
|
||||||
|
import type { GatewayBackendConfig } from './config.js';
|
||||||
|
|
||||||
|
function bk(id: string, model: string, roles?: string[]): GatewayBackendConfig {
|
||||||
|
return { id, endpoint: `http://${id}/v1`, model, maxSlots: 4, ...(roles ? { roles } : {}) };
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('isModelAllowed (no allowlist / literal-model behaviour)', () => {
|
||||||
|
it('allows anything when allowlist is undefined', () => {
|
||||||
|
expect(isModelAllowed('qwen3:32b', undefined)).toBe(true);
|
||||||
|
expect(isModelAllowed('quality', undefined)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies an empty-string / missing requested model when an allowlist exists', () => {
|
||||||
|
expect(isModelAllowed(undefined, ['qwen3:32b'])).toBe(false);
|
||||||
|
expect(isModelAllowed('', ['qwen3:32b'])).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows a literal model that is in the allowlist', () => {
|
||||||
|
expect(isModelAllowed('qwen3:32b', ['qwen3:32b'])).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies a literal model not in the allowlist', () => {
|
||||||
|
expect(isModelAllowed('qwen3:70b', ['qwen3:32b'])).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('empty allowlist array denies everything (intentional lockdown)', () => {
|
||||||
|
expect(isModelAllowed('qwen3:32b', [])).toBe(false);
|
||||||
|
expect(isModelAllowed('quality', [])).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('isModelAllowed (role-aware)', () => {
|
||||||
|
it('allows a role that is itself listed (role-name allowlist)', () => {
|
||||||
|
expect(isModelAllowed('quality', ['quality', 'fast'])).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows a role request when EVERY backend serving the role is in the model allowlist', () => {
|
||||||
|
const backends = [bk('gpu-a', 'qwen3:32b', ['quality'])];
|
||||||
|
// Legacy model-name allowlist keeps working: the only quality backend
|
||||||
|
// serves an allowed model.
|
||||||
|
expect(isModelAllowed('quality', ['qwen3:32b'], backends)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies a role request when SOME backend serving the role is not allowed (sound)', () => {
|
||||||
|
const backends = [
|
||||||
|
bk('gpu-a', 'qwen3:32b', ['quality']),
|
||||||
|
bk('gpu-b', 'qwen3:70b', ['quality']), // not in allowlist — router could pick it
|
||||||
|
];
|
||||||
|
expect(isModelAllowed('quality', ['qwen3:32b'], backends)).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches a serving backend by id as well as model', () => {
|
||||||
|
const backends = [bk('gpu-a', 'qwen3:32b', ['fast'])];
|
||||||
|
expect(isModelAllowed('fast', ['gpu-a'], backends)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies a role with no serving backend that is not literally allowlisted', () => {
|
||||||
|
const backends = [bk('gpu-a', 'qwen3:32b', ['quality'])];
|
||||||
|
expect(isModelAllowed('reflection', ['qwen3:32b'], backends)).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('treats an unspecified-roles backend as serving the role (must be allowlisted too)', () => {
|
||||||
|
const backends = [bk('generalist', 'qwen3:8b')]; // serves all roles
|
||||||
|
expect(isModelAllowed('quality', ['qwen3:8b'], backends)).toBe(true);
|
||||||
|
expect(isModelAllowed('quality', ['other'], backends)).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -21,7 +21,7 @@
|
|||||||
*/
|
*/
|
||||||
import { createHash, timingSafeEqual } from 'crypto';
|
import { createHash, timingSafeEqual } from 'crypto';
|
||||||
import type { Request, Response, NextFunction, RequestHandler } from 'express';
|
import type { Request, Response, NextFunction, RequestHandler } from 'express';
|
||||||
import type { GatewayVirtualKey } from './config.js';
|
import { backendServesRole, type GatewayVirtualKey, type GatewayBackendConfig } from './config.js';
|
||||||
import { hashKey } from './key-format.js';
|
import { hashKey } from './key-format.js';
|
||||||
import { logger } from '../logger.js';
|
import { logger } from '../logger.js';
|
||||||
import type { GatewayMetrics } from '../metrics/gateway-metrics.js';
|
import type { GatewayMetrics } from '../metrics/gateway-metrics.js';
|
||||||
@ -132,10 +132,30 @@ export function extractBearer(headerVal: string | undefined): string | null {
|
|||||||
export function isModelAllowed(
|
export function isModelAllowed(
|
||||||
requestedModel: string | undefined,
|
requestedModel: string | undefined,
|
||||||
allowedModels: string[] | undefined,
|
allowedModels: string[] | undefined,
|
||||||
|
backends?: GatewayBackendConfig[],
|
||||||
): boolean {
|
): boolean {
|
||||||
if (allowedModels === undefined) return true;
|
if (allowedModels === undefined) return true;
|
||||||
if (typeof requestedModel !== 'string' || requestedModel.length === 0) return false;
|
if (typeof requestedModel !== 'string' || requestedModel.length === 0) return false;
|
||||||
return allowedModels.includes(requestedModel);
|
// Direct hit: a literal model name OR a role-name allowlist entry.
|
||||||
|
if (allowedModels.includes(requestedModel)) return true;
|
||||||
|
// Role-based routing (2026-06-09): `requestedModel` may be a tier
|
||||||
|
// (auto/fast/quality/reflection) rather than a model name. The gateway
|
||||||
|
// routes it to SOME backend serving that role, chosen by least-busy AFTER
|
||||||
|
// this check — so we can't know which model the request lands on. Allow
|
||||||
|
// only when EVERY backend serving the role is within the allowlist (by id
|
||||||
|
// or model); then whichever the router picks is permitted. Conservative
|
||||||
|
// but sound, and keeps a legacy model-name allowlist working whenever the
|
||||||
|
// tier's backends are uniformly allowed.
|
||||||
|
if (backends && backends.length > 0) {
|
||||||
|
const serving = backends.filter(b => backendServesRole(b, requestedModel));
|
||||||
|
if (
|
||||||
|
serving.length > 0 &&
|
||||||
|
serving.every(b => allowedModels.includes(b.id) || allowedModels.includes(b.model))
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface AuthMiddlewareOptions {
|
interface AuthMiddlewareOptions {
|
||||||
@ -376,7 +396,11 @@ export function buildAuthMiddleware(opts: AuthMiddlewareOptions): RequestHandler
|
|||||||
* The 403 message intentionally surfaces the requested model — that's
|
* The 403 message intentionally surfaces the requested model — that's
|
||||||
* the same model the caller sent, so it's not new information to them.
|
* the same model the caller sent, so it's not new information to them.
|
||||||
*/
|
*/
|
||||||
export function buildAllowedModelsMiddleware(): RequestHandler {
|
export function buildAllowedModelsMiddleware(opts?: {
|
||||||
|
/** Live backend list, so the allowlist can resolve a role request to the
|
||||||
|
* models it could reach (role-based routing). Omit for legacy/unit use. */
|
||||||
|
getBackends?: () => GatewayBackendConfig[];
|
||||||
|
}): RequestHandler {
|
||||||
return function allowedModelsCheck(req: Request, res: Response, next: NextFunction): void {
|
return function allowedModelsCheck(req: Request, res: Response, next: NextFunction): void {
|
||||||
const auth = (req as AuthenticatedRequest).gatewayAuth;
|
const auth = (req as AuthenticatedRequest).gatewayAuth;
|
||||||
if (!auth) {
|
if (!auth) {
|
||||||
@ -388,7 +412,8 @@ export function buildAllowedModelsMiddleware(): RequestHandler {
|
|||||||
}
|
}
|
||||||
const body = req.body as { model?: unknown } | undefined;
|
const body = req.body as { model?: unknown } | undefined;
|
||||||
const requested = typeof body?.model === 'string' ? body.model : undefined;
|
const requested = typeof body?.model === 'string' ? body.model : undefined;
|
||||||
if (!isModelAllowed(requested, auth.allowedModels)) {
|
const backends = opts?.getBackends?.();
|
||||||
|
if (!isModelAllowed(requested, auth.allowedModels, backends)) {
|
||||||
res.status(403).json({ error: `model '${requested ?? ''}' not allowed for this key` });
|
res.status(403).json({ error: `model '${requested ?? ''}' not allowed for this key` });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -121,7 +121,7 @@ export function createGatewayApp(deps: GatewayServerDeps): GatewayServer {
|
|||||||
metrics: deps.metrics,
|
metrics: deps.metrics,
|
||||||
});
|
});
|
||||||
const jsonMw = express.json({ limit: `${CHAT_COMPLETIONS_BODY_LIMIT_MB}mb` });
|
const jsonMw = express.json({ limit: `${CHAT_COMPLETIONS_BODY_LIMIT_MB}mb` });
|
||||||
const modelsMw = buildAllowedModelsMiddleware();
|
const modelsMw = buildAllowedModelsMiddleware({ getBackends: () => config.backends });
|
||||||
|
|
||||||
const chatHandler = buildChatCompletionsHandler({
|
const chatHandler = buildChatCompletionsHandler({
|
||||||
router,
|
router,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user