import { useQuery } from '@tanstack/react-query'; import { useMemo } from 'react'; import { HelpText } from './HelpText'; import { FieldLabel, FieldInput } from './formUtils'; import type { SectionFormProps } from './types'; import { getGatewayServerStatus, type GatewayServerStatus } from '../../api'; import { GatewayKeysSection } from './GatewayKeysSection'; /** * Settings → LLM → Gateway Server. * * Sections (top → bottom): * - Enable toggle + live status badge * - Listen port * - Backends list (config-driven, draft/Save&Apply) * - Virtual Keys (key management — admin REST API, applied immediately) * - Advanced timeouts (request / upstream / shutdown) * * Step 8 of the 2026-05-21 settings restructure folded the standalone * Gateway Keys sidebar entry into this form as the "Virtual Keys" * section, so key issuance / rotation / revocation lives next to the * Gateway it configures. The keys section uses its own admin REST API * and therefore bypasses the surrounding Save & Apply bar — that's why * it's allowed to share this form even though it doesn't touch * `config.gateway.*`. * * Status badge polls /api/admin/gateway/status every 3s so an enable * flip is reflected near-instantly without a page reload. * * Field names are camelCase to match the in-memory AppConfig shape * (src/config.ts:transformKeys converts YAML snake_case → camelCase on * load, and toSnakeKeys reverses on save). The displayed labels keep the * YAML names (max_slots, api_key, ...) so operators can map back to * config.yaml.example without translation. */ interface GatewayBackend { id?: string; endpoint?: string; model?: string; maxSlots?: number; apiKey?: string; } interface GatewayConfigShape { enabled?: boolean; listenPort?: number; requestTimeoutSec?: number; upstreamTimeoutSec?: number; shutdownGracefulSec?: number; backends?: GatewayBackend[]; virtualKeys?: unknown[]; } /** * Render value for a ``. Returns the number * when it's a finite integer-typed value, otherwise `fallback`. Without * this, `value={NaN ?? 1}` resolves to `NaN` (nullish-coalesce only * traps null/undefined), and React renders the literal string "NaN" * into the input — see https://gitea.example.com/.../issues for the * Phase 3c regression that motivated this helper. */ function numberValue(n: unknown, fallback: number | ''): number | '' { return typeof n === 'number' && Number.isFinite(n) ? n : fallback; } /** * Parse the string emitted by a number `` into either a * finite number, or `undefined` for empty / unparseable input. Storing * `undefined` (rather than NaN) keeps the next render's value clean. */ function parseNumberInput(v: string): number | undefined { if (v === '') return undefined; const n = Number(v); return Number.isFinite(n) ? n : undefined; } function StatusBadge({ status }: { status: GatewayServerStatus | undefined }) { if (!status) { return ; } if (status.state === 'unavailable') { return ( unavailable ); } if (status.state === 'running') { return ( running (mounted at /v1, port {status.sharedPort}) ); } if (status.state === 'misconfigured') { return ( misconfigured ({status.errors.length} error{status.errors.length === 1 ? '' : 's'}) ); } if (status.state === 'starting' || status.state === 'stopping') { return ( {status.state}… ); } return ( disabled ); } /** * Validate backend rows in-form so the operator sees red-bordered fields * before they hit Save. Returns a per-row error map keyed by row index. */ function validateBackends(backends: GatewayBackend[]): Map { const errors = new Map(); const seenIds = new Set(); backends.forEach((b, i) => { const rowErrs: string[] = []; if (!b.id || b.id.trim() === '') rowErrs.push('id required'); else if (seenIds.has(b.id)) rowErrs.push('duplicate id'); if (b.id) seenIds.add(b.id); if (!b.endpoint || b.endpoint.trim() === '') rowErrs.push('endpoint required'); else { try { const u = new URL(b.endpoint); if (u.protocol !== 'http:' && u.protocol !== 'https:') { rowErrs.push('endpoint must be http(s)'); } } catch { rowErrs.push('endpoint invalid URL'); } } if (!b.model || b.model.trim() === '') rowErrs.push('model required'); if ( typeof b.maxSlots !== 'number' || !Number.isFinite(b.maxSlots) || b.maxSlots <= 0 || !Number.isInteger(b.maxSlots) ) { rowErrs.push('max_slots must be positive integer'); } if (rowErrs.length > 0) errors.set(i, rowErrs); }); return errors; } export function GatewayServerForm({ config, onChange }: SectionFormProps) { const gw: GatewayConfigShape = config.gateway ?? {}; const backends: GatewayBackend[] = Array.isArray(gw.backends) ? gw.backends : []; const statusQuery = useQuery({ queryKey: ['gateway-server-status'], queryFn: getGatewayServerStatus, refetchInterval: 3000, staleTime: 1000, }); const backendErrors = useMemo(() => validateBackends(backends), [backends]); const setEnabled = (v: boolean) => onChange('gateway.enabled', v); const setListenPort = (v: number | undefined) => onChange('gateway.listenPort', v); const setRequestTimeout = (v: number | undefined) => onChange('gateway.requestTimeoutSec', v); const setUpstreamTimeout = (v: number | undefined) => onChange('gateway.upstreamTimeoutSec', v); const setShutdownGraceful = (v: number | undefined) => onChange('gateway.shutdownGracefulSec', v); const updateBackend = (i: number, field: keyof GatewayBackend, value: unknown) => { const next = backends.map((b, idx) => (idx === i ? { ...b, [field]: value } : b)); onChange('gateway.backends', next); }; const addBackend = () => { const next: GatewayBackend = { id: `backend-${backends.length + 1}`, endpoint: '', model: '', maxSlots: 1, }; onChange('gateway.backends', [...backends, next]); }; const removeBackend = (i: number) => { onChange('gateway.backends', backends.filter((_, idx) => idx !== i)); }; return (

Gateway Server

AAO 自身を LLM Gateway として動かす。有効にすると /v1/chat/completions などのエンドポイントが、worker UI と 同じポートで待ち受けます (別 process 起動は不要)。他 AAO の provider.workers[].endpoint にこの URL を指定して GPU プールを共有できます。

{statusQuery.data?.errors && statusQuery.data.errors.length > 0 && (
    {statusQuery.data.errors.map((e, i) => (
  • • {e}
  • ))}
)}
Listen port
setListenPort(parseNumberInput(v))} /> 同 process 時はこの値は使われません: worker UI と同じポート ( {statusQuery.data?.sharedPort ?? '9876'}) を共有します。AAO_MODE=gateway で別 process 起動した場合のみ有効。
別 process deploy:{' '} AAO_MODE=gateway scripts/gateway.sh start

Backends

ルーティング先の llama-server / Ollama / vLLM など。Gateway は request.model に一致する model を持つ最も busy ではない backend に割り振ります。
api_key の保存形式: フォームで入力した値は config.yaml に平文で保存されます。${'${VAR}'} 形式の env var 参照はフォーム保存時に literal 文字列として保存されるため、env 経由で渡したい場合は config.yaml を直接編集してください。
{backends.length === 0 ? (
backend が未登録です。最低 1 つ追加してください。
) : (
{backends.map((b, i) => { const errs = backendErrors.get(i) ?? []; return (
0 ? 'border-red-200 bg-red-50/30' : 'border-slate-200'}`} >
id updateBackend(i, 'id', v)} placeholder="gpu-rtx-a" />
model updateBackend(i, 'model', v)} placeholder="qwen3:8b" />
endpoint updateBackend(i, 'endpoint', v)} placeholder="http://gpu-host:8080/v1" />
max_slots updateBackend(i, 'maxSlots', parseNumberInput(v))} placeholder="1" />
api_key (任意) updateBackend(i, 'apiKey', v || undefined)} placeholder="sk-... or ${ENV_VAR}" /> {/* G2: warn when the operator saves a literal ${VAR} reference. The config writer stores fields verbatim — env substitution happens at load time, so saving the form turns the reference into a literal "${VAR}" string and the env var indirection is lost. */} {typeof b.apiKey === 'string' && b.apiKey.trimStart().startsWith('${') && (

env var reference detected: 保存すると {b.apiKey} がそのまま config.yaml に書き込まれ、起動時の env 置換は効かなくなります。env 経由で渡すなら config.yaml を直接編集してください。

)}
{errs.length > 0 && (
    {errs.map((e, ei) =>
  • {e}
  • )}
)}
); })}
)}

Virtual Keys

この Gateway を経由してアクセスするための sk-aao-* bearer key を発行・rotate・revoke します。
注意: ここでの操作は Gateway Server の Save & Apply とは独立した admin API で即時反映されます (Save ボタンを押す必要はありません)。
Advanced
request_timeout_sec setRequestTimeout(parseNumberInput(v))} /> chat 全体の budget (streaming 含む)
upstream_timeout_sec setUpstreamTimeout(parseNumberInput(v))} /> 1 chunk あたりの idle 上限
shutdown_graceful_sec setShutdownGraceful(parseNumberInput(v))} /> SIGTERM 後の drain 上限

Hot reload: ここでの変更は Save 直後に同 process gateway に反映されます (backend / virtual_key 変更は bounce が発生し、in-flight ストリームは graceful drain されます)。

); }