382 lines
16 KiB
TypeScript
382 lines
16 KiB
TypeScript
import { useQuery } from '@tanstack/react-query';
|
|
import { useMemo } from 'react';
|
|
import { HelpText } from './HelpText';
|
|
import { FieldLabel, FieldInput } from './formUtils';
|
|
import type { SectionFormProps } from './types';
|
|
import { getGatewayServerStatus, type GatewayServerStatus } from '../../api';
|
|
import { GatewayKeysSection } from './GatewayKeysSection';
|
|
|
|
/**
|
|
* Settings → LLM → Gateway Server.
|
|
*
|
|
* Sections (top → bottom):
|
|
* - Enable toggle + live status badge
|
|
* - Listen port
|
|
* - Backends list (config-driven, draft/Save&Apply)
|
|
* - Virtual Keys (key management — admin REST API, applied immediately)
|
|
* - Advanced timeouts (request / upstream / shutdown)
|
|
*
|
|
* Step 8 of the 2026-05-21 settings restructure folded the standalone
|
|
* Gateway Keys sidebar entry into this form as the "Virtual Keys"
|
|
* section, so key issuance / rotation / revocation lives next to the
|
|
* Gateway it configures. The keys section uses its own admin REST API
|
|
* and therefore bypasses the surrounding Save & Apply bar — that's why
|
|
* it's allowed to share this form even though it doesn't touch
|
|
* `config.gateway.*`.
|
|
*
|
|
* Status badge polls /api/admin/gateway/status every 3s so an enable
|
|
* flip is reflected near-instantly without a page reload.
|
|
*
|
|
* Field names are camelCase to match the in-memory AppConfig shape
|
|
* (src/config.ts:transformKeys converts YAML snake_case → camelCase on
|
|
* load, and toSnakeKeys reverses on save). The displayed labels keep the
|
|
* YAML names (max_slots, api_key, ...) so operators can map back to
|
|
* config.yaml.example without translation.
|
|
*/
|
|
interface GatewayBackend {
|
|
id?: string;
|
|
endpoint?: string;
|
|
model?: string;
|
|
maxSlots?: number;
|
|
apiKey?: string;
|
|
}
|
|
|
|
interface GatewayConfigShape {
|
|
enabled?: boolean;
|
|
listenPort?: number;
|
|
requestTimeoutSec?: number;
|
|
upstreamTimeoutSec?: number;
|
|
shutdownGracefulSec?: number;
|
|
backends?: GatewayBackend[];
|
|
virtualKeys?: unknown[];
|
|
}
|
|
|
|
/**
|
|
* Render value for a `<FieldInput type="number">`. Returns the number
|
|
* when it's a finite integer-typed value, otherwise `fallback`. Without
|
|
* this, `value={NaN ?? 1}` resolves to `NaN` (nullish-coalesce only
|
|
* traps null/undefined), and React renders the literal string "NaN"
|
|
* into the input — see https://gitea.example.com/.../issues for the
|
|
* Phase 3c regression that motivated this helper.
|
|
*/
|
|
function numberValue(n: unknown, fallback: number | ''): number | '' {
|
|
return typeof n === 'number' && Number.isFinite(n) ? n : fallback;
|
|
}
|
|
|
|
/**
|
|
* Parse the string emitted by a number `<FieldInput>` into either a
|
|
* finite number, or `undefined` for empty / unparseable input. Storing
|
|
* `undefined` (rather than NaN) keeps the next render's value clean.
|
|
*/
|
|
function parseNumberInput(v: string): number | undefined {
|
|
if (v === '') return undefined;
|
|
const n = Number(v);
|
|
return Number.isFinite(n) ? n : undefined;
|
|
}
|
|
|
|
function StatusBadge({ status }: { status: GatewayServerStatus | undefined }) {
|
|
if (!status) {
|
|
return <span className="text-2xs text-slate-400">…</span>;
|
|
}
|
|
if (status.state === 'unavailable') {
|
|
return (
|
|
<span title={status.message} className="text-xs px-2 py-0.5 rounded bg-slate-100 text-slate-600">
|
|
unavailable
|
|
</span>
|
|
);
|
|
}
|
|
if (status.state === 'running') {
|
|
return (
|
|
<span className="text-xs px-2 py-0.5 rounded bg-emerald-50 text-emerald-700 border border-emerald-200">
|
|
running (mounted at /v1, port {status.sharedPort})
|
|
</span>
|
|
);
|
|
}
|
|
if (status.state === 'misconfigured') {
|
|
return (
|
|
<span className="text-xs px-2 py-0.5 rounded bg-red-50 text-red-700 border border-red-200">
|
|
misconfigured ({status.errors.length} error{status.errors.length === 1 ? '' : 's'})
|
|
</span>
|
|
);
|
|
}
|
|
if (status.state === 'starting' || status.state === 'stopping') {
|
|
return (
|
|
<span className="text-xs px-2 py-0.5 rounded bg-amber-50 text-amber-700 border border-amber-200">
|
|
{status.state}…
|
|
</span>
|
|
);
|
|
}
|
|
return (
|
|
<span className="text-xs px-2 py-0.5 rounded bg-slate-100 text-slate-600">
|
|
disabled
|
|
</span>
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Validate backend rows in-form so the operator sees red-bordered fields
|
|
* before they hit Save. Returns a per-row error map keyed by row index.
|
|
*/
|
|
function validateBackends(backends: GatewayBackend[]): Map<number, string[]> {
|
|
const errors = new Map<number, string[]>();
|
|
const seenIds = new Set<string>();
|
|
backends.forEach((b, i) => {
|
|
const rowErrs: string[] = [];
|
|
if (!b.id || b.id.trim() === '') rowErrs.push('id required');
|
|
else if (seenIds.has(b.id)) rowErrs.push('duplicate id');
|
|
if (b.id) seenIds.add(b.id);
|
|
if (!b.endpoint || b.endpoint.trim() === '') rowErrs.push('endpoint required');
|
|
else {
|
|
try {
|
|
const u = new URL(b.endpoint);
|
|
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
|
rowErrs.push('endpoint must be http(s)');
|
|
}
|
|
} catch {
|
|
rowErrs.push('endpoint invalid URL');
|
|
}
|
|
}
|
|
if (!b.model || b.model.trim() === '') rowErrs.push('model required');
|
|
if (
|
|
typeof b.maxSlots !== 'number'
|
|
|| !Number.isFinite(b.maxSlots)
|
|
|| b.maxSlots <= 0
|
|
|| !Number.isInteger(b.maxSlots)
|
|
) {
|
|
rowErrs.push('max_slots must be positive integer');
|
|
}
|
|
if (rowErrs.length > 0) errors.set(i, rowErrs);
|
|
});
|
|
return errors;
|
|
}
|
|
|
|
export function GatewayServerForm({ config, onChange }: SectionFormProps) {
|
|
const gw: GatewayConfigShape = config.gateway ?? {};
|
|
const backends: GatewayBackend[] = Array.isArray(gw.backends) ? gw.backends : [];
|
|
|
|
const statusQuery = useQuery({
|
|
queryKey: ['gateway-server-status'],
|
|
queryFn: getGatewayServerStatus,
|
|
refetchInterval: 3000,
|
|
staleTime: 1000,
|
|
});
|
|
|
|
const backendErrors = useMemo(() => validateBackends(backends), [backends]);
|
|
|
|
const setEnabled = (v: boolean) => onChange('gateway.enabled', v);
|
|
const setListenPort = (v: number | undefined) => onChange('gateway.listenPort', v);
|
|
const setRequestTimeout = (v: number | undefined) => onChange('gateway.requestTimeoutSec', v);
|
|
const setUpstreamTimeout = (v: number | undefined) => onChange('gateway.upstreamTimeoutSec', v);
|
|
const setShutdownGraceful = (v: number | undefined) => onChange('gateway.shutdownGracefulSec', v);
|
|
|
|
const updateBackend = (i: number, field: keyof GatewayBackend, value: unknown) => {
|
|
const next = backends.map((b, idx) => (idx === i ? { ...b, [field]: value } : b));
|
|
onChange('gateway.backends', next);
|
|
};
|
|
const addBackend = () => {
|
|
const next: GatewayBackend = {
|
|
id: `backend-${backends.length + 1}`,
|
|
endpoint: '',
|
|
model: '',
|
|
maxSlots: 1,
|
|
};
|
|
onChange('gateway.backends', [...backends, next]);
|
|
};
|
|
const removeBackend = (i: number) => {
|
|
onChange('gateway.backends', backends.filter((_, idx) => idx !== i));
|
|
};
|
|
|
|
return (
|
|
<div className="space-y-4">
|
|
<div>
|
|
<h2 className="text-base font-semibold text-slate-800 mb-1">Gateway Server</h2>
|
|
<p className="text-xs text-slate-500">
|
|
AAO 自身を LLM Gateway として動かす。有効にすると <code>/v1/chat/completions</code> などのエンドポイントが、worker UI と <strong>同じポート</strong>で待ち受けます (別 process 起動は不要)。他 AAO の <code>provider.workers[].endpoint</code> にこの URL を指定して GPU プールを共有できます。
|
|
</p>
|
|
<div className="flex items-center gap-3 mt-2 flex-wrap">
|
|
<label className="flex items-center gap-2 text-sm cursor-pointer">
|
|
<input
|
|
type="checkbox"
|
|
checked={gw.enabled === true}
|
|
onChange={e => setEnabled(e.target.checked)}
|
|
className="rounded"
|
|
/>
|
|
<span className="font-medium text-slate-700">Enable Gateway</span>
|
|
</label>
|
|
<StatusBadge status={statusQuery.data} />
|
|
</div>
|
|
{statusQuery.data?.errors && statusQuery.data.errors.length > 0 && (
|
|
<ul className="mt-2 text-xs text-red-700 bg-red-50 border border-red-200 rounded p-2 space-y-0.5">
|
|
{statusQuery.data.errors.map((e, i) => (
|
|
<li key={i}>• {e}</li>
|
|
))}
|
|
</ul>
|
|
)}
|
|
</div>
|
|
|
|
<div className="border-t border-hairline pt-3">
|
|
<FieldLabel>Listen port</FieldLabel>
|
|
<div className="grid grid-cols-2 gap-3">
|
|
<div>
|
|
<FieldInput
|
|
type="number"
|
|
value={numberValue(gw.listenPort, 4000)}
|
|
onChange={v => setListenPort(parseNumberInput(v))}
|
|
/>
|
|
<HelpText>
|
|
<strong>同 process 時はこの値は使われません</strong>: worker UI と同じポート (
|
|
{statusQuery.data?.sharedPort ?? '9876'}) を共有します。<code>AAO_MODE=gateway</code> で別 process 起動した場合のみ有効。
|
|
</HelpText>
|
|
</div>
|
|
<div className="text-xs text-slate-500 pt-1.5">
|
|
別 process deploy:{' '}
|
|
<code className="text-2xs">AAO_MODE=gateway scripts/gateway.sh start</code>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div className="border-t border-hairline pt-3">
|
|
<div className="flex items-center justify-between mb-1.5">
|
|
<h3 className="text-sm font-medium text-slate-700">Backends</h3>
|
|
<button
|
|
onClick={addBackend}
|
|
className="px-2.5 h-7 text-xs text-accent border border-accent rounded-md hover:bg-accent-soft"
|
|
>
|
|
+ Add backend
|
|
</button>
|
|
</div>
|
|
<HelpText>
|
|
ルーティング先の llama-server / Ollama / vLLM など。Gateway は <code>request.model</code> に一致する <code>model</code> を持つ最も busy ではない backend に割り振ります。<br/>
|
|
<strong>api_key の保存形式</strong>: フォームで入力した値は <code>config.yaml</code> に平文で保存されます。<code>${'${VAR}'}</code> 形式の env var 参照はフォーム保存時に literal 文字列として保存されるため、env 経由で渡したい場合は <code>config.yaml</code> を直接編集してください。
|
|
</HelpText>
|
|
{backends.length === 0 ? (
|
|
<div className="text-xs text-slate-400 border border-dashed border-slate-200 rounded p-4 mt-2 text-center">
|
|
backend が未登録です。最低 1 つ追加してください。
|
|
</div>
|
|
) : (
|
|
<div className="space-y-2 mt-2">
|
|
{backends.map((b, i) => {
|
|
const errs = backendErrors.get(i) ?? [];
|
|
return (
|
|
<div
|
|
key={i}
|
|
className={`border rounded-md p-3 space-y-2 relative ${errs.length > 0 ? 'border-red-200 bg-red-50/30' : 'border-slate-200'}`}
|
|
>
|
|
<button
|
|
onClick={() => removeBackend(i)}
|
|
className="absolute top-1.5 right-2 text-slate-400 hover:text-red-500 text-lg leading-none"
|
|
title="この backend を削除"
|
|
>
|
|
×
|
|
</button>
|
|
<div className="grid grid-cols-2 gap-2.5">
|
|
<div>
|
|
<FieldLabel>id</FieldLabel>
|
|
<FieldInput value={b.id ?? ''} onChange={v => updateBackend(i, 'id', v)} placeholder="gpu-rtx-a" />
|
|
</div>
|
|
<div>
|
|
<FieldLabel>model</FieldLabel>
|
|
<FieldInput value={b.model ?? ''} onChange={v => updateBackend(i, 'model', v)} placeholder="qwen3:8b" />
|
|
</div>
|
|
<div className="col-span-2">
|
|
<FieldLabel>endpoint</FieldLabel>
|
|
<FieldInput value={b.endpoint ?? ''} onChange={v => updateBackend(i, 'endpoint', v)} placeholder="http://gpu-host:8080/v1" />
|
|
</div>
|
|
<div>
|
|
<FieldLabel>max_slots</FieldLabel>
|
|
<FieldInput
|
|
type="number"
|
|
value={numberValue(b.maxSlots, 1)}
|
|
onChange={v => updateBackend(i, 'maxSlots', parseNumberInput(v))}
|
|
placeholder="1"
|
|
/>
|
|
</div>
|
|
<div>
|
|
<FieldLabel>api_key (任意)</FieldLabel>
|
|
<FieldInput
|
|
type="password"
|
|
value={b.apiKey ?? ''}
|
|
onChange={v => updateBackend(i, 'apiKey', v || undefined)}
|
|
placeholder="sk-... or ${ENV_VAR}"
|
|
/>
|
|
{/* G2: warn when the operator saves a literal
|
|
${VAR} reference. The config writer stores
|
|
fields verbatim — env substitution happens at
|
|
load time, so saving the form turns the
|
|
reference into a literal "${VAR}" string and
|
|
the env var indirection is lost. */}
|
|
{typeof b.apiKey === 'string' && b.apiKey.trimStart().startsWith('${') && (
|
|
<p className="text-2xs text-amber-700 bg-amber-50 border border-amber-200 rounded px-2 py-1 mt-1">
|
|
env var reference detected: 保存すると <code>{b.apiKey}</code> がそのまま config.yaml に書き込まれ、起動時の env 置換は効かなくなります。env 経由で渡すなら config.yaml を直接編集してください。
|
|
</p>
|
|
)}
|
|
</div>
|
|
</div>
|
|
{errs.length > 0 && (
|
|
<ul className="text-2xs text-red-600 list-disc pl-4 space-y-0.5">
|
|
{errs.map((e, ei) => <li key={ei}>{e}</li>)}
|
|
</ul>
|
|
)}
|
|
</div>
|
|
);
|
|
})}
|
|
</div>
|
|
)}
|
|
</div>
|
|
|
|
<div className="border-t border-hairline pt-3">
|
|
<div className="mb-1.5">
|
|
<h3 className="text-sm font-medium text-slate-700">Virtual Keys</h3>
|
|
</div>
|
|
<HelpText>
|
|
この Gateway を経由してアクセスするための <code>sk-aao-*</code> bearer key を発行・rotate・revoke します。<br/>
|
|
<strong>注意</strong>: ここでの操作は Gateway Server の Save & Apply とは独立した admin API で即時反映されます (Save ボタンを押す必要はありません)。
|
|
</HelpText>
|
|
<div className="mt-2">
|
|
<GatewayKeysSection />
|
|
</div>
|
|
</div>
|
|
|
|
<details className="border-t border-hairline pt-3 group">
|
|
<summary className="text-sm font-medium text-slate-700 cursor-pointer">
|
|
Advanced
|
|
</summary>
|
|
<div className="grid grid-cols-3 gap-3 mt-2">
|
|
<div>
|
|
<FieldLabel>request_timeout_sec</FieldLabel>
|
|
<FieldInput
|
|
type="number"
|
|
value={numberValue(gw.requestTimeoutSec, 600)}
|
|
onChange={v => setRequestTimeout(parseNumberInput(v))}
|
|
/>
|
|
<HelpText>chat 全体の budget (streaming 含む)</HelpText>
|
|
</div>
|
|
<div>
|
|
<FieldLabel>upstream_timeout_sec</FieldLabel>
|
|
<FieldInput
|
|
type="number"
|
|
value={numberValue(gw.upstreamTimeoutSec, 30)}
|
|
onChange={v => setUpstreamTimeout(parseNumberInput(v))}
|
|
/>
|
|
<HelpText>1 chunk あたりの idle 上限</HelpText>
|
|
</div>
|
|
<div>
|
|
<FieldLabel>shutdown_graceful_sec</FieldLabel>
|
|
<FieldInput
|
|
type="number"
|
|
value={numberValue(gw.shutdownGracefulSec, 30)}
|
|
onChange={v => setShutdownGraceful(parseNumberInput(v))}
|
|
/>
|
|
<HelpText>SIGTERM 後の drain 上限</HelpText>
|
|
</div>
|
|
</div>
|
|
<div className="mt-3 text-xs text-slate-500">
|
|
<p>
|
|
<strong>Hot reload:</strong> ここでの変更は Save 直後に同 process gateway に反映されます (backend / virtual_key 変更は bounce が発生し、in-flight ストリームは graceful drain されます)。
|
|
</p>
|
|
</div>
|
|
</details>
|
|
</div>
|
|
);
|
|
}
|