sync: update from private repo (c8b6d29)
Some checks failed
CI / build-and-test (push) Has been cancelled

This commit is contained in:
oss-sync 2026-06-10 09:09:39 +00:00
parent 5502478636
commit 25c087067a
12 changed files with 400 additions and 121 deletions

View File

@ -199,6 +199,34 @@
line-height: 1.5; line-height: 1.5;
} }
.auth-error {
background: #fef2f2;
border: 1px solid #fecaca;
color: #b91c1c;
border-radius: 8px;
padding: 10px 14px;
font-size: 0.8125rem;
line-height: 1.5;
margin-bottom: 16px;
}
.view-toggle {
margin-top: 16px;
font-size: 0.8125rem;
color: #64748b;
text-align: center;
}
.view-toggle a {
color: #4f46e5;
font-weight: 600;
text-decoration: none;
}
.view-toggle a:hover {
text-decoration: underline;
}
/* RESPONSIVE */ /* RESPONSIVE */
@media (max-width: 768px) { @media (max-width: 768px) {
.container { .container {
@ -261,6 +289,20 @@
.footer-note { .footer-note {
color: #475569; color: #475569;
} }
.auth-error {
background: rgba(239, 68, 68, 0.12);
border-color: rgba(239, 68, 68, 0.35);
color: #fca5a5;
}
.view-toggle {
color: #94a3b8;
}
.view-toggle a {
color: #818cf8;
}
} }
</style> </style>
</head> </head>
@ -291,9 +333,12 @@
<!-- Right Panel --> <!-- Right Panel -->
<div class="right-panel"> <div class="right-panel">
<div class="login-box"> <div class="login-box">
<h2>ログイン</h2> <h2 id="view-title">ログイン</h2>
<p class="subtitle">アカウントでサインインしてください</p> <p class="subtitle" id="view-subtitle">アカウントでサインインしてください</p>
<div id="auth-error" class="auth-error" style="display:none;"></div>
<div id="view-login">
<!-- GOOGLE_BUTTON_START --> <!-- GOOGLE_BUTTON_START -->
<!-- Google OAuth Button --> <!-- Google OAuth Button -->
<a href="/auth/google" class="oauth-button oauth-button-google"> <a href="/auth/google" class="oauth-button oauth-button-google">
@ -339,16 +384,29 @@
</button> </button>
</form> </form>
<!-- LOCAL_SIGNUP_START --> <!-- LOCAL_SIGNUP_START -->
<div class="divider">アカウントが無い場合</div> <p class="view-toggle">
アカウントが無い場合は <a href="#signup" data-show-view="signup">新規登録</a>
</p>
<!-- LOCAL_SIGNUP_END -->
<!-- LOCAL_FORM_END -->
</div><!-- /view-login -->
<!-- LOCAL_FORM_START -->
<!-- LOCAL_SIGNUP_START -->
<div id="view-signup" style="display:none;">
<form method="post" action="/auth/local/signup" style="display:flex;flex-direction:column;gap:10px;"> <form method="post" action="/auth/local/signup" style="display:flex;flex-direction:column;gap:10px;">
<input type="text" name="email" placeholder="ログインID" required autocomplete="username" <input type="text" name="email" placeholder="ログインID" required autocomplete="username"
style="padding:11px 12px;border:1px solid #d0d5dd;border-radius:8px;font-size:14px;width:100%;box-sizing:border-box;"> style="padding:11px 12px;border:1px solid #d0d5dd;border-radius:8px;font-size:14px;width:100%;box-sizing:border-box;">
<input type="password" name="password" placeholder="パスワード8文字以上" minlength="8" required autocomplete="new-password" <input type="password" name="password" placeholder="パスワード8文字以上" minlength="8" required autocomplete="new-password"
style="padding:11px 12px;border:1px solid #d0d5dd;border-radius:8px;font-size:14px;width:100%;box-sizing:border-box;"> style="padding:11px 12px;border:1px solid #d0d5dd;border-radius:8px;font-size:14px;width:100%;box-sizing:border-box;">
<button type="submit" class="oauth-button" style="justify-content:center;background:#fff;color:#111827;border:1px solid #d0d5dd;cursor:pointer;"> <button type="submit" class="oauth-button" style="justify-content:center;background:#111827;color:#fff;border:none;cursor:pointer;">
新規登録(管理者の承認後に利用可) 新規登録する
</button> </button>
</form> </form>
<p class="view-toggle">
<a href="#" data-show-view="login">← ログインに戻る</a>
</p>
</div><!-- /view-signup -->
<!-- LOCAL_SIGNUP_END --> <!-- LOCAL_SIGNUP_END -->
<!-- LOCAL_FORM_END --> <!-- LOCAL_FORM_END -->
@ -358,5 +416,58 @@
</div> </div>
</div> </div>
</div> </div>
<script>
(function () {
var loginView = document.getElementById('view-login');
var signupView = document.getElementById('view-signup');
var title = document.getElementById('view-title');
var subtitle = document.getElementById('view-subtitle');
var ERRORS = {
invalid: '入力内容が正しくありません。',
credentials: 'ログインID またはパスワードが違います。',
disabled: 'このアカウントは無効化されています。管理者にお問い合わせください。',
signup: '登録に失敗しました。このログインID は既に使われている可能性があります。',
weak: 'パスワードは8文字以上で設定してください。'
};
function show(view) {
var isSignup = view === 'signup' && !!signupView;
if (loginView) loginView.style.display = isSignup ? 'none' : '';
if (signupView) signupView.style.display = isSignup ? '' : 'none';
if (title) title.textContent = isSignup ? '新規登録' : 'ログイン';
if (subtitle) {
subtitle.textContent = isSignup
? '登録後、管理者の承認を経て利用できます'
: 'アカウントでサインインしてください';
}
if (history.replaceState) {
history.replaceState(null, '',
location.pathname + location.search + (isSignup ? '#signup' : ''));
}
}
var toggles = document.querySelectorAll('[data-show-view]');
for (var i = 0; i < toggles.length; i++) {
(function (el) {
el.addEventListener('click', function (e) {
e.preventDefault();
show(el.getAttribute('data-show-view'));
});
})(toggles[i]);
}
var err = new URLSearchParams(location.search).get('error');
if (err && ERRORS[err]) {
var box = document.getElementById('auth-error');
if (box) {
box.textContent = ERRORS[err];
box.style.display = '';
}
}
// signup 由来のエラー、または #signup 直リンクは登録ビューを開く
if (location.hash === '#signup' || err === 'signup' || err === 'weak') show('signup');
})();
</script>
</body> </body>
</html> </html>

View File

@ -104,6 +104,67 @@ describe('createRouter.pick', () => {
expect(counts).toEqual({ a: 2, b: 2, c: 2 }); expect(counts).toEqual({ a: 2, b: 2, c: 2 });
}); });
// ── Sticky routing (x-aao-preferred-backend) ────────────────────────────
it('honors the preferred backend even when another is idler', () => {
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b'), bk('b', 'qwen3:8b')],
registry: makeRegistry([
status('a', true, 3, 4), // busier, but preferred (KV cache lives here)
status('b', true, 0, 4), // idler
]),
});
expect(r.pick('qwen3:8b', 'a')?.id).toBe('a');
});
it('falls back to normal ranking when the preferred backend is saturated', () => {
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b'), bk('b', 'qwen3:8b')],
registry: makeRegistry([
status('a', true, 4, 4), // preferred but full
status('b', true, 1, 4),
]),
});
expect(r.pick('qwen3:8b', 'a')?.id).toBe('b');
});
it('falls back when the preferred backend is offline', () => {
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b'), bk('b', 'qwen3:8b')],
registry: makeRegistry([
status('a', false, 0, 4),
status('b', true, 1, 4),
]),
});
expect(r.pick('qwen3:8b', 'a')?.id).toBe('b');
});
it('ignores a preferred backend that does not serve the requested model/role', () => {
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b'), bk('x', 'llama:70b')],
registry: makeRegistry([
status('a', true, 2, 4),
status('x', true, 0, 4), // idle but wrong model — must not leak across pools
]),
});
expect(r.pick('qwen3:8b', 'x')?.id).toBe('a');
});
it('preferred pick still reserves an inflight slot', () => {
const inflight = createBackendInflightCounter();
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b', 2)],
registry: makeRegistry([status('a', true, 0, 2)]),
inflight,
});
expect(r.pick('qwen3:8b', 'a')?.id).toBe('a');
expect(inflight.get('a')).toBe(1);
expect(r.pick('qwen3:8b', 'a')?.id).toBe('a');
expect(inflight.get('a')).toBe(2);
// Saturated by reservations → preferred no longer admitted → null
expect(r.pick('qwen3:8b', 'a')).toBeNull();
});
it('treats registry-cold backends as idle (avoids startup outage)', () => { it('treats registry-cold backends as idle (avoids startup outage)', () => {
// Registry has nothing yet — first request still routes. // Registry has nothing yet — first request still routes.
const r = createRouter({ const r = createRouter({

View File

@ -44,7 +44,13 @@ export interface Router {
* backend matches the model OR all matching backends are * backend matches the model OR all matching backends are
* offline/saturated. * offline/saturated.
*/ */
pick(model: string): GatewayBackendConfig | null; /**
* Pick a backend for the routing key. `preferredBackendId` is the sticky
* routing hint (x-aao-preferred-backend): honored when that backend is an
* admitted candidate (right role/model, online, free capacity), otherwise
* the normal least-busy ranking applies.
*/
pick(model: string, preferredBackendId?: string | null): GatewayBackendConfig | null;
/** /**
* Inspection helper returns the registry-augmented view of every * Inspection helper returns the registry-augmented view of every
* configured backend (regardless of model). Useful for /v1/models * configured backend (regardless of model). Useful for /v1/models
@ -118,7 +124,7 @@ export function createRouter(deps: CreateRouterDeps): Router {
} }
return { return {
pick(model: string): GatewayBackendConfig | null { pick(model: string, preferredBackendId?: string | null): GatewayBackendConfig | null {
// Note registry activity so the polling cadence tightens while // Note registry activity so the polling cadence tightens while
// we're actively routing. Without this, a single-page burst of // we're actively routing. Without this, a single-page burst of
// requests would still see the idle 30s cadence. // requests would still see the idle 30s cadence.
@ -190,6 +196,21 @@ export function createRouter(deps: CreateRouterDeps): Router {
const candidates = warm.length > 0 ? warm : cold; const candidates = warm.length > 0 ? warm : cold;
if (candidates.length === 0) return null; if (candidates.length === 0) return null;
// Sticky routing (x-aao-preferred-backend): if the client asks for a
// specific backend and that backend is an ADMITTED candidate (right
// role/model, online, has free capacity), serve it without ranking —
// the job's KV cache lives there, so a least-busy re-pick would trade
// a large prefill cost for a small load-balance win. When the
// preferred backend is saturated or offline it simply isn't in the
// candidate list and we fall through to the normal ranking.
if (preferredBackendId) {
const preferred = candidates.find(c => c.backend.id === preferredBackendId);
if (preferred) {
deps.inflight?.inc(preferred.backend.id);
return preferred.backend;
}
}
// Stable ascending sort by ratio. // Stable ascending sort by ratio.
candidates.sort((a, b) => a.ratio - b.ratio); candidates.sort((a, b) => a.ratio - b.ratio);
const bestRatio = candidates[0]!.ratio; const bestRatio = candidates[0]!.ratio;

View File

@ -297,7 +297,13 @@ export function buildChatCompletionsHandler(deps: StreamProxyDeps) {
return; return;
} }
const backend = deps.router.pick(model); // Sticky routing hint from the worker (see router.pick): prefer the
// backend that already holds this job's KV cache when it has capacity.
const rawPreferred = req.headers['x-aao-preferred-backend'];
const preferredBackendId = typeof rawPreferred === 'string' && rawPreferred.trim().length > 0
? rawPreferred.trim()
: null;
const backend = deps.router.pick(model, preferredBackendId);
if (!backend) { if (!backend) {
emitRequestMetric(deps.metrics, { emitRequestMetric(deps.metrics, {
team: team0, backend: 'none', model, status: 'no_backend', durationMs: 0, team: team0, backend: 'none', model, status: 'no_backend', durationMs: 0,

View File

@ -281,6 +281,17 @@ export class OpenAICompatClient {
return `Request timed out (${mins} minutes)`; return `Request timed out (${mins} minutes)`;
} }
/**
* Backend the next request should prefer (gateway sticky routing for
* KV-cache reuse). Updated by the worker whenever the resolved backend
* changes; per-client so concurrent jobs never share affinity.
*/
private preferredBackendId: string | null = null;
setPreferredBackendId(backendId: string | null): void {
this.preferredBackendId = backendId;
}
async *chat(messages: Message[], tools?: ToolDef[], externalSignal?: AbortSignal): AsyncGenerator<LLMEvent> { async *chat(messages: Message[], tools?: ToolDef[], externalSignal?: AbortSignal): AsyncGenerator<LLMEvent> {
const controller = new AbortController(); const controller = new AbortController();
// アイドルタイムアウト: チャンク受信のたびにリセットされる // アイドルタイムアウト: チャンク受信のたびにリセットされる
@ -308,6 +319,13 @@ export class OpenAICompatClient {
if (this.apiKey) { if (this.apiKey) {
headers['Authorization'] = `Bearer ${this.apiKey}`; headers['Authorization'] = `Bearer ${this.apiKey}`;
} }
// Sticky routing hint: ask the gateway to keep serving this job from
// the backend that already holds its KV cache. The gateway only honors
// it while that backend is online with free capacity; otherwise it
// re-routes normally. Direct (non-proxy) backends ignore the header.
if (this.preferredBackendId) {
headers['x-aao-preferred-backend'] = this.preferredBackendId;
}
const body: Record<string, unknown> = { const body: Record<string, unknown> = {
messages, messages,

View File

@ -12,6 +12,7 @@ describe('activity log format', () => {
expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] [worker:worker-074] [mode:fast] Read: {"file_path":"input/a.png"}')).toEqual({ expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] [worker:worker-074] [mode:fast] Read: {"file_path":"input/a.png"}')).toEqual({
workerId: 'worker-074', workerId: 'worker-074',
mode: 'fast', mode: 'fast',
backendId: null,
}); });
}); });
@ -20,6 +21,7 @@ describe('activity log format', () => {
expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] final: completed')).toEqual({ expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] final: completed')).toEqual({
workerId: null, workerId: null,
mode: null, mode: null,
backendId: null,
}); });
}); });
@ -27,6 +29,7 @@ describe('activity log format', () => {
expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] [worker:worker-148] [mode:quality] [execute] preview: checked draft')).toEqual({ expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] [worker:worker-148] [mode:quality] [execute] preview: checked draft')).toEqual({
workerId: 'worker-148', workerId: 'worker-148',
mode: 'quality', mode: 'quality',
backendId: null,
}); });
}); });
@ -54,3 +57,24 @@ describe('activity log format', () => {
expect(formatDuration(Number.POSITIVE_INFINITY)).toBe('?'); expect(formatDuration(Number.POSITIVE_INFINITY)).toBe('?');
}); });
}); });
describe('backend tag', () => {
it('formats and parses [backend:...] alongside worker/mode', () => {
const line = formatActivityLogEntry('LLM call done', {
workerId: 'aao-gateway',
mode: 'quality',
backendId: 'gpu-rtx-a',
});
expect(line).toContain('[worker:aao-gateway]');
expect(line).toContain('[backend:gpu-rtx-a]');
const parsed = parseActivityLogMetadata(line);
expect(parsed.workerId).toBe('aao-gateway');
expect(parsed.backendId).toBe('gpu-rtx-a');
});
it('omits the backend tag when backendId is unset', () => {
const line = formatActivityLogEntry('entry', { workerId: 'w1', mode: 'auto' });
expect(line).not.toContain('[backend:');
expect(parseActivityLogMetadata(line).backendId).toBeNull();
});
});

View File

@ -1,6 +1,8 @@
export interface ActivityLogMetadata { export interface ActivityLogMetadata {
workerId?: string | null; workerId?: string | null;
mode?: string | null; mode?: string | null;
/** Physical backend behind a proxy worker (gateway-resolved), if known. */
backendId?: string | null;
} }
const TOOL_SUMMARY_KEYS = [ const TOOL_SUMMARY_KEYS = [
@ -24,6 +26,7 @@ export function formatActivityLogMetadata(metadata?: ActivityLogMetadata): strin
const segments: string[] = []; const segments: string[] = [];
if (metadata.workerId) segments.push(`[worker:${metadata.workerId}]`); if (metadata.workerId) segments.push(`[worker:${metadata.workerId}]`);
if (metadata.mode) segments.push(`[mode:${metadata.mode}]`); if (metadata.mode) segments.push(`[mode:${metadata.mode}]`);
if (metadata.backendId) segments.push(`[backend:${metadata.backendId}]`);
return segments.join(' '); return segments.join(' ');
} }
@ -35,9 +38,11 @@ export function formatActivityLogEntry(entry: string, metadata?: ActivityLogMeta
export function parseActivityLogMetadata(line: string): ActivityLogMetadata { export function parseActivityLogMetadata(line: string): ActivityLogMetadata {
const workerMatch = /\[worker:([^\]]+)\]/.exec(line); const workerMatch = /\[worker:([^\]]+)\]/.exec(line);
const modeMatch = /\[mode:([^\]]+)\]/.exec(line); const modeMatch = /\[mode:([^\]]+)\]/.exec(line);
const backendMatch = /\[backend:([^\]]+)\]/.exec(line);
return { return {
workerId: workerMatch?.[1] ?? null, workerId: workerMatch?.[1] ?? null,
mode: modeMatch?.[1] ?? null, mode: modeMatch?.[1] ?? null,
backendId: backendMatch?.[1] ?? null,
}; };
} }

View File

@ -1164,10 +1164,12 @@ export class Worker {
isLocalTask, isLocalTask,
localTaskId, localTaskId,
workspacePath, workspacePath,
// Seed the sticky-backend guard with whatever was already persisted // Seed the backend tracker with whatever was already persisted
// for this job (e.g. on retry / resume from ASK). Only matters for // for this job (e.g. on retry / resume from ASK). Only matters for
// proxy workers; direct workers never produce a backend event. // proxy workers; direct workers never produce a backend event.
isProxyWorker ? (job.lastBackendId ?? null) : null, isProxyWorker ? (job.lastBackendId ?? null) : null,
llmClient,
logMetadata,
); );
// 開始コメント // 開始コメント
@ -1500,24 +1502,34 @@ export class Worker {
localTaskId: number | null, localTaskId: number | null,
workspacePath: string, workspacePath: string,
/** /**
* Initial value of jobs.last_backend_id from the DB. Used to seed the * Initial value of jobs.last_backend_id from the DB. Seeds the backend
* sticky guard so callers don't repeatedly write the same value on * tracker (and the sticky-routing hint) so a resumed/retried job goes
* every LLM iteration. Falsy/null = no backend resolved yet. * back to the backend that already holds its KV cache.
* Falsy/null = no backend resolved yet.
*/ */
initialLastBackendId: string | null = null, initialLastBackendId: string | null = null,
/** LLM client of this job — receives the sticky-routing hint per switch. */
llmClient?: { setPreferredBackendId(backendId: string | null): void },
/**
* The reporter's metadata object (shared by reference): mutating
* `backendId` here makes every subsequent activity.log line carry
* `[backend:...]` so the Progress tab can show the physical backend
* behind a proxy worker.
*/
logMetadata?: ActivityLogMetadata,
): PieceRunCallbacks { ): PieceRunCallbacks {
let movementStartTime = Date.now(); let movementStartTime = Date.now();
const toolUsageCounts = new Map<string, number>(); const toolUsageCounts = new Map<string, number>();
// Sticky-backend per design Open Question #3: take the first proxy // Backend tracker (follow-current semantics, 2026-06): persists
// backend the job sees and never overwrite it. Subsequent calls that // jobs.last_backend_id whenever the resolved backend CHANGES so the UI
// happen to land on a different deployment are ignored at this layer // (pet, badges) follows where the job actually runs. Switches are rare
// so the UI Pet doesn't flicker between sprites. The resolver also // because the gateway honors the x-aao-preferred-backend sticky hint
// guarantees that if the DB persist fails, the local sticky stays // (KV-cache reuse) — they only happen when the preferred backend goes
// unset so the next event can retry (otherwise a transient DB error // offline or saturates. The tracker still guarantees that a failed DB
// would orphan the worker → backend mapping for the lifetime of the // persist leaves the in-memory value unchanged so the next event
// job). See src/worker/sticky-backend.ts. // retries. See src/worker/sticky-backend.ts.
const workerIdLocal = this.workerId; const workerIdLocal = this.workerId;
const onBackendResolvedHandler = createStickyBackendResolver({ const backendTracker = createStickyBackendResolver({
initial: initialLastBackendId, initial: initialLastBackendId,
persist: (backendId) => this.repo.updateJob(jobId, { lastBackendId: backendId }), persist: (backendId) => this.repo.updateJob(jobId, { lastBackendId: backendId }),
logger: { logger: {
@ -1528,6 +1540,12 @@ export class Worker {
workerId: workerIdLocal, workerId: workerIdLocal,
jobId, jobId,
}); });
// Seed the sticky-routing hint + activity-log backend tag from the DB
// value (resume/retry goes straight back to the cache-warm backend).
if (initialLastBackendId) {
llmClient?.setPreferredBackendId(initialLastBackendId);
if (logMetadata) logMetadata.backendId = initialLastBackendId;
}
// Phase 3b: local copy of the sticky backend so the LLM-call metric // Phase 3b: local copy of the sticky backend so the LLM-call metric
// has a stable backend_id label even before the persist returns. // has a stable backend_id label even before the persist returns.
// Direct workers (non-proxy) never fire onBackendResolved, so we // Direct workers (non-proxy) never fire onBackendResolved, so we
@ -1616,20 +1634,23 @@ export class Worker {
} }
}, },
onBackendResolved: (info) => { onBackendResolved: (info) => {
// Phase 3b: update the sticky backend id used for LLM-call // Phase 3b: update the backend id used for LLM-call metrics.
// metrics. We capture every event (not just the first) so a
// routing change mid-job is reflected in the next iteration's
// counters; the DB-side sticky still preserves the first.
if (info.backendId) { if (info.backendId) {
metricBackendId = info.backendId; metricBackendId = info.backendId;
// Sticky routing: ask the gateway to keep using this backend on
// the next request (KV-cache affinity).
llmClient?.setPreferredBackendId(info.backendId);
// Tag subsequent activity.log lines with the physical backend so
// the Progress tab shows more than the proxy worker's name.
if (logMetadata) logMetadata.backendId = info.backendId;
} }
// Fire-and-forget: agent-loop's onBackendResolved signature is // Fire-and-forget: agent-loop's onBackendResolved signature is
// sync (void). The resolver handles persist errors internally; // sync (void). The tracker handles persist errors internally;
// we just attach a final guard to log any unexpected throw. // we just attach a final guard to log any unexpected throw.
// cacheKey is observed but not persisted at the job level — // cacheKey is observed but not persisted at the job level —
// Phase B's NodeStatusWidget will track cache hits out-of-band. // Phase B's NodeStatusWidget will track cache hits out-of-band.
onBackendResolvedHandler(info).catch(err => { backendTracker.onEvent(info).catch(err => {
logger.warn(`[worker:${this.workerId}] sticky backend resolver threw for job ${jobId}: ${err}`); logger.warn(`[worker:${this.workerId}] backend tracker threw for job ${jobId}: ${err}`);
}); });
}, },
onMovementComplete: (movementName, result) => { onMovementComplete: (movementName, result) => {

View File

@ -13,11 +13,11 @@ function makeLogger(): StickyBackendLogger & {
}; };
} }
describe('createStickyBackendResolver', () => { describe('createStickyBackendResolver (follow-current semantics)', () => {
it('persists the first backend, sets sticky, and logs at info', async () => { it('persists the first backend, advances current, and logs at info', async () => {
const logger = makeLogger(); const logger = makeLogger();
const persist = vi.fn().mockResolvedValue(undefined); const persist = vi.fn().mockResolvedValue(undefined);
const resolve = createStickyBackendResolver({ const tracker = createStickyBackendResolver({
initial: null, initial: null,
persist, persist,
logger, logger,
@ -25,18 +25,19 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1', jobId: 'j1',
}); });
await resolve({ backendId: 'gpu-a', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(1); expect(persist).toHaveBeenCalledTimes(1);
expect(persist).toHaveBeenCalledWith('gpu-a'); expect(persist).toHaveBeenCalledWith('gpu-a');
expect(tracker.current()).toBe('gpu-a');
expect(logger.calls.info).toHaveLength(1); expect(logger.calls.info).toHaveLength(1);
expect(logger.calls.info[0]).toContain('gpu-a'); expect(logger.calls.info[0]).toContain('gpu-a');
}); });
it('short-circuits subsequent events once sticky is set', async () => { it('follows backend switches: each CHANGE persists; repeats do not', async () => {
const logger = makeLogger(); const logger = makeLogger();
const persist = vi.fn().mockResolvedValue(undefined); const persist = vi.fn().mockResolvedValue(undefined);
const resolve = createStickyBackendResolver({ const tracker = createStickyBackendResolver({
initial: null, initial: null,
persist, persist,
logger, logger,
@ -44,20 +45,24 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1', jobId: 'j1',
}); });
await resolve({ backendId: 'gpu-a', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
await resolve({ backendId: 'gpu-b', cacheKey: 'sha:xyz' }); await tracker.onEvent({ backendId: 'gpu-a', cacheKey: 'sha:1' }); // same — no persist
await resolve({ backendId: 'gpu-a', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-b', cacheKey: null }); // switch — persists
await tracker.onEvent({ backendId: 'gpu-b', cacheKey: null }); // same — no persist
expect(persist).toHaveBeenCalledTimes(1); expect(persist).toHaveBeenCalledTimes(2);
// gpu-b ≠ sticky → debug; gpu-a == sticky → no log expect(persist).toHaveBeenNthCalledWith(1, 'gpu-a');
expect(logger.calls.debug).toHaveLength(1); expect(persist).toHaveBeenNthCalledWith(2, 'gpu-b');
expect(logger.calls.debug[0]).toContain('gpu-b'); expect(tracker.current()).toBe('gpu-b');
// The switch log mentions both ends
expect(logger.calls.info[1]).toContain('gpu-a');
expect(logger.calls.info[1]).toContain('gpu-b');
}); });
it('honors initial sticky from DB without re-persisting', async () => { it('honors initial value from DB: same backend does not re-persist, a switch does', async () => {
const logger = makeLogger(); const logger = makeLogger();
const persist = vi.fn().mockResolvedValue(undefined); const persist = vi.fn().mockResolvedValue(undefined);
const resolve = createStickyBackendResolver({ const tracker = createStickyBackendResolver({
initial: 'gpu-seed', initial: 'gpu-seed',
persist, persist,
logger, logger,
@ -65,21 +70,22 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1', jobId: 'j1',
}); });
await resolve({ backendId: 'gpu-other', cacheKey: null }); expect(tracker.current()).toBe('gpu-seed');
await resolve({ backendId: 'gpu-seed', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-seed', cacheKey: null });
expect(persist).not.toHaveBeenCalled(); expect(persist).not.toHaveBeenCalled();
expect(logger.calls.debug).toHaveLength(1);
expect(logger.calls.debug[0]).toContain('gpu-other'); await tracker.onEvent({ backendId: 'gpu-other', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(1);
expect(tracker.current()).toBe('gpu-other');
}); });
it('does NOT set sticky when persist fails — next event retries', async () => { it('does NOT advance when persist fails — next event retries', async () => {
const logger = makeLogger(); const logger = makeLogger();
const persist = vi const persist = vi
.fn() .fn()
.mockRejectedValueOnce(new Error('SQLITE_BUSY')) .mockRejectedValueOnce(new Error('SQLITE_BUSY'))
.mockResolvedValueOnce(undefined); .mockResolvedValueOnce(undefined);
const resolve = createStickyBackendResolver({ const tracker = createStickyBackendResolver({
initial: null, initial: null,
persist, persist,
logger, logger,
@ -87,23 +93,19 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1', jobId: 'j1',
}); });
// First call: DB write fails → sticky unset → warn logged // First call: DB write fails → current stays null → warn logged
await resolve({ backendId: 'gpu-a', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(1); expect(persist).toHaveBeenCalledTimes(1);
expect(tracker.current()).toBeNull();
expect(logger.calls.warn).toHaveLength(1); expect(logger.calls.warn).toHaveLength(1);
expect(logger.calls.warn[0]).toContain('SQLITE_BUSY'); expect(logger.calls.warn[0]).toContain('SQLITE_BUSY');
expect(logger.calls.info).toHaveLength(0); expect(logger.calls.info).toHaveLength(0);
// Second call: DB write succeeds → sticky set // Second call (same backend again): retries because current ≠ backendId
await resolve({ backendId: 'gpu-b', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(2); expect(persist).toHaveBeenCalledTimes(2);
expect(persist).toHaveBeenLastCalledWith('gpu-b'); expect(tracker.current()).toBe('gpu-a');
expect(logger.calls.info).toHaveLength(1); expect(logger.calls.info).toHaveLength(1);
expect(logger.calls.info[0]).toContain('gpu-b');
// Third call: sticky is now set → no further persist
await resolve({ backendId: 'gpu-c', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(2);
}); });
it('retries on every event until persist succeeds (multiple failures)', async () => { it('retries on every event until persist succeeds (multiple failures)', async () => {
@ -113,7 +115,7 @@ describe('createStickyBackendResolver', () => {
.mockRejectedValueOnce(new Error('fail 1')) .mockRejectedValueOnce(new Error('fail 1'))
.mockRejectedValueOnce(new Error('fail 2')) .mockRejectedValueOnce(new Error('fail 2'))
.mockResolvedValueOnce(undefined); .mockResolvedValueOnce(undefined);
const resolve = createStickyBackendResolver({ const tracker = createStickyBackendResolver({
initial: null, initial: null,
persist, persist,
logger, logger,
@ -121,13 +123,13 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1', jobId: 'j1',
}); });
await resolve({ backendId: 'gpu-a', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
await resolve({ backendId: 'gpu-b', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-b', cacheKey: null });
await resolve({ backendId: 'gpu-c', cacheKey: null }); await tracker.onEvent({ backendId: 'gpu-c', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(3); expect(persist).toHaveBeenCalledTimes(3);
expect(logger.calls.warn).toHaveLength(2); expect(logger.calls.warn).toHaveLength(2);
expect(logger.calls.info).toHaveLength(1); expect(logger.calls.info).toHaveLength(1);
expect(logger.calls.info[0]).toContain('gpu-c'); expect(tracker.current()).toBe('gpu-c');
}); });
}); });

View File

@ -1,24 +1,23 @@
/** /**
* Sticky-backend resolver for proxy worker jobs. * Backend tracker for proxy worker jobs.
* *
* Design (per * History: originally "first backend wins" (2026-05-18 design, Open Question
* docs/superpowers/specs/2026-05-18-multi-team-gpu-pool-and-node-status-design.md * #3 case 1) to keep the UI pet from flickering while the gateway rebalanced
* Open Question #3 case 1, "最初に確定したら以後 update しない"): * every request. As of 2026-06 the gateway honors `x-aao-preferred-backend`
* (client-side sticky routing for KV-cache reuse), so backend switches are
* RARE they only happen when the preferred backend goes offline or
* saturates. The tracker therefore now follows the CURRENT backend:
* *
* - For a proxy worker, every LLM call may resolve to a different * - `jobs.last_backend_id` is updated whenever the resolved backend CHANGES,
* physical backend (LiteLLM rebalances per request). The UI pet should * so the UI (pet, badges) tracks where the job actually runs.
* not flicker, so we record only the FIRST backend a job sees. * - Persistence happens via `updateJob({ lastBackendId })`. If that DB write
* - Persistence happens via `updateJob({ lastBackendId })`. If that DB * FAILS, the in-memory value is left unchanged so the next
* write FAILS, we must remain in the unset state so the next * `onBackendResolved` event retries the persist (a transient DB error must
* `onBackendResolved` event has a chance to retry. If we set the local * not permanently lose the worker backend mapping).
* sticky variable BEFORE persisting, a transient DB error would lose
* the binding permanently for the lifetime of the job (sticky check
* short-circuits all subsequent events) and the UI would never see
* the worker backend mapping.
* *
* This module isolates the "set sticky only after persist succeeds" * This module isolates the "advance only after persist succeeds" invariant
* invariant from `Worker.buildPieceRunCallbacks`, which already has a * from `Worker.buildPieceCallbacks`, which already has a dozen other
* dozen other concerns and is hard to unit-test in isolation. * concerns and is hard to unit-test in isolation.
*/ */
export interface StickyBackendLogger { export interface StickyBackendLogger {
@ -32,16 +31,20 @@ export interface StickyBackendEvent {
cacheKey: string | null; cacheKey: string | null;
} }
export interface BackendTracker {
/** The onBackendResolved callback for the agent loop (fire-and-forget safe). */
onEvent: (event: StickyBackendEvent) => Promise<void>;
/**
* The most recently persisted backend id (or the initial DB value).
* Used as the `x-aao-preferred-backend` hint on the next LLM request.
*/
current: () => string | null;
}
/** /**
* Build the `onBackendResolved` callback. `persist(backendId)` is the * Build the backend tracker. `persist(backendId)` is the DB write
* DB write (typically `repo.updateJob(jobId, { lastBackendId })`); it * (typically `repo.updateJob(jobId, { lastBackendId })`); it must reject on
* must reject on failure so we can keep sticky unset for retry. * failure so the in-memory value stays put for retry.
*
* Returns an async function the agent-loop can call without awaiting
* errors are caught internally and logged. The function resolves once
* either: (a) sticky was already set and we short-circuited, (b) the
* persist succeeded and sticky is now set, or (c) the persist failed
* and sticky remains unset for the next event to retry.
*/ */
export function createStickyBackendResolver(opts: { export function createStickyBackendResolver(opts: {
initial: string | null; initial: string | null;
@ -49,33 +52,31 @@ export function createStickyBackendResolver(opts: {
logger: StickyBackendLogger; logger: StickyBackendLogger;
workerId: string; workerId: string;
jobId: string; jobId: string;
}): (event: StickyBackendEvent) => Promise<void> { }): BackendTracker {
const { initial, persist, logger, workerId, jobId } = opts; const { initial, persist, logger, workerId, jobId } = opts;
let sticky: string | null = initial; let current: string | null = initial;
return async function onBackendResolved({ backendId, cacheKey }: StickyBackendEvent): Promise<void> { async function onEvent({ backendId, cacheKey }: StickyBackendEvent): Promise<void> {
if (sticky) { if (current === backendId) return; // unchanged — nothing to persist
if (sticky !== backendId) {
logger.debug(
`[worker:${workerId}] job ${jobId} backend re-resolved to ${backendId} (sticky=${sticky}, cache=${cacheKey ?? 'miss'}); keeping sticky`,
);
}
return;
}
try { try {
await persist(backendId); await persist(backendId);
// Only set sticky AFTER persist succeeds. If we set first and persist // Only advance AFTER persist succeeds. If we advanced first and the
// failed, the next event would short-circuit on the sticky check and // persist failed, the next identical event would short-circuit on the
// we'd never recover — the UI would render "no backend" forever. // equality check and the DB would stay stale forever.
sticky = backendId; const previous = current;
current = backendId;
logger.info( logger.info(
`[worker:${workerId}] job ${jobId} backend resolved: ${backendId} cache=${cacheKey ?? 'miss'}`, previous
? `[worker:${workerId}] job ${jobId} backend switched: ${previous}${backendId} cache=${cacheKey ?? 'miss'}`
: `[worker:${workerId}] job ${jobId} backend resolved: ${backendId} cache=${cacheKey ?? 'miss'}`,
); );
} catch (err) { } catch (err) {
logger.warn( logger.warn(
`[worker:${workerId}] failed to persist lastBackendId for job ${jobId}: ${err}sticky left unset for retry`, `[worker:${workerId}] failed to persist lastBackendId for job ${jobId}: ${err}keeping ${current ?? 'unset'} for retry`,
); );
// Intentionally do NOT set sticky. Next event retries. // Intentionally do NOT advance. Next event retries.
} }
}; }
return { onEvent, current: () => current };
} }

View File

@ -22,7 +22,7 @@ const KIND_COLORS: Record<string, { dot: string; badge: string; badgeText: strin
export const ActivityEventCard = memo(function ActivityEventCard({ event, isLast }: { event: ActivityEvent; isLast?: boolean }) { export const ActivityEventCard = memo(function ActivityEventCard({ event, isLast }: { event: ActivityEvent; isLast?: boolean }) {
const colors = KIND_COLORS[event.kind] ?? KIND_COLORS.other!; const colors = KIND_COLORS[event.kind] ?? KIND_COLORS.other!;
const meta = formatActivityMeta(event.workerId, event.mode); const meta = formatActivityMeta(event.workerId, event.mode, event.backendId);
return ( return (
<div className="grid gap-3" style={{ gridTemplateColumns: '16px minmax(0,1fr)' }}> <div className="grid gap-3" style={{ gridTemplateColumns: '16px minmax(0,1fr)' }}>

View File

@ -115,10 +115,17 @@ export interface ActivityEvent {
timestamp: string | null; timestamp: string | null;
workerId: string | null; workerId: string | null;
mode: string | null; mode: string | null;
/** Physical backend behind a proxy worker (from [backend:...] tags). */
backendId: string | null;
} }
export function formatActivityMeta(workerId: string | null, mode: string | null): string { export function formatActivityMeta(workerId: string | null, mode: string | null, backendId?: string | null): string {
return [workerId ? `worker: ${workerPill(workerId)}` : '', mode ? `mode: ${mode}` : ''].filter(Boolean).join(' · '); return [
workerId ? `worker: ${workerPill(workerId)}` : '',
// Show the physical backend when it differs from the (proxy) worker name.
backendId && backendId !== workerId ? `backend: ${workerPill(backendId)}` : '',
mode ? `mode: ${mode}` : '',
].filter(Boolean).join(' · ');
} }
export function parseActivityLog(logText: string): ActivityEvent[] { export function parseActivityLog(logText: string): ActivityEvent[] {
@ -130,13 +137,15 @@ export function parseActivityLog(logText: string): ActivityEvent[] {
const timestamp = timestampMatch?.[1] ?? null; const timestamp = timestampMatch?.[1] ?? null;
const workerId = /\[worker:([^\]]+)\]/.exec(rawLine)?.[1] ?? null; const workerId = /\[worker:([^\]]+)\]/.exec(rawLine)?.[1] ?? null;
const mode = /\[mode:([^\]]+)\]/.exec(rawLine)?.[1] ?? null; const mode = /\[mode:([^\]]+)\]/.exec(rawLine)?.[1] ?? null;
const backendId = /\[backend:([^\]]+)\]/.exec(rawLine)?.[1] ?? null;
const line = rawLine const line = rawLine
.replace(/^\[[^\]]+\]\s+/, '') .replace(/^\[[^\]]+\]\s+/, '')
.replace(/\[worker:[^\]]+\]\s*/g, '') .replace(/\[worker:[^\]]+\]\s*/g, '')
.replace(/\[mode:[^\]]+\]\s*/g, '') .replace(/\[mode:[^\]]+\]\s*/g, '')
.replace(/\[backend:[^\]]+\]\s*/g, '')
.trim(); .trim();
const base = { id: `${timestamp ?? 'line'}-${index}`, timestamp, workerId, mode }; const base = { id: `${timestamp ?? 'line'}-${index}`, timestamp, workerId, mode, backendId };
const movementStart = /^\[([^\]]+)\] (?:start|ステップ開始)$/.exec(line); const movementStart = /^\[([^\]]+)\] (?:start|ステップ開始)$/.exec(line);
if (movementStart) { if (movementStart) {