sync: update from private repo (c8b6d29)
Some checks failed
CI / build-and-test (push) Has been cancelled

This commit is contained in:
oss-sync 2026-06-10 09:09:39 +00:00
parent 5502478636
commit 25c087067a
12 changed files with 400 additions and 121 deletions

View File

@ -199,6 +199,34 @@
line-height: 1.5;
}
.auth-error {
background: #fef2f2;
border: 1px solid #fecaca;
color: #b91c1c;
border-radius: 8px;
padding: 10px 14px;
font-size: 0.8125rem;
line-height: 1.5;
margin-bottom: 16px;
}
.view-toggle {
margin-top: 16px;
font-size: 0.8125rem;
color: #64748b;
text-align: center;
}
.view-toggle a {
color: #4f46e5;
font-weight: 600;
text-decoration: none;
}
.view-toggle a:hover {
text-decoration: underline;
}
/* RESPONSIVE */
@media (max-width: 768px) {
.container {
@ -261,6 +289,20 @@
.footer-note {
color: #475569;
}
.auth-error {
background: rgba(239, 68, 68, 0.12);
border-color: rgba(239, 68, 68, 0.35);
color: #fca5a5;
}
.view-toggle {
color: #94a3b8;
}
.view-toggle a {
color: #818cf8;
}
}
</style>
</head>
@ -291,9 +333,12 @@
<!-- Right Panel -->
<div class="right-panel">
<div class="login-box">
<h2>ログイン</h2>
<p class="subtitle">アカウントでサインインしてください</p>
<h2 id="view-title">ログイン</h2>
<p class="subtitle" id="view-subtitle">アカウントでサインインしてください</p>
<div id="auth-error" class="auth-error" style="display:none;"></div>
<div id="view-login">
<!-- GOOGLE_BUTTON_START -->
<!-- Google OAuth Button -->
<a href="/auth/google" class="oauth-button oauth-button-google">
@ -339,16 +384,29 @@
</button>
</form>
<!-- LOCAL_SIGNUP_START -->
<div class="divider">アカウントが無い場合</div>
<p class="view-toggle">
アカウントが無い場合は <a href="#signup" data-show-view="signup">新規登録</a>
</p>
<!-- LOCAL_SIGNUP_END -->
<!-- LOCAL_FORM_END -->
</div><!-- /view-login -->
<!-- LOCAL_FORM_START -->
<!-- LOCAL_SIGNUP_START -->
<div id="view-signup" style="display:none;">
<form method="post" action="/auth/local/signup" style="display:flex;flex-direction:column;gap:10px;">
<input type="text" name="email" placeholder="ログインID" required autocomplete="username"
style="padding:11px 12px;border:1px solid #d0d5dd;border-radius:8px;font-size:14px;width:100%;box-sizing:border-box;">
<input type="password" name="password" placeholder="パスワード8文字以上" minlength="8" required autocomplete="new-password"
style="padding:11px 12px;border:1px solid #d0d5dd;border-radius:8px;font-size:14px;width:100%;box-sizing:border-box;">
<button type="submit" class="oauth-button" style="justify-content:center;background:#fff;color:#111827;border:1px solid #d0d5dd;cursor:pointer;">
新規登録(管理者の承認後に利用可)
<button type="submit" class="oauth-button" style="justify-content:center;background:#111827;color:#fff;border:none;cursor:pointer;">
新規登録する
</button>
</form>
<p class="view-toggle">
<a href="#" data-show-view="login">← ログインに戻る</a>
</p>
</div><!-- /view-signup -->
<!-- LOCAL_SIGNUP_END -->
<!-- LOCAL_FORM_END -->
@ -358,5 +416,58 @@
</div>
</div>
</div>
<script>
(function () {
var loginView = document.getElementById('view-login');
var signupView = document.getElementById('view-signup');
var title = document.getElementById('view-title');
var subtitle = document.getElementById('view-subtitle');
var ERRORS = {
invalid: '入力内容が正しくありません。',
credentials: 'ログインID またはパスワードが違います。',
disabled: 'このアカウントは無効化されています。管理者にお問い合わせください。',
signup: '登録に失敗しました。このログインID は既に使われている可能性があります。',
weak: 'パスワードは8文字以上で設定してください。'
};
function show(view) {
var isSignup = view === 'signup' && !!signupView;
if (loginView) loginView.style.display = isSignup ? 'none' : '';
if (signupView) signupView.style.display = isSignup ? '' : 'none';
if (title) title.textContent = isSignup ? '新規登録' : 'ログイン';
if (subtitle) {
subtitle.textContent = isSignup
? '登録後、管理者の承認を経て利用できます'
: 'アカウントでサインインしてください';
}
if (history.replaceState) {
history.replaceState(null, '',
location.pathname + location.search + (isSignup ? '#signup' : ''));
}
}
var toggles = document.querySelectorAll('[data-show-view]');
for (var i = 0; i < toggles.length; i++) {
(function (el) {
el.addEventListener('click', function (e) {
e.preventDefault();
show(el.getAttribute('data-show-view'));
});
})(toggles[i]);
}
var err = new URLSearchParams(location.search).get('error');
if (err && ERRORS[err]) {
var box = document.getElementById('auth-error');
if (box) {
box.textContent = ERRORS[err];
box.style.display = '';
}
}
// signup 由来のエラー、または #signup 直リンクは登録ビューを開く
if (location.hash === '#signup' || err === 'signup' || err === 'weak') show('signup');
})();
</script>
</body>
</html>

View File

@ -104,6 +104,67 @@ describe('createRouter.pick', () => {
expect(counts).toEqual({ a: 2, b: 2, c: 2 });
});
// ── Sticky routing (x-aao-preferred-backend) ────────────────────────────
it('honors the preferred backend even when another is idler', () => {
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b'), bk('b', 'qwen3:8b')],
registry: makeRegistry([
status('a', true, 3, 4), // busier, but preferred (KV cache lives here)
status('b', true, 0, 4), // idler
]),
});
expect(r.pick('qwen3:8b', 'a')?.id).toBe('a');
});
it('falls back to normal ranking when the preferred backend is saturated', () => {
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b'), bk('b', 'qwen3:8b')],
registry: makeRegistry([
status('a', true, 4, 4), // preferred but full
status('b', true, 1, 4),
]),
});
expect(r.pick('qwen3:8b', 'a')?.id).toBe('b');
});
it('falls back when the preferred backend is offline', () => {
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b'), bk('b', 'qwen3:8b')],
registry: makeRegistry([
status('a', false, 0, 4),
status('b', true, 1, 4),
]),
});
expect(r.pick('qwen3:8b', 'a')?.id).toBe('b');
});
it('ignores a preferred backend that does not serve the requested model/role', () => {
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b'), bk('x', 'llama:70b')],
registry: makeRegistry([
status('a', true, 2, 4),
status('x', true, 0, 4), // idle but wrong model — must not leak across pools
]),
});
expect(r.pick('qwen3:8b', 'x')?.id).toBe('a');
});
it('preferred pick still reserves an inflight slot', () => {
const inflight = createBackendInflightCounter();
const r = createRouter({
getBackends: () => [bk('a', 'qwen3:8b', 2)],
registry: makeRegistry([status('a', true, 0, 2)]),
inflight,
});
expect(r.pick('qwen3:8b', 'a')?.id).toBe('a');
expect(inflight.get('a')).toBe(1);
expect(r.pick('qwen3:8b', 'a')?.id).toBe('a');
expect(inflight.get('a')).toBe(2);
// Saturated by reservations → preferred no longer admitted → null
expect(r.pick('qwen3:8b', 'a')).toBeNull();
});
it('treats registry-cold backends as idle (avoids startup outage)', () => {
// Registry has nothing yet — first request still routes.
const r = createRouter({

View File

@ -44,7 +44,13 @@ export interface Router {
* backend matches the model OR all matching backends are
* offline/saturated.
*/
pick(model: string): GatewayBackendConfig | null;
/**
* Pick a backend for the routing key. `preferredBackendId` is the sticky
* routing hint (x-aao-preferred-backend): honored when that backend is an
* admitted candidate (right role/model, online, free capacity), otherwise
* the normal least-busy ranking applies.
*/
pick(model: string, preferredBackendId?: string | null): GatewayBackendConfig | null;
/**
* Inspection helper returns the registry-augmented view of every
* configured backend (regardless of model). Useful for /v1/models
@ -118,7 +124,7 @@ export function createRouter(deps: CreateRouterDeps): Router {
}
return {
pick(model: string): GatewayBackendConfig | null {
pick(model: string, preferredBackendId?: string | null): GatewayBackendConfig | null {
// Note registry activity so the polling cadence tightens while
// we're actively routing. Without this, a single-page burst of
// requests would still see the idle 30s cadence.
@ -190,6 +196,21 @@ export function createRouter(deps: CreateRouterDeps): Router {
const candidates = warm.length > 0 ? warm : cold;
if (candidates.length === 0) return null;
// Sticky routing (x-aao-preferred-backend): if the client asks for a
// specific backend and that backend is an ADMITTED candidate (right
// role/model, online, has free capacity), serve it without ranking —
// the job's KV cache lives there, so a least-busy re-pick would trade
// a large prefill cost for a small load-balance win. When the
// preferred backend is saturated or offline it simply isn't in the
// candidate list and we fall through to the normal ranking.
if (preferredBackendId) {
const preferred = candidates.find(c => c.backend.id === preferredBackendId);
if (preferred) {
deps.inflight?.inc(preferred.backend.id);
return preferred.backend;
}
}
// Stable ascending sort by ratio.
candidates.sort((a, b) => a.ratio - b.ratio);
const bestRatio = candidates[0]!.ratio;

View File

@ -297,7 +297,13 @@ export function buildChatCompletionsHandler(deps: StreamProxyDeps) {
return;
}
const backend = deps.router.pick(model);
// Sticky routing hint from the worker (see router.pick): prefer the
// backend that already holds this job's KV cache when it has capacity.
const rawPreferred = req.headers['x-aao-preferred-backend'];
const preferredBackendId = typeof rawPreferred === 'string' && rawPreferred.trim().length > 0
? rawPreferred.trim()
: null;
const backend = deps.router.pick(model, preferredBackendId);
if (!backend) {
emitRequestMetric(deps.metrics, {
team: team0, backend: 'none', model, status: 'no_backend', durationMs: 0,

View File

@ -281,6 +281,17 @@ export class OpenAICompatClient {
return `Request timed out (${mins} minutes)`;
}
/**
* Backend the next request should prefer (gateway sticky routing for
* KV-cache reuse). Updated by the worker whenever the resolved backend
* changes; per-client so concurrent jobs never share affinity.
*/
private preferredBackendId: string | null = null;
setPreferredBackendId(backendId: string | null): void {
this.preferredBackendId = backendId;
}
async *chat(messages: Message[], tools?: ToolDef[], externalSignal?: AbortSignal): AsyncGenerator<LLMEvent> {
const controller = new AbortController();
// アイドルタイムアウト: チャンク受信のたびにリセットされる
@ -308,6 +319,13 @@ export class OpenAICompatClient {
if (this.apiKey) {
headers['Authorization'] = `Bearer ${this.apiKey}`;
}
// Sticky routing hint: ask the gateway to keep serving this job from
// the backend that already holds its KV cache. The gateway only honors
// it while that backend is online with free capacity; otherwise it
// re-routes normally. Direct (non-proxy) backends ignore the header.
if (this.preferredBackendId) {
headers['x-aao-preferred-backend'] = this.preferredBackendId;
}
const body: Record<string, unknown> = {
messages,

View File

@ -12,6 +12,7 @@ describe('activity log format', () => {
expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] [worker:worker-074] [mode:fast] Read: {"file_path":"input/a.png"}')).toEqual({
workerId: 'worker-074',
mode: 'fast',
backendId: null,
});
});
@ -20,6 +21,7 @@ describe('activity log format', () => {
expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] final: completed')).toEqual({
workerId: null,
mode: null,
backendId: null,
});
});
@ -27,6 +29,7 @@ describe('activity log format', () => {
expect(parseActivityLogMetadata('[2026-03-13T00:00:00.000Z] [worker:worker-148] [mode:quality] [execute] preview: checked draft')).toEqual({
workerId: 'worker-148',
mode: 'quality',
backendId: null,
});
});
@ -54,3 +57,24 @@ describe('activity log format', () => {
expect(formatDuration(Number.POSITIVE_INFINITY)).toBe('?');
});
});
describe('backend tag', () => {
it('formats and parses [backend:...] alongside worker/mode', () => {
const line = formatActivityLogEntry('LLM call done', {
workerId: 'aao-gateway',
mode: 'quality',
backendId: 'gpu-rtx-a',
});
expect(line).toContain('[worker:aao-gateway]');
expect(line).toContain('[backend:gpu-rtx-a]');
const parsed = parseActivityLogMetadata(line);
expect(parsed.workerId).toBe('aao-gateway');
expect(parsed.backendId).toBe('gpu-rtx-a');
});
it('omits the backend tag when backendId is unset', () => {
const line = formatActivityLogEntry('entry', { workerId: 'w1', mode: 'auto' });
expect(line).not.toContain('[backend:');
expect(parseActivityLogMetadata(line).backendId).toBeNull();
});
});

View File

@ -1,6 +1,8 @@
export interface ActivityLogMetadata {
workerId?: string | null;
mode?: string | null;
/** Physical backend behind a proxy worker (gateway-resolved), if known. */
backendId?: string | null;
}
const TOOL_SUMMARY_KEYS = [
@ -24,6 +26,7 @@ export function formatActivityLogMetadata(metadata?: ActivityLogMetadata): strin
const segments: string[] = [];
if (metadata.workerId) segments.push(`[worker:${metadata.workerId}]`);
if (metadata.mode) segments.push(`[mode:${metadata.mode}]`);
if (metadata.backendId) segments.push(`[backend:${metadata.backendId}]`);
return segments.join(' ');
}
@ -35,9 +38,11 @@ export function formatActivityLogEntry(entry: string, metadata?: ActivityLogMeta
export function parseActivityLogMetadata(line: string): ActivityLogMetadata {
const workerMatch = /\[worker:([^\]]+)\]/.exec(line);
const modeMatch = /\[mode:([^\]]+)\]/.exec(line);
const backendMatch = /\[backend:([^\]]+)\]/.exec(line);
return {
workerId: workerMatch?.[1] ?? null,
mode: modeMatch?.[1] ?? null,
backendId: backendMatch?.[1] ?? null,
};
}

View File

@ -1164,10 +1164,12 @@ export class Worker {
isLocalTask,
localTaskId,
workspacePath,
// Seed the sticky-backend guard with whatever was already persisted
// Seed the backend tracker with whatever was already persisted
// for this job (e.g. on retry / resume from ASK). Only matters for
// proxy workers; direct workers never produce a backend event.
isProxyWorker ? (job.lastBackendId ?? null) : null,
llmClient,
logMetadata,
);
// 開始コメント
@ -1500,24 +1502,34 @@ export class Worker {
localTaskId: number | null,
workspacePath: string,
/**
* Initial value of jobs.last_backend_id from the DB. Used to seed the
* sticky guard so callers don't repeatedly write the same value on
* every LLM iteration. Falsy/null = no backend resolved yet.
* Initial value of jobs.last_backend_id from the DB. Seeds the backend
* tracker (and the sticky-routing hint) so a resumed/retried job goes
* back to the backend that already holds its KV cache.
* Falsy/null = no backend resolved yet.
*/
initialLastBackendId: string | null = null,
/** LLM client of this job — receives the sticky-routing hint per switch. */
llmClient?: { setPreferredBackendId(backendId: string | null): void },
/**
* The reporter's metadata object (shared by reference): mutating
* `backendId` here makes every subsequent activity.log line carry
* `[backend:...]` so the Progress tab can show the physical backend
* behind a proxy worker.
*/
logMetadata?: ActivityLogMetadata,
): PieceRunCallbacks {
let movementStartTime = Date.now();
const toolUsageCounts = new Map<string, number>();
// Sticky-backend per design Open Question #3: take the first proxy
// backend the job sees and never overwrite it. Subsequent calls that
// happen to land on a different deployment are ignored at this layer
// so the UI Pet doesn't flicker between sprites. The resolver also
// guarantees that if the DB persist fails, the local sticky stays
// unset so the next event can retry (otherwise a transient DB error
// would orphan the worker → backend mapping for the lifetime of the
// job). See src/worker/sticky-backend.ts.
// Backend tracker (follow-current semantics, 2026-06): persists
// jobs.last_backend_id whenever the resolved backend CHANGES so the UI
// (pet, badges) follows where the job actually runs. Switches are rare
// because the gateway honors the x-aao-preferred-backend sticky hint
// (KV-cache reuse) — they only happen when the preferred backend goes
// offline or saturates. The tracker still guarantees that a failed DB
// persist leaves the in-memory value unchanged so the next event
// retries. See src/worker/sticky-backend.ts.
const workerIdLocal = this.workerId;
const onBackendResolvedHandler = createStickyBackendResolver({
const backendTracker = createStickyBackendResolver({
initial: initialLastBackendId,
persist: (backendId) => this.repo.updateJob(jobId, { lastBackendId: backendId }),
logger: {
@ -1528,6 +1540,12 @@ export class Worker {
workerId: workerIdLocal,
jobId,
});
// Seed the sticky-routing hint + activity-log backend tag from the DB
// value (resume/retry goes straight back to the cache-warm backend).
if (initialLastBackendId) {
llmClient?.setPreferredBackendId(initialLastBackendId);
if (logMetadata) logMetadata.backendId = initialLastBackendId;
}
// Phase 3b: local copy of the sticky backend so the LLM-call metric
// has a stable backend_id label even before the persist returns.
// Direct workers (non-proxy) never fire onBackendResolved, so we
@ -1616,20 +1634,23 @@ export class Worker {
}
},
onBackendResolved: (info) => {
// Phase 3b: update the sticky backend id used for LLM-call
// metrics. We capture every event (not just the first) so a
// routing change mid-job is reflected in the next iteration's
// counters; the DB-side sticky still preserves the first.
// Phase 3b: update the backend id used for LLM-call metrics.
if (info.backendId) {
metricBackendId = info.backendId;
// Sticky routing: ask the gateway to keep using this backend on
// the next request (KV-cache affinity).
llmClient?.setPreferredBackendId(info.backendId);
// Tag subsequent activity.log lines with the physical backend so
// the Progress tab shows more than the proxy worker's name.
if (logMetadata) logMetadata.backendId = info.backendId;
}
// Fire-and-forget: agent-loop's onBackendResolved signature is
// sync (void). The resolver handles persist errors internally;
// sync (void). The tracker handles persist errors internally;
// we just attach a final guard to log any unexpected throw.
// cacheKey is observed but not persisted at the job level —
// Phase B's NodeStatusWidget will track cache hits out-of-band.
onBackendResolvedHandler(info).catch(err => {
logger.warn(`[worker:${this.workerId}] sticky backend resolver threw for job ${jobId}: ${err}`);
backendTracker.onEvent(info).catch(err => {
logger.warn(`[worker:${this.workerId}] backend tracker threw for job ${jobId}: ${err}`);
});
},
onMovementComplete: (movementName, result) => {

View File

@ -13,11 +13,11 @@ function makeLogger(): StickyBackendLogger & {
};
}
describe('createStickyBackendResolver', () => {
it('persists the first backend, sets sticky, and logs at info', async () => {
describe('createStickyBackendResolver (follow-current semantics)', () => {
it('persists the first backend, advances current, and logs at info', async () => {
const logger = makeLogger();
const persist = vi.fn().mockResolvedValue(undefined);
const resolve = createStickyBackendResolver({
const tracker = createStickyBackendResolver({
initial: null,
persist,
logger,
@ -25,18 +25,19 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1',
});
await resolve({ backendId: 'gpu-a', cacheKey: null });
await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(1);
expect(persist).toHaveBeenCalledWith('gpu-a');
expect(tracker.current()).toBe('gpu-a');
expect(logger.calls.info).toHaveLength(1);
expect(logger.calls.info[0]).toContain('gpu-a');
});
it('short-circuits subsequent events once sticky is set', async () => {
it('follows backend switches: each CHANGE persists; repeats do not', async () => {
const logger = makeLogger();
const persist = vi.fn().mockResolvedValue(undefined);
const resolve = createStickyBackendResolver({
const tracker = createStickyBackendResolver({
initial: null,
persist,
logger,
@ -44,20 +45,24 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1',
});
await resolve({ backendId: 'gpu-a', cacheKey: null });
await resolve({ backendId: 'gpu-b', cacheKey: 'sha:xyz' });
await resolve({ backendId: 'gpu-a', cacheKey: null });
await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
await tracker.onEvent({ backendId: 'gpu-a', cacheKey: 'sha:1' }); // same — no persist
await tracker.onEvent({ backendId: 'gpu-b', cacheKey: null }); // switch — persists
await tracker.onEvent({ backendId: 'gpu-b', cacheKey: null }); // same — no persist
expect(persist).toHaveBeenCalledTimes(1);
// gpu-b ≠ sticky → debug; gpu-a == sticky → no log
expect(logger.calls.debug).toHaveLength(1);
expect(logger.calls.debug[0]).toContain('gpu-b');
expect(persist).toHaveBeenCalledTimes(2);
expect(persist).toHaveBeenNthCalledWith(1, 'gpu-a');
expect(persist).toHaveBeenNthCalledWith(2, 'gpu-b');
expect(tracker.current()).toBe('gpu-b');
// The switch log mentions both ends
expect(logger.calls.info[1]).toContain('gpu-a');
expect(logger.calls.info[1]).toContain('gpu-b');
});
it('honors initial sticky from DB without re-persisting', async () => {
it('honors initial value from DB: same backend does not re-persist, a switch does', async () => {
const logger = makeLogger();
const persist = vi.fn().mockResolvedValue(undefined);
const resolve = createStickyBackendResolver({
const tracker = createStickyBackendResolver({
initial: 'gpu-seed',
persist,
logger,
@ -65,21 +70,22 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1',
});
await resolve({ backendId: 'gpu-other', cacheKey: null });
await resolve({ backendId: 'gpu-seed', cacheKey: null });
expect(tracker.current()).toBe('gpu-seed');
await tracker.onEvent({ backendId: 'gpu-seed', cacheKey: null });
expect(persist).not.toHaveBeenCalled();
expect(logger.calls.debug).toHaveLength(1);
expect(logger.calls.debug[0]).toContain('gpu-other');
await tracker.onEvent({ backendId: 'gpu-other', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(1);
expect(tracker.current()).toBe('gpu-other');
});
it('does NOT set sticky when persist fails — next event retries', async () => {
it('does NOT advance when persist fails — next event retries', async () => {
const logger = makeLogger();
const persist = vi
.fn()
.mockRejectedValueOnce(new Error('SQLITE_BUSY'))
.mockResolvedValueOnce(undefined);
const resolve = createStickyBackendResolver({
const tracker = createStickyBackendResolver({
initial: null,
persist,
logger,
@ -87,23 +93,19 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1',
});
// First call: DB write fails → sticky unset → warn logged
await resolve({ backendId: 'gpu-a', cacheKey: null });
// First call: DB write fails → current stays null → warn logged
await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(1);
expect(tracker.current()).toBeNull();
expect(logger.calls.warn).toHaveLength(1);
expect(logger.calls.warn[0]).toContain('SQLITE_BUSY');
expect(logger.calls.info).toHaveLength(0);
// Second call: DB write succeeds → sticky set
await resolve({ backendId: 'gpu-b', cacheKey: null });
// Second call (same backend again): retries because current ≠ backendId
await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(2);
expect(persist).toHaveBeenLastCalledWith('gpu-b');
expect(tracker.current()).toBe('gpu-a');
expect(logger.calls.info).toHaveLength(1);
expect(logger.calls.info[0]).toContain('gpu-b');
// Third call: sticky is now set → no further persist
await resolve({ backendId: 'gpu-c', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(2);
});
it('retries on every event until persist succeeds (multiple failures)', async () => {
@ -113,7 +115,7 @@ describe('createStickyBackendResolver', () => {
.mockRejectedValueOnce(new Error('fail 1'))
.mockRejectedValueOnce(new Error('fail 2'))
.mockResolvedValueOnce(undefined);
const resolve = createStickyBackendResolver({
const tracker = createStickyBackendResolver({
initial: null,
persist,
logger,
@ -121,13 +123,13 @@ describe('createStickyBackendResolver', () => {
jobId: 'j1',
});
await resolve({ backendId: 'gpu-a', cacheKey: null });
await resolve({ backendId: 'gpu-b', cacheKey: null });
await resolve({ backendId: 'gpu-c', cacheKey: null });
await tracker.onEvent({ backendId: 'gpu-a', cacheKey: null });
await tracker.onEvent({ backendId: 'gpu-b', cacheKey: null });
await tracker.onEvent({ backendId: 'gpu-c', cacheKey: null });
expect(persist).toHaveBeenCalledTimes(3);
expect(logger.calls.warn).toHaveLength(2);
expect(logger.calls.info).toHaveLength(1);
expect(logger.calls.info[0]).toContain('gpu-c');
expect(tracker.current()).toBe('gpu-c');
});
});

View File

@ -1,24 +1,23 @@
/**
* Sticky-backend resolver for proxy worker jobs.
* Backend tracker for proxy worker jobs.
*
* Design (per
* docs/superpowers/specs/2026-05-18-multi-team-gpu-pool-and-node-status-design.md
* Open Question #3 case 1, "最初に確定したら以後 update しない"):
* History: originally "first backend wins" (2026-05-18 design, Open Question
* #3 case 1) to keep the UI pet from flickering while the gateway rebalanced
* every request. As of 2026-06 the gateway honors `x-aao-preferred-backend`
* (client-side sticky routing for KV-cache reuse), so backend switches are
* RARE they only happen when the preferred backend goes offline or
* saturates. The tracker therefore now follows the CURRENT backend:
*
* - For a proxy worker, every LLM call may resolve to a different
* physical backend (LiteLLM rebalances per request). The UI pet should
* not flicker, so we record only the FIRST backend a job sees.
* - Persistence happens via `updateJob({ lastBackendId })`. If that DB
* write FAILS, we must remain in the unset state so the next
* `onBackendResolved` event has a chance to retry. If we set the local
* sticky variable BEFORE persisting, a transient DB error would lose
* the binding permanently for the lifetime of the job (sticky check
* short-circuits all subsequent events) and the UI would never see
* the worker backend mapping.
* - `jobs.last_backend_id` is updated whenever the resolved backend CHANGES,
* so the UI (pet, badges) tracks where the job actually runs.
* - Persistence happens via `updateJob({ lastBackendId })`. If that DB write
* FAILS, the in-memory value is left unchanged so the next
* `onBackendResolved` event retries the persist (a transient DB error must
* not permanently lose the worker backend mapping).
*
* This module isolates the "set sticky only after persist succeeds"
* invariant from `Worker.buildPieceRunCallbacks`, which already has a
* dozen other concerns and is hard to unit-test in isolation.
* This module isolates the "advance only after persist succeeds" invariant
* from `Worker.buildPieceCallbacks`, which already has a dozen other
* concerns and is hard to unit-test in isolation.
*/
export interface StickyBackendLogger {
@ -32,16 +31,20 @@ export interface StickyBackendEvent {
cacheKey: string | null;
}
export interface BackendTracker {
/** The onBackendResolved callback for the agent loop (fire-and-forget safe). */
onEvent: (event: StickyBackendEvent) => Promise<void>;
/**
* The most recently persisted backend id (or the initial DB value).
* Used as the `x-aao-preferred-backend` hint on the next LLM request.
*/
current: () => string | null;
}
/**
* Build the `onBackendResolved` callback. `persist(backendId)` is the
* DB write (typically `repo.updateJob(jobId, { lastBackendId })`); it
* must reject on failure so we can keep sticky unset for retry.
*
* Returns an async function the agent-loop can call without awaiting
* errors are caught internally and logged. The function resolves once
* either: (a) sticky was already set and we short-circuited, (b) the
* persist succeeded and sticky is now set, or (c) the persist failed
* and sticky remains unset for the next event to retry.
* Build the backend tracker. `persist(backendId)` is the DB write
* (typically `repo.updateJob(jobId, { lastBackendId })`); it must reject on
* failure so the in-memory value stays put for retry.
*/
export function createStickyBackendResolver(opts: {
initial: string | null;
@ -49,33 +52,31 @@ export function createStickyBackendResolver(opts: {
logger: StickyBackendLogger;
workerId: string;
jobId: string;
}): (event: StickyBackendEvent) => Promise<void> {
}): BackendTracker {
const { initial, persist, logger, workerId, jobId } = opts;
let sticky: string | null = initial;
let current: string | null = initial;
return async function onBackendResolved({ backendId, cacheKey }: StickyBackendEvent): Promise<void> {
if (sticky) {
if (sticky !== backendId) {
logger.debug(
`[worker:${workerId}] job ${jobId} backend re-resolved to ${backendId} (sticky=${sticky}, cache=${cacheKey ?? 'miss'}); keeping sticky`,
);
}
return;
}
async function onEvent({ backendId, cacheKey }: StickyBackendEvent): Promise<void> {
if (current === backendId) return; // unchanged — nothing to persist
try {
await persist(backendId);
// Only set sticky AFTER persist succeeds. If we set first and persist
// failed, the next event would short-circuit on the sticky check and
// we'd never recover — the UI would render "no backend" forever.
sticky = backendId;
// Only advance AFTER persist succeeds. If we advanced first and the
// persist failed, the next identical event would short-circuit on the
// equality check and the DB would stay stale forever.
const previous = current;
current = backendId;
logger.info(
`[worker:${workerId}] job ${jobId} backend resolved: ${backendId} cache=${cacheKey ?? 'miss'}`,
previous
? `[worker:${workerId}] job ${jobId} backend switched: ${previous}${backendId} cache=${cacheKey ?? 'miss'}`
: `[worker:${workerId}] job ${jobId} backend resolved: ${backendId} cache=${cacheKey ?? 'miss'}`,
);
} catch (err) {
logger.warn(
`[worker:${workerId}] failed to persist lastBackendId for job ${jobId}: ${err}sticky left unset for retry`,
`[worker:${workerId}] failed to persist lastBackendId for job ${jobId}: ${err}keeping ${current ?? 'unset'} for retry`,
);
// Intentionally do NOT set sticky. Next event retries.
// Intentionally do NOT advance. Next event retries.
}
};
}
return { onEvent, current: () => current };
}

View File

@ -22,7 +22,7 @@ const KIND_COLORS: Record<string, { dot: string; badge: string; badgeText: strin
export const ActivityEventCard = memo(function ActivityEventCard({ event, isLast }: { event: ActivityEvent; isLast?: boolean }) {
const colors = KIND_COLORS[event.kind] ?? KIND_COLORS.other!;
const meta = formatActivityMeta(event.workerId, event.mode);
const meta = formatActivityMeta(event.workerId, event.mode, event.backendId);
return (
<div className="grid gap-3" style={{ gridTemplateColumns: '16px minmax(0,1fr)' }}>

View File

@ -115,10 +115,17 @@ export interface ActivityEvent {
timestamp: string | null;
workerId: string | null;
mode: string | null;
/** Physical backend behind a proxy worker (from [backend:...] tags). */
backendId: string | null;
}
export function formatActivityMeta(workerId: string | null, mode: string | null): string {
return [workerId ? `worker: ${workerPill(workerId)}` : '', mode ? `mode: ${mode}` : ''].filter(Boolean).join(' · ');
export function formatActivityMeta(workerId: string | null, mode: string | null, backendId?: string | null): string {
return [
workerId ? `worker: ${workerPill(workerId)}` : '',
// Show the physical backend when it differs from the (proxy) worker name.
backendId && backendId !== workerId ? `backend: ${workerPill(backendId)}` : '',
mode ? `mode: ${mode}` : '',
].filter(Boolean).join(' · ');
}
export function parseActivityLog(logText: string): ActivityEvent[] {
@ -130,13 +137,15 @@ export function parseActivityLog(logText: string): ActivityEvent[] {
const timestamp = timestampMatch?.[1] ?? null;
const workerId = /\[worker:([^\]]+)\]/.exec(rawLine)?.[1] ?? null;
const mode = /\[mode:([^\]]+)\]/.exec(rawLine)?.[1] ?? null;
const backendId = /\[backend:([^\]]+)\]/.exec(rawLine)?.[1] ?? null;
const line = rawLine
.replace(/^\[[^\]]+\]\s+/, '')
.replace(/\[worker:[^\]]+\]\s*/g, '')
.replace(/\[mode:[^\]]+\]\s*/g, '')
.replace(/\[backend:[^\]]+\]\s*/g, '')
.trim();
const base = { id: `${timestamp ?? 'line'}-${index}`, timestamp, workerId, mode };
const base = { id: `${timestamp ?? 'line'}-${index}`, timestamp, workerId, mode, backendId };
const movementStart = /^\[([^\]]+)\] (?:start|ステップ開始)$/.exec(line);
if (movementStart) {