maestro/src/worker.ts

import { Repository, Job, localTaskRepoName, type JobRole } from './db/repository.js';
import { userPiecesDir } from './user-folder/paths.js';
import { BrowserSessionRepo } from './db/browser-session-repo.js';
import { assertProfileOwner } from './engine/browser-session-auth.js';
import { initMasterKey, decryptUserDek, decryptStateBlob } from './crypto/sessions.js';
import { OpenAICompatClient } from './llm/openai-compat.js';
import { llmRoutingKey, shouldRequeueForModelMismatch } from './llm/routing-key.js';
import { loadPiece, runPiece, PieceRunCallbacks, PieceDef, type PieceRunResult } from './engine/piece-runner.js';
import { LocalProgressReporter } from './progress/local-reporter.js';
import { buildLocalConversationContext } from './engine/local-context.js';
import { AppConfig, isExecutionWorker, type WorkerDef, type ReflectionConfig, DEFAULT_NOTES_INJECT, type NotesInjectConfig } from './config.js';
import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from 'fs';
import { join } from 'path';
import { logger } from './logger.js';
import { commitWorkspaceChanges, ensureWorkspaceGitRepo } from './git/workspace-manager.js';
import { ContextManager, fetchOllamaContextLimit } from './engine/context-manager.js';
import { summarizeToolInput, type ActivityLogMetadata } from './progress/log-format.js';
import { ensureKeepaGraphs } from './engine/tools/amazon.js';
import type { McpTokenManager } from './mcp/token-manager.js';
import { mergeMcpConfig } from './mcp/config.js';
import { NotesService } from './notes/notes-service.js';
import { NotesRepository } from './notes/notes-repository.js';
import { createStickyBackendResolver } from './worker/sticky-backend.js';
import { pickIdlerIndex } from './worker/idle-routing.js';
import { jobEventBus } from './bridge/job-events.js';
import { normalizeToolNameForMetric } from './metrics/tool-name-allowlist.js';

const RETRY_HANDOFF_MAX_LENGTH = 8_000;
const RETRY_DIAGNOSTICS_PREVIEW_LENGTH = 1_200;
const RETRY_LESSONS_MAX_LINES = 12;

function buildTimeContextBlock(): string {
  const now = new Date();
  const utc = now.toISOString();
  const jst = new Intl.DateTimeFormat('ja-JP', {
    timeZone: 'Asia/Tokyo',
    year: 'numeric',
    month: '2-digit',
    day: '2-digit',
    hour: '2-digit',
    minute: '2-digit',
    second: '2-digit',
    hour12: false,
    weekday: 'short',
  }).format(now);

  return [
    '## 実行時刻コンテキスト',
    `- Current time (UTC): ${utc}`,
    `- Current time (JST): ${jst}`,
    '- 時刻依存の判断（今日/昨日/直近◯時間/最新ニュース等）は必ずこの時刻を基準に行うこと。',
    '',
  ].join('\n');
}

function getLocalTaskId(repoName: string): number | null {
  const match = /^local\/task-(\d+)$/.exec(repoName);
  if (!match) return null;
  return Number(match[1]);
}

function getSubTaskParentJobId(repoName: string): string | null {
  const match = /^subtask\/([0-9a-f-]{36})$/.exec(repoName);
  if (!match) return null;
  return match[1]!;
}

/**
 * Browser session を keying するための「論理タスク ID」を解決する。
 *
 * - local task の直接実行 → そのタスクの ID (string)
 * - subtask 実行 → 親方向に最大 5 段まで walk up し、最初に見つかる
 *   local task の ID。subtaskDepth は config 上 2 が上限なので余裕を見て 5
 * - 親が gitea issue 等 local task でないジョブの場合 → null (BrowseWeb は
 *   noVNC モードを使えない / 旧来の頭出しに fallback する)
 */
async function resolveSessionTaskId(
  repo: Repository,
  job: Job,
): Promise<{ taskId: string | undefined; userId: string | undefined }> {
  const directLocalTaskId = getLocalTaskId(job.repo);
  if (directLocalTaskId !== null) {
    return { taskId: String(directLocalTaskId), userId: job.ownerId ?? undefined };
  }

  let cursor: string | null = getSubTaskParentJobId(job.repo);
  let hops = 0;
  while (cursor && hops < 5) {
    const parent = await repo.getJob(cursor);
    if (!parent) return { taskId: undefined, userId: job.ownerId ?? undefined };
    const parentLocalTaskId = getLocalTaskId(parent.repo);
    if (parentLocalTaskId !== null) {
      return {
        taskId: String(parentLocalTaskId),
        // owner は親と同じはずだが、念のため fallback も用意
        userId: parent.ownerId ?? job.ownerId ?? undefined,
      };
    }
    cursor = parent.parentJobId ?? getSubTaskParentJobId(parent.repo);
    hops++;
  }
  return { taskId: undefined, userId: job.ownerId ?? undefined };
}

function buildUiMetadataBlock(job: Job): string {
  return [
    '---',
    `ui_profile: ${job.requiredRole}`,
    `ui_output_format: ${/ui_output_format:\s*(text|markdown|json)/i.exec(job.instruction)?.[1]?.toLowerCase() ?? 'markdown'}`,
    `ui_ask_policy: ${/ui_ask_policy:\s*(low|high)/i.exec(job.instruction)?.[1]?.toLowerCase() ?? 'low'}`,
    `ui_priority: ${/ui_priority:\s*(low|medium|high)/i.exec(job.instruction)?.[1]?.toLowerCase() ?? 'medium'}`,
    '---',
  ].join('\n');
}

function truncateRetryText(text: string, maxLength: number): string {
  const trimmed = text.trim();
  if (trimmed.length <= maxLength) return trimmed;
  return `${trimmed.slice(0, maxLength)}...`;
}

function readRetryLessons(workspacePath: string): string[] {
  const logPath = join(workspacePath, 'logs', 'lessons.jsonl');
  if (!existsSync(logPath)) return [];
  try {
    return readFileSync(logPath, 'utf-8')
      .split('\n')
      .filter(Boolean)
      .slice(-RETRY_LESSONS_MAX_LINES)
      .map((line) => {
        try {
          const data = JSON.parse(line) as { movement?: string; lessons?: string };
          const movement = data.movement ? `[${data.movement}] ` : '';
          return `- ${movement}${truncateRetryText(String(data.lessons ?? ''), 500)}`;
        } catch {
          return `- ${truncateRetryText(line, 500)}`;
        }
      })
      .filter((line) => line.trim() !== '-');
  } catch {
    return [];
  }
}

function readLastRunDiagnostics(workspacePath: string): string[] {
  const diagnosticsPath = join(workspacePath, 'logs', 'last-run-diagnostics.json');
  if (!existsSync(diagnosticsPath)) return [];
  try {
    const data = JSON.parse(readFileSync(diagnosticsPath, 'utf-8')) as {
      status?: string;
      abortReason?: string | null;
      finalOutput?: string;
      movementHistory?: Array<{
        name?: string;
        next?: string | null;
        toolsUsed?: string[];
        outputPreview?: string;
      }>;
      contextActions?: unknown[];
    };
    const lines: string[] = [];
    if (data.status || data.abortReason) {
      lines.push(`- 前回ステータス: ${data.status ?? 'unknown'}${data.abortReason ? ` (${data.abortReason})` : ''}`);
    }
    for (const movement of data.movementHistory ?? []) {
      const tools = movement.toolsUsed && movement.toolsUsed.length > 0
        ? ` tools=${movement.toolsUsed.join(',')}`
        : '';
      lines.push(`- movement ${movement.name ?? 'unknown'} -> ${movement.next ?? 'unknown'}${tools}`);
      if (movement.outputPreview) {
        lines.push(`  - output: ${truncateRetryText(movement.outputPreview, 300)}`);
      }
    }
    if (data.finalOutput) {
      lines.push(`- 最終出力プレビュー: ${truncateRetryText(data.finalOutput, RETRY_DIAGNOSTICS_PREVIEW_LENGTH)}`);
    }
    if (data.contextActions && data.contextActions.length > 0) {
      lines.push(`- context action: ${JSON.stringify(data.contextActions.slice(-3))}`);
    }
    return lines;
  } catch {
    return [];
  }
}

export function buildRetryHandoffSummary(params: {
  workspacePath: string;
  job: Job;
  errorMsg: string;
  nextRetryAt?: string | null;
  disposition: 'requeued_unhealthy' | 'retry' | 'failed';
}): string {
  const lines: string[] = [
    '# Retry Handoff',
    '',
    `Generated: ${new Date().toISOString()}`,
    `Job: ${params.job.id}`,
    `Disposition: ${params.disposition}`,
    `Attempt: ${params.job.attempt}/${params.job.maxAttempts}`,
  ];
  if (params.nextRetryAt) lines.push(`Next retry at: ${params.nextRetryAt}`);
  lines.push('', '## 失敗理由', truncateRetryText(params.errorMsg, 2_000));

  const diagnostics = readLastRunDiagnostics(params.workspacePath);
  if (diagnostics.length > 0) {
    lines.push('', '## 前回実行の要約', ...diagnostics);
  }

  const lessons = readRetryLessons(params.workspacePath);
  if (lessons.length > 0) {
    lines.push('', '## これまでの lessons', ...lessons);
  }

  lines.push(
    '',
    '## 次のエージェントへの指示',
    '- 前回の失敗理由と movement の進捗を踏まえ、同じ探索や同じ失敗を繰り返さないこと。',
    '- 既に完了している作業・生成済みファイル・確認済み事項は再実行前に workspace とログで確認すること。',
    '- 必要な情報が不足している場合は、全体を読み直すのではなく targeted Read / Grep / Bash で範囲を絞ること。',
  );

  return `${truncateRetryText(lines.join('\n'), RETRY_HANDOFF_MAX_LENGTH)}\n`;
}

function writeRetryHandoffSummary(params: {
  workspacePath: string | null | undefined;
  job: Job;
  errorMsg: string;
  nextRetryAt?: string | null;
  disposition: 'requeued_unhealthy' | 'retry' | 'failed';
}): void {
  if (!params.workspacePath) return;
  try {
    const logsDir = join(params.workspacePath, 'logs');
    mkdirSync(logsDir, { recursive: true });
    const summary = buildRetryHandoffSummary({
      workspacePath: params.workspacePath,
      job: params.job,
      errorMsg: params.errorMsg,
      nextRetryAt: params.nextRetryAt,
      disposition: params.disposition,
    });
    writeFileSync(join(logsDir, 'retry-summary.md'), summary, 'utf-8');
  } catch (err) {
    logger.warn(`[worker] failed to write retry handoff summary: ${err}`);
  }
}

function buildRetryHandoffContext(workspacePath: string, job: Job): string {
  if (job.attempt <= 1 && !job.errorSummary) return '';
  const summaryPath = join(workspacePath, 'logs', 'retry-summary.md');
  if (!existsSync(summaryPath)) return '';
  try {
    const summary = truncateRetryText(readFileSync(summaryPath, 'utf-8'), RETRY_HANDOFF_MAX_LENGTH);
    if (!summary) return '';
    return [
      '## Retry 復帰用引き継ぎ',
      'このジョブは前回実行からの retry / 再キューです。以下を前提に、重複作業を避けて継続してください。',
      '',
      summary,
    ].join('\n');
  } catch {
    return '';
  }
}

export async function maybeEnqueueReflection(
  repo: Repository,
  job: Job,
  outcome: 'succeeded' | 'failed' | 'aborted',
  cfg: Pick<ReflectionConfig, 'enabled' | 'workerRequired' | 'perUserDailyBudgetTokens'>,
  workers: WorkerDef[] = [],
): Promise<void> {
  if (!cfg.enabled) return;
  if (job.taskKind === 'reflection') return;
  // No-auth mode runs every job with ownerId=null. Reflection is per-user
  // (memory/pieces live under data/users/{userId}/), so fall back to the same
  // 'local' namespace the rest of the no-auth path uses (ToolContext, pieces,
  // user-folder). Without this the enqueue gate skipped forever and reflection
  // silently never ran in no-auth deployments.
  const reflectionOwner = job.ownerId ?? 'local';

  // worker_required enforcement: when true, at least one worker must have 'reflection' in its roles
  if (cfg.workerRequired) {
    const hasReflectionWorker = workers.some(
      (w) => Array.isArray(w.roles) && w.roles.includes('reflection'),
    );
    if (!hasReflectionWorker) {
      logger.warn(`[reflection] enqueue skipped reason=no_reflection_worker user=${reflectionOwner}`);
      return;
    }
  }

  // Per-user daily token budget check.
  // Cap=0 means "no limit" — useful for fresh installs that haven't tuned the budget yet.
  const cap = cfg.perUserDailyBudgetTokens ?? 0;
  if (cap > 0) {
    // Compute today's start in UTC (00:00:00.000 UTC).
    const now = new Date();
    const todayStartMs = Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate());
    const metrics = repo.aggregateReflectionMetrics(reflectionOwner, todayStartMs);
    const spent = metrics.tokensIn + metrics.tokensOut;
    if (spent >= cap) {
      const spentM = (spent / 1_000_000).toFixed(1);
      const capM = (cap / 1_000_000).toFixed(1);
      logger.info(`[reflection] enqueue skipped reason=budget user=${reflectionOwner} spent=${spentM}M cap=${capM}M`);
      return;
    }
  }

  const payload = JSON.stringify({
    originalJobId: job.id,
    userId: reflectionOwner,
    pieceName: job.pieceName,
    outcome,
  });
  await repo.createJob({
    repo: `local/reflection-${job.id}`,
    issueNumber: 0,
    instruction: '',
    pieceName: 'reflection',
    role: 'reflection',
    ownerId: reflectionOwner,
    visibility: 'private',
    taskKind: 'reflection',
    payload,
  } as any);
  logger.info(`[reflection] enqueued original=${job.id} owner=${reflectionOwner} piece=${job.pieceName} outcome=${outcome}`);
}

export class Worker {
  private running = false;
  private inflight = 0;
  private polling = false;
  private stopped = false;
  private pollInterval: ReturnType<typeof setInterval> | null = null;
  private healthInterval: ReturnType<typeof setInterval> | null = null;
  /** Live sibling list, injected by WorkerManager for idle-preferring claims. */
  private siblingsAccessor: (() => Worker[]) | null = null;
  /** Last initialize() result — whether we'd actually claim if poked. */
  private lastAvailable = false;
  /** Job id we deferred to an idler last round (safety net against stuck yields). */
  private lastYieldedJobId: string | null = null;
  private workerId: string;
  private endpoint: string;
  private model: string | undefined;
  private availableModels: Set<string> = new Set();
  private healthy = false;
  private lastHealthError: string | null = null;
  private contextLimitTokens: number = 128_000;
  private mcpTokenManager: McpTokenManager | null = null;
  /**
   * Phase 3b: optional Prometheus metrics handle. When set, the worker
   * emits jobs_total / active_jobs / job_duration_seconds in
   * executeJob's start/finally, llm_calls_total via the AgentLoop
   * onLLMCall callback, and tool_calls_total via the new onToolMetric
   * callback in agent-loop.ts. Wired by WorkerManager after the metric
   * registry exists.
   */
  private workerMetrics: import('./metrics/worker-metrics.js').WorkerMetrics | null = null;
  private skillCatalog: import('./engine/skills.js').SkillCatalog | null = null;
  /**
   * V2 Web Push notification service. Null when push is disabled via
   * config or when the worker is built without one (tests). Hooks fire
   * via enqueue (fire-and-forget) so a slow push service can't block job
   * execution.
   *
   * Spec: docs/superpowers/specs/2026-05-28-browser-notifications-v2-webpush.md.
   */
  private pushService: import('./push-service.js').PushService | null = null;

  constructor(
    workerId: string,
    endpoint: string,
    model: string | undefined,
    private repo: Repository,
    private config: AppConfig,
  ) {
    this.workerId = workerId;
    this.endpoint = endpoint;
    this.model = model;
  }

  public setMcpTokenManager(tm: McpTokenManager | null): void {
    this.mcpTokenManager = tm;
  }

  public setSkillCatalog(catalog: import('./engine/skills.js').SkillCatalog): void {
    this.skillCatalog = catalog;
  }

  public setPushService(svc: import('./push-service.js').PushService | null): void {
    this.pushService = svc;
  }

  /**
   * Hot-swap the global config on a still-running worker. Used by
   * WorkerManager's differential rebuild: when a config change does NOT
   * touch this worker's own def (e.g. a tool size limit changed), we keep
   * the worker — and any in-flight job — alive and just refresh the config
   * it reads for future jobs. Def-derived values (roles, endpoint,
   * maxConcurrency) are read live via getWorkerDef(), so they stay correct.
   * In-flight jobs keep the settings they captured at start, by design.
   */
  public updateConfig(config: AppConfig): void {
    this.config = config;
  }

  /** Jobs currently executing in this worker's detached loops. */
  public get inflightCount(): number {
    return this.inflight;
  }

  /** Free execution slots right now (max_concurrency − inflight). */
  public get freeSlots(): number {
    return Math.max(0, this.getMaxConcurrency() - this.inflight);
  }

  /** True when this worker would actually pick up a job if poked. */
  public get availableForClaim(): boolean {
    return this.running && !this.stopped && this.lastAvailable
      && isExecutionWorker(this.getWorkerDef());
  }

  /** Whether this worker serves jobs of the given role. */
  public canClaimRole(role: string): boolean {
    return this.supportsRole(role);
  }

  /** Nudge this worker to poll immediately (hands a yielded job to an idler). */
  public pokePoll(): void {
    void this.processNext();
  }

  /** WorkerManager injects the live sibling list so claims prefer idler workers. */
  public setSiblingsAccessor(fn: () => Worker[]): void {
    this.siblingsAccessor = fn;
  }

  /**
   * Find the idlest sibling that has strictly more free slots than us and
   * serves `role`. Returns null when we are (tied for) the most free, in which
   * case we should claim the job ourselves.
   */
  private findIdlerCompetitor(role: string): Worker | null {
    const others = (this.siblingsAccessor?.() ?? []).filter((s) => s !== this);
    const idx = pickIdlerIndex(
      this.freeSlots,
      others.map((s) => ({
        freeSlots: s.freeSlots,
        availableForClaim: s.availableForClaim,
        servesRole: s.canClaimRole(role),
      })),
    );
    return idx >= 0 ? others[idx]! : null;
  }

  /**
   * Fire a V2 push for a job status transition. Fire-and-forget — never
   * throws and never awaits the underlying queue. Skips silently when
   *   - push is disabled (pushService === null)
   *   - the job has no owner (legacy / system-issued)
   *   - the job is not a local task (sub-task pushes go to the parent owner;
   *     we still send for direct local tasks).
   * Reflection jobs are also skipped — they're an internal mechanism, not
   * user-facing work.
   */
  private enqueuePush(
    job: Job,
    event: 'running' | 'succeeded' | 'failed' | 'waiting_human',
  ): void {
    if (!this.pushService) return;
    if (job.taskKind === 'reflection') return;
    if (!job.ownerId) return;
    const localTaskId = getLocalTaskId(job.repo);
    if (localTaskId === null) return;
    // Title lookup is cheap (single-row SELECT) and synchronous via the
    // raw db handle. Falling back to a generic label is fine — the
    // push-service uses privacy-default payloads unless the user opted
    // in via include_details.
    let taskTitle = `Task #${localTaskId}`;
    try {
      const row = this.repo.getDb()
        .prepare('SELECT title FROM local_tasks WHERE id = ?')
        .get(localTaskId) as { title: string | null } | undefined;
      if (row?.title) taskTitle = row.title;
    } catch {
      // best-effort; fall through with default title
    }
    this.pushService.enqueue({
      event,
      taskId: localTaskId,
      taskTitle,
      pieceName: job.pieceName,
      ownerId: job.ownerId,
    });
  }

  /**
   * Phase 3b: install (or remove) the Prometheus metrics handle.
   * Idempotent — calling with the same handle twice is fine. Null
   * clears the handle, useful when reconfiguring at runtime.
   */
  public setWorkerMetrics(
    metrics: import('./metrics/worker-metrics.js').WorkerMetrics | null,
  ): void {
    this.workerMetrics = metrics;
  }

  private getWorkerDef(): WorkerDef {
    const workerDef = this.config.provider.workers.find((worker) => worker.id === this.workerId);
    if (!workerDef) {
      throw new Error(`Worker config not found: ${this.workerId}`);
    }
    return workerDef;
  }

  private getSupportedRoles(): string[] {
    return this.getWorkerDef().roles ?? ['auto', 'fast', 'quality'];
  }

  private getMaxConcurrency(): number {
    return Math.max(1, this.getWorkerDef().maxConcurrency ?? 1);
  }

  async initialize(): Promise<boolean> {
    const workerDef = this.getWorkerDef();
    const enabled = workerDef.enabled !== false;
    if (!enabled) {
      await this.repo.upsertWorkerNode({
        workerId: this.workerId,
        endpoint: this.endpoint,
        enabled: false,
        healthy: false,
        roles: this.getSupportedRoles(),
        availableModels: [],
        inflightJobs: this.inflight,
        maxConcurrency: this.getMaxConcurrency(),
        lastError: 'disabled by config',
      });
      this.healthy = false;
      this.lastHealthError = 'disabled by config';
      logger.info(`[worker:${this.workerId}] disabled by config; skipping polling`);
      return false;
    }

    try {
      const ollamaBase = this.endpoint.replace(/\/v1\/?$/, '');
      // Try Ollama /api/tags first, then fall back to OpenAI-compatible /v1/models.
      //
      // Forward `Authorization: Bearer <apiKey>` when the worker has one
      // configured. The discovery probes (/api/tags / /v1/models) were
      // previously sent un-authenticated, which caused 30s-interval 401
      // floods against AAO Gateway endpoints (gateway requires Bearer auth
      // on /v1/models) — the worker then logged "failed to fetch model
      // list" indefinitely and `availableModels` stayed empty.
      // Discovered during 2026-05-20 dogfooding on production aao.
      const apiKey = this.getWorkerDef().apiKey;
      const init: RequestInit = apiKey
        ? { headers: { Authorization: `Bearer ${apiKey}` } }
        : {};
      let models: string[] = [];
      const ollamaRes = await fetch(`${ollamaBase}/api/tags`, init).catch(() => null);
      if (ollamaRes?.ok) {
        const data = await ollamaRes.json() as { models?: Array<{ name: string }> };
        models = (data.models ?? []).map((m: { name: string }) => m.name);
      } else {
        const openaiBase = this.endpoint.replace(/\/?$/, '');
        const openaiRes = await fetch(`${openaiBase}/models`, init).catch(() => null);
        if (openaiRes?.ok) {
          const data = await openaiRes.json() as { data?: Array<{ id: string }> };
          models = (data.data ?? []).map((m: { id: string }) => m.id);
        } else if (this.model) {
          throw new Error(`failed to fetch model list from both /api/tags and /v1/models`);
        }
        // llama-server compat: model 未設定なら model 一覧 API は必須ではないので空配列で続行。
      }
      this.availableModels = new Set(models);
      await this.repo.upsertWorkerNode({
        workerId: this.workerId,
        endpoint: this.endpoint,
        enabled: true,
        healthy: true,
        roles: this.getSupportedRoles(),
        availableModels: [...this.availableModels],
        inflightJobs: this.inflight,
        maxConcurrency: this.getMaxConcurrency(),
        lastError: null,
      });
      if (!this.healthy || this.lastHealthError !== null) {
        logger.info(`[worker:${this.workerId}] available models: ${[...this.availableModels].join(', ')}`);
      }
      this.healthy = true;
      this.lastHealthError = null;

      // Auto-detect context limit from Ollama if not configured
      if (!this.config.context?.limitTokens) {
        if (this.model) {
          const contextLimit = await fetchOllamaContextLimit(this.endpoint, this.model);
          if (contextLimit !== this.contextLimitTokens) {
            logger.info(`[worker:${this.workerId}] context limit updated: ${contextLimit} tokens`);
            this.contextLimitTokens = contextLimit;
          }
        } else {
          // No model configured — try llama.cpp /props endpoint for context limit
          const contextLimit = await fetchOllamaContextLimit(this.endpoint, '');
          if (contextLimit !== this.contextLimitTokens) {
            logger.info(`[worker:${this.workerId}] context limit updated: ${contextLimit} tokens`);
            this.contextLimitTokens = contextLimit;
          }
        }
      } else {
        this.contextLimitTokens = this.config.context.limitTokens;
      }

      return true;
    } catch (e) {
      const errorMessage = e instanceof Error ? e.message : String(e);
      this.availableModels.clear();
      await this.repo.upsertWorkerNode({
        workerId: this.workerId,
        endpoint: this.endpoint,
        enabled: true,
        healthy: false,
        roles: this.getSupportedRoles(),
        availableModels: [],
        inflightJobs: this.inflight,
        maxConcurrency: this.getMaxConcurrency(),
        lastError: errorMessage,
      });
      if (this.healthy || this.lastHealthError !== errorMessage) {
        logger.warn(`[worker:${this.workerId}] failed to fetch model list: ${e}`);
      }
      this.healthy = false;
      this.lastHealthError = errorMessage;
      return false;
    }
  }

  start(): void {
    if (this.running) return;
    this.running = true;
    logger.info(`[worker:${this.workerId}] started`);

    const tick = () => void this.processNext();
    tick();
    const baseInterval = 5000;
    const jitter = Math.floor(Math.random() * 2000);
    this.pollInterval = setInterval(tick, baseInterval + jitter);
    const healthIntervalSeconds = Math.max(10, this.getWorkerDef().healthcheckIntervalSeconds ?? 30);
    this.healthInterval = setInterval(() => void this.initialize(), healthIntervalSeconds * 1000);
  }

  stop(): void {
    this.running = false;
    this.stopped = true;
    if (this.pollInterval) {
      clearInterval(this.pollInterval);
      this.pollInterval = null;
    }
    if (this.healthInterval) {
      clearInterval(this.healthInterval);
      this.healthInterval = null;
    }
    logger.info(`[worker:${this.workerId}] stopped`);
  }

  async waitForCompletion(timeoutMs = 30000): Promise<boolean> {
    if (this.inflight === 0) return true;
    const start = Date.now();
    while (this.inflight > 0 && (Date.now() - start) < timeoutMs) {
      await new Promise(resolve => setTimeout(resolve, 500));
    }
    return this.inflight === 0;
  }

  get id(): string { return this.workerId; }

  private async processNext(): Promise<void> {
    if (!isExecutionWorker(this.getWorkerDef()) || !this.running || this.stopped) return;
    if (this.polling) return;            // claim loop is single-flight (prevents over-claim)
    this.polling = true;
    try {
      // スタックジョブ watchdog: LLM タイムアウトの2倍を閾値にする
      try {
        const staleMinutes = Math.max(20, (this.config.provider.timeoutMinutes ?? 10) * 2);
        this.repo.recoverStuckRunningJobs(staleMinutes);
      } catch (err) {
        logger.warn(`[worker:${this.workerId}] recoverStuckRunningJobs error: ${err}`);
      }

      const available = await this.initialize();
      this.lastAvailable = available;
      if (!available) return;

      const max = this.getMaxConcurrency();
      while (this.inflight < max && this.running && !this.stopped) {
        // Idle-preferring gate (most-free-wins): if a strictly-idler sibling
        // serves the next job's role, hand it off (nudge that worker) instead
        // of piling on. Safety net: if we already deferred this exact job last
        // round and it is still here, the idler didn't take it (unhealthy /
        // raced) — claim it ourselves so a job never gets stuck.
        //
        // Only consult the gate when there are sibling workers to defer to AND
        // the repo supports peeking. Single-worker setups and unit tests skip
        // it entirely — no extra query, no added latency, original claim timing.
        const siblings = this.siblingsAccessor?.();
        if (siblings && siblings.length > 1 && this.repo.peekNextClaimable) {
          const peek = await this.repo.peekNextClaimable(this.workerId);
          if (peek && peek.id !== this.lastYieldedJobId) {
            const idler = this.findIdlerCompetitor(peek.requiredRole);
            if (idler) {
              this.lastYieldedJobId = peek.id;
              idler.pokePoll();
              break;
            }
          }
          this.lastYieldedJobId = null;
        }
        // リトライジョブを優先
        const job = await this.repo.claimNextRetryJob(this.workerId)
          ?? await this.repo.claimNextJob(this.workerId);
        if (!job) break;
        this.inflight++;
        void this.runJobTracked(job);    // 並行実行: await しない
      }
    } catch (err) {
      logger.error(`[worker:${this.workerId}] processNext error: ${err}`);
    } finally {
      this.polling = false;
    }
  }

  /** Run one job to completion, always restoring the inflight counter. */
  private async runJobTracked(job: Job): Promise<void> {
    try {
      await this.executeJob(job);
    } catch (err) {
      logger.error(`[worker:${this.workerId}] runJobTracked error job=${job.id}: ${err}`);
    } finally {
      this.inflight--;
      await this.reportInflight();
    }
  }

  /** Push the live inflight count (and current health) to the worker_nodes row. */
  private async reportInflight(): Promise<void> {
    try {
      await this.repo.updateWorkerNodeHealth(this.workerId, {
        healthy: this.healthy,
        lastError: this.lastHealthError,
        inflightJobs: this.inflight,
        availableModels: [...this.availableModels],
      });
    } catch (err) {
      logger.warn(`[worker:${this.workerId}] reportInflight failed: ${err}`);
    }
  }

  private supportsRole(role: string): boolean {
    return this.getSupportedRoles().includes(role);
  }

  private buildLogMetadata(role: JobRole): ActivityLogMetadata {
    return { workerId: this.workerId, mode: role };
  }

  /**
   * サブタスクの ASK に対して、親ジョブの文脈を使って LLM に回答を生成させる
   */
  private async answerSubtaskAsk(subtaskJob: Job, parentJobId: string, question: string): Promise<string> {
    const parentJob = await this.repo.getJob(parentJobId);
    const parentInstruction = parentJob?.instruction ?? '(不明)';

    const workerDefForAnswer = this.getWorkerDef();
    // Gateway routes by the subtask's tier; direct keeps the worker's model.
    const resolvedModel = llmRoutingKey({
      isGateway: workerDefForAnswer.proxy === true,
      role: subtaskJob.requiredRole,
      resolveDirectModel: () => this.model,
    });
    const timeoutMs = (this.config.provider.timeoutMinutes ?? 10) * 60 * 1000;
    const llmClient = new OpenAICompatClient(
      this.endpoint,
      resolvedModel,
      workerDefForAnswer.apiKey,
      this.config.provider.retry,
      timeoutMs,
      this.contextLimitTokens,
      this.config.safety?.promptGuardRatio,
      undefined,
      { proxy: workerDefForAnswer.proxy === true },
    );

    const messages: import('./llm/openai-compat.js').Message[] = [
      {
        role: 'system',
        content: [
          'あなたはタスクを管理する親エージェントです。',
          'サブタスクがユーザーに確認を求めていますが、あなたが代わりに回答してください。',
          '元の依頼の意図を汲み取り、サブタスクが作業を継続できるよう具体的に回答してください。',
          '回答のみを簡潔に返してください。',
        ].join('\n'),
      },
      {
        role: 'user',
        content: [
          '## 元の依頼',
          parentInstruction,
          '',
          '## サブタスクの指示',
          subtaskJob.instruction,
          '',
          '## サブタスクからの質問',
          question,
        ].join('\n'),
      },
    ];

    let answer = '';
    for await (const event of llmClient.chat(messages)) {
      if (event.type === 'text') {
        answer += event.text;
      } else if (event.type === 'error') {
        throw new Error(`LLM error: ${event.error}`);
      }
    }
    return answer.trim() || '特に制約はありません。あなたの判断で進めてください。';
  }

  private writeRunDiagnostics(workspacePath: string, result: PieceRunResult): void {
    try {
      const logsDir = join(workspacePath, 'logs');
      mkdirSync(logsDir, { recursive: true });
      const diagnostics = {
        generatedAt: new Date().toISOString(),
        status: result.status,
        abortReason: result.abortReason ?? null,
        finalOutput: result.finalOutput,
        movementHistory: result.movementHistory.map((entry) => ({
          name: entry.name,
          next: entry.result.next,
          toolsUsed: entry.result.toolsUsed,
          outputPreview: entry.result.output.slice(0, 600),
        })),
        contextActions: result.contextActions,
      };
      writeFileSync(join(logsDir, 'last-run-diagnostics.json'), `${JSON.stringify(diagnostics, null, 2)}\n`, 'utf-8');
    } catch (err) {
      logger.warn(`[worker:${this.workerId}] failed to write run diagnostics: ${err}`);
    }
  }

  /**
   * Resolve the workspace path for a job and ensure the directory tree
   * (input/output/logs + a git repo) exists. Persists the resolved path
   * back to the job + local-task records so downstream consumers can find
   * the workspace.
   *
   * Throws on jobs that are neither local tasks nor sub-tasks (the orchestrator
   * doesn't currently spin its own workspaces for raw repo/issue jobs).
   */
  private async prepareJobWorkspace(
    job: Job,
    isLocalTask: boolean,
    isSubTask: boolean,
    localTaskId: number | null,
  ): Promise<string> {
    const { repo: repoName, issueNumber, id: jobId } = job;
    const workspacePath = isLocalTask
      ? join(this.config.worktreeDir, 'local', String(localTaskId))
      : isSubTask
        ? (job.worktreePath ?? join(this.config.worktreeDir, 'subtasks', jobId))
        : join(this.config.worktreeDir, repoName, String(issueNumber));

    if (isLocalTask) {
      mkdirSync(workspacePath, { recursive: true });
      mkdirSync(join(workspacePath, 'input'), { recursive: true });
      mkdirSync(join(workspacePath, 'output'), { recursive: true });
      mkdirSync(join(workspacePath, 'logs'), { recursive: true });
      await ensureWorkspaceGitRepo(workspacePath);
      if (localTaskId !== null) {
        await this.repo.updateLocalTask(localTaskId, { workspacePath });
      }
    } else if (isSubTask) {
      // SpawnSubTask 経由で worktreePath が設定されている前提
      if (!job.worktreePath) {
        throw new Error(`Sub-task job ${jobId} has no worktreePath set`);
      }
      mkdirSync(job.worktreePath, { recursive: true });
      mkdirSync(join(job.worktreePath, 'output'), { recursive: true });
      mkdirSync(join(job.worktreePath, 'logs'), { recursive: true });
      await ensureWorkspaceGitRepo(job.worktreePath);
    } else {
      throw new Error(`Unsupported job type: repo="${repoName}" is neither a local task nor a sub-task`);
    }

    await this.repo.updateJob(jobId, { worktreePath: workspacePath });
    return workspacePath;
  }

  /**
   * Run the two pre-execution gates: role capability check, issue lock.
   * On failure, requeue the job (so another worker can pick it up) and
   * return false so executeJob returns early.
   */
  private async acquireJobOrRequeue(job: Job): Promise<boolean> {
    const { repo: repoName, issueNumber, id: jobId } = job;

    if (!this.supportsRole(job.requiredRole)) {
      await this.repo.updateJob(jobId, { status: 'queued', workerId: null });
      await this.repo.addAuditLog(jobId, 'job_requeued_capability_mismatch', 'worker', {
        workerId: this.workerId,
        requiredRole: job.requiredRole,
      });
      logger.info(`[worker:${this.workerId}] requeued job ${jobId} due to role mismatch (role=${job.requiredRole})`);
      return false;
    }

    const locked = await this.repo.lockIssue(repoName, issueNumber, jobId);
    if (!locked) {
      await this.repo.updateJob(jobId, { status: 'queued', workerId: null });
      await this.repo.addAuditLog(jobId, 'job_requeued_issue_locked', 'worker', {
        workerId: this.workerId,
      });
      logger.info(`[worker:${this.workerId}] job ${jobId}: issue ${repoName}#${issueNumber} already locked, skipping`);
      return false;
    }

    return true;
  }

  private async executeJob(job: Job): Promise<void> {
    const { repo: repoName, issueNumber, id: jobId } = job;
    const localTaskId = getLocalTaskId(repoName);
    const isLocalTask = localTaskId !== null;
    const parentJobId = getSubTaskParentJobId(repoName);
    const isSubTask = parentJobId !== null;

    const logMetadata = this.buildLogMetadata(job.requiredRole);
    if (!(await this.acquireJobOrRequeue(job))) return;

    // Phase 3b: job-lifecycle metrics. Inc active_jobs at start; capture
    // a terminal status + duration in the finally block. `profile` maps
    // to the assigned required role (the multi-profile / multi-piece
    // operator-facing dimension).
    const metricPiece = job.pieceName ?? 'unknown';
    const metricProfile = job.requiredRole ?? 'unknown';
    const jobStartedAtMs = Date.now();
    let metricFinalStatus: 'succeeded' | 'failed' | 'aborted' | 'cancelled' | 'waiting_human' | 'error' = 'error';
    if (this.workerMetrics) {
      try {
        this.workerMetrics.activeJobs.labels({ piece: metricPiece, profile: metricProfile }).inc();
      } catch { /* metrics never affect business logic */ }
    }

    await this.repo.updateWorkerNodeHealth(this.workerId, {
      healthy: this.healthy,
      lastError: this.lastHealthError,
      inflightJobs: this.inflight,
      availableModels: [...this.availableModels],
    });

    // claimNextJob がすでに status = 'running' にセット済み
    await this.repo.addAuditLog(jobId, 'job_started', 'worker', {});

    // V2 push: notify on the first time a job transitions queued→running.
    // Retry runs are intentionally silent — V1's 4s debounce relied on this
    // and we keep the same UX (one running notification, not one per retry).
    if (job.attempt === 1) {
      this.enqueuePush(job, 'running');
    }

    // Reflection jobs bypass workspace preparation and the agent / LLM loop.
    // task_kind='agent' (default) keeps the pre-existing piece-runner path.
    if (job.taskKind === 'reflection') {
      try {
        await this.handleReflectionJob(job);
      } finally {
        await this.repo.updateWorkerNodeHealth(this.workerId, {
          healthy: this.healthy,
          lastError: this.lastHealthError,
          inflightJobs: this.inflight,
          availableModels: [...this.availableModels],
        });
        await this.repo.unlockIssue(repoName, issueNumber);
      }
      return;
    }

    const workspacePath = await this.prepareJobWorkspace(job, isLocalTask, isSubTask, localTaskId);

    // 進捗レポーター
    // ローカルタスク・サブタスクともに activity.log を書き出す
    // サブタスクでは isSubTask=true を渡し、DB コメント書き込みをスキップする
    const reporter = new LocalProgressReporter(this.repo, localTaskId ?? issueNumber, workspacePath, logMetadata, isSubTask);

    // 会話コンテキストの組み立て
    let enrichedInstruction = `${buildTimeContextBlock()}${job.instruction}`;

    if (isLocalTask) {
      try {
        const comments = await this.repo.listLocalTaskComments(localTaskId);
        const outputFiles = this.listDir(join(workspacePath, 'output'));
        const inputFiles = this.listDir(join(workspacePath, 'input'));
        const contextBody = buildLocalConversationContext({
          comments,
          jobInstruction: job.instruction,
          inputFiles,
          outputFiles,
        });
        enrichedInstruction = `${buildTimeContextBlock()}${contextBody}`;
      } catch (err) {
        logger.warn(`[worker:${this.workerId}] failed to build local context: ${err}`);
      }
    }

    const retryHandoffContext = buildRetryHandoffContext(workspacePath, job);
    if (retryHandoffContext) {
      enrichedInstruction = `${enrichedInstruction}\n\n${retryHandoffContext}`;
    }

    // watchdog 誤検知防止: runPiece 実行中に updated_at を定期更新
    let heartbeatTimer: ReturnType<typeof setInterval> | undefined;
    try {
      // Piece 読み込み: per-user カスタムディレクトリ → global カスタムディレクトリ → builtin の順に探索
      // No-auth jobs (ownerId null) resolve pieces from data/users/local/pieces, matching
      // where no-auth POST now writes (LOCAL_OWNER='local' in pieces-api.ts).
      const userFolderRoot = this.config.userFolderRoot ?? './data/users';
      const ownerForPieces = job.ownerId ?? 'local';
      const customPieceDirs = [
        userPiecesDir(userFolderRoot, ownerForPieces),
        this.config.customPiecesDir,
      ].filter((d): d is string => !!d);
      logger.info(`[worker:${this.workerId}] job ${jobId} loadPiece piece=${job.pieceName} customDirs=[${customPieceDirs.join(', ') || 'none'}] piecesDir=pieces`);
      const piece = loadPiece(job.pieceName, 'pieces', customPieceDirs);
      // Model-mismatch requeue gate (direct mode only — gateway routes by
      // role, see shouldRequeueForModelMismatch).
      if (
        shouldRequeueForModelMismatch({
          isGateway: this.getWorkerDef().proxy === true,
          pieceModel: piece.model,
          availableModels: this.availableModels,
          workerModel: this.model,
        })
      ) {
        await this.repo.updateJob(jobId, {
          status: 'queued',
          workerId: null,
          errorSummary: `Required model ${piece.model} is not available on ${this.workerId}`,
        });
        await this.repo.addAuditLog(jobId, 'job_requeued_model_mismatch', 'worker', {
          workerId: this.workerId,
          requiredModel: piece.model,
          availableModels: [...this.availableModels],
        });
        logger.info(`[worker:${this.workerId}] requeued job ${jobId} due to model mismatch (${piece.model})`);
        return;
      }

      // MCP 認証ゲート: piece.required_mcp に記載されたサーバーのトークンがなければ park
      const missingMcp = (piece.required_mcp ?? []).filter(
        (serverId) => !this.mcpTokenManager || !this.mcpTokenManager.hasToken(job.ownerId ?? '', serverId),
      );
      if (missingMcp.length > 0) {
        await this.repo.updateJob(jobId, {
          status: 'waiting_human',
          waitReason: 'mcp_auth_required',
          resumeMovement: piece.initial_movement ?? null,
        });
        if (localTaskId !== null) {
          await this.repo.addLocalTaskComment(
            localTaskId,
            'system',
            `この piece は MCP サーバー「${missingMcp.join(', ')}」との連携が必要です。Settings → MCP 接続から連携してください。`,
            'event',
          );
        }
        logger.info(`[worker:${this.workerId}] mcp gate parked job=${jobId} missing=${missingMcp.join(',')}`);
        return;
      }

      const workerDefForLlm = this.getWorkerDef();
      const isProxyWorker = workerDefForLlm.proxy === true;
      // Gateway routes by role; direct resolves the worker's model. The
      // resolver thunk runs only in direct mode (no auto-select via gateway).
      const resolvedModel = llmRoutingKey({
        isGateway: isProxyWorker,
        role: job.requiredRole,
        resolveDirectModel: () => this.resolveModel(piece),
      });
      const timeoutMs = (this.config.provider.timeoutMinutes ?? 10) * 60 * 1000;
      const llmClient = new OpenAICompatClient(
        this.endpoint,
        resolvedModel,
        workerDefForLlm.apiKey,
        this.config.provider.retry,
        timeoutMs,
        this.contextLimitTokens,
        this.config.safety?.promptGuardRatio,
        (line) => reporter.reportPromptPreflight(line),
        { proxy: isProxyWorker },
      );

      // ASK 再開の場合、resume_movement を使用
      const pieceOptions = {
        resumeMovement: job.resumeMovement ?? undefined,
        askCount: job.askCount,
        maxAskPerJob: this.config.ask.maxPerJob,
        checkInterjections: isLocalTask && localTaskId !== null && !isSubTask
          ? async (movementName: string) => {
              const comments = await this.repo.getUninjectedComments(localTaskId);
              if (comments.length === 0) return [];
              const injected = comments.map(c => ({ id: c.id, body: c.body }));
              this.repo.markCommentsInjected(injected.map(c => c.id));
              reporter.reportInterjectionAck(injected, movementName);
              return injected;
            }
          : undefined,
        spawnSubTask: job.subtaskDepth < this.config.subtasks.maxDepth
          ? async (params: { title: string; instruction: string; piece?: string }) => {
              const subJobs = await this.repo.getSubJobs(jobId);
              const subtaskIndex = subJobs.length + 1;
              if (subJobs.length >= this.config.subtasks.maxPerParent) {
                throw new Error(`サブタスク上限 (${this.config.subtasks.maxPerParent}) に達しました。これ以上のサブタスクは作成できません。`);
              }
              const subtaskWorkspace = join(workspacePath, 'subtasks', String(subtaskIndex));
              mkdirSync(subtaskWorkspace, { recursive: true });
              mkdirSync(join(subtaskWorkspace, 'output'), { recursive: true });
              mkdirSync(join(subtaskWorkspace, 'logs'), { recursive: true });

              // 親ジョブの role を継承
              const subJobInstruction = [
                `ui_profile: ${job.requiredRole}`,
                '',
                `# ${params.title}`,
                '',
                params.instruction,
              ].join('\n');

              const subJob = await this.repo.createJob({
                repo: `subtask/${jobId}`,
                issueNumber: subtaskIndex,
                instruction: subJobInstruction,
                pieceName: params.piece ?? 'general',
                parentJobId: jobId,
                subtaskDepth: job.subtaskDepth + 1,
                maxAttempts: 2,
                role: job.requiredRole,
                ownerId: job.ownerId,
                visibility: job.visibility,
                visibilityScopeOrgId: job.visibilityScopeOrgId,
              });
              await this.repo.updateJob(subJob.id, { worktreePath: subtaskWorkspace });
              logger.info(`[worker:${this.workerId}] spawned sub-task #${subtaskIndex} depth=${job.subtaskDepth + 1} job=${subJob.id}`);
              return { jobId: subJob.id, subtaskIndex, workspacePath: subtaskWorkspace };
            }
          : undefined,
      };

      const callbacks = this.buildPieceCallbacks(
        jobId,
        reporter,
        isLocalTask,
        localTaskId,
        workspacePath,
        // Seed the backend tracker with whatever was already persisted
        // for this job (e.g. on retry / resume from ASK). Only matters for
        // proxy workers; direct workers never produce a backend event.
        isProxyWorker ? (job.lastBackendId ?? null) : null,
        llmClient,
        logMetadata,
      );

      // 開始コメント
      await reporter.reportMovementStart(`${piece.name} タスク開始`);

      // キャンセル用 AbortController
      const jobAbortController = new AbortController();

      // キャンセルチェック: DB のジョブ状態が 'cancelled' になっていたら中断する
      const cancelCheck = (): boolean => {
        const isCancelled = this.repo.getJobStatusSync(jobId) === 'cancelled';
        if (isCancelled) {
          jobAbortController.abort();
        }
        return isCancelled;
      };

      // ContextManager 初期化
      const contextManager = new ContextManager(this.config.context ?? {});
      contextManager.setContextLimit(this.contextLimitTokens);

      // Piece 実行（ハートビート開始: 5分ごとに updated_at を更新）
      heartbeatTimer = setInterval(() => {
        try { this.repo.touchJobUpdatedAt(jobId); } catch { /* ignore */ }
      }, 5 * 60 * 1000);
      // VLM 対応: worker の vlm=true なら vision 設定を worker 自身の endpoint/model で上書き
      const workerDef = this.getWorkerDef();
      const toolsConfig = workerDef.vlm
        ? { ...this.config.tools, visionBaseUrl: this.endpoint, visionModel: this.model }
        : this.config.tools;

      logger.info(`[worker:${this.workerId}] job ${jobId} runPiece start`);
      // Browser session keying: resolve the session-task identity for the
      // ToolContext. Threaded into BrowseWeb / InteractiveBrowse so each
      // task gets its own noVNC session and the Captcha Pool stays
      // separate from per-task sessions. Subtasks walk up to find the
      // root local task ID.
      const sessionIdentity = await resolveSessionTaskId(this.repo, job);

      // ── Browser session profile binding ─────────────────────────────
      // If this job is bound to a browser_session_profile, decrypt the
      // captured Playwright storageState and pass it into runPiece so
      // BrowseWeb can inject it into BrowserContext. Owner-mismatch and
      // expired-profile checks fail-fast before the agent loop starts.
      let browserSessionState: object | undefined;
      let browserSessionProfileId: number | undefined;
      let browserSessionProfile:
        | { loggedInSelector: string | null; loginUrlPatterns: string[] }
        | undefined;
      let onAuthExpired:
        | ((profileId: number, reason: string) => void)
        | undefined;

      if (job.browserSessionProfileId) {
        const sessRepo = new BrowserSessionRepo(this.repo.getDb());
        const profile = sessRepo.getProfileByIdUnsafe(job.browserSessionProfileId);
        if (!profile) {
          sessRepo.audit({
            actorUserId: job.ownerId ?? null,
            ownerId: null,
            profileId: job.browserSessionProfileId,
            action: 'use',
            result: 'error',
            reason: 'profile not found',
            jobId: job.id,
          });
          throw new Error(`Browser session profile ${job.browserSessionProfileId} not found`);
        }
        // Fail-closed owner check: a job with null/missing ownerId must not
        // be allowed to decrypt any profile, even if the profile id would
        // otherwise resolve. Helper audits + throws on rejection. Extracted
        // to src/engine/browser-session-auth.ts so the contract is unit
        // tested in isolation from the Worker class.
        assertProfileOwner(profile, job, sessRepo);
        if (profile.status !== 'active' || !profile.encryptedStateBlob) {
          sessRepo.audit({
            actorUserId: job.ownerId, ownerId: profile.ownerId, profileId: profile.id,
            action: 'use', result: 'error', reason: `status=${profile.status}`, jobId: job.id,
          });
          throw new Error(`AUTH_SESSION_EXPIRED: profile ${profile.id} status=${profile.status}`);
        }
        const masterKeyPath = this.config.secrets?.masterKeyPath ?? './data/secrets/master.key';
        const master = initMasterKey(masterKeyPath);
        const encDek = sessRepo.getUserDek(profile.ownerId);
        if (!encDek) {
          sessRepo.audit({
            actorUserId: job.ownerId,
            ownerId: profile.ownerId,
            profileId: profile.id,
            action: 'decrypt',
            result: 'error',
            reason: 'user DEK missing',
            jobId: job.id,
          });
          throw new Error('user DEK missing for browser session profile');
        }
        let dek: Buffer;
        try {
          dek = decryptUserDek(master, encDek);
        } catch (e) {
          sessRepo.audit({
            actorUserId: job.ownerId,
            ownerId: profile.ownerId,
            profileId: profile.id,
            action: 'decrypt',
            result: 'error',
            reason: `dek decrypt failed: ${(e as Error).message}`,
            jobId: job.id,
          });
          throw e;
        }
        let stateJson: string;
        try {
          stateJson = decryptStateBlob(dek, profile.encryptedStateBlob);
        } catch (e) {
          sessRepo.audit({
            actorUserId: job.ownerId,
            ownerId: profile.ownerId,
            profileId: profile.id,
            action: 'decrypt',
            result: 'error',
            reason: `state decrypt failed: ${(e as Error).message}`,
            jobId: job.id,
          });
          throw e;
        }
        browserSessionState = JSON.parse(stateJson) as object;
        browserSessionProfileId = profile.id;
        browserSessionProfile = {
          loggedInSelector: profile.loggedInSelector,
          loginUrlPatterns: profile.loginUrlPatterns,
        };
        onAuthExpired = (pid, reason) => {
          sessRepo.markProfileStatus(pid, 'expired', reason);
          sessRepo.audit({
            actorUserId: job.ownerId, ownerId: profile.ownerId, profileId: pid,
            action: 'expire', result: 'success', reason, jobId: job.id,
          });
          // Best-effort task-level notification. Subtask jobs and
          // gitea-issue jobs may not have a numeric local_task id.
          if (localTaskId !== null) {
            this.repo.addLocalTaskComment(
              localTaskId,
              'agent',
              `⚠️ Browser session "${profile.label}" expired: ${reason}. Re-login from Settings → Browser Sessions.`,
              'progress',
            ).catch(() => { /* ignore — comment posting is best-effort */ });
          }
        };
        sessRepo.audit({
          actorUserId: job.ownerId, ownerId: profile.ownerId, profileId: profile.id,
          action: 'use', result: 'success', jobId: job.id,
        });
        sessRepo.touchUsed(profile.id);
      }

      // Piece handoff: when this job continues an earlier one in the same
      // local_task, agent-loop injects a "this is a continuation of piece X"
      // block into the system prompt. We resolve the prev piece name + the
      // most recent agent result/ask comment as the LLM-visible carry-over.
      let handoffContext: import('./engine/agent-loop.js').HandoffContext | undefined;
      if (job.continuedFromJobId && isLocalTask && localTaskId !== null) {
        const prevJob = await this.repo.getJob(job.continuedFromJobId);
        if (prevJob) {
          const prevResultComment = await this.repo.getLatestResultComment(localTaskId);
          handoffContext = {
            prevPiece: prevJob.pieceName,
            prevResult: prevResultComment?.body ?? null,
          };
        } else {
          logger.warn(`[worker:${this.workerId}] continued_from_job_id=${job.continuedFromJobId} not found for job ${jobId}; skipping handoff context`);
        }
      }

      // Shared-knowledge notes: construct once per job, forwarded into
      // ToolContext so agent-loop can inject "## Subscribed Notes" into
      // the system prompt. Only active when the job has a known owner.
      let notesService: NotesService | undefined;
      let notesInjectConfig: NotesInjectConfig | undefined;
      let notesUserOrgIds: string[] | undefined;
      let notesUserRole: 'admin' | 'user' | undefined;
      if (job.ownerId) {
        try {
          const notesRepo = new NotesRepository(this.repo.getDb());
          const userFolderRoot = this.config.userFolderRoot ?? './data/users';
          notesService = new NotesService({
            db: this.repo.getDb(),
            repo: notesRepo,
            userFolderRoot,
            getUserOrgIds: (uid) => this.repo.listUserGiteaOrgs(uid).map((o) => o.orgId),
            audit: (action, actor, target) => {
              try {
                this.repo.addAuditLog(jobId, action, actor, { target });
              } catch (err) {
                logger.warn(`[notes-audit] failed: ${(err as Error).message}`);
              }
            },
          });
          const cfgNotes = this.config.notes?.inject ?? {};
          notesInjectConfig = {
            perNoteMaxKb: cfgNotes.perNoteMaxKb ?? DEFAULT_NOTES_INJECT.perNoteMaxKb,
            totalMaxKb: cfgNotes.totalMaxKb ?? DEFAULT_NOTES_INJECT.totalMaxKb,
            overBudgetStrategy: cfgNotes.overBudgetStrategy ?? DEFAULT_NOTES_INJECT.overBudgetStrategy,
          };
          notesUserOrgIds = this.repo.listUserGiteaOrgs(job.ownerId).map((o) => o.orgId);
          const ownerRow = this.repo.getUserById(job.ownerId);
          notesUserRole = ownerRow?.role === 'admin' ? 'admin' : 'user';
        } catch (err) {
          logger.warn(`[worker:${this.workerId}] job ${jobId} notes setup failed: ${(err as Error).message}`);
        }
      }

      // Parse per-task options from job payload (e.g. { options: { mcpDisabled, skillsDisabled } }).
      let jobPayloadOptions: Record<string, unknown> = {};
      if (job.payload) {
        try {
          const parsed = JSON.parse(job.payload) as Record<string, unknown>;
          if (parsed?.options && typeof parsed.options === 'object' && !Array.isArray(parsed.options)) {
            jobPayloadOptions = parsed.options as Record<string, unknown>;
          }
        } catch {
          logger.warn(`[worker:${this.workerId}] job ${jobId} failed to parse payload JSON`);
        }
      }
      const mcpDisabled = jobPayloadOptions.mcpDisabled === true;
      const skillsDisabled = jobPayloadOptions.skillsDisabled === true;

      const result = await runPiece(piece, enrichedInstruction, llmClient, workspacePath, callbacks, toolsConfig, {
        ...pieceOptions,
        cancelCheck,
        abortController: jobAbortController,
        safetyConfig: this.config.safety,
        searchFilter: this.config.searchFilter,
        customPiecesDir: customPieceDirs,
        contextManager,
        vlmEnabled: workerDef.vlm === true,
        jobId,                  // Phase 5: subtask handoff parent identity
        handoffContext,
        // Phase 5 PR2: when this run IS a subtask, pass parent identity +
        // child workspace path so the runner emits a memory-delta.json on
        // completion. Subtask workspaces follow `<parent>/subtasks/<N>`
        // where N is the subtask job's issueNumber.
        parentJobId: isSubTask && parentJobId ? parentJobId : undefined,
        childWorkspaceRelative: isSubTask ? `subtasks/${issueNumber}` : undefined,
        // Mission Brief: only wire IO when this run is bound to a local
        // task (the brief is per-LocalTask, not per-job). Subtask runs
        // and gitea-issue runs leave it unset → MissionUpdate degrades
        // to a no-op and the system prompt MISSION block is skipped.
        missionBrief: isLocalTask && localTaskId !== null && !isSubTask
          ? this.repo.makeMissionBriefIO(localTaskId)
          : undefined,
        taskId: sessionIdentity.taskId,
        userId: sessionIdentity.userId,
        browserSessionState,
        browserSessionProfileId,
        browserSessionProfile,
        onAuthExpired,
        ownerId: job.ownerId,
        mcpConfig: mergeMcpConfig(this.config.mcp),
        notesService,
        notesInjectConfig,
        notesUserOrgIds,
        notesUserRole,
        skillCatalog: this.skillCatalog ?? undefined,
        mcpDisabled,
        skillsDisabled,
      });
      logger.info(`[worker:${this.workerId}] job ${jobId} runPiece done: status=${result.status}`);
      this.writeRunDiagnostics(workspacePath, result);

      await this.handlePieceResult(result, job, reporter, workspacePath, isLocalTask, isSubTask, parentJobId);

      // Phase 3b: capture the terminal status for the jobs_total label.
      // result.status uses piece-runner's own enum
      // ('completed'|'aborted'|'error'|'waiting_human'|'waiting_subtasks'|'cancelled'); map to the
      // metric enum (waiting_subtasks stays "succeeded" for the metric
      // because the job pauses cleanly — not a failure).
      switch (result.status) {
        case 'completed': metricFinalStatus = 'succeeded'; break;
        case 'aborted': metricFinalStatus = 'aborted'; break;
        case 'cancelled': metricFinalStatus = 'cancelled'; break;
        case 'waiting_human': metricFinalStatus = 'waiting_human'; break;
        case 'waiting_subtasks': metricFinalStatus = 'succeeded'; break;
        case 'error':
        default: metricFinalStatus = 'failed'; break;
      }
    } catch (err) {
      const errorMsg = err instanceof Error ? err.message : String(err);
      const errorStack = err instanceof Error && err.stack ? err.stack : '(no stack)';
      if (errorMsg.includes('Piece not found')) {
        logger.error(`[worker:${this.workerId}] job ${jobId} loadPiece failed piece=${job.pieceName} customPiecesDir=${this.config.customPiecesDir ?? 'none'} error=${errorMsg}`);
      }
      logger.error(`[worker:${this.workerId}] job ${jobId} failed: ${errorMsg}`);
      // Always log the stack so opaque errors (e.g. SqliteError: FOREIGN KEY
      // constraint failed) can be traced to the offending insert/update.
      logger.error(`[worker:${this.workerId}] job ${jobId} stack: ${errorStack}`);
      const retryDisposition = await this.scheduleRetryOrFail(job, errorMsg, workspacePath, 'worker_exception');
      if (retryDisposition !== 'requeued_unhealthy') {
        await reporter.reportError(errorMsg);
      }
      await this.repo.addAuditLog(jobId, 'job_error', 'worker', { error: errorMsg });
    } finally {
      if (heartbeatTimer) clearInterval(heartbeatTimer);
      // Phase 3b: emit job lifecycle counters + duration histogram. The
      // active gauge always decrements (matching the inc at start) so a
      // process restart can't leak active_jobs > 0 forever.
      if (this.workerMetrics) {
        try {
          this.workerMetrics.activeJobs.labels({ piece: metricPiece, profile: metricProfile }).dec();
          this.workerMetrics.jobsTotal.labels({ piece: metricPiece, status: metricFinalStatus, profile: metricProfile }).inc();
          this.workerMetrics.jobDurationSeconds
            .labels({ piece: metricPiece, status: metricFinalStatus, profile: metricProfile })
            .observe((Date.now() - jobStartedAtMs) / 1000);
        } catch { /* metrics never affect business logic */ }
      }
      await this.repo.updateWorkerNodeHealth(this.workerId, {
        healthy: this.healthy,
        lastError: this.lastHealthError,
        inflightJobs: this.inflight,
        availableModels: [...this.availableModels],
      });
      await this.repo.unlockIssue(repoName, issueNumber);
    }
  }

  private buildPieceCallbacks(
    jobId: string,
    reporter: LocalProgressReporter,
    isLocalTask: boolean,
    localTaskId: number | null,
    workspacePath: string,
    /**
     * Initial value of jobs.last_backend_id from the DB. Seeds the backend
     * tracker (and the sticky-routing hint) so a resumed/retried job goes
     * back to the backend that already holds its KV cache.
     * Falsy/null = no backend resolved yet.
     */
    initialLastBackendId: string | null = null,
    /** LLM client of this job — receives the sticky-routing hint per switch. */
    llmClient?: { setPreferredBackendId(backendId: string | null): void },
    /**
     * The reporter's metadata object (shared by reference): mutating
     * `backendId` here makes every subsequent activity.log line carry
     * `[backend:...]` so the Progress tab can show the physical backend
     * behind a proxy worker.
     */
    logMetadata?: ActivityLogMetadata,
  ): PieceRunCallbacks {
    let movementStartTime = Date.now();
    const toolUsageCounts = new Map<string, number>();
    // Backend tracker (follow-current semantics, 2026-06): persists
    // jobs.last_backend_id whenever the resolved backend CHANGES so the UI
    // (pet, badges) follows where the job actually runs. Switches are rare
    // because the gateway honors the x-aao-preferred-backend sticky hint
    // (KV-cache reuse) — they only happen when the preferred backend goes
    // offline or saturates. The tracker still guarantees that a failed DB
    // persist leaves the in-memory value unchanged so the next event
    // retries. See src/worker/sticky-backend.ts.
    const workerIdLocal = this.workerId;
    const backendTracker = createStickyBackendResolver({
      initial: initialLastBackendId,
      persist: (backendId) => this.repo.updateJob(jobId, { lastBackendId: backendId }),
      logger: {
        debug: (m) => logger.debug(m),
        info: (m) => logger.info(m),
        warn: (m) => logger.warn(m),
      },
      workerId: workerIdLocal,
      jobId,
    });
    // Seed the sticky-routing hint + activity-log backend tag from the DB
    // value (resume/retry goes straight back to the cache-warm backend).
    if (initialLastBackendId) {
      llmClient?.setPreferredBackendId(initialLastBackendId);
      if (logMetadata) logMetadata.backendId = initialLastBackendId;
    }
    // Phase 3b: local copy of the sticky backend so the LLM-call metric
    // has a stable backend_id label even before the persist returns.
    // Direct workers (non-proxy) never fire onBackendResolved, so we
    // fall back to the worker id (`gpu-rtx-a`) as the backend identity.
    let metricBackendId = initialLastBackendId ?? workerIdLocal;
    const metricModel = this.model ?? 'unknown';
    const metricsRef = this.workerMetrics;
    // Pending tool calls awaiting result, keyed by callId.
    // On onToolResult, we pair via callId and persist a single tool_call comment.
    const pendingToolCalls = new Map<string, { name: string; args: Record<string, unknown>; movement: string }>();
    let currentMovementForTools = '';

    const ARG_PREVIEW_CAP = 8 * 1024;
    const RESULT_PREVIEW_CAP = 16 * 1024;
    const truncate = (s: string, cap: number): string =>
      s.length > cap ? s.slice(0, cap) + `\n…[truncated ${s.length - cap} bytes]` : s;

    return {
      onMovementStart: (name) => {
        movementStartTime = Date.now();
        toolUsageCounts.clear();
        currentMovementForTools = name;
        this.repo.updateJob(jobId, { currentMovement: name, currentActivity: null });
        reporter.reportMovementStart(name);
      },
      onToolUse: (toolName, input, callId) => {
        toolUsageCounts.set(toolName, (toolUsageCounts.get(toolName) ?? 0) + 1);
        const summary = summarizeToolInput(toolName, input);
        this.repo.updateJob(jobId, { currentActivity: `${toolName}: ${summary}`.slice(0, 200) });
        reporter.reportToolUse(toolName, input);
        if (callId) {
          pendingToolCalls.set(callId, { name: toolName, args: input, movement: currentMovementForTools });
        }
        if (jobEventBus.hasListeners(jobId)) {
          jobEventBus.emitJob(jobId, {
            type: 'tool_use',
            toolName,
            toolInput: summary,
            callId: callId ?? '',
          });
        }
      },
      onToolCallDelta: (callId, name, chunk) => {
        if (jobEventBus.hasListeners(jobId)) {
          jobEventBus.emitJob(jobId, { type: 'tool_use_delta', callId, name, chunk });
        }
      },
      onText: (text) => {
        if (jobEventBus.hasListeners(jobId)) {
          jobEventBus.emitJob(jobId, { type: 'text', text });
        }
      },
      onPromptProgress: (progress) => {
        if (jobEventBus.hasListeners(jobId)) {
          jobEventBus.emitJob(jobId, {
            type: 'prompt_progress',
            processed: progress.processed,
            total: progress.total,
            timeMs: progress.timeMs,
            cache: progress.cache,
          });
        }
      },
      onTextPreview: (movementName, preview) => {
        reporter.reportAssistantPreview(movementName, preview);
      },
      onContextAction: (action) => {
        reporter.reportContextAction(action);
      },
      onContextUpdate: (payload) => {
        this.repo.updateJobContext(jobId, payload).catch(err => {
          logger.warn(`[worker:${this.workerId}] failed to persist context for job ${jobId}: ${err}`);
        });
      },
      onLLMCall: (info) => {
        reporter.reportLLMCall(info);
        if (metricsRef) {
          try {
            metricsRef.llmCallsTotal
              .labels({ worker_id: workerIdLocal, backend_id: metricBackendId, model: metricModel })
              .inc();
            metricsRef.llmCallDurationSeconds
              .labels({ worker_id: workerIdLocal, backend_id: metricBackendId, model: metricModel })
              .observe(info.durationMs / 1000);
          } catch { /* metrics best-effort */ }
        }
      },
      onBackendResolved: (info) => {
        // Phase 3b: update the backend id used for LLM-call metrics.
        if (info.backendId) {
          metricBackendId = info.backendId;
          // Sticky routing: ask the gateway to keep using this backend on
          // the next request (KV-cache affinity).
          llmClient?.setPreferredBackendId(info.backendId);
          // Tag subsequent activity.log lines with the physical backend so
          // the Progress tab shows more than the proxy worker's name.
          if (logMetadata) logMetadata.backendId = info.backendId;
        }
        // Fire-and-forget: agent-loop's onBackendResolved signature is
        // sync (void). The tracker handles persist errors internally;
        // we just attach a final guard to log any unexpected throw.
        // cacheKey is observed but not persisted at the job level —
        // Phase B's NodeStatusWidget will track cache hits out-of-band.
        backendTracker.onEvent(info).catch(err => {
          logger.warn(`[worker:${this.workerId}] backend tracker threw for job ${jobId}: ${err}`);
        });
      },
      onMovementComplete: (movementName, result) => {
        const durationMs = Date.now() - movementStartTime;
        const tools: Record<string, number> = {};
        for (const [name, count] of toolUsageCounts) {
          tools[name] = count;
        }
        reporter.reportMovementComplete(movementName, result.output, result.next);

        if (isLocalTask) {
          const isTerminal = result.next === 'COMPLETE' || result.next === 'ABORT' || result.next === 'ASK';
          const summary = !isTerminal ? (result.output?.trim() || undefined) : undefined;
          const progressBody = JSON.stringify({ movement: movementName, tools, durationMs, ...(summary ? { summary } : {}) });
          this.repo.addLocalTaskComment(localTaskId!, 'agent', progressBody, 'progress')
            .catch(err => logger.warn(`[worker:${this.workerId}] failed to insert progress comment: ${err}`));
          if (isTerminal && jobEventBus.hasListeners(jobId)) {
            jobEventBus.emitJob(jobId, { type: 'done' });
          }
        }
      },
      onToolResult: (toolName, info, callId) => {
        const { isError } = info;
        reporter.reportToolResult(toolName, info);

        // Pair with pending tool_use via callId, then persist as comment + emit SSE.
        const pending = callId ? pendingToolCalls.get(callId) : undefined;
        if (callId) pendingToolCalls.delete(callId);
        if (isLocalTask && callId && pending) {
          let argsStr: string;
          try { argsStr = truncate(JSON.stringify(pending.args), ARG_PREVIEW_CAP); }
          catch { argsStr = '"<unserializable>"'; }
          const toolCallBody = JSON.stringify({
            type: 'tool_call',
            callId,
            movement: pending.movement,
            name: toolName,
            args: argsStr,
            result: truncate(info.result, RESULT_PREVIEW_CAP),
            isError,
            durationMs: info.durationMs,
            cacheHit: info.cacheHit,
          });
          this.repo.addLocalTaskComment(localTaskId!, 'agent', toolCallBody, 'progress')
            .catch(err => logger.warn(`[worker:${this.workerId}] tool_call comment failed: ${err}`));
        }
        if (jobEventBus.hasListeners(jobId) && callId) {
          jobEventBus.emitJob(jobId, {
            type: 'tool_result',
            toolName,
            callId,
            toolOutput: truncate(info.result, 2 * 1024),
            toolIsError: isError,
          });
        }
        // Phase 3b: count every tool invocation. success label is the
        // string form so Grafana queries can group by it. Same
        // best-effort guard as the LLM emission above.
        //
        // Phase 3b post-review: normalize the tool_name label so a
        // piece firing arbitrary mcp__*/user-defined names doesn't
        // explode label cardinality. MCP tools collapse to a single
        // `mcp` bucket; unknown names land in `unknown`. The full
        // tool_name is still visible in the activity log + reporter,
        // so the metric drop only affects Prometheus dimensions.
        if (metricsRef) {
          try {
            metricsRef.toolCallsTotal
              .labels({ tool_name: normalizeToolNameForMetric(toolName), success: isError ? 'false' : 'true' })
              .inc();
          } catch { /* metrics best-effort */ }
        }
        if (isLocalTask && !isError && (toolName === 'CheckItem' || toolName === 'CreateChecklist')) {
          try {
            const checklistDir = join(workspacePath, 'logs', 'checklists');
            if (existsSync(checklistDir)) {
              const files = readdirSync(checklistDir).filter(f => f.endsWith('.json'));
              if (files.length > 0) {
                let latestFile = files[0]!;
                let latestMtime = 0;
                for (const file of files) {
                  try {
                    const { mtimeMs } = statSync(join(checklistDir, file));
                    if (mtimeMs > latestMtime) {
                      latestMtime = mtimeMs;
                      latestFile = file;
                    }
                  } catch { /* skip */ }
                }
                const data = JSON.parse(readFileSync(join(checklistDir, latestFile), 'utf-8'));
                const progressBody = JSON.stringify({
                  type: 'checklist',
                  name: data.name,
                  items: data.items,
                  summary: data.summary,
                });
                this.repo.addLocalTaskComment(localTaskId!, 'agent', progressBody, 'progress')
                  .catch(err => logger.warn(`[worker:${this.workerId}] checklist progress comment failed: ${err}`));
              }
            }
          } catch (err) {
            logger.warn(`[worker:${this.workerId}] checklist read failed: ${err}`);
          }
        }
      },
    };
  }

  private async handleReflectionJob(job: Job): Promise<void> {
    const { runReflectionJob } = await import('./engine/reflection/reflection-runner.js');
    try {
      // Gateway mode routes by role: send the reflection tier as the key
      // (job.requiredRole is 'reflection'), not the worker's model name.
      const reflectionRoutingKey = llmRoutingKey({
        isGateway: this.getWorkerDef().proxy === true,
        role: job.requiredRole,
        resolveDirectModel: () => this.model,
        roleFallback: 'reflection',
      });
      const outcome = await runReflectionJob(
        {
          repo: this.repo,
          config: this.config,
          llmEndpoint: this.endpoint,
          llmModel: reflectionRoutingKey,
          // Same credential as normal task LLM calls — a key-enforcing
          // gateway 401s reflection without it.
          llmApiKey: this.getWorkerDef().apiKey,
        },
        job
      );
      await this.repo.updateJob(job.id, {
        status: outcome === 'failed' ? 'failed' : 'succeeded',
        currentActivity: null,
      });
    } catch (e) {
      logger.error(`[reflection] runner threw job=${job.id} err=${String(e)}`);
      await this.repo.updateJob(job.id, { status: 'failed', currentActivity: null });
    }
  }

  private async handlePieceResult(
    result: PieceRunResult,
    job: Job,
    reporter: LocalProgressReporter,
    workspacePath: string,
    isLocalTask: boolean,
    isSubTask: boolean,
    parentJobId: string | null,
  ): Promise<void> {
    const { repo: repoName, issueNumber, id: jobId } = job;
    const localTaskId = getLocalTaskId(repoName);

    if (result.status === 'completed') {
      if (isLocalTask) {
        await this.commitLocalWorkspace(issueNumber, workspacePath);
      }
      await this.repo.updateJob(jobId, { status: 'succeeded', currentActivity: null });
      this.enqueuePush(job, 'succeeded');
      await maybeEnqueueReflection(this.repo, job, 'succeeded', this.config.reflection, this.config.provider.workers);
      let resultBody = result.finalOutput;
      if (resultBody) {
        resultBody = ensureKeepaGraphs(resultBody);
      }
      await reporter.reportFinalResult('completed', resultBody);
    } else if (result.status === 'waiting_human') {
      if (isSubTask && parentJobId) {
        await this.repo.updateJob(jobId, { status: 'waiting_human', resumeMovement: result.resumeMovement ?? null, askCount: job.askCount + 1 });
        // Sub-task ASK is auto-answered below, so we don't notify on it.
        reporter.reportToolUse('ASK', { question: result.finalOutput });
        await this.repo.addAuditLog(jobId, 'job_ask_subtask', 'worker', { question: result.finalOutput, resumeMovement: result.resumeMovement });

        try {
          const answer = await this.answerSubtaskAsk(job, parentJobId, result.finalOutput);
          logger.info(`[worker:${this.workerId}] answered subtask ASK for job ${jobId}: ${answer.slice(0, 100)}`);

          const newJob = await this.repo.createJob({
            repo: repoName,
            issueNumber,
            instruction: answer,
            pieceName: job.pieceName,
            askCount: job.askCount + 1,
            resumeMovement: result.resumeMovement,
            parentJobId: job.parentJobId,
            subtaskDepth: job.subtaskDepth,
            maxAttempts: 2,
            role: job.requiredRole,
            ownerId: job.ownerId,
            visibility: job.visibility,
            visibilityScopeOrgId: job.visibilityScopeOrgId,
          });
          await this.repo.updateJob(newJob.id, { worktreePath: workspacePath });
          await this.repo.addAuditLog(newJob.id, 'job_queued_subtask_ask_answer', 'worker', { originalJobId: jobId, question: result.finalOutput });
        } catch (askErr) {
          logger.warn(`[worker:${this.workerId}] failed to answer subtask ASK, leaving as waiting_human: ${askErr}`);
        }
      } else {
        await this.repo.updateJob(jobId, {
          status: 'waiting_human',
          resumeMovement: result.resumeMovement ?? null,
          askCount: job.askCount + 1,
        });
        this.enqueuePush(job, 'waiting_human');
        await reporter.reportAsk(result.finalOutput);
        await this.repo.addAuditLog(jobId, 'job_ask', 'worker', {
          question: result.finalOutput,
          resumeMovement: result.resumeMovement,
        });
      }
    } else if (result.status === 'waiting_subtasks') {
      const subJobs = await this.repo.getSubJobs(jobId);
      if (subJobs.length === 0) {
        if (result.resumeMovement) {
          logger.warn(`[worker:${this.workerId}] job ${jobId} waiting_subtasks but no sub-jobs exist, re-queuing to ${result.resumeMovement}`);
          await this.repo.updateJob(jobId, {
            status: 'queued',
            resumeMovement: result.resumeMovement,
          });
        } else {
          logger.error(`[worker:${this.workerId}] job ${jobId} waiting_subtasks with no sub-jobs and no resumeMovement, failing`);
          await this.repo.updateJob(jobId, { status: 'failed' });
        }
        await this.repo.addAuditLog(jobId, 'job_requeued_no_subtasks', 'worker', {
          resumeMovement: result.resumeMovement,
          action: result.resumeMovement ? 'requeued' : 'failed',
        });
      } else {
        await this.repo.updateJob(jobId, {
          status: 'waiting_subtasks',
          resumeMovement: result.resumeMovement ?? null,
        });
        await reporter.reportMovementStart('サブタスク待機中...');
        await this.repo.addAuditLog(jobId, 'job_waiting_subtasks', 'worker', {
          resumeMovement: result.resumeMovement,
        });
      }
    } else if (result.status === 'cancelled') {
      logger.info(`[worker:${this.workerId}] job ${jobId} cancelled`);
      await reporter.reportFinalResult('cancelled', result.finalOutput);
    } else {
      const retryDisposition = await this.scheduleRetryOrFail(job, result.finalOutput, workspacePath, result.abortReason ?? null);
      if (retryDisposition !== 'requeued_unhealthy') {
        await reporter.reportFinalResult(result.status, result.finalOutput);
      }
      if (retryDisposition === 'failed') {
        const outcome = result.status === 'aborted' ? 'aborted' : 'failed';
        await maybeEnqueueReflection(this.repo, job, outcome, this.config.reflection, this.config.provider.workers);
      }
    }

    // サブタスク完了時（終端ステータスのみ）: 全兄弟ジョブが完了なら親ジョブを再キュー
    const SUBTASK_TERMINAL = ['completed', 'error', 'aborted', 'cancelled'];
    if (isSubTask && parentJobId && SUBTASK_TERMINAL.includes(result.status)) {
      try {
        const parentJob = await this.repo.getJob(parentJobId);
        if (parentJob?.worktreePath) {
          const resultDir = join(parentJob.worktreePath, 'subtasks', String(issueNumber));
          mkdirSync(resultDir, { recursive: true });
          writeFileSync(
            join(resultDir, 'result.md'),
            `# サブタスク #${issueNumber} 結果\n\nステータス: ${result.status}\n\n${result.finalOutput}\n`,
            'utf-8',
          );
        }
        const requeued = await this.repo.requeueParentJobIfAllSubtasksDone(parentJobId);
        if (requeued) {
          logger.info(`[worker:${this.workerId}] all sub-tasks done, re-queued parent job ${parentJobId}`);
        }
      } catch (subErr) {
        logger.warn(`[worker:${this.workerId}] sub-task parent re-queue error: ${subErr}`);
      }
    }

    await this.repo.addAuditLog(jobId, `job_${result.status}`, 'worker', {
      movementCount: result.movementHistory.length,
      abortReason: result.abortReason ?? null,
      contextActionCount: result.contextActions.length,
      latestContextAction: result.contextActions[result.contextActions.length - 1] ?? null,
    });
  }

  private resolveModel(piece: PieceDef): string | undefined {
    if (piece.model) {
      if (this.availableModels.size === 0 || this.availableModels.has(piece.model)) {
        return piece.model;
      }
      logger.warn(`[worker:${this.workerId}] piece model "${piece.model}" not available, falling back to ${this.model ?? '<none>'}`);
    }
    // If the configured model is not in available models, auto-select the first available one
    if (this.model && this.availableModels.size > 0 && !this.availableModels.has(this.model)) {
      const autoModel = [...this.availableModels][0]!;
      logger.info(`[worker:${this.workerId}] configured model "${this.model}" not available, auto-selecting "${autoModel}"`);
      return autoModel;
    }
    return this.model;
  }

  private async scheduleRetryOrFail(
    job: Job,
    errorMsg: string,
    workspacePath?: string,
    abortReason: string | null = null,
  ): Promise<'requeued_unhealthy' | 'retry' | 'failed'> {
    const { id: jobId, attempt, maxAttempts } = job;

    const isLlmConnectionFatal = /connection error:\s*fetch failed|econnrefused|enotfound|etimedout|network error/i.test(errorMsg);
    if (isLlmConnectionFatal) {
      this.healthy = false;
      this.lastHealthError = errorMsg;
      this.availableModels.clear();
      await this.repo.updateWorkerNodeHealth(this.workerId, {
        healthy: false,
        lastError: errorMsg,
        inflightJobs: this.inflight,
        availableModels: [],
      });
      await this.repo.updateJob(jobId, {
        status: 'queued',
        workerId: null,
        errorSummary: errorMsg,
        abortReason,
        nextRetryAt: null,
      });
      writeRetryHandoffSummary({
        workspacePath: workspacePath ?? job.worktreePath,
        job,
        errorMsg,
        nextRetryAt: null,
        disposition: 'requeued_unhealthy',
      });
      logger.warn(`[worker:${this.workerId}] job ${jobId} requeued after LLM connection error; worker marked unhealthy`);
      return 'requeued_unhealthy';
    }

    if (attempt < maxAttempts) {
      const backoffIndex = Math.min(attempt - 1, this.config.retry.backoffSeconds.length - 1);
      const backoffSec = this.config.retry.backoffSeconds[backoffIndex] ?? this.config.retry.backoffSeconds[this.config.retry.backoffSeconds.length - 1] ?? 60;
      const nextRetryAt = new Date(Date.now() + backoffSec * 1000).toISOString();
      await this.repo.updateJob(jobId, {
        status: 'retry',
        attempt: attempt + 1,
        nextRetryAt,
        errorSummary: errorMsg,
        abortReason,
      });
      writeRetryHandoffSummary({
        workspacePath: workspacePath ?? job.worktreePath,
        job,
        errorMsg,
        nextRetryAt,
        disposition: 'retry',
      });
      logger.info(`[worker:${this.workerId}] job ${jobId} scheduled for retry ${attempt + 1}/${maxAttempts} at ${nextRetryAt}`);
      return 'retry';
    } else {
      await this.repo.updateJob(jobId, { status: 'failed', errorSummary: errorMsg, abortReason });
      // V2 push: only on terminal fail. Intermediate retry attempts are
      // silenced (matches V1's 4-second debounce intent).
      this.enqueuePush(job, 'failed');
      writeRetryHandoffSummary({
        workspacePath: workspacePath ?? job.worktreePath,
        job,
        errorMsg,
        nextRetryAt: null,
        disposition: 'failed',
      });
      logger.info(`[worker:${this.workerId}] job ${jobId} failed permanently after ${maxAttempts} attempts`);
      return 'failed';
    }
  }

  private listDir(dirPath: string): string[] {
    try {
      return readdirSync(dirPath);
    } catch {
      return [];
    }
  }

  private async commitLocalWorkspace(
    taskId: number,
    workspacePath: string,
    commitMessage?: string,
  ): Promise<void> {
    const result = await commitWorkspaceChanges({
      workspacePath,
      branchName: 'main',
      commitMessage: commitMessage?.trim() || `agent: update task #${taskId}`,
      ignoreEntries: ['input/', 'logs/'],
    });
    if (!result.changed) {
      logger.info(`[worker:${this.workerId}] no local changes to commit for task #${taskId}`);
      return;
    }
    if (result.committed) {
      logger.info(`[worker:${this.workerId}] committed local workspace changes for task #${taskId}`);
    }
    if (result.pushed) {
      logger.info(`[worker:${this.workerId}] pushed local workspace changes for task #${taskId}`);
    }
  }

}