diff --git a/config.yaml.example b/config.yaml.example index a5bad8a..d2e92d5 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -64,7 +64,11 @@ llm: # max_concurrency: 2 # enabled: true - # 例: タイトル生成専用ワーカー (chat ジョブは受け付けない) + # 例: タイトル生成用ワーカー (chat ジョブは受け付けない)。 + # 注意: タスク作成時にタイトル用 LLM はもう呼ばれない (実行中にエージェントが + # Mission Brief の goal からタイトルを派生する)。このワーカーは + # 「AIでタイトルを再生成」ボタン (POST /api/local/tasks/:id/regenerate-title) + # のオンデマンド生成にのみ使われる。未設定なら先頭ワーカーが代替する。 # - id: title-worker # connection_type: direct # endpoint: http://localhost:11434/v1 @@ -265,6 +269,7 @@ tools: # office_pdf_max_size_mb: 10 # ReadPdf 上限 # office_pptx_max_size_mb: 50 # ReadPPTX 上限 # office_pptx_max_uncompressed_mb: 200 # PPTX ZIP 展開後上限 (zip-bomb 検知) + # office_msg_max_size_mb: 25 # ReadMsg 上限 (default 25) # speech_server_url: http://localhost:8000/v1 # speech_timeout: 300 # speech_language: ja @@ -307,6 +312,29 @@ tools: # total_max_kb: 32 # over_budget_strategy: skip_remaining # truncate_last / skip_remaining (default) / degrade_to_search +# ─── サーバー TLS (オプション) ─────────────────────────────── +# 【フレッシュインストール】setup.sh が server.tls.enabled: true を自動書き込む。 +# 【アップグレード】server ブロック未記載の場合は false のまま(既存デプロイを壊さない)。 +# 【リバースプロキシ構成】このブロックを省略するか enabled: false のままにすること。 +# プロキシが TLS を終端しているので、ここで有効にすると二重終端になり接続が壊れる。 +# +# server: +# tls: +# enabled: true # フレッシュインストールのデフォルト; ブロック未記載=アップグレード時 false +# cert_file: null # PEM 証明書パス (任意); cert_file と key_file は両方設定するか両方省略 +# key_file: null +# min_version: TLSv1.2 +# self_signed_dir: ./data/tls +# self_signed_hosts: [] # localhost / 127.0.0.1 / ::1 / hostname は常に含まれる +# http_redirect: true +# http_redirect_port: 9080 # HTTPS ポートと異なる値にすること +# redirect_host: null # リダイレクト先のホスト; null = バインドホストを使用 +# # リバースプロキシ構成: このブロックを省略するか enabled: false のままにすること。 +# # プロキシが TLS を終端している場合に native TLS を有効にすると二重終端になり壊れる。 +# # noVNC / SSH コンソールを wss で使う場合は「信頼済み証明書」が必要 +# # (自己署名の wss はブラウザに click-through がない) — +# # cert_file / key_file で実証明書を指定するか、OS の信頼ストアに自己署名 CA を登録すること。 + # ─── 認証 (オプション) ──────────────────────────────────────── # 未設定なら認証なしで動作 (従来互換)。 # auth: diff --git a/docs/tools/office.md b/docs/tools/office.md index 2b810a6..6d13254 100644 --- a/docs/tools/office.md +++ b/docs/tools/office.md @@ -1,4 +1,4 @@ -# Office ファイル系ツール(ReadPdf / ReadExcel / ReadDocx / ReadPPTX / PdfToImages / SplitExcelSheets / SplitDocxSections) +# Office ファイル系ツール(ReadPdf / ReadExcel / ReadDocx / ReadPPTX / ReadMsg / PdfToImages / SplitExcelSheets / SplitDocxSections) ローカル workspace の Office 文書・PDF を読み込むツール群。 @@ -126,6 +126,38 @@ ReadPPTX({ file_path: "input/slides.pptx" }) // → 各スライドのテキスト・表・スピーカーノートを返す ``` +### ReadMsg + +Outlook の `.msg`(OLE2 / CFBF 複合バイナリ)メールを読む。 + +```js +ReadMsg({ file_path: "input/inquiry.msg" }) +// → 件名・差出人・宛先・CC・日時・本文(テキスト)と添付一覧を返す +``` + +返却テキストの構成: + +``` +Subject: 見積もりのご依頼 +From: 山田太郎 +To: sales@example.com +Date: ... + +(本文テキスト) + +Attachments (2): +- 見積書.pdf (20480 bytes) -> input/見積書.pdf +- data.xlsx (8192 bytes) -> input/data.xlsx +``` + +ポイント: + +- 本文は plain text を優先。HTML メールはタグを除去して整形。どちらも取れない場合は `(no text body)` +- **添付は `input/` に自動保存される**。保存後は種別ごとのツールで開く(PDF → ReadPdf、Excel → ReadExcel、画像 → ReadImage など) +- ファイル名は basename に正規化し、パス区切りや制御文字を除去(ディレクトリトラバーサル防止)。同名衝突時は連番を付与 +- 添付に埋め込まれた `.msg`(メール in メール)は保存せず一覧に注記のみ。必要なら個別に扱う +- 不正な `.msg`(CFBF シグネチャ不一致)は中身を読まずエラーを返す。バイナリがそのまま出力に混ざることはない + ## 変換・分割系 ### PdfToImages diff --git a/package-lock.json b/package-lock.json index 70b1044..c2be0b4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "0.1.0", "license": "Apache-2.0", "dependencies": { + "@kenjiuno/msgreader": "^1.28.0", "@modelcontextprotocol/sdk": "^1.29.0", "@novnc/novnc": "^1.6.0", "@types/ssh2": "^1.15.5", @@ -33,6 +34,7 @@ "pptxgenjs": "^4.0.1", "prom-client": "^15.1.3", "proper-lockfile": "^4.1.2", + "selfsigned": "^2.4.1", "sharp": "^0.34.5", "ssh2": "^1.17.0", "undici": "^7.25.0", @@ -675,6 +677,37 @@ "dev": true, "license": "MIT" }, + "node_modules/@kenjiuno/decompressrtf": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/@kenjiuno/decompressrtf/-/decompressrtf-0.1.4.tgz", + "integrity": "sha512-v9c/iFz17jRWyd2cRnrvJg4VOg/4I/VCk+bG8JnoX2gJ9sAesPzo3uTqcmlVXdpasTI8hChpBVw00pghKe3qTQ==", + "license": "BSD-2-Clause" + }, + "node_modules/@kenjiuno/msgreader": { + "version": "1.28.0", + "resolved": "https://registry.npmjs.org/@kenjiuno/msgreader/-/msgreader-1.28.0.tgz", + "integrity": "sha512-+iv2rWCGRHmX/3sBwXZzkThEuuywGJjnYsvxj6Kp1L/FDMICQcFrtqN+6MFrnh2d+umtfGtX904wxaYEDZ52MQ==", + "license": "Apache-2.0", + "dependencies": { + "@kenjiuno/decompressrtf": "^0.1.3", + "iconv-lite": "^0.6.3" + }, + "engines": { + "node": ">= 10" + } + }, + "node_modules/@kenjiuno/msgreader/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/@modelcontextprotocol/sdk": { "version": "1.29.0", "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.29.0.tgz", @@ -1709,6 +1742,15 @@ "undici-types": "~6.21.0" } }, + "node_modules/@types/node-forge": { + "version": "1.3.14", + "resolved": "https://registry.npmjs.org/@types/node-forge/-/node-forge-1.3.14.tgz", + "integrity": "sha512-mhVF2BnD4BO+jtOp7z1CdzaK4mbuK0LLQYAvdOLqHTavxFNq4zA1EmYkpnFjP8HOUzedfQkRnp0E2ulSAYSzAw==", + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/oauth": { "version": "0.9.6", "resolved": "https://registry.npmjs.org/@types/oauth/-/oauth-0.9.6.tgz", @@ -4518,6 +4560,15 @@ "node": ">=10" } }, + "node_modules/node-forge": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.4.0.tgz", + "integrity": "sha512-LarFH0+6VfriEhqMMcLX2F7SwSXeWwnEAJEsYm5QKWchiVYVvJyV9v7UDvUv+w5HO23ZpQTXDv/GxdDdMyOuoQ==", + "license": "(BSD-3-Clause OR GPL-2.0)", + "engines": { + "node": ">= 6.13.0" + } + }, "node_modules/normalize-path": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", @@ -5314,6 +5365,19 @@ "node": ">=4" } }, + "node_modules/selfsigned": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/selfsigned/-/selfsigned-2.4.1.tgz", + "integrity": "sha512-th5B4L2U+eGLq1TVh7zNRGBapioSORUeymIydxgFpwww9d2qyKvtuPU2jJuHvYAwwqi2Y596QBL3eEqcPEYL8Q==", + "license": "MIT", + "dependencies": { + "@types/node-forge": "^1.3.0", + "node-forge": "^1" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/semver": { "version": "7.7.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", diff --git a/package.json b/package.json index a311ab5..dfeb02d 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "vapid-rotate": "tsx scripts/vapid-rotate.ts" }, "dependencies": { + "@kenjiuno/msgreader": "^1.28.0", "@modelcontextprotocol/sdk": "^1.29.0", "@novnc/novnc": "^1.6.0", "@types/ssh2": "^1.15.5", @@ -51,6 +52,7 @@ "pptxgenjs": "^4.0.1", "prom-client": "^15.1.3", "proper-lockfile": "^4.1.2", + "selfsigned": "^2.4.1", "sharp": "^0.34.5", "ssh2": "^1.17.0", "undici": "^7.25.0", diff --git a/pieces/chat.yaml b/pieces/chat.yaml index 0e7eb22..c87ba3c 100644 --- a/pieces/chat.yaml +++ b/pieces/chat.yaml @@ -66,7 +66,7 @@ movements: - `result` がそのままユーザーに表示される最終出力。途中のメモや作業ログは入れない - **ユーザー確認が必要**: `complete({status: "needs_user_input", missing_info: "確認したい内容", why_no_default: "デフォルトで進められない理由"})` - **技術的失敗で打ち切り**: `complete({status: "aborted", abort_reason: "失敗の理由"})` - allowed_tools: [Read, Write, Edit, Glob, Grep, WebSearch, WebFetch, DownloadFile, ReadImage, AnnotateImage, ReadPdf, PdfToImages, ReadExcel, ReadDocx, ReadPPTX, SQLite, Bash, XSearch, XUserPosts, XPostDetail, XTimeline, XFetchCardMedia, BrowseWeb, SearchPlaces, GetDirections, ReverseGeocode, GetYouTubeTranscript, SearchYouTube, SearchAmazon, TranscribeAudio, ListPieces, GetPiece, CreatePiece, UpdatePiece, SearchKnowledge, ListNamespaces, ListDocuments, SearchNotes, ReadNote, WriteNote, SearchMicrosoftLearn, FetchMicrosoftLearn, SearchMicrosoftLearnCache, RefreshMicrosoftLearnCache, ReadToolDoc, UpdateDashboardWidget, 'mcp__*'] + allowed_tools: [Read, Write, Edit, Glob, Grep, WebSearch, WebFetch, DownloadFile, ReadImage, AnnotateImage, ReadPdf, PdfToImages, ReadExcel, ReadDocx, ReadPPTX, ReadMsg, SQLite, Bash, XSearch, XUserPosts, XPostDetail, XTimeline, XFetchCardMedia, BrowseWeb, SearchPlaces, GetDirections, ReverseGeocode, GetYouTubeTranscript, SearchYouTube, SearchAmazon, TranscribeAudio, ListPieces, GetPiece, CreatePiece, UpdatePiece, SearchKnowledge, ListNamespaces, ListDocuments, SearchNotes, ReadNote, WriteNote, SearchMicrosoftLearn, FetchMicrosoftLearn, SearchMicrosoftLearnCache, RefreshMicrosoftLearnCache, ReadToolDoc, UpdateDashboardWidget, 'mcp__*'] # default_next is the engine-internal fallback for context overflow / ASK # limit reached / SpawnSubTask unavailable. It is NOT exposed to the LLM. default_next: COMPLETE diff --git a/pieces/data-process.yaml b/pieces/data-process.yaml index 7c41383..76aaee0 100644 --- a/pieces/data-process.yaml +++ b/pieces/data-process.yaml @@ -62,7 +62,7 @@ movements: - **次の report へ**: `transition({next_step: "report"})` - **処理対象が特定できずユーザー確認が必要**: `complete({status: "needs_user_input", missing_info: "...", why_no_default: "..."})` - **データが壊れている / 読み取れない / エラー発生で打ち切り**: `complete({status: "aborted", abort_reason: "..."})` - allowed_tools: [Read, Write, Bash, Glob, Grep, SQLite, WebSearch, WebFetch, DownloadFile, ReadExcel, ReadDocx, ReadPdf, ReadPPTX, SplitExcelSheets, PdfToImages, ReadImage, AnnotateImage, TranscribeAudio, SearchKnowledge, ListNamespaces, ListDocuments, ReadToolDoc, 'mcp__*'] + allowed_tools: [Read, Write, Bash, Glob, Grep, SQLite, WebSearch, WebFetch, DownloadFile, ReadExcel, ReadDocx, ReadPdf, ReadPPTX, ReadMsg, SplitExcelSheets, PdfToImages, ReadImage, AnnotateImage, TranscribeAudio, SearchKnowledge, ListNamespaces, ListDocuments, ReadToolDoc, 'mcp__*'] default_next: report rules: - condition: output/ に結果を書き出した diff --git a/pieces/general.yaml b/pieces/general.yaml index b93a155..d96a6fe 100644 --- a/pieces/general.yaml +++ b/pieces/general.yaml @@ -121,7 +121,7 @@ movements: - **並列分解が効率的 → decompose へ**: `transition({next_step: "decompose"})` - **必須情報が不足し確認が必要**: `complete({status: "needs_user_input", missing_info: "...", why_no_default: "..."})` - **技術的失敗で打ち切り**: `complete({status: "aborted", abort_reason: "..."})` - allowed_tools: [Read, Write, Bash, Glob, Grep, WebSearch, WebFetch, BrowseWeb, DownloadFile, ReadImage, AnnotateImage, ReadPdf, PdfToImages, BatchReviewTextWithLLM, MergeReviewedResults, SearchPlaces, GetDirections, ReverseGeocode, GetYouTubeTranscript, SearchYouTube, SearchAmazon, TranscribeAudio, SearchKnowledge, ListNamespaces, ListDocuments, IngestDocument, IngestStatus, SearchNotes, ReadNote, WriteNote, SearchMicrosoftLearn, FetchMicrosoftLearn, SearchMicrosoftLearnCache, RefreshMicrosoftLearnCache, 'mcp__*'] + allowed_tools: [Read, Write, Bash, Glob, Grep, WebSearch, WebFetch, BrowseWeb, DownloadFile, ReadImage, AnnotateImage, ReadPdf, PdfToImages, ReadMsg, BatchReviewTextWithLLM, MergeReviewedResults, SearchPlaces, GetDirections, ReverseGeocode, GetYouTubeTranscript, SearchYouTube, SearchAmazon, TranscribeAudio, SearchKnowledge, ListNamespaces, ListDocuments, IngestDocument, IngestStatus, SearchNotes, ReadNote, WriteNote, SearchMicrosoftLearn, FetchMicrosoftLearn, SearchMicrosoftLearnCache, RefreshMicrosoftLearnCache, 'mcp__*'] default_next: verify rules: - condition: 2つ以上の独立したテーマがあり、並列分解が効率的と判断した @@ -177,7 +177,7 @@ movements: - 合格: `complete({status: "success", result: "ユーザー向け最終回答"})` - 修正必要: `transition({next_step: "execute", summary: "差し戻し指摘"})` (上記形式で) - 技術的失敗: `complete({status: "aborted", abort_reason: "..."})` - allowed_tools: [Read, Glob, Grep, WebSearch, WebFetch, ReadImage, AnnotateImage, ReadPdf, ReadExcel, ReadDocx, ReadPPTX, SearchNotes, ReadNote, SearchMicrosoftLearn, FetchMicrosoftLearn, SearchMicrosoftLearnCache, RefreshMicrosoftLearnCache] + allowed_tools: [Read, Glob, Grep, WebSearch, WebFetch, ReadImage, AnnotateImage, ReadPdf, ReadExcel, ReadDocx, ReadPPTX, ReadMsg, SearchNotes, ReadNote, SearchMicrosoftLearn, FetchMicrosoftLearn, SearchMicrosoftLearnCache, RefreshMicrosoftLearnCache] default_next: COMPLETE rules: - condition: 成果物がない、または内容に不足・誤りがある(追加質問への回答に検索根拠が不足している場合も含む) diff --git a/pieces/office-process.yaml b/pieces/office-process.yaml index a2d6d04..3316c63 100644 --- a/pieces/office-process.yaml +++ b/pieces/office-process.yaml @@ -38,6 +38,10 @@ movements: - テキストが抽出できた場合 → そのまま加工に進む - 全ページが空テキスト(スキャン PDF)の場合 → PdfToImages でページ画像化し、ReadImage で内容を確認する(ReadImage は VLM 対応 worker でのみ利用可能) + **Outlook メール (.msg)**: + - ReadMsg で件名・差出人・宛先・本文を取得。添付は input/ に保存される + - 保存された添付は ReadPdf / ReadExcel / ReadImage など種別ごとのツールで開く + ## Office ファイルの加工方針 Excel (.xlsx) の編集: @@ -63,7 +67,7 @@ movements: - **追加情報が必要で同じ process を続行**: `transition({next_step: "process", summary: "..."})` - **対象が特定できずユーザー確認が必要**: `complete({status: "needs_user_input", missing_info: "...", why_no_default: "..."})` - **読み取り不能・対応外フォーマット等の技術的失敗**: `complete({status: "aborted", abort_reason: "..."})` - allowed_tools: [Read, Write, Bash, Glob, Grep, ReadExcel, ReadDocx, ReadPdf, ReadPPTX, SplitExcelSheets, SplitDocxSections, PdfToImages, ReadImage, WebSearch, WebFetch, DownloadFile, SQLite, TranscribeAudio, SearchKnowledge, ListNamespaces, ListDocuments, ReadToolDoc, 'mcp__*'] + allowed_tools: [Read, Write, Bash, Glob, Grep, ReadExcel, ReadDocx, ReadPdf, ReadPPTX, ReadMsg, SplitExcelSheets, SplitDocxSections, PdfToImages, ReadImage, WebSearch, WebFetch, DownloadFile, SQLite, TranscribeAudio, SearchKnowledge, ListNamespaces, ListDocuments, ReadToolDoc, 'mcp__*'] default_next: verify rules: - condition: output/ に成果物を書き出した(または既存ファイルを編集した) @@ -112,7 +116,7 @@ movements: - 合格: `complete({status: "success", result: "ユーザー向け最終回答"})` - 修正必要: `transition({next_step: "process", summary: "差し戻し指摘"})` (上記形式で) - 技術的失敗: `complete({status: "aborted", abort_reason: "..."})` - allowed_tools: [Read, Glob, Grep, ReadPdf, ReadImage, ReadExcel, ReadDocx, ReadPPTX, ReadToolDoc] + allowed_tools: [Read, Glob, Grep, ReadPdf, ReadImage, ReadExcel, ReadDocx, ReadPPTX, ReadMsg, ReadToolDoc] default_next: COMPLETE rules: - condition: 成果物がない、または内容に不足・誤りがある diff --git a/scripts/server.sh b/scripts/server.sh index fe2a0f3..0063b4e 100755 --- a/scripts/server.sh +++ b/scripts/server.sh @@ -5,6 +5,53 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" PID_FILE="$PROJECT_DIR/.server.pid" LOG_FILE="$PROJECT_DIR/logs/server.log" + +# Optional project-root .env (gitignored; setup.sh writes credentials here, +# operators can add e.g. HOST=0.0.0.0 for reverse-proxy deployments so a bare +# `server.sh restart` keeps the bind address). Lines are `KEY=value` or +# `export KEY='value'`. Precedence: explicit environment > .env > defaults, +# so `HOST=127.0.0.1 scripts/server.sh restart` still overrides the file. +if [[ -f "$PROJECT_DIR/.env" ]]; then + # Values are parsed LITERALLY (no eval/source): a value is either bare, or + # single-quoted in setup.sh's format ('\'' encodes a literal quote), or + # double-quoted (quotes stripped, contents kept literal — no $ expansion). + # One KEY=value per line; full-line comments only. + # + # Keys this loader itself set from .env are tracked space-delimited (keys are + # validated identifiers so this is unambiguous — no Bash-4 associative + # arrays, macOS ships bash 3.2). Only EXTERNAL pre-set environment is + # protected; duplicate keys within .env keep normal source semantics (last + # line wins) because earlier lines are recorded here and may be overridden. + _envfile_set=" " + _q=\' + _esc=$'\x01' + while IFS= read -r _line || [[ -n "$_line" ]]; do + _line="${_line#"${_line%%[![:space:]]*}"}" # ltrim + [[ -z "$_line" || "$_line" == \#* ]] && continue + _kv="${_line#export }" + _key="${_kv%%=*}" + [[ "$_key" =~ ^[A-Za-z_][A-Za-z0-9_]*$ ]] || continue + [[ "$_kv" == *=* ]] || continue + _val="${_kv#*=}" + case "$_val" in + "$_q"*) + _val="${_val//${_q}\\${_q}${_q}/${_esc}}" # '\'' -> literal-quote marker + _val="${_val//${_q}/}" # drop quoting quotes + _val="${_val//${_esc}/${_q}}" # restore literal quotes + ;; + \"*\") + _val="${_val#\"}" + _val="${_val%\"}" + ;; + esac + if [[ "$_envfile_set" == *" $_key "* || -z "${!_key+x}" ]]; then + export "$_key=$_val" + _envfile_set="${_envfile_set}${_key} " + fi + done < "$PROJECT_DIR/.env" + unset _line _kv _key _val _envfile_set _q _esc +fi + PORT="${PORT:-9876}" cd "$PROJECT_DIR" diff --git a/scripts/setup.sh b/scripts/setup.sh index 5c52c24..6454745 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -130,6 +130,15 @@ path.write_text(text.replace(needle, replacement)) PY fi +python3 - <<'PY' +from pathlib import Path +p = Path("config.yaml") +text = p.read_text() +if "\nserver:" not in text: + text += "\nserver:\n tls:\n enabled: true\n" + p.write_text(text) +PY + echo " config.yaml を生成/更新しました" echo "" @@ -191,7 +200,8 @@ chmod 600 "${ENV_FILE}" echo "" echo " 認証情報を ${ENV_FILE} (権限 0600) に書き出しました。" -echo " 起動前に以下で読み込んでください:" +echo " scripts/server.sh は起動時に ${ENV_FILE} を自動で読み込みます。" +echo " 手動起動 (npm start 等) の場合のみ、事前に読み込んでください:" echo "" echo " source ${ENV_FILE}" echo "" diff --git a/src/bridge/console-ws-api.ts b/src/bridge/console-ws-api.ts index 093194c..59fc8ff 100644 --- a/src/bridge/console-ws-api.ts +++ b/src/bridge/console-ws-api.ts @@ -1,4 +1,5 @@ import type { IncomingMessage, Server as HttpServer } from 'node:http'; +import type { Server as HttpsServer } from 'node:https'; import type { Socket } from 'node:net'; import { WebSocketServer, type WebSocket } from 'ws'; import { Router, json, type Request, type Response } from 'express'; @@ -81,7 +82,9 @@ const PATH_RE = /^\/+api\/local\/tasks\/([^/]+)\/console\/ws$/; * (the client gets a 1006 abnormal close) so we don't leak failure * reasons over the upgrade channel. The reason is always logged. */ -export function attachConsoleWs(server: HttpServer, deps: ConsoleWsDeps): void { +// Both http.Server and https.Server emit the 'upgrade' event used for WSS, +// so either type is a valid host for the console WebSocket upgrade handler. +export function attachConsoleWs(server: HttpServer | HttpsServer, deps: ConsoleWsDeps): void { const wss = new WebSocketServer({ noServer: true }); server.on('upgrade', async (req, socket, head) => { diff --git a/src/bridge/local-files-api.test.ts b/src/bridge/local-files-api.test.ts index 211e90e..3a539f0 100644 --- a/src/bridge/local-files-api.test.ts +++ b/src/bridge/local-files-api.test.ts @@ -37,7 +37,11 @@ function makeUser(overrides: Partial = {}): Express.User { }; } -function makeApp(repo: Repository, user?: Express.User): express.Application { +function makeApp( + repo: Repository, + user?: Express.User, + opts: { authActive?: boolean } = {}, +): express.Application { const app = express(); if (user) { app.use((req, _res, next) => { @@ -45,7 +49,7 @@ function makeApp(repo: Repository, user?: Express.User): express.Application { next(); }); } - mountLocalFilesApi(app, repo); + mountLocalFilesApi(app, repo, { authActive: opts.authActive ?? true }); return app; } @@ -55,6 +59,7 @@ beforeEach(() => { mkdirSync(join(ws, 'output', 'sub'), { recursive: true }); writeFileSync(join(ws, 'input', 'data.csv'), 'a,b\n1,2\n'); writeFileSync(join(ws, 'output', 'report.md'), '# report'); + writeFileSync(join(ws, 'output', 'report.html'), '

report

'); writeFileSync(join(ws, 'output', 'sub', 'nested.txt'), 'nested'); // A file just outside the workspace that traversal must never reach. writeFileSync(join(ws, '..', `outside-${process.pid}.txt`), 'secret'); @@ -153,6 +158,80 @@ describe('GET /api/local/tasks/:taskId/files/raw', () => { expect(res.headers['content-type']).toContain('markdown'); }); + it('sandboxes raw HTML by default', async () => { + const res = await request(makeApp(makeRepo(), makeUser())) + .get('/api/local/tasks/1/files/raw?section=output&path=report.html'); + expect(res.status).toBe(200); + expect(res.headers['content-security-policy']).toBe('sandbox'); + }); + + it('allows the task OWNER to open trusted raw HTML without the sandbox header', async () => { + const res = await request(makeApp(makeRepo(), makeUser())) + .get('/api/local/tasks/1/files/raw?section=output&path=report.html&trusted=1'); + expect(res.status).toBe(200); + expect(res.headers['content-security-policy']).toBeUndefined(); + expect(res.headers['content-type']).toContain('html'); + }); + + it("keeps trusted raw HTML sandboxed even for an admin on another user's task (user→admin lure)", async () => { + const res = await request(makeApp(makeRepo(), makeUser({ id: 'admin-9', role: 'admin' }))) + .get('/api/local/tasks/1/files/raw?section=output&path=report.html&trusted=1'); + expect(res.status).toBe(200); + expect(res.headers['content-security-policy']).toBe('sandbox'); + }); + + it('keeps trusted raw HTML sandboxed for a NON-owner viewer of a shared task', async () => { + const repo = makeRepo({ + getLocalTask: vi.fn().mockResolvedValue({ + id: 1, + ownerId: 'user-1', + visibility: 'public', + workspacePath: ws, + }), + } as Partial); + const res = await request(makeApp(repo, makeUser({ id: 'user-2' }))) + .get('/api/local/tasks/1/files/raw?section=output&path=report.html&trusted=1'); + expect(res.status).toBe(200); + expect(res.headers['content-security-policy']).toBe('sandbox'); + }); + + it('keeps trusted raw HTML sandboxed for an ownerless task even when authenticated', async () => { + const repo = makeRepo({ + getLocalTask: vi.fn().mockResolvedValue({ + id: 1, + ownerId: null, + visibility: 'public', + workspacePath: ws, + }), + } as Partial); + const res = await request(makeApp(repo, makeUser())) + .get('/api/local/tasks/1/files/raw?section=output&path=report.html&trusted=1'); + expect(res.status).toBe(200); + expect(res.headers['content-security-policy']).toBe('sandbox'); + }); + + it('keeps trusted raw HTML sandboxed when auth is on but no user is present', async () => { + const res = await request(makeApp(makeRepo())) + .get('/api/local/tasks/1/files/raw?section=output&path=report.html&trusted=1'); + expect(res.status).toBe(200); + expect(res.headers['content-security-policy']).toBe('sandbox'); + }); + + it('allows trusted raw HTML in no-auth mode (sole operator owns every task)', async () => { + const res = await request(makeApp(makeRepo(), undefined, { authActive: false })) + .get('/api/local/tasks/1/files/raw?section=output&path=report.html&trusted=1'); + expect(res.status).toBe(200); + expect(res.headers['content-security-policy']).toBeUndefined(); + expect(res.headers['content-type']).toContain('html'); + }); + + it('still sandboxes non-HTML in no-auth mode even with trusted=1', async () => { + const res = await request(makeApp(makeRepo(), undefined, { authActive: false })) + .get('/api/local/tasks/1/files/raw?section=output&path=report.md&trusted=1'); + expect(res.status).toBe(200); + expect(res.headers['content-security-policy']).toBe('sandbox'); + }); + it('rejects traversal reads with 400 and never serves outside files', async () => { const res = await request(makeApp(makeRepo(), makeUser())) .get(`/api/local/tasks/1/files/raw?section=input&path=..%2F..%2Foutside-${process.pid}.txt`); diff --git a/src/bridge/local-files-api.ts b/src/bridge/local-files-api.ts index 7847532..3bf0a3c 100644 --- a/src/bridge/local-files-api.ts +++ b/src/bridge/local-files-api.ts @@ -6,7 +6,20 @@ import { logger } from '../logger.js'; import { parseTaskId } from './validation.js'; import { ensurePathWithin, isPathEscapeError, serializeLocalFileEntry, checkTaskOwnership, canViewTask, setUntrustedFileResponseHeaders } from './local-api-helpers.js'; -export function mountLocalFilesApi(app: Application, repo: Repository): void { +export interface LocalFilesApiOptions { + /** Whether the auth subsystem is wired. When false (no-auth single-user + * deployment) there is no req.user, so the sole local operator owns every + * task and is allowed to open their own generated HTML with trusted=1. + * Defaults to true (owner identity comes from req.user). */ + authActive?: boolean; +} + +export function mountLocalFilesApi( + app: Application, + repo: Repository, + opts: LocalFilesApiOptions = {}, +): void { + const authActive = opts.authActive ?? true; app.get('/api/local/tasks/:taskId/files', async (req: Request, res: Response) => { try { @@ -125,7 +138,23 @@ export function mountLocalFilesApi(app: Application, repo: Repository): void { res.status(400).json({ error: 'path must point to a file' }); return; } - setUntrustedFileResponseHeaders(res); + // trusted=1 drops the CSP sandbox so the owner's own generated HTML can + // run on the app origin. STRICTLY owner-only — self-XSS at worst: + // - org/public visibility lets other users VIEW the task, but serving + // someone else's HTML unsandboxed here would be stored XSS against + // the viewer; + // - admins are excluded too: another user's HTML running in an ADMIN + // session would be a user→admin privilege-escalation lure. + // No-auth single-user mode has no req.user; the sole operator owns every + // task, so they are the owner for this purpose (self-XSS only — there is + // no second principal to attack). + const trustedAllowed = authActive + ? !!viewer && task.ownerId != null && viewer.id === task.ownerId + : true; + const trustedHtml = req.query.trusted === '1' && /\.html?$/i.test(filePath) && trustedAllowed; + if (!trustedHtml) { + setUntrustedFileResponseHeaders(res); + } res.type(extname(filePath) || 'application/octet-stream'); res.send(readFileSync(filePath)); } catch (err) { diff --git a/src/bridge/local-tasks-api.test.ts b/src/bridge/local-tasks-api.test.ts index eec007e..226c447 100644 --- a/src/bridge/local-tasks-api.test.ts +++ b/src/bridge/local-tasks-api.test.ts @@ -7,6 +7,7 @@ import { tmpdir } from 'os'; import { Repository, localTaskRepoName } from '../db/repository.js'; import { BrowserSessionRepo } from '../db/browser-session-repo.js'; import { mountLocalTasksApi } from './local-tasks-api.js'; +import { buildLocalConversationContext } from '../engine/local-context.js'; describe('POST /api/local/tasks with visibility', () => { let tempDir = ''; @@ -888,6 +889,41 @@ describe('POST /api/local/tasks/:id/continue', () => { expect(handoff?.body).toContain('ssh-ops'); }); + it('persists the switch instruction as the latest user request so the agent follows it', async () => { + const { task } = await setupTaskWithTerminalJob(); + const res = await request(app) + .post(`/api/local/tasks/${task.id}/continue`) + .send({ piece: 'ssh-ops', instruction: 'use output/manual.md to set up foo' }); + expect(res.status).toBe(201); + const comments = await repo.listLocalTaskComments(task.id); + // The switch text must exist as a user 'request' comment... + const userRequests = comments.filter((c) => c.author === 'user' && c.kind === 'request'); + const switchComment = userRequests.find((c) => c.body === 'use output/manual.md to set up foo'); + expect(switchComment).toBeTruthy(); + // ...and be the LATEST user instruction (newer than the original 'b' body and + // the prior agent result), which is what buildLocalConversationContext keys on. + const userInstructionKinds = ['comment', 'request', 'interjection']; + const latestUserInstruction = [...comments] + .reverse() + .find((c) => c.author === 'user' && userInstructionKinds.includes(c.kind)); + expect(latestUserInstruction?.body).toBe('use output/manual.md to set up foo'); + + // End-to-end: feeding the resulting comments + the continued job's + // instruction into the worker's context builder must put the switch text + // under the active "## タスク" heading, NOT the demoted + // "## オリジナルタスク (参考、対応済みの可能性あり)" slot that caused the + // agent to re-follow earlier instructions. + const ctx = buildLocalConversationContext({ + comments, + jobInstruction: 'use output/manual.md to set up foo', + inputFiles: [], + outputFiles: [], + }); + expect(ctx).toContain('## タスク'); + expect(ctx).toContain('use output/manual.md to set up foo'); + expect(ctx).not.toContain('## オリジナルタスク'); + }); + it('returns 409 job_in_progress when prev job is running', async () => { const { task } = await setupTaskWithTerminalJob({ status: 'running' }); const res = await request(app) diff --git a/src/bridge/local-tasks-api.ts b/src/bridge/local-tasks-api.ts index f2af72a..693db83 100644 --- a/src/bridge/local-tasks-api.ts +++ b/src/bridge/local-tasks-api.ts @@ -8,11 +8,12 @@ import { resolveJobScheduling } from '../scheduling.js'; import { parseTaskId, validateCreateTaskBody, validateCommentBody, validateFeedbackBody } from './validation.js'; import { getLocalWorkspacePath, checkTaskOwnership, canViewTask } from './local-api-helpers.js'; import { jobEventBus, type JobStreamEvent } from './job-events.js'; +import { buildTitleFallback } from '../title-generation.js'; export interface LocalTasksApiOptions { repo: Repository; worktreeDir?: string; - generateTitle?: (body: string) => Promise; + generateTitle?: (body: string, ownerId?: string) => Promise; selectPiece?: (body: string, fileNames: string[], userId?: string) => Promise; /** * Server-side validator for piece names accepted by the @@ -126,28 +127,23 @@ export function mountLocalTasksApi(app: Application, opts: LocalTasksApiOptions) browserSessionProfileId = n; } - let taskTitle = (body.title ?? '').trim(); + const userTitle = (body.title ?? '').trim(); const rawPiece = (body.piece ?? 'auto').trim(); const attachmentNames = (body.attachments ?? []).map((a: { name?: string }) => a.name).filter(Boolean) as string[]; - // タイトル生成と piece 分類を並列実行 - const [generatedTitle, autoSelectedPiece] = await Promise.all([ - // タイトル生成 - (!taskTitle && opts.generateTitle) - ? Promise.race([ - opts.generateTitle(body.body.trim()), - new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 8000)), - ]).catch((e: unknown) => { logger.warn(`Title generation failed: ${e}`); return ''; }) - : Promise.resolve(''), - // piece 分類('auto' の場合のみ); userId を渡し per-user カタログを使用 - (rawPiece === 'auto' && opts.selectPiece) - ? opts.selectPiece(body.body.trim(), attachmentNames, (req.user as Express.User | undefined)?.id).catch((e: unknown) => { logger.warn(`Piece classification failed: ${e}`); return 'chat'; }) - : Promise.resolve(rawPiece), - ]); + // Title is NOT generated by an LLM at creation time anymore — that fired a + // second concurrent LLM request per task and churned gateway backend + // slots. Instead we set a cheap synchronous fallback now, and the agent + // upgrades it during the run by deriving from the Mission Brief goal + // (see Repository.updateMissionBriefSync). On-demand AI regeneration is + // available via POST /api/local/tasks/:id/regenerate-title. + const autoSelectedPiece = (rawPiece === 'auto' && opts.selectPiece) + ? await opts.selectPiece(body.body.trim(), attachmentNames, (req.user as Express.User | undefined)?.id) + .catch((e: unknown) => { logger.warn(`Piece classification failed: ${e}`); return 'chat'; }) + : rawPiece; - if (!taskTitle) { - taskTitle = generatedTitle || body.body.trim().slice(0, 40).replace(/\n/g, ' '); - } + const taskTitle = userTitle || buildTitleFallback(body.body.trim()); + const titleSource: 'auto' | 'user' = userTitle ? 'user' : 'auto'; const piece = autoSelectedPiece; const profile = body.profile ?? 'auto'; const outputFormat = body.outputFormat ?? 'markdown'; @@ -168,6 +164,7 @@ export function mountLocalTasksApi(app: Application, opts: LocalTasksApiOptions) const task = await repo.createLocalTask({ title: taskTitle, + titleSource, body: body.body.trim(), pieceName: piece, profile, @@ -406,7 +403,18 @@ export function mountLocalTasksApi(app: Application, opts: LocalTasksApiOptions) const task = await repo.getLocalTask(taskId, { viewer: req.user as Express.User | undefined }); if (!checkTaskOwnership(req, res, task)) return; - const updates: { visibility?: 'private' | 'org' | 'public'; visibilityScopeOrgId?: string | null } = {}; + const updates: { title?: string; titleSource?: 'user'; visibility?: 'private' | 'org' | 'public'; visibilityScopeOrgId?: string | null } = {}; + if (req.body.title !== undefined) { + if (typeof req.body.title !== 'string') { + res.status(400).json({ error: 'title must be a string' }); return; + } + const trimmed = req.body.title.trim(); + if (!trimmed) { res.status(400).json({ error: 'title must not be empty' }); return; } + if (trimmed.length > 200) { res.status(400).json({ error: 'title must be 200 characters or less' }); return; } + // Manual edit pins the title: the agent never auto-overwrites a user title. + updates.title = trimmed; + updates.titleSource = 'user'; + } if (req.body.visibility !== undefined) { const v = req.body.visibility; if (!['private', 'org', 'public'].includes(v)) { @@ -443,6 +451,41 @@ export function mountLocalTasksApi(app: Application, opts: LocalTasksApiOptions) } }); + // On-demand AI title regeneration. Unlike the old creation-time path this + // only fires when the user explicitly asks (a button), so it never adds a + // concurrent LLM request to the task-creation hot path. Owner/admin only. + app.post('/api/local/tasks/:taskId/regenerate-title', async (req: Request, res: Response) => { + try { + const taskId = parseTaskId(req.params.taskId); + if (taskId === null) { res.status(400).json({ error: 'Invalid task ID' }); return; } + const task = await repo.getLocalTask(taskId, { viewer: req.user as Express.User | undefined }); + if (!checkTaskOwnership(req, res, task)) return; + if (!opts.generateTitle) { res.status(503).json({ error: 'Title generation is not configured' }); return; } + + let title = ''; + try { + title = await Promise.race([ + // Ownerless (no-auth) tasks attribute to 'local', matching the + // worker/piece-runner convention (ownerId ?? 'local'). + opts.generateTitle(task!.body, task!.ownerId ?? 'local'), + new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 8000)), + ]); + } catch (e) { + logger.warn(`Title regeneration failed (task=${taskId}): ${e}`); + res.status(502).json({ error: 'Title generation failed' }); return; + } + // Empty model output is not an error: fall back to the cheap synchronous + // title so the button always yields something (matching the old creation + // path's behaviour). + title = (title ?? '').trim() || buildTitleFallback(task!.body); + await repo.updateLocalTask(taskId, { title, titleSource: 'agent' }); + res.json({ title }); + } catch (err) { + logger.error(`Regenerate title API error: ${err}`); + res.status(500).json({ error: 'Failed to regenerate title' }); + } + }); + app.delete('/api/local/tasks/:taskId', async (req: Request, res: Response) => { try { const taskId = parseTaskId(req.params.taskId); @@ -556,6 +599,15 @@ export function mountLocalTasksApi(app: Application, opts: LocalTasksApiOptions) await repo.updateLocalTask(taskId, { pieceName: piece }); + // Persist the switch-time instruction as a user request. Without this it + // lives only in job.instruction, and buildLocalConversationContext picks + // the *latest user comment* as the current instruction — so a stale older + // comment would win and the switch text would be demoted to the "original + // task (possibly already handled)" slot, making the agent re-follow prior + // instructions instead of the new one. Mirrors the create path, which + // also persists the body as a 'request' comment. + await repo.addLocalTaskComment(taskId, 'user', instruction.trim(), 'request'); + // Surface the handoff in the timeline so the user (and the LLM, when // it later inspects task comments) can see when piece switches happened. await repo.addLocalTaskComment( diff --git a/src/bridge/novnc-proxy.ts b/src/bridge/novnc-proxy.ts index e6d68e3..20c97c2 100644 --- a/src/bridge/novnc-proxy.ts +++ b/src/bridge/novnc-proxy.ts @@ -6,6 +6,7 @@ import { fileURLToPath } from 'url'; import { existsSync } from 'fs'; import express from 'express'; import type { Server } from 'http'; +import type { Server as HttpsServer } from 'https'; import type { SessionManager, BrowserSession } from '../engine/browser-session.js'; import type { UpgradeAuthChecker } from './auth.js'; import { logger } from '../logger.js'; @@ -78,7 +79,9 @@ export function createNovncRouter(): Router { * - authenticateUpgrade 未設定 (dev モード) は session 存在確認だけで通す */ export function setupNovncWebSocketProxy( - server: Server, + // Both http.Server and https.Server emit the 'upgrade' event used for WSS, + // so either type works here as a WebSocket proxy host. + server: Server | HttpsServer, getSessionManager: () => SessionManager | null, authenticateUpgrade?: UpgradeAuthChecker, authorizeSession?: NovncSessionAuthorizer, diff --git a/src/bridge/server-tls-listener.test.ts b/src/bridge/server-tls-listener.test.ts new file mode 100644 index 0000000..3f724df --- /dev/null +++ b/src/bridge/server-tls-listener.test.ts @@ -0,0 +1,81 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { createServer as createHttpsServer, type Server } from 'https'; +import { request as httpsRequest } from 'https'; +import { mkdtempSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { resolveTlsOptions } from '../net/tls-options.js'; +import { SERVER_TLS_DEFAULTS } from '../server/config.js'; + +describe('native HTTPS listener (self-signed)', () => { + let server: Server | undefined; + let dir: string | undefined; + afterEach(async () => { + if (server) await new Promise((r) => server!.close(() => r())); + if (dir) rmSync(dir, { recursive: true, force: true }); + server = undefined; + dir = undefined; + }); + + it('completes a TLS>=1.2 handshake and serves the app over https', { timeout: 15000 }, async () => { + dir = mkdtempSync(join(tmpdir(), 'tls-listener-')); + const resolved = resolveTlsOptions({ ...SERVER_TLS_DEFAULTS, enabled: true, selfSignedDir: dir }); + server = createHttpsServer( + { cert: resolved.cert, key: resolved.key, minVersion: resolved.minVersion }, + (_req, res) => { + res.writeHead(200); + res.end('ok'); + }, + ); + await new Promise((r) => server!.listen(0, '127.0.0.1', r)); + const addr = server.address(); + const port = typeof addr === 'object' && addr ? addr.port : 0; + + const result = await new Promise<{ code: number; body: string; proto: string | null }>( + (resolve, reject) => { + const req = httpsRequest( + { host: '127.0.0.1', port, path: '/', rejectUnauthorized: false }, + (res) => { + // Capture protocol before the socket is torn down (socket may be + // null by the time 'end' fires so we snapshot it on 'response'). + const proto = (res.socket as import('tls').TLSSocket | null)?.getProtocol?.() ?? null; + let d = ''; + res.on('data', (c) => (d += c)); + res.on('end', () => + resolve({ code: res.statusCode ?? 0, body: d, proto }), + ); + }, + ); + req.on('error', reject); + req.end(); + }, + ); + expect(result.code).toBe(200); + expect(result.body).toBe('ok'); + expect(['TLSv1.2', 'TLSv1.3']).toContain(result.proto); + }); + + it('a strict client rejects the self-signed cert', async () => { + dir = mkdtempSync(join(tmpdir(), 'tls-listener-strict-')); + const resolved = resolveTlsOptions({ ...SERVER_TLS_DEFAULTS, enabled: true, selfSignedDir: dir }); + server = createHttpsServer( + { cert: resolved.cert, key: resolved.key, minVersion: resolved.minVersion }, + (_req, res) => { + res.writeHead(200); + res.end('ok'); + }, + ); + await new Promise((r) => server!.listen(0, '127.0.0.1', r)); + const addr = server.address(); + const port = typeof addr === 'object' && addr ? addr.port : 0; + const outcome = await new Promise((resolve) => { + const req = httpsRequest( + { host: '127.0.0.1', port, path: '/', rejectUnauthorized: true }, + () => resolve('UNEXPECTED_OK'), + ); + req.on('error', (e) => resolve('rejected:' + (e as NodeJS.ErrnoException).code)); + req.end(); + }); + expect(outcome).toMatch(/^rejected:/); + }); +}); diff --git a/src/bridge/server.tls.test.ts b/src/bridge/server.tls.test.ts new file mode 100644 index 0000000..aadee40 --- /dev/null +++ b/src/bridge/server.tls.test.ts @@ -0,0 +1,26 @@ +import { describe, it, expect } from 'vitest'; +import { computeEffectiveSecureCookie, shouldWarnDoubleTls } from './server.js'; + +describe('computeEffectiveSecureCookie', () => { + it('is true when secure_cookie is on (proxy mode)', () => { + expect(computeEffectiveSecureCookie(true, false)).toBe(true); + }); + it('is true when native TLS is on even if secure_cookie is off', () => { + expect(computeEffectiveSecureCookie(false, true)).toBe(true); + }); + it('is false when neither', () => { + expect(computeEffectiveSecureCookie(false, false)).toBe(false); + }); +}); + +describe('shouldWarnDoubleTls', () => { + it('warns when native TLS and secure_cookie (proxy signal) are both on', () => { + expect(shouldWarnDoubleTls(true, true)).toBe(true); + }); + it('does not warn for a plain native-TLS install (secure_cookie off)', () => { + expect(shouldWarnDoubleTls(true, false)).toBe(false); + }); + it('does not warn when TLS is disabled', () => { + expect(shouldWarnDoubleTls(false, true)).toBe(false); + }); +}); diff --git a/src/bridge/server.ts b/src/bridge/server.ts index 6e7afac..8d3d0ba 100644 --- a/src/bridge/server.ts +++ b/src/bridge/server.ts @@ -16,6 +16,7 @@ import { mountBrandingApi, resolveBranding } from './branding-api.js'; import { createBrowserApi } from './browser-api.js'; import { createBrowserSessionApi } from './browser-session-api.js'; import { createSubtaskActivityRouter } from './subtask-activity-api.js'; +import { createUsageRouter } from './usage-api.js'; import { SessionManager } from '../engine/browser-session.js'; import { createNovncRouter, setupNovncWebSocketProxy } from './novnc-proxy.js'; import { setSessionManager } from '../engine/tools/browser.js'; @@ -104,6 +105,11 @@ import { createNotesApi } from './notes-api.js'; import { mountGateway, type GatewayMountHandle } from './gateway-mount.js'; import { readGatewayConfig } from '../gateway/config.js'; import { createAdminGatewayStatusRouter } from './admin-gateway-status-api.js'; +import { createServer as createHttpsServer } from 'https'; +import { X509Certificate } from 'crypto'; +import { mergeServerConfig } from '../server/config.js'; +import { resolveTlsOptions } from '../net/tls-options.js'; +import { createHttpRedirectServer } from '../net/http-redirect.js'; const __filenameServer = fileURLToPath(import.meta.url); const __dirnameServer = dirname(__filenameServer); @@ -112,7 +118,7 @@ export interface CoreServerOptions { repo: Repository; worktreeDir?: string; configuredRepos?: string[]; - generateTitle?: (body: string) => Promise; + generateTitle?: (body: string, ownerId?: string) => Promise; selectPiece?: (body: string, fileNames: string[], userId?: string) => Promise; configManager?: ConfigManager; piecesDir?: string; @@ -170,6 +176,14 @@ export function createCoreServer(opts: CoreServerOptions): { gatewayMount: GatewayMountHandle | null; /** True when an OAuth provider or local auth is active. False = no-auth mode. */ authActive: boolean; + /** + * Resolved server config snapshot — computed once with the real listen port + * so both the cookie-secure decision (inside createCoreServer) and the + * listener branch (inside startCoreServer) share the SAME object. + * A config hot-reload between the two would otherwise cause tls.enabled to + * disagree between the cookie flag and the actual listener type. + */ + serverConfig: ReturnType; } { const { repo, worktreeDir } = opts; const app = express(); @@ -276,6 +290,16 @@ export function createCoreServer(opts: CoreServerOptions): { } let authenticateUpgrade: import('./auth.js').UpgradeAuthChecker | undefined; + // Resolve server config ONCE here with the real listen port (threaded in via + // opts.listenPort by startCoreServer). Both the cookie-secure decision below + // and the listener branch in startCoreServer consume this same snapshot so a + // config hot-reload between the two calls cannot produce a mismatch between + // tls.enabled and the cookie secure flag. + const serverCfg = mergeServerConfig(loadConfig().server, { + freshInstall: false, + httpsPort: opts.listenPort ?? Number(process.env['PORT'] ?? 9876), + }); + if (authActive) { // Idempotently seed the shared `local` system admin (id='local', the same // owner the no-auth path uses) so an existing single-user / no-auth @@ -286,9 +310,21 @@ export function createCoreServer(opts: CoreServerOptions): { logger.info(`[auth] seeded local system admin id=local email=${bootstrap.email}`); } + // Compose effective secureCookie: native TLS termination also requires + // the secure flag on session cookies, even when no upstream proxy is + // present. IMPORTANT: trust-proxy (line ~191) stays keyed on the + // ORIGINAL opts.authConfig.secureCookie — native TLS must NOT enable it. + const effectiveSecureCookie = computeEffectiveSecureCookie( + !!opts.authConfig?.secureCookie, + serverCfg.tls.enabled, + ); + const authConfigForSetup = opts.authConfig + ? { ...opts.authConfig, secureCookie: effectiveSecureCookie } + : opts.authConfig; + const auth = setupAuth( repo, - opts.authConfig!, + authConfigForSetup!, () => { const b = resolveBranding(opts.configManager); return { appName: b.appName, loginPageTitle: b.loginPageTitle }; @@ -330,6 +366,7 @@ export function createCoreServer(opts: CoreServerOptions): { // per-piece write authz (built-in/global-custom → admin, user-custom → owner) // is enforced inside pieces-api.ts handlers. app.use('/api/pieces', requireAuth); + app.use('/api/usage', requireAuth); // Scheduled tasks: any authenticated user can create/list (visibility-filtered). // PATCH/DELETE owner-or-admin enforcement lives in the handlers (Task 14). app.use('/api/scheduled-tasks', requireAuth); @@ -893,10 +930,11 @@ export function createCoreServer(opts: CoreServerOptions): { }); // --- Local files API --- - mountLocalFilesApi(app, repo); + mountLocalFilesApi(app, repo, { authActive }); // --- Subtask activity API --- app.use('/api/local/tasks', createSubtaskActivityRouter(repo)); + app.use('/api/usage', createUsageRouter(repo, { authActive })); // --- Subtask files API (listing MUST come before wildcard) --- mountSubtaskFilesApi(app, repo); @@ -1176,7 +1214,19 @@ export function createCoreServer(opts: CoreServerOptions): { return isOwner || user.role === 'admin'; }; - return { app, browserSessionManager, authenticateUpgrade, authorizeNovncSession, sshConsole, backendStatusRegistry, workerMetrics, gatewayMount, authActive }; + return { app, browserSessionManager, authenticateUpgrade, authorizeNovncSession, sshConsole, backendStatusRegistry, workerMetrics, gatewayMount, authActive, serverConfig: serverCfg }; +} + +/** Cookie `secure` must be set whenever the user-facing scheme is https — + * via an upstream TLS proxy (secureCookie) OR native TLS termination. */ +export function computeEffectiveSecureCookie(secureCookie: boolean, tlsEnabled: boolean): boolean { + return secureCookie || tlsEnabled; +} + +/** Heuristic: native TLS + the proxy signal (secure_cookie) likely means a + * reverse proxy is also terminating TLS → double-TLS misconfiguration. */ +export function shouldWarnDoubleTls(tlsEnabled: boolean, secureCookie: boolean): boolean { + return tlsEnabled && secureCookie; } export function finalizeServer(app: express.Application): express.Application { @@ -1231,6 +1281,7 @@ export function startCoreServer(opts: CoreServerOptions, port: number = 9876): v workerMetrics, gatewayMount, authActive, + serverConfig, // Forward the actual port to createCoreServer so the admin gateway // status endpoint reports the real bind port (not the PORT env // guess). See `listenPort` doc on CoreServerOptions. @@ -1255,18 +1306,65 @@ export function startCoreServer(opts: CoreServerOptions, port: number = 9876): v // 127.0.0.1:9876 port mapping instead. const host = process.env['HOST'] ?? '127.0.0.1'; const isLoopbackBind = host === '127.0.0.1' || host === '::1' || host === 'localhost'; - const server = finalApp.listen(port, host, () => { - logger.info(`Core server listening on ${host}:${port}`); - if (!isLoopbackBind && !authActive) { - logger.warn( - `[security] Listening on ${host} with authentication DISABLED. The agent API ` + - `(including the Bash tool) is reachable by anyone who can reach this host — ` + - `this is effectively unauthenticated remote code execution. Enable auth in ` + - `config.yaml (auth.local or an OAuth provider) before exposing a non-loopback ` + - `interface, or unset HOST to bind 127.0.0.1.`, + // Use the config snapshot already resolved in createCoreServer (same port, + // same loadConfig() call) — no second read so a hot-reload between the two + // cannot make the cookie-secure flag disagree with the listener type. + const tls = serverConfig.tls; + let server: import('http').Server | import('https').Server; + if (tls.enabled) { + // Augment the self-signed SAN list with the redirect target host and the + // (non-wildcard) bind host so that browsers following the HTTP→HTTPS redirect + // always land on a hostname that is covered by the certificate. Provided-cert + // deployments are unaffected because resolveTlsOptions ignores selfSignedHosts + // when cert_file/key_file are set. + const extraSan = [ + tls.redirectHost, + host && host !== '0.0.0.0' && host !== '::' ? host : null, + ].filter((h): h is string => !!h); + const tlsForResolve = extraSan.length + ? { ...tls, selfSignedHosts: [...tls.selfSignedHosts, ...extraSan] } + : tls; + const resolved = resolveTlsOptions(tlsForResolve); // fatal throw on bad operator cert + server = createHttpsServer({ cert: resolved.cert, key: resolved.key, minVersion: resolved.minVersion }, finalApp); + server.listen(port, host, () => { + const source = tls.certFile ? `provided(${tls.certFile})` : 'self-signed'; + const fp = new X509Certificate(resolved.cert).fingerprint256; + logger.info(`Core server listening on https://${host}:${port} cert=${source} sha256=${fp}`); + if (shouldWarnDoubleTls(true, !!opts.authConfig?.secureCookie)) { + logger.warn( + `[security] server.tls.enabled is ON while auth.secure_cookie is also ON (reverse-proxy signal). ` + + `If a TLS-terminating proxy is in front of this app, set server.tls.enabled: false to avoid double TLS.`, + ); + } + }); + if (tls.httpRedirect) { + if (tls.redirectHost == null && (host === '0.0.0.0' || host === '::')) { + logger.warn( + `[server] HTTP->HTTPS redirect host falls back to the wildcard bind address (${host}); browsers cannot follow it. ` + + `Set server.tls.redirect_host to the externally reachable hostname.`, + ); + } + const pinnedHost = tls.redirectHost ?? (isLoopbackBind ? 'localhost' : host); + const redirector = createHttpRedirectServer({ httpsPort: port, pinnedHost }); + redirector.on('error', (e) => logger.warn(`[server] HTTP redirect listener error: ${(e as Error).message}`)); + redirector.listen(tls.httpRedirectPort, host, () => + logger.info(`HTTP->HTTPS redirect listening on http://${host}:${tls.httpRedirectPort}`), ); } - }); + } else { + server = finalApp.listen(port, host, () => { + logger.info(`Core server listening on ${host}:${port}`); + if (!isLoopbackBind && !authActive) { + logger.warn( + `[security] Listening on ${host} with authentication DISABLED. The agent API ` + + `(including the Bash tool) is reachable by anyone who can reach this host — ` + + `this is effectively unauthenticated remote code execution. Enable auth in ` + + `config.yaml (auth.local or an OAuth provider) before exposing a non-loopback ` + + `interface, or unset HOST to bind 127.0.0.1.`, + ); + } + }); + } // 起動と同時に CAPTCHA Pool の idle GC を回す (task session を 5 分アイドルで GC) if (browserSessionManager) browserSessionManager.startIdleGc(); diff --git a/src/bridge/usage-api.test.ts b/src/bridge/usage-api.test.ts new file mode 100644 index 0000000..14deaaa --- /dev/null +++ b/src/bridge/usage-api.test.ts @@ -0,0 +1,135 @@ +/** + * Usage dashboard API (GET /api/usage/daily) tests. + * + * Coverage: + * - admin sees all users + byUser breakdown; non-admin scoped to own rows + * - no-auth (authActive=false) sees everyone (scope 'all') + * - day / week / month bucketing collapses model/route correctly + * - inclusive range, default range, from>to → 400, range-too-large → 400 + * - invalid dates fall back to defaults (not 500) + * + * Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md + */ +import { describe, it, expect, beforeEach } from 'vitest'; +import express from 'express'; +import request from 'supertest'; +import { Repository } from '../db/repository.js'; +import { createUsageRouter } from './usage-api.js'; + +function makeApp(repo: Repository, opts: { authActive: boolean; user?: { id: string; role?: string } }) { + const app = express(); + app.use((req, _res, next) => { + if (opts.user) (req as unknown as { user: unknown }).user = opts.user; + next(); + }); + app.use('/api/usage', createUsageRouter(repo, { authActive: opts.authActive })); + return app; +} + +function seed(repo: Repository, rows: Array<{ day: string; userId: string; source: 'gateway' | 'direct'; model?: string; route?: string; tin: number; tout: number; req?: number }>) { + for (const r of rows) { + repo.incrementLlmUsage({ + day: r.day, userId: r.userId, source: r.source, + model: r.model ?? 'm', route: r.route ?? 'r', + tokensIn: r.tin, tokensOut: r.tout, requests: r.req ?? 1, + }); + } +} + +describe('GET /api/usage/daily', () => { + let repo: Repository; + beforeEach(() => { + repo = new Repository(':memory:'); + seed(repo, [ + { day: '2026-06-10', userId: 'u1', source: 'gateway', tin: 100, tout: 40 }, + { day: '2026-06-10', userId: 'u1', source: 'direct', model: 'x', route: 'h', tin: 10, tout: 5 }, + { day: '2026-06-11', userId: 'u1', source: 'gateway', tin: 7, tout: 3 }, + { day: '2026-06-11', userId: 'u2', source: 'direct', tin: 1000, tout: 500 }, + ]); + }); + + it('non-admin sees only their own rows (scope=self, no byUser)', async () => { + const app = makeApp(repo, { authActive: true, user: { id: 'u1', role: 'user' } }); + const res = await request(app).get('/api/usage/daily?from=2026-06-01&to=2026-06-30'); + expect(res.status).toBe(200); + expect(res.body.scope).toBe('self'); + expect(res.body.byUser).toBeUndefined(); + // u1 only: gateway 100+40+7+3=150, direct 10+5=15 + expect(res.body.totals.gateway).toMatchObject({ tokensIn: 107, tokensOut: 43, requests: 2 }); + expect(res.body.totals.direct).toMatchObject({ tokensIn: 10, tokensOut: 5, requests: 1 }); + }); + + it('admin sees all users with a byUser breakdown', async () => { + const app = makeApp(repo, { authActive: true, user: { id: 'admin1', role: 'admin' } }); + const res = await request(app).get('/api/usage/daily?from=2026-06-01&to=2026-06-30'); + expect(res.status).toBe(200); + expect(res.body.scope).toBe('all'); + expect(res.body.totals.direct.tokensIn).toBe(1010); // u1 10 + u2 1000 + const users = (res.body.byUser as Array<{ userId: string }>).map((u) => u.userId).sort(); + expect(users).toEqual(['u1', 'u2']); + // sorted by total tokens desc → u2 (1500) first + expect(res.body.byUser[0].userId).toBe('u2'); + }); + + it('resolves byUser display names (real users → name, sentinels verbatim)', async () => { + const u = repo.createUser({ email: 'alice@example.com', name: 'Alice', role: 'user', status: 'active' }); + seed(repo, [ + { day: '2026-06-11', userId: u.id, source: 'direct', tin: 5, tout: 5 }, + { day: '2026-06-11', userId: 'local', source: 'direct', tin: 1, tout: 1 }, + ]); + const app = makeApp(repo, { authActive: true, user: { id: 'admin1', role: 'admin' } }); + const res = await request(app).get('/api/usage/daily?from=2026-06-01&to=2026-06-30'); + const byId = Object.fromEntries((res.body.byUser as Array<{ userId: string; displayName: string }>).map((r) => [r.userId, r.displayName])); + expect(byId[u.id]).toBe('Alice'); + expect(byId['local']).toBe('local'); // sentinel returned verbatim for UI localization + }); + + it('no-auth mode (authActive=false) sees everyone', async () => { + const app = makeApp(repo, { authActive: false }); // no req.user + const res = await request(app).get('/api/usage/daily?from=2026-06-01&to=2026-06-30'); + expect(res.status).toBe(200); + expect(res.body.scope).toBe('all'); + expect(res.body.byUser.length).toBe(2); + }); + + it('day granularity yields one bucket per active day', async () => { + const app = makeApp(repo, { authActive: true, user: { id: 'admin1', role: 'admin' } }); + const res = await request(app).get('/api/usage/daily?from=2026-06-10&to=2026-06-11&granularity=day'); + expect(res.body.series.map((b: { bucket: string }) => b.bucket)).toEqual(['2026-06-10', '2026-06-11']); + }); + + it('month granularity collapses days into a YYYY-MM bucket', async () => { + const app = makeApp(repo, { authActive: true, user: { id: 'admin1', role: 'admin' } }); + const res = await request(app).get('/api/usage/daily?from=2026-06-01&to=2026-06-30&granularity=month'); + expect(res.body.series).toHaveLength(1); + expect(res.body.series[0].bucket).toBe('2026-06'); + }); + + it('week granularity uses an ISO YYYY-Www bucket', async () => { + const app = makeApp(repo, { authActive: true, user: { id: 'admin1', role: 'admin' } }); + const res = await request(app).get('/api/usage/daily?from=2026-06-08&to=2026-06-14&granularity=week'); + // 2026-06-10 / -11 fall in ISO week 24 of 2026 + expect(res.body.series).toHaveLength(1); + expect(res.body.series[0].bucket).toBe('2026-W24'); + }); + + it('rejects from > to with 400', async () => { + const app = makeApp(repo, { authActive: true, user: { id: 'u1', role: 'user' } }); + const res = await request(app).get('/api/usage/daily?from=2026-06-30&to=2026-06-01'); + expect(res.status).toBe(400); + }); + + it('rejects an absurdly large range with 400', async () => { + const app = makeApp(repo, { authActive: true, user: { id: 'u1', role: 'user' } }); + const res = await request(app).get('/api/usage/daily?from=2000-01-01&to=2026-06-30'); + expect(res.status).toBe(400); + }); + + it('falls back to defaults for invalid dates (no 500)', async () => { + const app = makeApp(repo, { authActive: true, user: { id: 'u1', role: 'user' } }); + const res = await request(app).get('/api/usage/daily?from=2026-99-99'); + expect(res.status).toBe(200); + // default window is the last 30 days, ending today + expect(res.body.to).toMatch(/^\d{4}-\d{2}-\d{2}$/); + }); +}); diff --git a/src/bridge/usage-api.ts b/src/bridge/usage-api.ts new file mode 100644 index 0000000..c1cfd21 --- /dev/null +++ b/src/bridge/usage-api.ts @@ -0,0 +1,171 @@ +import { Router, Request, Response } from 'express'; +import type { Repository, LlmUsageDailyAgg } from '../db/repository.js'; +import { logger } from '../logger.js'; + +/** + * Per-user LLM usage dashboard API. Reads the llm_usage_daily ledger + * (gateway + direct, recorded at the OpenAICompatClient completion + * boundary) and shapes a time series for the Usage tab. + * + * Visibility: admin (and the no-auth single-user local mode) see every + * user's usage; a non-admin authenticated user sees only their own rows. + * This is a separate lens from the gateway per-key billing view — the two + * are never summed. + * + * Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md + */ + +const DAY_RE = /^\d{4}-\d{2}-\d{2}$/; + +/** True only for a real calendar day in 'YYYY-MM-DD' (rejects 2026-99-99). */ +function isValidDay(s: unknown): s is string { + if (typeof s !== 'string' || !DAY_RE.test(s)) return false; + const d = new Date(`${s}T00:00:00.000Z`); + return !Number.isNaN(d.getTime()) && d.toISOString().slice(0, 10) === s; +} +const MAX_RANGE_DAYS = 800; // ~2y guard so a hand-crafted range can't scan unbounded +type Granularity = 'day' | 'week' | 'month'; + +interface Counters { + tokensIn: number; + tokensOut: number; + requests: number; +} + +function emptyCounters(): Counters { + return { tokensIn: 0, tokensOut: 0, requests: 0 }; +} + +function addInto(target: Counters, row: LlmUsageDailyAgg): void { + target.tokensIn += row.tokensIn; + target.tokensOut += row.tokensOut; + target.requests += row.requests; +} + +function utcToday(): string { + return new Date().toISOString().slice(0, 10); +} + +/** day - n days, as 'YYYY-MM-DD' (UTC). */ +function shiftDay(day: string, deltaDays: number): string { + const d = new Date(`${day}T00:00:00.000Z`); + d.setUTCDate(d.getUTCDate() + deltaDays); + return d.toISOString().slice(0, 10); +} + +/** Inclusive day count between two 'YYYY-MM-DD' (UTC). */ +function dayDiff(from: string, to: string): number { + const a = Date.parse(`${from}T00:00:00.000Z`); + const b = Date.parse(`${to}T00:00:00.000Z`); + return Math.round((b - a) / 86_400_000); +} + +/** ISO-8601 week key 'YYYY-Www' for a 'YYYY-MM-DD' day (UTC). */ +function isoWeekKey(day: string): string { + const d = new Date(`${day}T00:00:00.000Z`); + // ISO week: Thursday of the current week decides the year. + const dayNum = (d.getUTCDay() + 6) % 7; // Mon=0 .. Sun=6 + d.setUTCDate(d.getUTCDate() - dayNum + 3); + const firstThursday = new Date(Date.UTC(d.getUTCFullYear(), 0, 4)); + const firstDayNum = (firstThursday.getUTCDay() + 6) % 7; + firstThursday.setUTCDate(firstThursday.getUTCDate() - firstDayNum + 3); + const week = 1 + Math.round((d.getTime() - firstThursday.getTime()) / (7 * 86_400_000)); + return `${d.getUTCFullYear()}-W${String(week).padStart(2, '0')}`; +} + +function bucketKey(day: string, granularity: Granularity): string { + if (granularity === 'month') return day.slice(0, 7); + if (granularity === 'week') return isoWeekKey(day); + return day; +} + +/** + * Human-friendly label for a usage owner id. Real users resolve to their + * name (or email) so the admin breakdown isn't a wall of opaque ids; the + * 'local' / 'system' sentinels are returned verbatim so the UI can localize + * them. Falls back to the raw id when no user row exists. + */ +function resolveDisplayName(repo: Repository, userId: string): string { + if (userId === 'local' || userId === 'system') return userId; + const u = repo.getUserById(userId); + return u?.name || u?.email || userId; +} + +export function createUsageRouter(repo: Repository, opts: { authActive: boolean }): Router { + const router = Router(); + + // GET /daily?from=YYYY-MM-DD&to=YYYY-MM-DD&granularity=day|week|month + router.get('/daily', (req: Request, res: Response) => { + try { + const to = isValidDay(req.query['to']) ? req.query['to'] : utcToday(); + const from = isValidDay(req.query['from']) ? req.query['from'] : shiftDay(to, -29); + if (from > to) { + res.status(400).json({ error: 'from must be on or before to' }); + return; + } + if (dayDiff(from, to) > MAX_RANGE_DAYS) { + res.status(400).json({ error: `range too large (max ${MAX_RANGE_DAYS} days)` }); + return; + } + const gq = req.query['granularity']; + const granularity: Granularity = + gq === 'week' || gq === 'month' ? gq : 'day'; + + // Visibility: a non-admin authenticated user is scoped to their own + // rows. Admin and the no-auth local mode see everyone. + const user = req.user as Express.User | undefined; + const isAdmin = !opts.authActive || user?.role === 'admin'; + const scopeUserId = isAdmin ? undefined : (user?.id ?? 'local'); + + const rows = repo.queryLlmUsageDaily({ from, to, userId: scopeUserId }); + + // Bucket by (bucketKey, source). Buckets are sparse — only days with + // usage appear; the client fills gaps for the chart. + const buckets = new Map(); + const totals = { gateway: emptyCounters(), direct: emptyCounters() }; + const byUser = new Map(); + + for (const row of rows) { + const key = bucketKey(row.day, granularity); + let b = buckets.get(key); + if (!b) { + b = { gateway: emptyCounters(), direct: emptyCounters() }; + buckets.set(key, b); + } + const sourceKey = row.source === 'gateway' ? 'gateway' : 'direct'; + addInto(b[sourceKey], row); + addInto(totals[sourceKey], row); + if (isAdmin) { + let u = byUser.get(row.userId); + if (!u) { u = emptyCounters(); byUser.set(row.userId, u); } + addInto(u, row); + } + } + + const series = Array.from(buckets.entries()) + .sort((a, b) => (a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0)) + .map(([bucket, c]) => ({ bucket, gateway: c.gateway, direct: c.direct })); + + res.json({ + from, + to, + granularity, + scope: isAdmin ? 'all' : 'self', + series, + totals, + ...(isAdmin + ? { + byUser: Array.from(byUser.entries()) + .map(([userId, c]) => ({ userId, displayName: resolveDisplayName(repo, userId), ...c })) + .sort((a, b) => (b.tokensIn + b.tokensOut) - (a.tokensIn + a.tokensOut)), + } + : {}), + }); + } catch (e) { + logger.error(`[usage-api] /daily failed: ${String(e)}`); + res.status(500).json({ error: 'Failed to load usage' }); + } + }); + + return router; +} diff --git a/src/config.ts b/src/config.ts index 2a0900f..aa9a851 100644 --- a/src/config.ts +++ b/src/config.ts @@ -4,6 +4,7 @@ import { logger } from './logger.js'; import { normalizeConfig } from './config-normalize.js'; import type { McpRuntimeConfig } from './mcp/config.js'; import type { SshRuntimeConfig } from './ssh/config.js'; +import type { ServerConfig } from './server/config.js'; export interface AskConfig { maxPerJob: number; // default: 2 @@ -60,6 +61,7 @@ export interface ToolsConfig { officePdfMaxSizeMb?: number; // ReadPdf の最大ファイルサイズ (default: 10) officePptxMaxSizeMb?: number; // ReadPPTX の最大ファイルサイズ (default: 50) officePptxMaxUncompressedMb?: number; // ReadPPTX の ZIP 展開後サイズ上限 (default: 200) + officeMsgMaxSizeMb?: number; // ReadMsg の最大ファイルサイズ (default: 25) /** * Max request body size (MB) for the POST /api/local/tasks and * /api/local/tasks/:id/comments endpoints (includes base64-encoded @@ -502,6 +504,7 @@ export interface AppConfig { ssh?: Partial; notes?: NotesConfig; notifications?: NotificationsConfig; + server?: Partial; } const DEFAULT_REFLECTION: ReflectionConfig = { diff --git a/src/db/migrate.reflection-columns.test.ts b/src/db/migrate.reflection-columns.test.ts index eb47d19..8cee761 100644 --- a/src/db/migrate.reflection-columns.test.ts +++ b/src/db/migrate.reflection-columns.test.ts @@ -338,27 +338,38 @@ describe('runReflectionJob integration', () => { }), } as any); - // Mock fetch so callReflectionLlm returns a valid abstain result - const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: async () => ({ - choices: [{ - message: { - tool_calls: [{ - function: { - arguments: JSON.stringify({ - memory_changes: [], - piece_changes: { should_edit: false }, - reasoning: 'nothing to learn', - abstain_reason: 'task completed successfully without issues', - }), - }, - }], - }, - }], - usage: { prompt_tokens: 123, completion_tokens: 45 }, - }), - } as any); + // Mock fetch so callReflectionLlm (now streaming via OpenAICompatClient) + // returns a valid abstain result as an SSE submit_reflection tool_call. + const reflectionArgs = JSON.stringify({ + memory_changes: [], + piece_changes: { should_edit: false }, + reasoning: 'nothing to learn', + abstain_reason: 'task completed successfully without issues', + }); + const sseLines = [ + `data: ${JSON.stringify({ model: 'reflect-model', choices: [{ delta: { tool_calls: [{ index: 0, id: 'r1', function: { name: 'submit_reflection', arguments: reflectionArgs } }] }, finish_reason: null }] })}\n\n`, + `data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: 'tool_calls' }] })}\n\n`, + `data: ${JSON.stringify({ choices: [], usage: { prompt_tokens: 123, completion_tokens: 45 } })}\n\n`, + 'data: [DONE]\n\n', + ]; + const mockFetch = vi.fn().mockImplementation(async () => { + const encoder = new TextEncoder(); + let i = 0; + return { + ok: true, + status: 200, + headers: { get: () => null }, + body: { + getReader: () => ({ + read: async () => + i < sseLines.length + ? { done: false, value: encoder.encode(sseLines[i++]) } + : { done: true, value: undefined }, + releaseLock: () => {}, + }), + }, + } as any; + }); vi.stubGlobal('fetch', mockFetch); const { runReflectionJob } = await import('../engine/reflection/reflection-runner.js'); diff --git a/src/db/migrate.ts b/src/db/migrate.ts index a50c22a..6ed1d9c 100644 --- a/src/db/migrate.ts +++ b/src/db/migrate.ts @@ -90,12 +90,44 @@ export function runMigrations(db: Database.Database): void { db.exec("ALTER TABLE local_tasks ADD COLUMN options TEXT DEFAULT '{}'"); }); + // Title provenance: 'auto' (creation fallback) / 'agent' (derived from + // Mission Brief goal) / 'user' (manual edit, never auto-overwritten). + addColumnIfMissing(db, 'local_tasks', 'title_source', () => { + db.exec("ALTER TABLE local_tasks ADD COLUMN title_source TEXT NOT NULL DEFAULT 'auto'"); + }); + migrateMcpTables(db); migrateSshTables(db); migrateNotesTables(db); migrateDashboardWidgets(db); migrateGatewayVirtualKeys(db); migratePushNotificationsTables(db); + migrateLlmUsageDaily(db); +} + +/** + * Per-user daily LLM usage aggregation (gateway + direct). Idempotent. + * Mirrors schema.sql + Repository.initSchema (dual-path rule: + * project_db_migration_dual_path). Additive table, no mixed-version risk. + * Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md. + */ +function migrateLlmUsageDaily(db: Database.Database): void { + db.exec(` + CREATE TABLE IF NOT EXISTS llm_usage_daily ( + day TEXT NOT NULL, + user_id TEXT NOT NULL, + source TEXT NOT NULL, + model TEXT NOT NULL, + route TEXT NOT NULL, + tokens_in INTEGER NOT NULL DEFAULT 0, + tokens_out INTEGER NOT NULL DEFAULT 0, + requests INTEGER NOT NULL DEFAULT 0, + last_updated_at TEXT NOT NULL, + PRIMARY KEY (day, user_id, source, model, route) + ); + CREATE INDEX IF NOT EXISTS idx_llm_usage_daily_user_day + ON llm_usage_daily (user_id, day); + `); } /** diff --git a/src/db/repository.llm-usage.test.ts b/src/db/repository.llm-usage.test.ts new file mode 100644 index 0000000..1161a89 --- /dev/null +++ b/src/db/repository.llm-usage.test.ts @@ -0,0 +1,105 @@ +/** + * Per-user daily LLM usage ledger (llm_usage_daily) repository tests. + * + * Coverage: + * - incrementLlmUsage UPSERTs on first call, accumulates on second + * - requests defaults to +1; a usage-less call still bumps requests + * - negative deltas clamp to zero + * - distinct (model) and (route) produce distinct rows on the same day + * - 'system' / 'local' sentinels aggregate as single rows (NULL trap avoided) + * - queryLlmUsageDaily collapses model/route, groups by (day, user, source) + * - day filter is an inclusive range; userId filter scopes a single user + * - UTC day boundary splits into separate buckets + * + * Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md + */ +import { describe, expect, it, beforeEach } from 'vitest'; +import { Repository } from './repository.js'; + +function makeRepo(): Repository { + return new Repository(':memory:'); +} + +describe('llm_usage_daily repository', () => { + let repo: Repository; + beforeEach(() => { + repo = makeRepo(); + }); + + it('UPSERTs first call and accumulates on the same grain', () => { + const grain = { day: '2026-06-11', userId: 'u1', source: 'direct' as const, model: 'm', route: 'r' }; + repo.incrementLlmUsage({ ...grain, tokensIn: 100, tokensOut: 40 }); + repo.incrementLlmUsage({ ...grain, tokensIn: 10, tokensOut: 5 }); + const rows = repo.queryLlmUsageDaily({ from: '2026-06-11', to: '2026-06-11' }); + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ + day: '2026-06-11', userId: 'u1', source: 'direct', + tokensIn: 110, tokensOut: 45, requests: 2, + }); + }); + + it('a usage-less call still bumps requests (0-token request != failure)', () => { + repo.incrementLlmUsage({ day: '2026-06-11', userId: 'u1', source: 'gateway', model: 'm', route: 'r' }); + const rows = repo.queryLlmUsageDaily({ from: '2026-06-11', to: '2026-06-11' }); + expect(rows[0]).toMatchObject({ tokensIn: 0, tokensOut: 0, requests: 1 }); + }); + + it('clamps negative deltas to zero', () => { + repo.incrementLlmUsage({ day: '2026-06-11', userId: 'u1', source: 'direct', model: 'm', route: 'r', tokensIn: -5, tokensOut: -9, requests: -3 }); + const rows = repo.queryLlmUsageDaily({ from: '2026-06-11', to: '2026-06-11' }); + expect(rows[0]).toMatchObject({ tokensIn: 0, tokensOut: 0, requests: 0 }); + }); + + it('distinct model and route are separate rows but collapse in the query', () => { + const base = { day: '2026-06-11', userId: 'u1', source: 'direct' as const, tokensIn: 10, tokensOut: 5 }; + repo.incrementLlmUsage({ ...base, model: 'big', route: 'host-a' }); + repo.incrementLlmUsage({ ...base, model: 'small', route: 'host-a' }); + repo.incrementLlmUsage({ ...base, model: 'big', route: 'host-b' }); + // 3 distinct (model,route) rows underneath, collapsed to one (day,user,source). + const rows = repo.queryLlmUsageDaily({ from: '2026-06-11', to: '2026-06-11' }); + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ tokensIn: 30, tokensOut: 15, requests: 3 }); + }); + + it("'system' and 'local' sentinels each aggregate as a single row", () => { + const g = { day: '2026-06-11', source: 'direct' as const, model: 'm', route: 'r', tokensIn: 1, tokensOut: 1 }; + repo.incrementLlmUsage({ ...g, userId: 'system' }); + repo.incrementLlmUsage({ ...g, userId: 'system' }); + repo.incrementLlmUsage({ ...g, userId: 'local' }); + const rows = repo.queryLlmUsageDaily({ from: '2026-06-11', to: '2026-06-11' }); + const byUser = Object.fromEntries(rows.map((r) => [r.userId, r.requests])); + expect(byUser).toEqual({ system: 2, local: 1 }); + }); + + it('gateway and direct are distinct rows for the same user/day', () => { + repo.incrementLlmUsage({ day: '2026-06-11', userId: 'u1', source: 'gateway', model: 'm', route: 'r', tokensIn: 7, tokensOut: 3 }); + repo.incrementLlmUsage({ day: '2026-06-11', userId: 'u1', source: 'direct', model: 'm', route: 'r', tokensIn: 2, tokensOut: 1 }); + const rows = repo.queryLlmUsageDaily({ from: '2026-06-11', to: '2026-06-11' }); + expect(rows).toHaveLength(2); + expect(rows.map((r) => r.source).sort()).toEqual(['direct', 'gateway']); + }); + + it('queryLlmUsageDaily honours an inclusive day range', () => { + for (const day of ['2026-06-09', '2026-06-10', '2026-06-11', '2026-06-12']) { + repo.incrementLlmUsage({ day, userId: 'u1', source: 'direct', model: 'm', route: 'r', tokensIn: 1, tokensOut: 0 }); + } + const rows = repo.queryLlmUsageDaily({ from: '2026-06-10', to: '2026-06-11' }); + expect(rows.map((r) => r.day)).toEqual(['2026-06-10', '2026-06-11']); + }); + + it('userId filter scopes the query to one user', () => { + repo.incrementLlmUsage({ day: '2026-06-11', userId: 'u1', source: 'direct', model: 'm', route: 'r', tokensIn: 1, tokensOut: 0 }); + repo.incrementLlmUsage({ day: '2026-06-11', userId: 'u2', source: 'direct', model: 'm', route: 'r', tokensIn: 9, tokensOut: 0 }); + const mine = repo.queryLlmUsageDaily({ from: '2026-06-11', to: '2026-06-11', userId: 'u1' }); + expect(mine).toHaveLength(1); + expect(mine[0]).toMatchObject({ userId: 'u1', tokensIn: 1 }); + }); + + it('derives the UTC day from `at` when day is omitted; boundary splits buckets', () => { + const grain = { userId: 'u1', source: 'direct' as const, model: 'm', route: 'r', tokensIn: 1, tokensOut: 0 }; + repo.incrementLlmUsage({ ...grain, at: '2026-06-11T23:59:59.000Z' }); + repo.incrementLlmUsage({ ...grain, at: '2026-06-12T00:00:01.000Z' }); + const rows = repo.queryLlmUsageDaily({ from: '2026-06-01', to: '2026-06-30' }); + expect(rows.map((r) => r.day)).toEqual(['2026-06-11', '2026-06-12']); + }); +}); diff --git a/src/db/repository.test.ts b/src/db/repository.test.ts index 9ee343a..2caddc4 100644 --- a/src/db/repository.test.ts +++ b/src/db/repository.test.ts @@ -1653,3 +1653,74 @@ describe('Repository browser notifications V2', () => { }); }); }); + +describe('Repository title derivation from Mission Brief goal', () => { + let tempDir = ''; + + afterEach(() => { + if (tempDir) { + rmSync(tempDir, { recursive: true, force: true }); + tempDir = ''; + } + }); + + function makeRepo(): Repository { + tempDir = mkdtempSync(join(tmpdir(), 'maestro-title-')); + return new Repository(join(tempDir, 'orchestrator.db')); + } + + it('derives the title from the goal when title is auto, and marks it agent', async () => { + const repo = makeRepo(); + try { + const task = await repo.createLocalTask({ title: '仮タイトル', titleSource: 'auto', body: 'b' }); + repo.makeMissionBriefIO(task.id).update({ goal: '議事録を作成する\n背景...' }); + const after = await repo.getLocalTask(task.id); + expect(after?.title).toBe('議事録を作成する'); + expect(after?.titleSource).toBe('agent'); + } finally { + repo.close(); + } + }); + + it('re-derives on a later goal update while still agent-owned', async () => { + const repo = makeRepo(); + try { + const task = await repo.createLocalTask({ title: 'x', titleSource: 'auto', body: 'b' }); + const io = repo.makeMissionBriefIO(task.id); + io.update({ goal: '最初の目標' }); + io.update({ goal: '更新された目標' }); + const after = await repo.getLocalTask(task.id); + expect(after?.title).toBe('更新された目標'); + expect(after?.titleSource).toBe('agent'); + } finally { + repo.close(); + } + }); + + it('never overwrites a user-edited title', async () => { + const repo = makeRepo(); + try { + const task = await repo.createLocalTask({ title: 'x', titleSource: 'auto', body: 'b' }); + await repo.updateLocalTask(task.id, { title: '手動タイトル', titleSource: 'user' }); + repo.makeMissionBriefIO(task.id).update({ goal: 'エージェントの目標' }); + const after = await repo.getLocalTask(task.id); + expect(after?.title).toBe('手動タイトル'); + expect(after?.titleSource).toBe('user'); + } finally { + repo.close(); + } + }); + + it('does not touch the title when the patch has no goal', async () => { + const repo = makeRepo(); + try { + const task = await repo.createLocalTask({ title: '仮', titleSource: 'auto', body: 'b' }); + repo.makeMissionBriefIO(task.id).update({ done: '- step 1' }); + const after = await repo.getLocalTask(task.id); + expect(after?.title).toBe('仮'); + expect(after?.titleSource).toBe('auto'); + } finally { + repo.close(); + } + }); +}); diff --git a/src/db/repository.ts b/src/db/repository.ts index 8a87c01..cb1b71d 100644 --- a/src/db/repository.ts +++ b/src/db/repository.ts @@ -6,6 +6,7 @@ import { randomUUID, scryptSync, randomBytes, timingSafeEqual } from 'crypto'; import { v4 as uuidv4 } from 'uuid'; import { logger } from '../logger.js'; import { buildVisibilityWhere } from '../bridge/visibility.js'; +import { buildTitleFromGoal } from '../title-generation.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); @@ -146,9 +147,13 @@ export interface SubtaskInfo { childCompleted?: number; } +export type TitleSource = 'auto' | 'agent' | 'user'; + export interface LocalTask { id: number; title: string; + /** Provenance of `title`. 'user' is never auto-overwritten by the agent. */ + titleSource: TitleSource; body: string; pieceName: string; profile: 'auto' | 'fast' | 'quality' | string; @@ -335,6 +340,33 @@ function rowToGatewayKeyUsage(row: GatewayKeyUsageRow): GatewayKeyUsage { }; } +/** Per-call delta for the per-user daily LLM usage ledger. */ +export interface LlmUsageIncrement { + /** UTC day bucket 'YYYY-MM-DD'. Defaults to today (UTC) when omitted. */ + day?: string; + /** Owner id, or 'local' (no-auth) / 'system' (ownerless) sentinel. */ + userId: string; + source: 'gateway' | 'direct'; + /** Real model name (chunk.model), routing-key fallback, or 'unknown'. */ + model: string; + /** Backend server name (gateway backendId / direct host), or 'unknown'. */ + route: string; + tokensIn?: number; + tokensOut?: number; + requests?: number; + at?: string; +} + +/** Daily-grouped aggregate row (model/route collapsed) for the usage API. */ +export interface LlmUsageDailyAgg { + day: string; + userId: string; + source: string; + tokensIn: number; + tokensOut: number; + requests: number; +} + /** * Coerce an optional limit (tokens_budget / rate_limit_rpm) to either * a positive integer or null. Anything else (undefined, null, 0, @@ -432,6 +464,8 @@ export interface UpsertWorkerNodeParams { export interface CreateLocalTaskParams { title: string; + /** Defaults to 'auto'. Pass 'user' when the caller supplied an explicit title. */ + titleSource?: TitleSource; body: string; pieceName?: string; profile?: 'auto' | 'fast' | 'quality'; @@ -650,6 +684,7 @@ interface JobRow { interface LocalTaskRow { id: number; title: string; + title_source: string | null; body: string; piece_name: string; profile: string; @@ -802,6 +837,7 @@ function rowToLocalTask(row: LocalTaskRow): LocalTask { return { id: row.id, title: row.title, + titleSource: (row.title_source as TitleSource | null) ?? 'auto', body: row.body, pieceName: row.piece_name, profile: row.profile, @@ -1133,6 +1169,26 @@ export class Repository { CREATE INDEX IF NOT EXISTS idx_gateway_usage_key ON gateway_key_usage (key_id); `); + + // Per-user daily LLM usage (gateway + direct). Mirrors schema.sql + + // migrate.ts (dual-path rule). Separate lens from gateway_key_usage. + // Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md + this.db.exec(` + CREATE TABLE IF NOT EXISTS llm_usage_daily ( + day TEXT NOT NULL, + user_id TEXT NOT NULL, + source TEXT NOT NULL, + model TEXT NOT NULL, + route TEXT NOT NULL, + tokens_in INTEGER NOT NULL DEFAULT 0, + tokens_out INTEGER NOT NULL DEFAULT 0, + requests INTEGER NOT NULL DEFAULT 0, + last_updated_at TEXT NOT NULL, + PRIMARY KEY (day, user_id, source, model, route) + ); + CREATE INDEX IF NOT EXISTS idx_llm_usage_daily_user_day + ON llm_usage_daily (user_id, day); + `); } private ensureColumn(tableName: string, columnName: string, definition: string): void { @@ -1247,11 +1303,12 @@ export class Repository { async createLocalTask(params: CreateLocalTaskParams): Promise { const result = this.db .prepare( - `INSERT INTO local_tasks (title, body, piece_name, profile, output_format, ask_policy, priority, workspace_path, owner_id, visibility, visibility_scope_org_id, browser_session_profile_id, options) - VALUES (@title, @body, @pieceName, @profile, @outputFormat, @askPolicy, @priority, @workspacePath, @ownerId, @visibility, @visibilityScopeOrgId, @browserSessionProfileId, @options)` + `INSERT INTO local_tasks (title, title_source, body, piece_name, profile, output_format, ask_policy, priority, workspace_path, owner_id, visibility, visibility_scope_org_id, browser_session_profile_id, options) + VALUES (@title, @titleSource, @body, @pieceName, @profile, @outputFormat, @askPolicy, @priority, @workspacePath, @ownerId, @visibility, @visibilityScopeOrgId, @browserSessionProfileId, @options)` ) .run({ title: params.title, + titleSource: params.titleSource ?? 'auto', body: params.body, pieceName: params.pieceName ?? 'chat', profile: params.profile ?? 'auto', @@ -1365,9 +1422,10 @@ export class Repository { * from sync paths (e.g. buildSystemPrompt). better-sqlite3 is sync * underneath anyway. */ updateMissionBriefSync(taskId: number, patch: Partial): MissionBrief | null { - const existing = parseMissionBrief( - (this.db.prepare(`SELECT mission_brief FROM local_tasks WHERE id = ?`).get(taskId) as { mission_brief: string | null } | undefined)?.mission_brief ?? null, - ); + const row = this.db + .prepare(`SELECT mission_brief, title_source FROM local_tasks WHERE id = ?`) + .get(taskId) as { mission_brief: string | null; title_source: string | null } | undefined; + const existing = parseMissionBrief(row?.mission_brief ?? null); const next: MissionBrief = { goal: patch.goal !== undefined ? patch.goal : existing?.goal ?? '', done: patch.done !== undefined ? patch.done : existing?.done ?? '', @@ -1376,9 +1434,29 @@ export class Repository { }; const allEmpty = !next.goal && !next.done && !next.open && !next.clarifications; const stored = allEmpty ? null : JSON.stringify(next); - this.db.prepare( - `UPDATE local_tasks SET mission_brief = ?, updated_at = datetime('now') WHERE id = ?` - ).run(stored, taskId); + + // Derive the task title from the agent's goal (no LLM call). Only when the + // goal value actually changed (agents re-send an unchanged brief across + // iterations — re-deriving every time would churn updated_at and flicker + // the title) and the user hasn't taken manual control (a user edit pins + // title_source='user' and is never overwritten). + const goalChanged = patch.goal !== undefined && patch.goal !== (existing?.goal ?? ''); + const derivedTitle = (goalChanged && (row?.title_source ?? 'auto') !== 'user') + ? buildTitleFromGoal(next.goal) + : ''; + + // Atomic: persist the brief and the derived title as one unit so a crash + // between them can't leave the title out of sync with the goal. + this.db.transaction(() => { + this.db.prepare( + `UPDATE local_tasks SET mission_brief = ?, updated_at = datetime('now') WHERE id = ?` + ).run(stored, taskId); + if (derivedTitle) { + this.db.prepare( + `UPDATE local_tasks SET title = ?, title_source = 'agent' WHERE id = ?` + ).run(derivedTitle, taskId); + } + })(); return allEmpty ? null : next; } @@ -1636,6 +1714,7 @@ export class Repository { const params: Record = { taskId }; const fieldMap: Record = { title: 'title', + titleSource: 'title_source', body: 'body', pieceName: 'piece_name', profile: 'profile', @@ -3490,6 +3569,83 @@ export class Repository { return rows.map(rowToGatewayKeyUsage); } + // ── Per-user daily LLM usage (gateway + direct) ────────────────────── + // + // Recorded at the OpenAICompatClient completion boundary for every + // successful chat completion. UPSERT on the (day, user_id, source, + // model, route) grain. Separate lens from gateway_key_usage — never + // summed across the two tables. Spec: + // docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md + + /** + * UPSERT: bump per-(day, user, source, model, route) counters. Deltas + * are clamped at zero. `day` defaults to the UTC day of `at` (or now). + * Called once per successful stream completion; `usage`-less completions + * still bump `requests` (tokens 0) so a 0-token request is distinct from + * a failed/aborted one (which is never recorded). + */ + incrementLlmUsage(params: LlmUsageIncrement): void { + const tIn = Math.max(0, Math.floor(params.tokensIn ?? 0)); + const tOut = Math.max(0, Math.floor(params.tokensOut ?? 0)); + const reqs = Math.max(0, Math.floor(params.requests ?? 1)); + const ts = params.at ?? new Date().toISOString(); + const day = params.day ?? ts.slice(0, 10); + this.db + .prepare( + `INSERT INTO llm_usage_daily + (day, user_id, source, model, route, tokens_in, tokens_out, requests, last_updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (day, user_id, source, model, route) DO UPDATE SET + tokens_in = tokens_in + excluded.tokens_in, + tokens_out = tokens_out + excluded.tokens_out, + requests = requests + excluded.requests, + last_updated_at = excluded.last_updated_at`, + ) + .run(day, params.userId, params.source, params.model, params.route, tIn, tOut, reqs, ts); + } + + /** + * Daily time series for the usage dashboard, grouped by (day, user_id, + * source) with model/route collapsed. `userId` filter scopes a non-admin + * to their own rows; omit it for the admin all-users view (callers can + * collapse user_id afterwards). Inclusive `from`/`to` are 'YYYY-MM-DD'. + */ + queryLlmUsageDaily(opts: { from: string; to: string; userId?: string }): LlmUsageDailyAgg[] { + const where = ['day >= ?', 'day <= ?']; + const args: unknown[] = [opts.from, opts.to]; + if (opts.userId !== undefined) { + where.push('user_id = ?'); + args.push(opts.userId); + } + const rows = this.db + .prepare( + `SELECT day, user_id, source, + SUM(tokens_in) AS tokens_in, + SUM(tokens_out) AS tokens_out, + SUM(requests) AS requests + FROM llm_usage_daily + WHERE ${where.join(' AND ')} + GROUP BY day, user_id, source + ORDER BY day ASC`, + ) + .all(...args) as Array<{ + day: string; + user_id: string; + source: string; + tokens_in: number; + tokens_out: number; + requests: number; + }>; + return rows.map((r) => ({ + day: r.day, + userId: r.user_id, + source: r.source, + tokensIn: r.tokens_in, + tokensOut: r.tokens_out, + requests: r.requests, + })); + } + /** Return the underlying Database instance (needed by migrate.ts and session store) */ getDb(): Database.Database { return this.db; diff --git a/src/db/schema.sql b/src/db/schema.sql index eec3b27..5d33be9 100644 --- a/src/db/schema.sql +++ b/src/db/schema.sql @@ -65,7 +65,11 @@ CREATE TABLE IF NOT EXISTS local_tasks ( mission_brief TEXT, -- Per-task options (JSON blob). Controls runtime behaviour toggles such as -- { mcpDisabled: true, skillsDisabled: true }. Default '{}' = all enabled. - options TEXT DEFAULT '{}' + options TEXT DEFAULT '{}', + -- Provenance of `title`: 'auto' = cheap fallback set at creation, + -- 'agent' = derived from the Mission Brief goal during the run, + -- 'user' = manually edited (never overwritten by agent/regeneration). + title_source TEXT NOT NULL DEFAULT 'auto' ); CREATE INDEX IF NOT EXISTS idx_local_tasks_updated_at ON local_tasks (updated_at DESC); @@ -586,6 +590,29 @@ CREATE TABLE IF NOT EXISTS gateway_key_usage ( CREATE INDEX IF NOT EXISTS idx_gateway_usage_key ON gateway_key_usage (key_id); +-- ── LLM usage: per-user daily aggregation (gateway + direct) ──────────── +-- Daily UPSERT buckets recorded at the OpenAICompatClient completion +-- boundary, covering BOTH gateway-routed and direct LLM calls. This is a +-- separate lens from gateway_key_usage (which is per-virtual-key / billing). +-- grain = (day, user_id, source, model, route); user_id is NOT NULL with a +-- 'system' / 'local' sentinel so ON CONFLICT keys never hit the SQLite +-- NULL != NULL trap. Spec: +-- docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md +CREATE TABLE IF NOT EXISTS llm_usage_daily ( + day TEXT NOT NULL, -- 'YYYY-MM-DD' (UTC) + user_id TEXT NOT NULL, -- owner id / 'local' / 'system' + source TEXT NOT NULL, -- 'gateway' | 'direct' + model TEXT NOT NULL, -- real model name (chunk.model), routing key fallback + route TEXT NOT NULL, -- backend server name (gateway backendId / direct host) + tokens_in INTEGER NOT NULL DEFAULT 0, + tokens_out INTEGER NOT NULL DEFAULT 0, + requests INTEGER NOT NULL DEFAULT 0, + last_updated_at TEXT NOT NULL, + PRIMARY KEY (day, user_id, source, model, route) +); +CREATE INDEX IF NOT EXISTS idx_llm_usage_daily_user_day + ON llm_usage_daily (user_id, day); + -- ── Browser Notifications V2: Web Push subscriptions + per-user prefs ─── -- Spec: docs/superpowers/specs/2026-05-28-browser-notifications-v2-webpush.md -- endpoint is globally UNIQUE so logging into a different user in the same diff --git a/src/engine/agent-loop.ts b/src/engine/agent-loop.ts index b08c1d6..889f1d8 100644 --- a/src/engine/agent-loop.ts +++ b/src/engine/agent-loop.ts @@ -1963,7 +1963,7 @@ export async function executeMovement( { role: 'user', content: taskInstruction }, ]; const runIsolatedLlm = (isolatedMessages: Message[]): Promise => - runIsolatedLlmHelper(client, isolatedMessages, cancelSignal); + runIsolatedLlmHelper(client, isolatedMessages, cancelSignal, { userId: ctx.userId }); // Traceability T-1: ensure eventLogger is non-undefined for the // duration of the movement. Production callers (piece-runner) always @@ -2166,6 +2166,7 @@ export async function executeMovement( }, }, `movement=${movement.name} `, + { userId: ctx.userId }, ); const llmDurationMs = Date.now() - llmStartedAt; let { accumulatedText } = consumed; diff --git a/src/engine/llm-stream.ts b/src/engine/llm-stream.ts index 65a46b7..498958b 100644 --- a/src/engine/llm-stream.ts +++ b/src/engine/llm-stream.ts @@ -4,6 +4,7 @@ import type { ToolCall, OpenAICompatClient, LLMEvent, + LlmCallContext, } from '../llm/openai-compat.js'; import { logger } from '../logger.js'; import { stripThinkingTokens } from './strip-thinking.js'; @@ -22,9 +23,10 @@ export async function runIsolatedLlm( client: OpenAICompatClient, messages: Message[], cancelSignal?: AbortSignal, + context?: LlmCallContext, ): Promise { let output = ''; - for await (const event of client.chat(messages, undefined, cancelSignal)) { + for await (const event of client.chat(messages, undefined, cancelSignal, context)) { if (event.type === 'text') { output += event.text; continue; @@ -107,8 +109,9 @@ export async function consumeLlmStream( idleTimeoutMs: number, callbacks: ConsumeStreamCallbacks = {}, contextLabel: string = '', + context?: LlmCallContext, ): Promise { - const stream = client.chat(messages, tools, cancelSignal); + const stream = client.chat(messages, tools, cancelSignal, context); const accumulator: ConsumedLLMResponse = { accumulatedText: '', pendingToolCalls: [], diff --git a/src/engine/piece-classifier.ts b/src/engine/piece-classifier.ts index 789c87b..8a81486 100644 --- a/src/engine/piece-classifier.ts +++ b/src/engine/piece-classifier.ts @@ -79,6 +79,7 @@ export async function classifyPiece( pieces: PieceDescription[], fileNames: string[], timeoutMs: number = 8000, + userId?: string, ): Promise { const prompt = buildClassificationPrompt(taskText, pieces, fileNames); logger.debug(`[piece-classifier] candidates=[${pieces.map(p => p.name).join(', ')}] textLen=${taskText.length}`); @@ -87,7 +88,7 @@ export async function classifyPiece( const llmCall = async (): Promise => { let result = ''; try { - for await (const event of client.chat(messages)) { + for await (const event of client.chat(messages, undefined, undefined, { userId })) { if (event.type === 'text') result += event.text; else if (event.type === 'error') return null; else if (event.type === 'done') break; diff --git a/src/engine/reflection/llm-client.test.ts b/src/engine/reflection/llm-client.test.ts index eec9741..e15dbc4 100644 --- a/src/engine/reflection/llm-client.test.ts +++ b/src/engine/reflection/llm-client.test.ts @@ -13,26 +13,57 @@ const validResult = { reasoning: 'x', }; -const okResponse = { - ok: true, - json: () => Promise.resolve({ - choices: [ - { - message: { - tool_calls: [ - { - function: { - name: 'submit_reflection', - arguments: JSON.stringify(validResult), - }, - }, - ], +/** + * The reflection client now routes through OpenAICompatClient, which speaks + * streaming SSE. Build a fake streaming `Response` that emits the given SSE + * `data:` payloads, then `[DONE]`. + */ +function sseResponse(chunks: unknown[]): Response { + const lines = chunks.map((c) => `data: ${JSON.stringify(c)}\n\n`); + lines.push('data: [DONE]\n\n'); + const encoder = new TextEncoder(); + let i = 0; + return { + ok: true, + status: 200, + headers: { get: () => null }, + body: { + getReader: () => ({ + read: async () => + i < lines.length + ? { done: false, value: encoder.encode(lines[i++]) } + : { done: true, value: undefined }, + releaseLock: () => {}, + }), + }, + } as unknown as Response; +} + +/** A complete, valid submit_reflection tool-call stream with usage. */ +function okStream(args: unknown = validResult): Response { + return sseResponse([ + { + model: 'test-model', + choices: [ + { + delta: { tool_calls: [{ index: 0, id: 'c1', function: { name: 'submit_reflection', arguments: JSON.stringify(args) } }] }, + finish_reason: null, }, - }, - ], - usage: { prompt_tokens: 42, completion_tokens: 17 }, - }), -}; + ], + }, + { choices: [{ delta: {}, finish_reason: 'tool_calls' }] }, + { choices: [], usage: { prompt_tokens: 42, completion_tokens: 17 } }, + ]); +} + +function httpError(status: number, bodyText: string): Response { + return { + ok: false, + status, + headers: { get: () => null }, + text: () => Promise.resolve(bodyText), + } as unknown as Response; +} beforeEach(() => { // No real backoff sleeps in tests. @@ -45,7 +76,7 @@ afterEach(() => { describe('callReflectionLlm', () => { it('happy path: parses tool_call arguments and extracts token usage', async () => { - vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse)); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okStream())); const result = await callReflectionLlm(cfg, 'system prompt', 'user prompt'); @@ -59,12 +90,8 @@ describe('callReflectionLlm', () => { it('retries a 5xx (backend tool-call parse failure) and succeeds on resample', async () => { const fetchMock = vi.fn() - .mockResolvedValueOnce({ - ok: false, - status: 500, - text: () => Promise.resolve('{"error":{"message":"Failed to parse input at pos 41: ..."}}'), - }) - .mockResolvedValueOnce(okResponse); + .mockResolvedValueOnce(httpError(500, '{"error":{"message":"Failed to parse input at pos 41"}}')) + .mockResolvedValueOnce(okStream()); vi.stubGlobal('fetch', fetchMock); const result = await callReflectionLlm(cfg, 's', 'u'); @@ -73,11 +100,7 @@ describe('callReflectionLlm', () => { }); it('gives up after 3 attempts of persistent 5xx', async () => { - const fetchMock = vi.fn().mockResolvedValue({ - ok: false, - status: 500, - text: () => Promise.resolve('parse error'), - }); + const fetchMock = vi.fn().mockResolvedValue(httpError(500, 'parse error')); vi.stubGlobal('fetch', fetchMock); await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 500'); @@ -85,38 +108,62 @@ describe('callReflectionLlm', () => { }); it('does NOT retry a 4xx (deterministic config error, e.g. invalid api key)', async () => { - const fetchMock = vi.fn().mockResolvedValue({ - ok: false, - status: 401, - text: () => Promise.resolve('invalid api key'), - }); + const fetchMock = vi.fn().mockResolvedValue(httpError(401, 'invalid api key')); vi.stubGlobal('fetch', fetchMock); await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('HTTP 401'); expect(fetchMock).toHaveBeenCalledTimes(1); }); - it('retries when no tool_calls present, then throws after exhaustion', async () => { - const fetchMock = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ choices: [{ message: {} }] }), - }); + it('retries when no tool_call present, then throws after exhaustion', async () => { + // A stream that yields only text and finishes — no submit_reflection call. + const noToolStream = () => sseResponse([ + { choices: [{ delta: { content: 'just text' }, finish_reason: 'stop' }] }, + ]); + const fetchMock = vi.fn().mockImplementation(async () => noToolStream()); vi.stubGlobal('fetch', fetchMock); await expect(callReflectionLlm(cfg, 'system prompt', 'user prompt')) - .rejects.toThrow('no tool_call'); + .rejects.toThrow('no submit_reflection tool_call'); expect(fetchMock).toHaveBeenCalledTimes(3); }); - it('retries malformed tool_call arguments JSON', async () => { + it('does NOT retry a budget_exhausted gateway sentinel (fail fast)', async () => { + // SSE sentinel error → client yields gatewayErrorType=budget_exhausted. + const fetchMock = vi.fn().mockResolvedValue( + sseResponse([{ error: { type: 'budget_exhausted', message: 'over quota' } }]), + ); + vi.stubGlobal('fetch', fetchMock); + + await expect(callReflectionLlm(cfg, 's', 'u')).rejects.toThrow('budget_exhausted'); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + + it('does NOT retry a prompt-size preflight block (fail fast)', async () => { + // Tiny context window forces the client preflight guard to block before + // any fetch; resampling the identical prompt cannot help. + const fetchMock = vi.fn().mockResolvedValue(okStream()); + vi.stubGlobal('fetch', fetchMock); + + await expect(callReflectionLlm({ ...cfg, contextLimitTokens: 1 }, 'system', 'user')) + .rejects.toThrow('blocked before send'); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it('retries malformed tool_call arguments (client yields empty input)', async () => { + // First stream carries broken JSON args → client parses to {} → structural + // guard treats it as malformed → resample. Second stream is valid. + const brokenStream = () => sseResponse([ + { + choices: [ + { delta: { tool_calls: [{ index: 0, id: 'c1', function: { name: 'submit_reflection', arguments: '{broken' } }] }, finish_reason: null }, + ], + }, + { choices: [{ delta: {}, finish_reason: 'tool_calls' }] }, + ]); const fetchMock = vi.fn() - .mockResolvedValueOnce({ - ok: true, - json: () => Promise.resolve({ - choices: [{ message: { tool_calls: [{ function: { name: 'submit_reflection', arguments: '{broken' } }] } }], - }), - }) - .mockResolvedValueOnce(okResponse); + .mockImplementationOnce(async () => brokenStream()) + .mockImplementationOnce(async () => okStream()); vi.stubGlobal('fetch', fetchMock); const result = await callReflectionLlm(cfg, 's', 'u'); diff --git a/src/engine/reflection/llm-client.ts b/src/engine/reflection/llm-client.ts index 8d3e141..aa97ef4 100644 --- a/src/engine/reflection/llm-client.ts +++ b/src/engine/reflection/llm-client.ts @@ -1,4 +1,6 @@ import { logger } from '../../logger.js'; +import { getDefaultProviderRetryConfig } from '../../config.js'; +import { OpenAICompatClient, type LLMEvent, type Message, type ToolDef } from '../../llm/openai-compat.js'; import type { ReflectionResult } from './types.js'; import { REFLECTION_TOOL_SCHEMA } from './reflection-schema.js'; @@ -6,6 +8,17 @@ export interface ReflectionLlmConfig { endpoint: string; model: string | undefined; apiKey?: string; + /** True when the reflection worker routes through the AAO Gateway (proxy). */ + proxy?: boolean; + /** Reflection target user — recorded as the usage owner. */ + userId?: string; + /** + * Model context window in tokens. Passed to the shared client's + * prompt-size preflight guard. Reflection prompts can be large (uncapped + * memory snapshot), so use the worker's real limit rather than the + * client's conservative 32k default, which would block valid prompts. + */ + contextLimitTokens?: number; } export interface ReflectionLlmResult { @@ -62,54 +75,104 @@ export async function callReflectionLlm( throw lastErr ?? new Error('reflection LLM failed'); } +/** + * Classify an OpenAICompatClient error for the reflection resample loop. + * - HTTP 5xx (incl. tool-call parse errors on malformed model output) and + * gateway_shutdown / gateway_timeout: transient → resample. + * - HTTP 4xx (bad key / request shape), budget_exhausted / rate_limited + * (won't pass until the period resets), and the client-side + * "blocked before send" prompt-size guard: deterministic → fail fast. + * - Everything else (transport / parse / idle timeout): stochastic → resample. + */ +function classifyClientError(message: string, gatewayErrorType?: string): Error { + if (gatewayErrorType === 'budget_exhausted' || gatewayErrorType === 'rate_limited') { + return new Error(message); + } + if (gatewayErrorType === 'gateway_shutdown' || gatewayErrorType === 'gateway_timeout') { + return new RetryableLlmError(message); + } + // Client-side preflight rejection — the prompt is too large; resampling the + // identical prompt cannot help. + if (message.includes('blocked before send')) { + return new Error(message); + } + const m = /HTTP (\d{3})/.exec(message); + if (m) { + const status = Number(m[1]); + if (status >= 500) return new RetryableLlmError(message); + return new Error(message); + } + return new RetryableLlmError(message); +} + async function callOnce( cfg: ReflectionLlmConfig, systemPrompt: string, userPrompt: string, start: number, ): Promise { - const body: Record = { - messages: [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: userPrompt }, - ], - tools: [REFLECTION_TOOL_SCHEMA], - tool_choice: { type: 'function', function: { name: 'submit_reflection' } }, - temperature: 0.2, - }; - if (cfg.model) { - body['model'] = cfg.model; + // Route through the shared client so usage lands in the single + // per-user ledger (gateway + direct) like every other LLM call. + // maxAttempts=1: the outer callReflectionLlm loop owns resampling. + const client = new OpenAICompatClient( + cfg.endpoint, + cfg.model, + cfg.apiKey, + { ...getDefaultProviderRetryConfig(), maxAttempts: 1 }, + undefined, + cfg.contextLimitTokens, // real model window; avoid the 32k default blocking large reflection prompts + undefined, + undefined, + { proxy: cfg.proxy === true }, + ); + const messages: Message[] = [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ]; + + let parsed: ReflectionResult | null = null; + let usage: { prompt_tokens: number; completion_tokens: number } | undefined; + let errorMsg: string | null = null; + let errorGatewayType: string | undefined; + + for await (const event of client.chat( + messages, + [REFLECTION_TOOL_SCHEMA as unknown as ToolDef], + undefined, + { userId: cfg.userId }, + { temperature: 0.2, toolChoice: { type: 'function', function: { name: 'submit_reflection' } } }, + ) as AsyncGenerator) { + if (event.type === 'tool_use') { + if (event.name === 'submit_reflection' && parsed === null) { + parsed = event.input as unknown as ReflectionResult; + } + } else if (event.type === 'done') { + usage = event.usage; + } else if (event.type === 'error') { + errorMsg = event.error; + errorGatewayType = event.gatewayErrorType; + } } - const resp = await fetch(`${cfg.endpoint}/chat/completions`, { - method: 'POST', - headers: { - 'content-type': 'application/json', - ...(cfg.apiKey ? { authorization: `Bearer ${cfg.apiKey}` } : {}), - }, - body: JSON.stringify(body), - }); - if (!resp.ok) { - const text = await resp.text(); - const msg = `reflection LLM HTTP ${resp.status}: ${text}`; - // 5xx: backend-side failure (incl. tool-call parse errors on malformed - // model output) — resample. 4xx: deterministic config error — fail fast. - if (resp.status >= 500) throw new RetryableLlmError(msg); - throw new Error(msg); + + if (errorMsg !== null) { + throw classifyClientError(`reflection LLM ${errorMsg}`, errorGatewayType); } - const data = await resp.json() as any; - const toolCall = data.choices?.[0]?.message?.tool_calls?.[0]; - if (!toolCall) throw new RetryableLlmError('reflection LLM returned no tool_call'); - let parsed: ReflectionResult; - try { - parsed = JSON.parse(toolCall.function.arguments) as ReflectionResult; - } catch { - throw new RetryableLlmError('reflection LLM tool_call arguments were not valid JSON'); + if (parsed === null) { + throw new RetryableLlmError('reflection LLM returned no submit_reflection tool_call'); + } + // The shared client swallows tool-argument JSON parse errors and yields an + // empty `{}` input. Preserve the old resample-on-malformed behaviour with a + // shallow structural check against the tool schema's required fields — a + // genuinely-empty object means the model emitted broken tool markup. + const p = parsed as unknown as Record; + if (p['piece_changes'] === undefined || p['reasoning'] === undefined) { + throw new RetryableLlmError('reflection LLM tool_call arguments were malformed or incomplete'); } return { parsed, - tokensIn: data.usage?.prompt_tokens ?? 0, - tokensOut: data.usage?.completion_tokens ?? 0, + tokensIn: usage?.prompt_tokens ?? 0, + tokensOut: usage?.completion_tokens ?? 0, durationMs: Date.now() - start, - raw: data, + raw: { usage }, }; } diff --git a/src/engine/reflection/reflection-runner.ts b/src/engine/reflection/reflection-runner.ts index bd1de90..286db2f 100644 --- a/src/engine/reflection/reflection-runner.ts +++ b/src/engine/reflection/reflection-runner.ts @@ -24,6 +24,10 @@ export interface RunReflectionDeps { * (normal task calls always send the worker's key — reflection must too). */ llmApiKey?: string; + /** True when the reflection worker routes through the AAO Gateway (proxy). */ + llmProxy?: boolean; + /** Reflection worker's model context window (tokens) for the prompt guard. */ + llmContextLimitTokens?: number; } export async function runReflectionJob( @@ -86,6 +90,9 @@ export async function runReflectionJob( endpoint: deps.llmEndpoint, model: deps.llmModel, apiKey: deps.llmApiKey, + proxy: deps.llmProxy === true, + userId: meta.userId, + contextLimitTokens: deps.llmContextLimitTokens, }; let llmResult; diff --git a/src/engine/tools/__fixtures__/attachmentFiles.msg b/src/engine/tools/__fixtures__/attachmentFiles.msg new file mode 100644 index 0000000..349789d Binary files /dev/null and b/src/engine/tools/__fixtures__/attachmentFiles.msg differ diff --git a/src/engine/tools/core.ts b/src/engine/tools/core.ts index 6323430..c84ef73 100644 --- a/src/engine/tools/core.ts +++ b/src/engine/tools/core.ts @@ -51,6 +51,7 @@ export interface ToolsConfig { officePdfMaxSizeMb?: number; // ReadPdf の最大ファイルサイズ (default: 10) officePptxMaxSizeMb?: number; // ReadPPTX の最大ファイルサイズ (default: 50) officePptxMaxUncompressedMb?: number; // ReadPPTX の ZIP 展開後サイズ上限 (default: 200) + officeMsgMaxSizeMb?: number; // ReadMsg の最大ファイルサイズ (default: 25) webfetchScreenshot?: boolean; // WebFetch で vlmEnabled 時にスクショを添付するか (default: true) webfetchScreenshotTimeoutMs?: number; // スクショ取得のタイムアウト (default: 15000) } diff --git a/src/engine/tools/docs.ts b/src/engine/tools/docs.ts index 47c5cee..0f06000 100644 --- a/src/engine/tools/docs.ts +++ b/src/engine/tools/docs.ts @@ -52,6 +52,7 @@ const TOOL_DOC_ALIASES: Record = { readexcel: 'office', readdocx: 'office', readpptx: 'office', + readmsg: 'office', pdftoimages: 'office', splitexcelsheets: 'office', splitdocxsections: 'office', diff --git a/src/engine/tools/msg.test.ts b/src/engine/tools/msg.test.ts new file mode 100644 index 0000000..01f0d36 --- /dev/null +++ b/src/engine/tools/msg.test.ts @@ -0,0 +1,324 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { fileURLToPath } from 'url'; +import { + formatAddress, + stripHtml, + selectMsgBody, + sanitizeAttachmentName, + formatMsgOutput, + assembleMsgOutput, + pickEmail, + isParsedMsgValid, + executeReadMsg, + type MsgView, +} from './msg.js'; +import type { ToolContext } from './core.js'; +import { executeTool as officeExecuteTool, TOOL_DEFS as OFFICE_TOOL_DEFS } from './office.js'; + +const FIXTURE = path.join( + path.dirname(fileURLToPath(import.meta.url)), + '__fixtures__', + 'attachmentFiles.msg', +); + +describe('formatAddress', () => { + it('renders name and email together', () => { + expect(formatAddress({ name: 'Alice', email: 'alice@example.com' })).toBe( + 'Alice ', + ); + }); + + it('renders name only when email is missing', () => { + expect(formatAddress({ name: 'Alice' })).toBe('Alice'); + }); + + it('renders email only when name is missing', () => { + expect(formatAddress({ email: 'alice@example.com' })).toBe('alice@example.com'); + }); + + it('falls back to a placeholder when both are missing', () => { + expect(formatAddress({})).toBe('(unknown)'); + }); +}); + +describe('stripHtml', () => { + it('removes tags and decodes entities', () => { + expect(stripHtml('

Hello world & co

')).toBe('Hello world & co'); + }); + + it('drops script and style content', () => { + const html = '

Keep

'; + expect(stripHtml(html)).toBe('Keep'); + }); + + it('turns block boundaries into newlines', () => { + expect(stripHtml('
line1
line2
')).toBe('line1\nline2'); + }); + + it('decodes valid numeric entities', () => { + expect(stripHtml('

AB

')).toBe('AB'); + }); + + it('does not throw on out-of-range numeric entities', () => { + expect(() => stripHtml('

')).not.toThrow(); + expect(stripHtml('A�B')).toBe('A�B'); + }); +}); + +describe('selectMsgBody', () => { + it('prefers the plain-text body', () => { + expect(selectMsgBody({ body: 'plain text', bodyHtml: '

html

' })).toEqual({ + text: 'plain text', + format: 'plain', + }); + }); + + it('falls back to stripped HTML when no plain body exists', () => { + expect(selectMsgBody({ bodyHtml: '

html body

' })).toEqual({ + text: 'html body', + format: 'html', + }); + }); + + it('reports none when no body is present', () => { + expect(selectMsgBody({})).toEqual({ text: '', format: 'none' }); + }); + + it('decodes PidTagHtml (html) when body and bodyHtml are absent', () => { + const html = new TextEncoder().encode('

from pidtag

'); + expect(selectMsgBody({ html })).toEqual({ text: 'from pidtag', format: 'html' }); + }); + + it('prefers plain body over the PidTagHtml field', () => { + const html = new TextEncoder().encode('

html

'); + expect(selectMsgBody({ body: 'plain', html })).toEqual({ text: 'plain', format: 'plain' }); + }); + + it('falls back to PidTagHtml when bodyHtml is empty/whitespace', () => { + const html = new TextEncoder().encode('

pidtag body

'); + expect(selectMsgBody({ bodyHtml: ' ', html })).toEqual({ + text: 'pidtag body', + format: 'html', + }); + }); +}); + +describe('pickEmail', () => { + it('prefers a real SMTP address over a legacy EX DN', () => { + expect(pickEmail('/O=EX/OU=x/CN=alice', 'alice@example.com')).toBe('alice@example.com'); + expect(pickEmail('alice@example.com', '/O=EX/OU=x/CN=alice')).toBe('alice@example.com'); + }); + + it('falls back to the EX DN when no SMTP-looking address exists', () => { + expect(pickEmail(undefined, '/O=EX/OU=x/CN=alice')).toBe('/O=EX/OU=x/CN=alice'); + }); + + it('returns undefined when nothing usable is provided', () => { + expect(pickEmail(undefined, undefined)).toBeUndefined(); + expect(pickEmail('', ' ')).toBeUndefined(); + }); +}); + +describe('isParsedMsgValid', () => { + it('accepts a parsed Outlook message', () => { + expect(isParsedMsgValid({ dataType: 'msg' })).toBe(true); + }); + + it('rejects an unsupported CFBF result (old .doc/.xls, corrupted compound file)', () => { + expect(isParsedMsgValid({ error: 'Unsupported file type!', dataType: null })).toBe(false); + expect(isParsedMsgValid({ dataType: null })).toBe(false); + expect(isParsedMsgValid({ dataType: 'attachment' })).toBe(false); + }); + + it('treats a whitespace-only plain body as empty and uses HTML', () => { + expect(selectMsgBody({ body: ' \n ', bodyHtml: '

real

' })).toEqual({ + text: 'real', + format: 'html', + }); + }); +}); + +describe('sanitizeAttachmentName', () => { + it('keeps a normal filename unchanged', () => { + expect(sanitizeAttachmentName('report.pdf', 0)).toBe('report.pdf'); + }); + + it('strips directory components to prevent path traversal', () => { + expect(sanitizeAttachmentName('../../etc/passwd', 0)).toBe('passwd'); + expect(sanitizeAttachmentName('foo/bar/baz.txt', 0)).toBe('baz.txt'); + expect(sanitizeAttachmentName('a\\b\\c.doc', 0)).toBe('c.doc'); + }); + + it('removes control characters and null bytes', () => { + expect(sanitizeAttachmentName('na\x00me.txt', 0)).toBe('name.txt'); + expect(sanitizeAttachmentName('tab\tname.txt', 0)).toBe('tabname.txt'); + }); + + it('preserves spaces inside the filename', () => { + expect(sanitizeAttachmentName('my report.pdf', 0)).toBe('my report.pdf'); + }); + + it('falls back to an indexed name when the result is empty', () => { + expect(sanitizeAttachmentName('', 2)).toBe('attachment-3'); + expect(sanitizeAttachmentName('...', 0)).toBe('attachment-1'); + }); +}); + +describe('formatMsgOutput', () => { + const baseView: MsgView = { + subject: 'Quarterly report', + from: { name: 'Alice', email: 'alice@example.com' }, + to: [{ name: 'Bob', email: 'bob@example.com' }], + cc: [], + date: 'Mon, 1 Jun 2026 10:00:00 +0900', + body: { text: 'See attached.', format: 'plain' }, + attachments: [], + }; + + it('renders the header block and body', () => { + const out = formatMsgOutput(baseView); + expect(out).toContain('Subject: Quarterly report'); + expect(out).toContain('From: Alice '); + expect(out).toContain('To: Bob '); + expect(out).toContain('Date: Mon, 1 Jun 2026 10:00:00 +0900'); + expect(out).toContain('See attached.'); + }); + + it('lists saved attachments with their paths and sizes', () => { + const out = formatMsgOutput({ + ...baseView, + attachments: [{ fileName: 'report.pdf', contentLength: 2048, savedPath: 'input/report.pdf' }], + }); + expect(out).toContain('Attachments (1)'); + expect(out).toContain('report.pdf'); + expect(out).toContain('input/report.pdf'); + expect(out).toContain('2048'); + }); + + it('shows a skip reason for attachments that were not saved', () => { + const out = formatMsgOutput({ + ...baseView, + attachments: [{ fileName: 'huge.bin', skipped: 'exceeds size limit' }], + }); + expect(out).toContain('huge.bin'); + expect(out).toContain('exceeds size limit'); + }); + + it('notes when the body could not be extracted', () => { + const out = formatMsgOutput({ ...baseView, body: { text: '', format: 'none' } }); + expect(out).toContain('(no text body)'); + }); + + it('omits the CC line when there are no CC recipients', () => { + expect(formatMsgOutput(baseView)).not.toContain('Cc:'); + }); + + it('keeps the attachment list when the body is truncated to budget', () => { + const longBody = 'word '.repeat(20000); + const out = assembleMsgOutput( + { + ...baseView, + body: { text: longBody, format: 'plain' }, + attachments: [{ fileName: 'a.pdf', contentLength: 10, savedPath: 'input/a.pdf' }], + }, + 100, + 'mail.msg', + ); + expect(out).toContain('input/a.pdf'); + expect(out).toContain('Subject: Quarterly report'); + expect(out.length).toBeLessThan(longBody.length); + }); + + it('includes the CC line when CC recipients exist', () => { + const out = formatMsgOutput({ ...baseView, cc: [{ email: 'carol@example.com' }] }); + expect(out).toContain('Cc: carol@example.com'); + }); +}); + +describe('executeReadMsg (integration)', () => { + let workspace: string; + const ctx = (): ToolContext => ({ workspacePath: workspace, editAllowed: true }); + + beforeEach(() => { + workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'readmsg-')); + fs.copyFileSync(FIXTURE, path.join(workspace, 'mail.msg')); + }); + + afterEach(() => { + fs.rmSync(workspace, { recursive: true, force: true }); + }); + + it('extracts headers and body from a real .msg file', async () => { + const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx()); + expect(result.isError).toBeFalsy(); + expect(result.output).toContain('Subject: attachmentFiles'); + expect(result.output).toContain('From: hmailuser '); + expect(result.output).toContain('To: hmailuser@hmailserver.test'); + expect(result.output).toContain('attachmentFiles'); + }); + + it('saves attachments to input/ and lists them', async () => { + const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx()); + expect(result.output).toContain('Attachments (3)'); + for (const [name, size] of [ + ['jpg.jpg', 726], + ['png.png', 134], + ['tif.tif', 664], + ] as const) { + const saved = path.join(workspace, 'input', name); + expect(fs.existsSync(saved)).toBe(true); + expect(fs.statSync(saved).size).toBe(size); + expect(result.output).toContain(path.join('input', name)); + } + }); + + it('rejects paths outside the workspace', async () => { + const result = await executeReadMsg({ file_path: '../../etc/passwd' }, ctx()); + expect(result.isError).toBe(true); + }); + + it('reports a clear error for a non-.msg file', async () => { + fs.writeFileSync(path.join(workspace, 'junk.msg'), 'not a real msg file'); + const result = await executeReadMsg({ file_path: 'junk.msg' }, ctx()); + expect(result.isError).toBe(true); + expect(result.output).toContain('ReadMsg'); + }); + + it('does not write attachments in a read-only phase', async () => { + const result = await executeReadMsg( + { file_path: 'mail.msg' }, + { workspacePath: workspace, editAllowed: false }, + ); + expect(result.isError).toBeFalsy(); + expect(fs.existsSync(path.join(workspace, 'input', 'jpg.jpg'))).toBe(false); + expect(result.output).toContain('read-only'); + }); + + it('does not overwrite an existing file in input/', async () => { + fs.mkdirSync(path.join(workspace, 'input'), { recursive: true }); + fs.writeFileSync(path.join(workspace, 'input', 'jpg.jpg'), 'pre-existing'); + const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx()); + expect(fs.readFileSync(path.join(workspace, 'input', 'jpg.jpg'), 'utf8')).toBe('pre-existing'); + expect(fs.existsSync(path.join(workspace, 'input', 'jpg-1.jpg'))).toBe(true); + expect(result.output).toContain('jpg-1.jpg'); + }); + + it('rejects files exceeding the configured size limit', async () => { + const result = await executeReadMsg( + { file_path: 'mail.msg' }, + { workspacePath: workspace, editAllowed: true, toolsConfig: { officeMsgMaxSizeMb: 0.001 } }, + ); + expect(result.isError).toBe(true); + expect(result.output).toMatch(/size|limit|too large/i); + }); + + it('is registered and routed through the office module dispatch', async () => { + expect(OFFICE_TOOL_DEFS.ReadMsg).toBeDefined(); + const result = await officeExecuteTool('ReadMsg', { file_path: 'mail.msg' }, ctx()); + expect(result?.isError).toBeFalsy(); + expect(result?.output).toContain('Subject: attachmentFiles'); + }); +}); diff --git a/src/engine/tools/msg.ts b/src/engine/tools/msg.ts new file mode 100644 index 0000000..f5e8f0d --- /dev/null +++ b/src/engine/tools/msg.ts @@ -0,0 +1,416 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import MsgReaderImport from '@kenjiuno/msgreader'; +import { ToolDef } from '../../llm/openai-compat.js'; +import type { ToolContext, ToolResult } from './core.js'; +import { resolveAndGuard, truncateToBudget, getToolOutputBudgetTokens } from './core.js'; +import { logger } from '../../logger.js'; + +// CJS/ESM interop: under native Node ESM (the built dist), a default import of +// this CommonJS package resolves to the module.exports namespace object, not the +// class — so `new MsgReaderImport()` throws "is not a constructor". Vitest/tsx +// hide this via __esModule interop. Pick the real constructor for both worlds. +const MsgReader = ( + typeof MsgReaderImport === 'function' + ? MsgReaderImport + : (MsgReaderImport as unknown as { default: typeof MsgReaderImport }).default +) as typeof MsgReaderImport; +type MsgReaderInstance = InstanceType; + +const DEFAULT_MSG_MAX_SIZE_MB = 25; + +export interface MsgAddress { + name?: string; + email?: string; +} + +export interface MsgAttachmentMeta { + fileName: string; + contentLength?: number; + /** Relative path the attachment was written to (when saved). */ + savedPath?: string; + /** Reason the attachment was not saved (mutually exclusive with savedPath). */ + skipped?: string; +} + +export interface MsgView { + subject?: string; + from?: MsgAddress; + to: MsgAddress[]; + cc: MsgAddress[]; + date?: string; + body: { text: string; format: 'plain' | 'html' | 'none' }; + attachments: MsgAttachmentMeta[]; +} + +/** Render a single address as `Name `, falling back gracefully. */ +export function formatAddress(a: MsgAddress): string { + const name = a.name?.trim(); + const email = a.email?.trim(); + if (name && email) return `${name} <${email}>`; + if (name) return name; + if (email) return email; + return '(unknown)'; +} + +const NAMED_ENTITIES: Record = { + ' ': ' ', + '&': '&', + '<': '<', + '>': '>', + '"': '"', + ''': "'", + ''': "'", +}; + +// Decode a numeric character reference, preserving the original entity if the +// code point is out of range (broken email HTML must not crash the whole read). +function safeFromCodePoint(code: number, original: string): string { + if (!Number.isFinite(code) || code < 0 || code > 0x10ffff || (code >= 0xd800 && code <= 0xdfff)) { + return original; + } + try { + return String.fromCodePoint(code); + } catch { + return original; + } +} + +function decodeEntities(s: string): string { + let out = s.replace(/ |&|<|>|"|'|'/g, (m) => NAMED_ENTITIES[m]); + out = out.replace(/&#(\d+);/g, (m, code) => safeFromCodePoint(Number(code), m)); + out = out.replace(/&#x([0-9a-fA-F]+);/g, (m, code) => safeFromCodePoint(parseInt(code, 16), m)); + return out; +} + +/** Convert an HTML fragment into readable plain text. */ +export function stripHtml(html: string): string { + let s = html; + // Drop script/style blocks including their contents. + s = s.replace(/<(script|style)\b[^>]*>[\s\S]*?<\/\1>/gi, ''); + // Treat
and block-level boundaries as newlines. + s = s.replace(//gi, '\n'); + s = s.replace(/<\/(p|div|li|tr|h[1-6]|ul|ol|table|blockquote|section|article)\s*>/gi, '\n'); + // Remove all remaining tags. + s = s.replace(/<[^>]+>/g, ''); + s = decodeEntities(s); + // Normalize whitespace: collapse intra-line runs, trim each line, collapse blank runs. + s = s + .split('\n') + .map((line) => line.replace(/[^\S\n]+/g, ' ').trim()) + .join('\n') + .replace(/\n{3,}/g, '\n\n') + .trim(); + return s; +} + +/** Choose the best available body text, preferring plain over HTML. */ +export function selectMsgBody(fields: { + body?: string; + bodyHtml?: string; + // PidTagHtml: some HTML-only messages carry the body here as raw bytes. + html?: Uint8Array | string; +}): { + text: string; + format: 'plain' | 'html' | 'none'; +} { + const plain = fields.body?.trim(); + if (plain) return { text: plain, format: 'plain' }; + // Try each HTML source in order; an empty/whitespace bodyHtml must not block + // the PidTagHtml fallback, so we check the stripped result of each. + const htmlSources = [ + fields.bodyHtml, + fields.html != null + ? typeof fields.html === 'string' + ? fields.html + : Buffer.from(fields.html).toString('utf8') + : undefined, + ]; + for (const source of htmlSources) { + if (!source) continue; + const stripped = stripHtml(source); + if (stripped) return { text: stripped, format: 'html' }; + } + return { text: '', format: 'none' }; +} + +/** + * Pick the most usable email address from candidates, preferring a real SMTP + * address (contains '@') over an Exchange legacy EX DN (`/O=.../CN=...`). + */ +export function pickEmail(...candidates: (string | undefined)[]): string | undefined { + const valid = candidates.map((c) => c?.trim()).filter((c): c is string => !!c); + return valid.find((c) => c.includes('@')) ?? valid[0]; +} + +/** Reduce an attachment name to a safe basename, never escaping the target dir. */ +export function sanitizeAttachmentName(name: string, index: number): string { + // Take the last path segment across both separators (defends path traversal). + const base = name.split(/[/\\]/).pop() ?? ''; + // Strip control characters and null bytes. + // eslint-disable-next-line no-control-regex + const cleaned = base.replace(/[\x00-\x1f\x7f]/g, '').trim(); + // Reject names that are empty or consist only of dots/spaces. + if (!cleaned || /^[.\s]*$/.test(cleaned)) { + return `attachment-${index + 1}`; + } + return cleaned; +} + +/** Find a filename that collides with neither this run nor existing files on disk. */ +function resolveFreeName(dir: string, name: string, used: Set): string { + const ext = path.extname(name); + const stem = name.slice(0, name.length - ext.length); + let candidate = name; + let n = 1; + while (used.has(candidate) || fs.existsSync(path.join(dir, candidate))) { + candidate = `${stem}-${n}${ext}`; + n += 1; + } + return candidate; +} + +/** Build the human-readable text output for a parsed message. */ +export function formatMsgOutput(view: MsgView): string { + const lines: string[] = []; + if (view.subject) lines.push(`Subject: ${view.subject}`); + if (view.from) lines.push(`From: ${formatAddress(view.from)}`); + if (view.to.length) lines.push(`To: ${view.to.map(formatAddress).join(', ')}`); + if (view.cc.length) lines.push(`Cc: ${view.cc.map(formatAddress).join(', ')}`); + if (view.date) lines.push(`Date: ${view.date}`); + + const parts: string[] = [lines.join('\n')]; + + parts.push(view.body.format === 'none' ? '(no text body)' : view.body.text); + + if (view.attachments.length) { + const attLines = [`Attachments (${view.attachments.length}):`]; + for (const att of view.attachments) { + if (att.savedPath) { + const size = att.contentLength != null ? ` (${att.contentLength} bytes)` : ''; + attLines.push(`- ${att.fileName}${size} -> ${att.savedPath}`); + } else { + attLines.push(`- ${att.fileName} - skipped: ${att.skipped ?? 'not saved'}`); + } + } + parts.push(attLines.join('\n')); + } + + return parts.join('\n\n'); +} + +/** + * Assemble the final output, truncating ONLY the body to the token budget. + * Headers and the attachment list (with saved input/ paths) always survive — + * attachments are already written to disk and the caller needs their paths. + */ +export function assembleMsgOutput(view: MsgView, budgetTokens: number, sourceLabel: string): string { + const shell = formatMsgOutput({ ...view, body: { text: '', format: 'plain' } }); + const reserveTokens = Math.ceil(shell.length / 4) + 64; + const bodyBudget = Math.max(500, budgetTokens - reserveTokens); + const bodyText = view.body.format === 'none' ? '' : view.body.text; + const truncatedBody = truncateToBudget(bodyText, bodyBudget, { sourceLabel }).text; + return formatMsgOutput({ ...view, body: { text: truncatedBody, format: view.body.format } }); +} + +/** + * msgreader's getFileData() returns `{ error: 'Unsupported file type!' }` (not a + * throw) for CFBF files that aren't Outlook messages (legacy .doc/.xls, broken + * compound files). Treat anything whose root isn't a 'msg' as a read failure. + */ +export function isParsedMsgValid(fields: { error?: string; dataType?: string | null }): boolean { + return !fields.error && fields.dataType === 'msg'; +} + +export const READ_MSG_DEF: ToolDef = { + type: 'function', + function: { + name: 'ReadMsg', + description: + 'Read an Outlook .msg email file, extracting subject/sender/recipients/body and saving attachments to input/. 詳細は ReadToolDoc({ name: "ReadMsg" }) で取得可能。', + parameters: { + type: 'object', + properties: { + file_path: { type: 'string', description: 'Path to the .msg file' }, + }, + required: ['file_path'], + }, + }, +}; + +interface FieldsLike { + error?: string; + dataType?: string | null; + subject?: string; + senderName?: string; + senderEmail?: string; + senderSmtpAddress?: string; + body?: string; + bodyHtml?: string; + html?: Uint8Array; + messageDeliveryTime?: string; + clientSubmitTime?: string; + recipients?: { name?: string; email?: string; smtpAddress?: string; recipType?: string }[]; + attachments?: { + fileName?: string; + fileNameShort?: string; + contentLength?: number; + innerMsgContent?: boolean; + dataType?: string | null; + }[]; +} + +export async function executeReadMsg( + input: Record, + ctx: ToolContext, +): Promise { + const filePath = String(input.file_path ?? ''); + if (!filePath) { + return { output: 'ReadMsg: file_path is required', isError: true }; + } + + let resolved: string; + try { + resolved = resolveAndGuard(ctx.workspacePath, filePath); + } catch (e) { + return { output: `ReadMsg: ${(e as Error).message}`, isError: true }; + } + + // Enforce a size cap before loading the whole file into memory (matches the + // other office tools, which each guard against oversized inputs). + const maxMb = + typeof ctx.toolsConfig?.officeMsgMaxSizeMb === 'number' && + Number.isFinite(ctx.toolsConfig.officeMsgMaxSizeMb) && + ctx.toolsConfig.officeMsgMaxSizeMb > 0 + ? ctx.toolsConfig.officeMsgMaxSizeMb + : DEFAULT_MSG_MAX_SIZE_MB; + try { + const sizeMb = fs.statSync(resolved).size / 1024 / 1024; + if (sizeMb > maxMb) { + return { + output: `ReadMsg: file size ${sizeMb.toFixed(1)}MB exceeds limit of ${maxMb}MB`, + isError: true, + }; + } + } catch (e) { + return { output: `ReadMsg: cannot stat file: ${(e as Error).message}`, isError: true }; + } + + let buffer: Buffer; + try { + buffer = fs.readFileSync(resolved); + } catch (e) { + return { output: `ReadMsg: cannot read file: ${(e as Error).message}`, isError: true }; + } + + // .msg is an OLE2 / CFBF compound file. Validate the magic header up front: + // MsgReader silently returns an empty result for non-CFBF data instead of throwing. + const CFBF_MAGIC = Buffer.from([0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1]); + if (buffer.length < 8 || !buffer.subarray(0, 8).equals(CFBF_MAGIC)) { + return { + output: `ReadMsg: not a valid Outlook .msg file (bad signature): ${path.basename(resolved)}`, + isError: true, + }; + } + + let reader: MsgReaderInstance; + let fields: FieldsLike; + try { + // Copy into a standalone ArrayBuffer (MsgReader rejects Node Buffers). + const arrayBuffer = new Uint8Array(buffer).buffer; + reader = new MsgReader(arrayBuffer); + fields = reader.getFileData() as unknown as FieldsLike; + } catch (e) { + return { + output: `ReadMsg: failed to parse .msg (is this a valid Outlook message?): ${(e as Error).message}`, + isError: true, + }; + } + + if (!isParsedMsgValid(fields)) { + return { + output: `ReadMsg: not a parseable Outlook message${fields.error ? ` (${fields.error})` : ''}: ${path.basename(resolved)}`, + isError: true, + }; + } + + const recipients = fields.recipients ?? []; + const to = recipients + .filter((r) => (r.recipType ?? 'to') === 'to') + .map((r) => ({ name: r.name, email: pickEmail(r.smtpAddress, r.email) })); + const cc = recipients + .filter((r) => r.recipType === 'cc') + .map((r) => ({ name: r.name, email: pickEmail(r.smtpAddress, r.email) })); + + const inputDir = path.join(ctx.workspacePath, 'input'); + const attachments: MsgAttachmentMeta[] = []; + const rawAttachments = fields.attachments ?? []; + const usedNames = new Set(); + + rawAttachments.forEach((att, i) => { + const rawName = att.fileName || att.fileNameShort || ''; + const baseName = sanitizeAttachmentName(rawName, i); + + // Read-only movements (verify etc.) must not mutate the workspace. + if (!ctx.editAllowed) { + attachments.push({ + fileName: baseName, + contentLength: att.contentLength, + skipped: 'read-only phase (attachment not saved)', + }); + return; + } + + if (att.innerMsgContent) { + attachments.push({ + fileName: baseName, + contentLength: att.contentLength, + skipped: 'embedded message (open separately)', + }); + return; + } + + // Resolve a name that collides with neither an earlier attachment this run + // nor a file already present in input/ (user uploads, prior extractions). + const name = resolveFreeName(inputDir, baseName, usedNames); + usedNames.add(name); + + try { + const data = reader.getAttachment(att as never); + fs.mkdirSync(inputDir, { recursive: true }); + const dest = path.join(inputDir, name); + fs.writeFileSync(dest, Buffer.from(data.content)); + attachments.push({ + fileName: name, + contentLength: data.content.length, + savedPath: path.join('input', name), + }); + } catch (e) { + logger.warn(`[ReadMsg] failed to save attachment ${name}: ${(e as Error).message}`); + attachments.push({ + fileName: name, + contentLength: att.contentLength, + skipped: `extraction failed: ${(e as Error).message}`, + }); + } + }); + + const view: MsgView = { + subject: fields.subject, + from: (() => { + const email = pickEmail(fields.senderSmtpAddress, fields.senderEmail); + return fields.senderName || email ? { name: fields.senderName, email } : undefined; + })(), + to, + cc, + date: fields.messageDeliveryTime || fields.clientSubmitTime, + body: selectMsgBody(fields), + attachments, + }; + + const output = assembleMsgOutput(view, getToolOutputBudgetTokens(ctx), path.basename(resolved)); + logger.info( + `[ReadMsg] ${path.basename(resolved)}: attachments=${attachments.length} bodyFormat=${view.body.format}`, + ); + return { output, isError: false }; +} diff --git a/src/engine/tools/office.ts b/src/engine/tools/office.ts index 4ff64a5..565af86 100644 --- a/src/engine/tools/office.ts +++ b/src/engine/tools/office.ts @@ -10,6 +10,7 @@ import { ToolDef } from '../../llm/openai-compat.js'; import type { ToolContext, ToolResult } from './core.js'; import { resolveAndGuard, resolveOutputPathWithin, truncateToBudget, getToolOutputBudgetTokens } from './core.js'; import { resolveThemePalette, extractSheetStyles } from './excel-styles.js'; +import { READ_MSG_DEF, executeReadMsg } from './msg.js'; import { logger } from '../../logger.js'; import { callVisionModel, resolveImagePath } from './image.js'; import type { @@ -351,6 +352,7 @@ export const TOOL_DEFS: Record = { ReadDocx: READ_DOCX_DEF, ReadPdf: READ_PDF_DEF, ReadPPTX: READ_PPTX_DEF, + ReadMsg: READ_MSG_DEF, SplitExcelSheets: SPLIT_EXCEL_SHEETS_DEF, SplitDocxSections: SPLIT_DOCX_SECTIONS_DEF, PdfToImages: PDF_TO_IMAGES_DEF, @@ -2284,6 +2286,8 @@ export async function executeTool( return executeReadPdf(input, ctx); case 'ReadPPTX': return executeReadPptx(input, ctx); + case 'ReadMsg': + return executeReadMsg(input, ctx); case 'SplitExcelSheets': return executeSplitExcelSheets(input, ctx); case 'SplitDocxSections': diff --git a/src/llm/openai-compat.test.ts b/src/llm/openai-compat.test.ts index 0923ac4..799e37d 100644 --- a/src/llm/openai-compat.test.ts +++ b/src/llm/openai-compat.test.ts @@ -758,3 +758,43 @@ describe('OpenAICompatClient gateway sentinel error events', () => { expect(events.at(-1)?.type).toBe('done'); }); }); + +describe('OpenAICompatClient tool-call flush at stream end', () => { + afterEach(() => { + vi.restoreAllMocks(); + vi.unstubAllGlobals(); + }); + + // Some OpenAI-compat backends finish a forced/named tool call with + // finish_reason 'stop' (or omit it) instead of 'tool_calls'. The client must + // still surface the accumulated tool_use at the done boundary, not drop it. + it('emits tool_use when the stream ends on finish_reason "stop" (via [DONE])', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(makeSseResponse([ + { choices: [{ delta: { tool_calls: [{ index: 0, id: 'c1', function: { name: 'do_it', arguments: '{"a":1}' } }] }, finish_reason: null }] }, + { choices: [{ delta: {}, finish_reason: 'stop' }] }, + '[DONE]', + ]))); + const events = await collectEvents(new OpenAICompatClient('http://h:1/v1', 'm'), [{ role: 'user', content: 'q' }]); + const toolUse = events.find((e) => e.type === 'tool_use'); + expect(toolUse).toMatchObject({ type: 'tool_use', name: 'do_it', input: { a: 1 } }); + // done still follows the tool_use + expect(events[events.length - 1].type).toBe('done'); + }); + + it('emits tool_use when the stream ends at EOF without [DONE]', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(makeSseResponse([ + { choices: [{ delta: { tool_calls: [{ index: 0, id: 'c2', function: { name: 'fn', arguments: '{}' } }] }, finish_reason: null }] }, + ]))); + const events = await collectEvents(new OpenAICompatClient('http://h:1/v1', 'm'), [{ role: 'user', content: 'q' }]); + expect(events.filter((e) => e.type === 'tool_use')).toHaveLength(1); + }); + + it('does not double-emit when finish_reason "tool_calls" already flushed', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(makeSseResponse([ + { choices: [{ delta: { tool_calls: [{ index: 0, id: 'c3', function: { name: 'fn', arguments: '{}' } }] }, finish_reason: 'tool_calls' }] }, + '[DONE]', + ]))); + const events = await collectEvents(new OpenAICompatClient('http://h:1/v1', 'm'), [{ role: 'user', content: 'q' }]); + expect(events.filter((e) => e.type === 'tool_use')).toHaveLength(1); + }); +}); diff --git a/src/llm/openai-compat.ts b/src/llm/openai-compat.ts index 6547a3d..3c3222d 100644 --- a/src/llm/openai-compat.ts +++ b/src/llm/openai-compat.ts @@ -1,5 +1,6 @@ import { getDefaultProviderRetryConfig, type ProviderRetryConfig } from '../config.js'; import { logger } from '../logger.js'; +import { recordLlmUsage } from './usage-recorder.js'; import { IMAGE_CONTENT_TOKENS, estimateMessageTokens, @@ -238,6 +239,33 @@ interface ToolCallAccumulator { }; } +/** + * Emit accumulated tool calls as `tool_use` events (sorted by index) and + * clear the accumulator. Called both on `finish_reason === 'tool_calls'` and + * at stream end — some OpenAI-compat backends finish a forced/named tool call + * with finish_reason 'stop', so draining at the done boundary keeps the call + * from being silently dropped. Returns an empty array when nothing is pending, + * so the done-site flush is a no-op for the normal 'tool_calls' path (the map + * is already cleared). + */ +function drainToolCalls(accumulators: Map): LLMEvent[] { + if (accumulators.size === 0) return []; + const events: LLMEvent[] = []; + const sortedIndices = Array.from(accumulators.keys()).sort((a, b) => a - b); + for (const idx of sortedIndices) { + const acc = accumulators.get(idx)!; + let input: Record = {}; + try { + input = JSON.parse(acc.function.arguments) as Record; + } catch { + logger.warn(`OpenAICompatClient: failed to parse tool arguments: ${acc.function.arguments}`); + } + events.push({ type: 'tool_use', id: acc.id, name: acc.function.name, input }); + } + accumulators.clear(); + return events; +} + export interface OpenAICompatClientOptions { /** * When true, this client treats its endpoint as an LLM gateway / proxy @@ -252,6 +280,26 @@ export interface OpenAICompatClientOptions { proxy?: boolean; } +/** + * Per-call attribution context. Threaded from each call site so the + * usage recorder can attribute the completion to a MAESTRO user. Absent + * userId falls back to the 'system' sentinel (never NULL). + */ +export interface LlmCallContext { + userId?: string; +} + +/** + * Per-call request-shaping overrides. Used by callers that need to force a + * tool (reflection's forced submit_reflection) or pin sampling temperature. + * Kept off the hot agent path (which leaves these unset). + */ +export interface LlmRequestOptions { + temperature?: number; + /** OpenAI tool_choice (e.g. `{ type: 'function', function: { name } }`). */ + toolChoice?: unknown; +} + export class OpenAICompatClient { private retryConfig: ProviderRetryConfig; readonly timeoutMs: number; @@ -292,7 +340,43 @@ export class OpenAICompatClient { this.preferredBackendId = backendId; } - async *chat(messages: Message[], tools?: ToolDef[], externalSignal?: AbortSignal): AsyncGenerator { + /** + * Record one successful completion to the per-user daily usage ledger. + * Called from the single done funnel (both the `[DONE]` and EOF exits) + * so the two terminal paths can never double-count. `source` is the + * client's proxy flag, `model` is the first observed chunk.model (routing + * key fallback), `route` is the gateway backendId (proxy) or endpoint host + * (direct). Never records on abort / timeout / error (those don't `done`). + */ + private finalizeDone( + usage: { prompt_tokens: number; completion_tokens: number } | undefined, + observedModel: string, + observedBackendId: string, + context?: LlmCallContext, + ): void { + const source: 'gateway' | 'direct' = this.proxy ? 'gateway' : 'direct'; + const model = observedModel || this.model || 'unknown'; + let route = 'unknown'; + if (this.proxy) { + route = observedBackendId || 'unknown'; + } else { + try { + route = new URL(this.baseUrl).host || 'unknown'; + } catch { + route = 'unknown'; + } + } + recordLlmUsage({ + userId: context?.userId || 'system', + source, + model, + route, + tokensIn: usage?.prompt_tokens ?? 0, + tokensOut: usage?.completion_tokens ?? 0, + }); + } + + async *chat(messages: Message[], tools?: ToolDef[], externalSignal?: AbortSignal, context?: LlmCallContext, requestOptions?: LlmRequestOptions): AsyncGenerator { const controller = new AbortController(); // アイドルタイムアウト: チャンク受信のたびにリセットされる let timeoutId = setTimeout(() => controller.abort(), this.timeoutMs); @@ -338,6 +422,12 @@ export class OpenAICompatClient { if (tools && tools.length > 0) { body['tools'] = tools; } + if (requestOptions?.temperature != null) { + body['temperature'] = requestOptions.temperature; + } + if (requestOptions?.toolChoice != null) { + body['tool_choice'] = requestOptions.toolChoice; + } // Block oversized prompts before the HTTP request so callers see a // structured error instead of an opaque HTTP 400. The runtime context // limit is fetched per-model (see fetchOllamaContextLimit) and passed @@ -358,6 +448,10 @@ export class OpenAICompatClient { for (let attempt = 1; attempt <= maxAttempts; attempt++) { let response: Response | null = null; + // Usage attribution captured during this attempt's stream. Reset + // per attempt; only the attempt that reaches `done` records. + let observedModel = ''; + let observedBackendId = ''; try { response = await fetch(`${this.baseUrl}/chat/completions`, { @@ -440,6 +534,7 @@ export class OpenAICompatClient { const rawBackendId = response.headers.get('x-litellm-model-id'); const backendId = rawBackendId ? rawBackendId.trim() : ''; if (backendId.length > 0) { + observedBackendId = backendId; const rawCacheKey = response.headers.get('x-litellm-cache-key'); const cacheKey = rawCacheKey ? rawCacheKey.trim() : ''; yield { type: 'backend', backendId, cacheKey: cacheKey.length > 0 ? cacheKey : null }; @@ -475,7 +570,13 @@ export class OpenAICompatClient { const data = trimmed.slice('data: '.length); if (data === '[DONE]') { + // Flush any tool calls the backend left un-finished (some + // OpenAI-compat servers end a forced/named tool call with + // finish_reason 'stop' instead of 'tool_calls'). Without this + // the accumulated call would be silently dropped. + yield* drainToolCalls(toolCallAccumulators); // usage 付きで done を emit + this.finalizeDone(usage, observedModel, observedBackendId, context); yield { type: 'done', usage }; return; } @@ -488,6 +589,15 @@ export class OpenAICompatClient { continue; } + // Real model name for usage attribution. The gateway passes + // chunks through byte-for-byte, so chunk.model is the actual + // backend model for both direct and gateway paths. First + // non-empty value wins. + if (!observedModel) { + const m = chunk['model']; + if (typeof m === 'string' && m.length > 0) observedModel = m; + } + // AAO Gateway / LiteLLM sentinel error event: // data: {"error":{"type":"gateway_shutdown","message":"..."}} // gateway_shutdown / gateway_timeout は他 worker に retry すれば @@ -596,23 +706,7 @@ export class OpenAICompatClient { // tool_calls が完了したら emit if (finishReason === 'tool_calls') { - const sortedIndices = Array.from(toolCallAccumulators.keys()).sort((a, b) => a - b); - for (const idx of sortedIndices) { - const acc = toolCallAccumulators.get(idx)!; - let input: Record = {}; - try { - input = JSON.parse(acc.function.arguments) as Record; - } catch { - logger.warn(`OpenAICompatClient: failed to parse tool arguments: ${acc.function.arguments}`); - } - yield { - type: 'tool_use', - id: acc.id, - name: acc.function.name, - input, - }; - } - toolCallAccumulators.clear(); + yield* drainToolCalls(toolCallAccumulators); } } } @@ -644,6 +738,8 @@ export class OpenAICompatClient { } // [DONE] なしにストリームが終了した場合 + yield* drainToolCalls(toolCallAccumulators); + this.finalizeDone(usage, observedModel, observedBackendId, context); yield { type: 'done', usage }; return; } diff --git a/src/llm/usage-recorder.ts b/src/llm/usage-recorder.ts new file mode 100644 index 0000000..1c9e095 --- /dev/null +++ b/src/llm/usage-recorder.ts @@ -0,0 +1,52 @@ +import { logger } from '../logger.js'; + +/** + * Per-user LLM usage event, emitted by OpenAICompatClient once per + * successful chat completion (gateway-routed and direct alike). The + * recorder is a process-global sink so every call site — agent-loop, + * title generation, piece classification, reflection — is covered + * without threading a Repository handle through every client + * construction. This is the single chokepoint the design relies on to + * avoid the propagation leaks this codebase has repeatedly hit. + * + * Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md + */ +export interface LlmUsageEvent { + /** Owner id, or 'local' (no-auth) / 'system' (ownerless) sentinel. NOT NULL. */ + userId: string; + source: 'gateway' | 'direct'; + /** Real model name (chunk.model) with routing-key / 'unknown' fallback. */ + model: string; + /** Backend server name (gateway backendId / direct host) or 'unknown'. */ + route: string; + tokensIn: number; + tokensOut: number; +} + +export type LlmUsageRecorder = (event: LlmUsageEvent) => void; + +let recorder: LlmUsageRecorder | null = null; + +/** + * Install (or clear with null) the process-global usage recorder. Called + * once during bootstrap with a thin wrapper over Repository.incrementLlmUsage. + */ +export function setLlmUsageRecorder(fn: LlmUsageRecorder | null): void { + recorder = fn; +} + +/** + * Record one successful completion. No-op when no recorder is installed + * (e.g. unit tests, gateway server process). Writes are best-effort: a DB + * hiccup must never kill the agent's stream, so failures are swallowed + * with a warn. + */ +export function recordLlmUsage(event: LlmUsageEvent): void { + if (!recorder) return; + try { + recorder(event); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + logger.warn(`[usage-recorder] record failed (non-fatal): ${msg}`); + } +} diff --git a/src/llm/usage-recording.test.ts b/src/llm/usage-recording.test.ts new file mode 100644 index 0000000..7b8b27c --- /dev/null +++ b/src/llm/usage-recording.test.ts @@ -0,0 +1,125 @@ +/** + * Usage recording at the OpenAICompatClient completion boundary. + * + * Verifies the single finalizeDone funnel: + * - a successful stream records exactly one event with model (chunk.model), + * tokens from usage, source=direct, route=endpoint host + * - a usage-less but successful stream still records (tokens 0) + * - an error stream records nothing (abort/timeout/error never `done`) + * - proxy mode records source=gateway, route=backendId (x-litellm-model-id) + * - no recorder installed = no-op (never throws) + * + * Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md + */ +import { describe, it, expect, vi, afterEach } from 'vitest'; +import { OpenAICompatClient } from './openai-compat.js'; +import { getDefaultProviderRetryConfig } from '../config.js'; +import { setLlmUsageRecorder, type LlmUsageEvent } from './usage-recorder.js'; + +const NO_RETRY = { ...getDefaultProviderRetryConfig(), maxAttempts: 1 }; + +function sseResponse(chunks: unknown[], headers: Record = {}): Response { + const lines = chunks.map((c) => `data: ${JSON.stringify(c)}\n\n`); + lines.push('data: [DONE]\n\n'); + const encoder = new TextEncoder(); + let i = 0; + return { + ok: true, + status: 200, + headers: { get: (k: string) => headers[k.toLowerCase()] ?? null }, + body: { + getReader: () => ({ + read: async () => + i < lines.length + ? { done: false, value: encoder.encode(lines[i++]) } + : { done: true, value: undefined }, + releaseLock: () => {}, + }), + }, + } as unknown as Response; +} + +const textChunk = (model: string) => ({ model, choices: [{ delta: { content: 'hi' }, finish_reason: null }] }); +const usageChunk = (pin: number, pout: number) => ({ choices: [], usage: { prompt_tokens: pin, completion_tokens: pout } }); + +async function drain(client: OpenAICompatClient, ctx?: { userId?: string }): Promise { + for await (const _ of client.chat([{ role: 'user', content: 'q' }], undefined, undefined, ctx)) { + // discard + } +} + +afterEach(() => { + setLlmUsageRecorder(null); + vi.unstubAllGlobals(); +}); + +describe('LLM usage recording', () => { + it('records one direct event with model, tokens, and endpoint-host route', async () => { + const events: LlmUsageEvent[] = []; + setLlmUsageRecorder((e) => events.push(e)); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(sseResponse([textChunk('llama-3.1-8b'), usageChunk(120, 30)]))); + + const client = new OpenAICompatClient('http://gpu-1.local:11434/v1', 'role-auto'); + await drain(client, { userId: 'u1' }); + + expect(events).toHaveLength(1); + expect(events[0]).toEqual({ + userId: 'u1', source: 'direct', model: 'llama-3.1-8b', route: 'gpu-1.local:11434', + tokensIn: 120, tokensOut: 30, + }); + }); + + it('records a usage-less success with zero tokens', async () => { + const events: LlmUsageEvent[] = []; + setLlmUsageRecorder((e) => events.push(e)); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(sseResponse([textChunk('m')]))); + + await drain(new OpenAICompatClient('http://h:1/v1', 'm'), { userId: 'u1' }); + expect(events).toHaveLength(1); + expect(events[0]).toMatchObject({ tokensIn: 0, tokensOut: 0 }); + }); + + it('falls back userId to system and model to routing key', async () => { + const events: LlmUsageEvent[] = []; + setLlmUsageRecorder((e) => events.push(e)); + // No `model` field in the chunk → fall back to the client's routing key. + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(sseResponse([{ choices: [{ delta: { content: 'x' }, finish_reason: null }] }, usageChunk(1, 1)]))); + + await drain(new OpenAICompatClient('http://h:1/v1', 'routing-key')); // no context + expect(events[0]).toMatchObject({ userId: 'system', model: 'routing-key' }); + }); + + it('records nothing on an error stream', async () => { + const events: LlmUsageEvent[] = []; + setLlmUsageRecorder((e) => events.push(e)); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ + ok: false, status: 500, headers: { get: () => null }, text: () => Promise.resolve('boom'), + } as unknown as Response)); + + await drain(new OpenAICompatClient('http://h:1/v1', 'm', undefined, NO_RETRY), { userId: 'u1' }); + expect(events).toHaveLength(0); + }); + + it('records source=gateway with backendId route in proxy mode', async () => { + const events: LlmUsageEvent[] = []; + setLlmUsageRecorder((e) => events.push(e)); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue( + sseResponse([textChunk('qwen-72b'), usageChunk(5, 2)], { 'x-litellm-model-id': 'pool-a-node-3' }), + )); + + const client = new OpenAICompatClient( + 'http://gateway:4000/v1', 'role-quality', undefined, undefined, undefined, undefined, undefined, undefined, + { proxy: true }, + ); + await drain(client, { userId: 'u9' }); + + expect(events).toHaveLength(1); + expect(events[0]).toMatchObject({ source: 'gateway', model: 'qwen-72b', route: 'pool-a-node-3' }); + }); + + it('is a no-op when no recorder is installed', async () => { + setLlmUsageRecorder(null); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(sseResponse([textChunk('m'), usageChunk(1, 1)]))); + await expect(drain(new OpenAICompatClient('http://h:1/v1', 'm'), { userId: 'u1' })).resolves.toBeUndefined(); + }); +}); diff --git a/src/metrics/tool-name-allowlist.test.ts b/src/metrics/tool-name-allowlist.test.ts index e0a3f43..c358c58 100644 --- a/src/metrics/tool-name-allowlist.test.ts +++ b/src/metrics/tool-name-allowlist.test.ts @@ -7,6 +7,7 @@ import { normalizeToolNameForMetric, BUILTIN_TOOL_NAMES } from './tool-name-allo import { createWorkerMetrics } from './worker-metrics.js'; import { TOOL_DEFS as SLIDE_DEFS } from '../engine/tools/slide.js'; import { TOOL_DEFS as MSLEARN_DEFS } from '../engine/tools/ms-learn.js'; +import { TOOL_DEFS as OFFICE_DEFS } from '../engine/tools/office.js'; describe('normalizeToolNameForMetric', () => { it('passes built-in tool names through verbatim', () => { @@ -69,15 +70,19 @@ describe('normalizeToolNameForMetric', () => { // so every real call collapsed to 'unknown' in metrics. Pin the allowlist to // the actual TOOL_DEFS so it can't drift again. describe('metrics allowlist ↔ real tool definitions (audit regression)', () => { - const realNames = [...Object.keys(SLIDE_DEFS), ...Object.keys(MSLEARN_DEFS)]; + const realNames = [ + ...Object.keys(SLIDE_DEFS), + ...Object.keys(MSLEARN_DEFS), + ...Object.keys(OFFICE_DEFS), + ]; - it('every real slide/ms-learn tool normalizes to itself (not "unknown")', () => { + it('every real slide/ms-learn/office tool normalizes to itself (not "unknown")', () => { for (const name of realNames) { expect(normalizeToolNameForMetric(name)).toBe(name); } }); - it('every real slide/ms-learn tool is present in BUILTIN_TOOL_NAMES', () => { + it('every real slide/ms-learn/office tool is present in BUILTIN_TOOL_NAMES', () => { for (const name of realNames) { expect(BUILTIN_TOOL_NAMES.has(name)).toBe(true); } diff --git a/src/metrics/tool-name-allowlist.ts b/src/metrics/tool-name-allowlist.ts index ccf83a6..1469d1a 100644 --- a/src/metrics/tool-name-allowlist.ts +++ b/src/metrics/tool-name-allowlist.ts @@ -43,7 +43,7 @@ const BUILTIN_TOOL_NAMES_LIST: ReadonlyArray = [ // image.ts 'AnnotateImage', 'ReadImage', // office.ts - 'PdfToImages', 'ReadDocx', 'ReadExcel', 'ReadPdf', 'ReadPPTX', + 'PdfToImages', 'ReadDocx', 'ReadExcel', 'ReadMsg', 'ReadPdf', 'ReadPPTX', 'SplitDocxSections', 'SplitExcelSheets', // data.ts 'SQLite', diff --git a/src/net/http-redirect.test.ts b/src/net/http-redirect.test.ts new file mode 100644 index 0000000..be5556a --- /dev/null +++ b/src/net/http-redirect.test.ts @@ -0,0 +1,70 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import type { Server } from 'http'; +import * as http from 'http'; +import { buildRedirectLocation, createHttpRedirectServer } from './http-redirect.js'; + +describe('buildRedirectLocation', () => { + it('pins the host and preserves path + query', () => { + expect(buildRedirectLocation('app.lan', 9876, '/tasks?id=5')).toBe('https://app.lan:9876/tasks?id=5'); + }); + + it('buildRedirectLocation never echoes a caller-supplied host string', () => { + // The function takes pinnedHost as a parameter — it cannot echo an attacker host + // because the host is always taken from the first argument, not from reqUrl. + // Pass a reqUrl that looks like it contains a host to confirm it is ignored. + const loc = buildRedirectLocation('app.lan', 9876, '/'); + expect(loc).toBe('https://app.lan:9876/'); + expect(loc).not.toContain('evil.attacker.com'); + }); + + it('strips CR/LF/space from the path to prevent header injection', () => { + const loc = buildRedirectLocation('app.lan', 9876, '/a\r\nSet-Cookie: x=1 b'); + expect(loc).not.toMatch(/[\r\n ]/); + }); + + it('omits the port for 443', () => { + expect(buildRedirectLocation('app.lan', 443, '/')).toBe('https://app.lan/'); + }); + + it('prefixes a leading slash when the url lacks one', () => { + expect(buildRedirectLocation('app.lan', 9876, 'x')).toBe('https://app.lan:9876/x'); + }); +}); + +describe('createHttpRedirectServer', () => { + let server: Server; + afterEach(() => new Promise((r) => (server ? server.close(() => r()) : r()))); + + it('responds 301 with the pinned https Location', async () => { + server = createHttpRedirectServer({ httpsPort: 9876, pinnedHost: 'app.lan' }); + await new Promise((r) => server.listen(0, '127.0.0.1', r)); + const addr = server.address(); + const port = typeof addr === 'object' && addr ? addr.port : 0; + const res = await fetch(`http://127.0.0.1:${port}/x?y=1`, { redirect: 'manual' }); + expect(res.status).toBe(301); + expect(res.headers.get('location')).toBe('https://app.lan:9876/x?y=1'); + }); + + it('ignores an attacker-controlled Host header (open-redirect guard)', async () => { + const srv = createHttpRedirectServer({ httpsPort: 9876, pinnedHost: 'app.lan' }); + await new Promise((r) => srv.listen(0, '127.0.0.1', r)); + try { + const addr = srv.address(); + const port = typeof addr === 'object' && addr ? addr.port : 0; + // Use node's http.request so we can set the Host header explicitly. + // (Some fetch implementations silently drop Host overrides.) + const loc = await new Promise((resolve, reject) => { + const req = http.request( + { hostname: '127.0.0.1', port, path: '/x', method: 'GET', headers: { Host: 'evil.attacker.com' } }, + (res) => resolve(res.headers['location'] ?? ''), + ); + req.on('error', reject); + req.end(); + }); + expect(loc).toBe('https://app.lan:9876/x'); + expect(loc).not.toContain('evil.attacker.com'); + } finally { + await new Promise((r) => srv.close(() => r())); + } + }); +}); diff --git a/src/net/http-redirect.ts b/src/net/http-redirect.ts new file mode 100644 index 0000000..be45563 --- /dev/null +++ b/src/net/http-redirect.ts @@ -0,0 +1,21 @@ +import { createServer, type Server } from 'http'; + +/** Build the redirect target from a PINNED host — never the request Host header. */ +export function buildRedirectLocation(pinnedHost: string, httpsPort: number, reqUrl: string): string { + const safePath = reqUrl.replace(/[\r\n ]/g, ''); + const hostPort = httpsPort === 443 ? pinnedHost : `${pinnedHost}:${httpsPort}`; + return `https://${hostPort}${safePath.startsWith('/') ? safePath : `/${safePath}`}`; +} + +export interface RedirectServerOpts { + httpsPort: number; + pinnedHost: string; +} + +export function createHttpRedirectServer(opts: RedirectServerOpts): Server { + return createServer((req, res) => { + const location = buildRedirectLocation(opts.pinnedHost, opts.httpsPort, req.url ?? '/'); + res.writeHead(301, { Location: location }); + res.end(); + }); +} diff --git a/src/net/self-signed.test.ts b/src/net/self-signed.test.ts new file mode 100644 index 0000000..a440a77 --- /dev/null +++ b/src/net/self-signed.test.ts @@ -0,0 +1,72 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, rmSync, existsSync, writeFileSync, statSync, readFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { X509Certificate } from 'crypto'; +import { ensureSelfSignedCert } from './self-signed.js'; + +let dir: string; +beforeEach(() => { dir = mkdtempSync(join(tmpdir(), 'selfsigned-')); }); +afterEach(() => { rmSync(dir, { recursive: true, force: true }); }); + +describe('ensureSelfSignedCert', () => { + it('generates a cert+key pair with localhost/127.0.0.1 in the SAN', () => { + const { cert, key } = ensureSelfSignedCert(dir, []); + expect(cert).toContain('BEGIN CERTIFICATE'); + expect(key).toContain('PRIVATE KEY'); + const san = new X509Certificate(cert).subjectAltName ?? ''; + expect(san).toContain('localhost'); + expect(san).toContain('127.0.0.1'); + }); + + it('includes extra SAN hosts', () => { + const { cert } = ensureSelfSignedCert(dir, ['maestro.lan']); + expect(new X509Certificate(cert).subjectAltName ?? '').toContain('maestro.lan'); + }); + + it('includes a redirect-style hostname in the SAN (guards server.ts augmentation)', () => { + // When an operator sets redirect_host: app.lan the server folds that name + // into selfSignedHosts before calling resolveTlsOptions — this test guards + // that the augmented host is actually present in the emitted certificate. + const { cert } = ensureSelfSignedCert(dir, ['redirect.example']); + expect(new X509Certificate(cert).subjectAltName ?? '').toContain('redirect.example'); + }); + + it('reuses an existing valid pair (stable across calls)', () => { + const a = ensureSelfSignedCert(dir, []); + const b = ensureSelfSignedCert(dir, []); + expect(b.cert).toBe(a.cert); + expect(b.key).toBe(a.key); + }); + + it('regenerates when the existing pair is mismatched/corrupt', () => { + const a = ensureSelfSignedCert(dir, []); + writeFileSync(join(dir, 'cert.pem'), 'not a cert'); + const b = ensureSelfSignedCert(dir, []); + expect(b.cert).toContain('BEGIN CERTIFICATE'); + expect(b.cert).not.toBe(a.cert); + }); + + it('writes the key file with 0600 permissions', () => { + ensureSelfSignedCert(dir, []); + expect(statSync(join(dir, 'key.pem')).mode & 0o777).toBe(0o600); + }); + + it('writes the cert file with 0600 permissions', () => { + ensureSelfSignedCert(dir, []); + expect(statSync(join(dir, 'cert.pem')).mode & 0o777).toBe(0o600); + }); + + it('treats host:port as a DNS name, not an IP address (regression for isIP fix)', () => { + const { cert } = ensureSelfSignedCert(dir, ['myhost:8443']); + const san = new X509Certificate(cert).subjectAltName ?? ''; + // Must appear as a DNS-typed SAN entry, not an IP Address entry + expect(san).toContain('DNS:myhost:8443'); + expect(san).not.toContain('IP Address:myhost:8443'); + }); + + it('always includes localhost even when hostname detection is junk', () => { + const { cert } = ensureSelfSignedCert(dir, [], { hostname: '' }); + expect(new X509Certificate(cert).subjectAltName ?? '').toContain('localhost'); + }); +}); diff --git a/src/net/self-signed.ts b/src/net/self-signed.ts new file mode 100644 index 0000000..d95aca5 --- /dev/null +++ b/src/net/self-signed.ts @@ -0,0 +1,72 @@ +import { readFileSync, writeFileSync, existsSync, renameSync, mkdirSync } from 'fs'; +import { join } from 'path'; +import { hostname as osHostname } from 'os'; +import { isIP } from 'net'; +import { X509Certificate, createPrivateKey } from 'crypto'; +import { logger } from '../logger.js'; +import selfsigned from 'selfsigned'; + +export interface EnsureCertOpts { + /** Override host detection (tests). */ + hostname?: string; +} + +const CERT_FILE = 'cert.pem'; +const KEY_FILE = 'key.pem'; + +function sanHosts(extra: string[], hostname: string): string[] { + const base = ['localhost', '127.0.0.1', '::1']; + const host = hostname.trim(); + if (host && host !== 'localhost') base.push(host); + return [...new Set([...base, ...extra.filter((h) => h.trim().length > 0)])]; +} + +function isIp(value: string): boolean { + return isIP(value) !== 0; +} + +// Validates that the cert and key match each other; does NOT check expiry by design +// (the caller regenerates on restart; the Settings UI surfaces expiry). +function readValidPair(dir: string): { cert: string; key: string } | null { + const certPath = join(dir, CERT_FILE); + const keyPath = join(dir, KEY_FILE); + if (!existsSync(certPath) || !existsSync(keyPath)) return null; + try { + const cert = readFileSync(certPath, 'utf-8'); + const key = readFileSync(keyPath, 'utf-8'); + const x509 = new X509Certificate(cert); + const priv = createPrivateKey(key); + if (!x509.checkPrivateKey(priv)) return null; + return { cert, key }; + } catch (err) { + logger.warn(`[self-signed] existing cert/key unreadable, will regenerate: ${(err as Error).message}`); + return null; + } +} + +export function ensureSelfSignedCert( + dir: string, + extraHosts: string[], + opts: EnsureCertOpts = {}, +): { cert: string; key: string } { + const existing = readValidPair(dir); + if (existing) return existing; + + mkdirSync(dir, { recursive: true }); + const hosts = sanHosts(extraHosts, opts.hostname ?? osHostname()); + const altNames = hosts.map((h) => (isIp(h) ? { type: 7, ip: h } : { type: 2, value: h })); + const pems = selfsigned.generate([{ name: 'commonName', value: hosts[0] }], { + keySize: 2048, + days: 825, + algorithm: 'sha256', + extensions: [{ name: 'subjectAltName', altNames }], + }); + + const certPath = join(dir, CERT_FILE); + const keyPath = join(dir, KEY_FILE); + writeFileSync(`${certPath}.tmp`, pems.cert, { mode: 0o600 }); + writeFileSync(`${keyPath}.tmp`, pems.private, { mode: 0o600 }); + renameSync(`${keyPath}.tmp`, keyPath); + renameSync(`${certPath}.tmp`, certPath); + return { cert: pems.cert, key: pems.private }; +} diff --git a/src/net/tls-options.test.ts b/src/net/tls-options.test.ts new file mode 100644 index 0000000..da8f055 --- /dev/null +++ b/src/net/tls-options.test.ts @@ -0,0 +1,67 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, rmSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { resolveTlsOptions } from './tls-options.js'; +import { SERVER_TLS_DEFAULTS } from '../server/config.js'; +import { ensureSelfSignedCert } from './self-signed.js'; + +let dir: string; +beforeEach(() => { dir = mkdtempSync(join(tmpdir(), 'tlsopt-')); }); +afterEach(() => { rmSync(dir, { recursive: true, force: true }); }); + +describe('resolveTlsOptions', () => { + it('returns a self-signed pair when no cert_file is configured', () => { + const opts = resolveTlsOptions({ ...SERVER_TLS_DEFAULTS, enabled: true, selfSignedDir: dir }); + expect(opts.cert).toContain('BEGIN CERTIFICATE'); + expect(opts.key).toContain('PRIVATE KEY'); + expect(opts.minVersion).toBe('TLSv1.2'); + }); + + it('uses provided PEM files when cert_file and key_file are set', () => { + const { cert, key } = ensureSelfSignedCert(dir, []); + writeFileSync(join(dir, 'my.crt'), cert); + writeFileSync(join(dir, 'my.key'), key); + const opts = resolveTlsOptions({ + ...SERVER_TLS_DEFAULTS, enabled: true, + certFile: join(dir, 'my.crt'), keyFile: join(dir, 'my.key'), + }); + expect(opts.cert).toBe(cert); + expect(opts.key).toBe(key); + }); + + it('fails closed (throws) when cert_file is unreadable — never self-signs', () => { + expect(() => + resolveTlsOptions({ + ...SERVER_TLS_DEFAULTS, enabled: true, + certFile: join(dir, 'missing.crt'), keyFile: join(dir, 'missing.key'), + }), + ).toThrow(); + }); + + it('fails closed when the provided cert is malformed PEM', () => { + writeFileSync(join(dir, 'bad.crt'), 'garbage'); + writeFileSync(join(dir, 'bad.key'), 'garbage'); + expect(() => + resolveTlsOptions({ + ...SERVER_TLS_DEFAULTS, enabled: true, + certFile: join(dir, 'bad.crt'), keyFile: join(dir, 'bad.key'), + }), + ).toThrow(); + }); + + it('fails closed when provided cert and key do not match', () => { + const a = ensureSelfSignedCert(dir, []); + const dir2 = mkdtempSync(join(tmpdir(), 'tlsopt2-')); + const b = ensureSelfSignedCert(dir2, ['other']); + writeFileSync(join(dir, 'a.crt'), a.cert); + writeFileSync(join(dir, 'b.key'), b.key); + expect(() => + resolveTlsOptions({ + ...SERVER_TLS_DEFAULTS, enabled: true, + certFile: join(dir, 'a.crt'), keyFile: join(dir, 'b.key'), + }), + ).toThrow(); + rmSync(dir2, { recursive: true, force: true }); + }); +}); diff --git a/src/net/tls-options.ts b/src/net/tls-options.ts new file mode 100644 index 0000000..c0da88a --- /dev/null +++ b/src/net/tls-options.ts @@ -0,0 +1,30 @@ +import { readFileSync } from 'fs'; +import { X509Certificate, createPrivateKey } from 'crypto'; +import type { ServerTlsConfig } from '../server/config.js'; +import { ensureSelfSignedCert } from './self-signed.js'; + +export interface ResolvedTlsOptions { + cert: string; + key: string; + minVersion: 'TLSv1.2' | 'TLSv1.3'; +} + +/** + * Resolve TLS material. Any failure on an operator-provided cert is FATAL + * (throws) — never silently fall back to self-signed or HTTP, which would + * downgrade an operator who deliberately configured a real certificate. + */ +export function resolveTlsOptions(tls: ServerTlsConfig): ResolvedTlsOptions { + if (tls.certFile && tls.keyFile) { + const cert = readFileSync(tls.certFile, 'utf-8'); + const key = readFileSync(tls.keyFile, 'utf-8'); + const x509 = new X509Certificate(cert); + const priv = createPrivateKey(key); + if (!x509.checkPrivateKey(priv)) { + throw new Error('server.tls: cert_file and key_file do not match'); + } + return { cert, key, minVersion: tls.minVersion }; + } + const { cert, key } = ensureSelfSignedCert(tls.selfSignedDir, tls.selfSignedHosts); + return { cert, key, minVersion: tls.minVersion }; +} diff --git a/src/server/config.test.ts b/src/server/config.test.ts new file mode 100644 index 0000000..0a81950 --- /dev/null +++ b/src/server/config.test.ts @@ -0,0 +1,61 @@ +import { describe, it, expect } from 'vitest'; +import { mergeServerConfig, SERVER_TLS_DEFAULTS } from './config.js'; + +describe('mergeServerConfig', () => { + it('upgrade-safe: an absent server block disables TLS', () => { + const cfg = mergeServerConfig(undefined, { freshInstall: false }); + expect(cfg.tls.enabled).toBe(false); + }); + + it('fresh install with absent block enables TLS', () => { + const cfg = mergeServerConfig(undefined, { freshInstall: true }); + expect(cfg.tls.enabled).toBe(true); + }); + + it('an explicit enabled value always wins over the freshInstall default', () => { + expect(mergeServerConfig({ tls: { enabled: false } }, { freshInstall: true }).tls.enabled).toBe(false); + expect(mergeServerConfig({ tls: { enabled: true } }, { freshInstall: false }).tls.enabled).toBe(true); + }); + + it('fills defaults for unspecified tls fields', () => { + const cfg = mergeServerConfig({ tls: { enabled: true } }, { freshInstall: false }); + expect(cfg.tls.minVersion).toBe(SERVER_TLS_DEFAULTS.minVersion); + expect(cfg.tls.selfSignedDir).toBe(SERVER_TLS_DEFAULTS.selfSignedDir); + expect(cfg.tls.httpRedirect).toBe(true); + expect(cfg.tls.httpRedirectPort).toBe(9080); + expect(cfg.tls.selfSignedHosts).toEqual([]); + }); + + it('throws when http_redirect_port equals the https port', () => { + expect(() => + mergeServerConfig({ tls: { enabled: true, httpRedirectPort: 9876 } }, { freshInstall: false, httpsPort: 9876 }), + ).toThrow(/redirect.*port/i); + }); + + it('throws when only one of cert_file/key_file is set', () => { + expect(() => + mergeServerConfig({ tls: { enabled: true, certFile: '/x/cert.pem' } }, { freshInstall: false }), + ).toThrow(/cert_file.*key_file|both/i); + }); + + it('throws when key_file is set without cert_file', () => { + expect(() => + mergeServerConfig({ tls: { enabled: true, keyFile: '/x/key.pem' } }, { freshInstall: false }), + ).toThrow(/cert_file.*key_file|both/i); + }); + + it('does not throw on port collision when httpRedirect is false', () => { + expect(() => + mergeServerConfig( + { tls: { enabled: true, httpRedirect: false, httpRedirectPort: 9876 } }, + { freshInstall: false, httpsPort: 9876 }, + ), + ).not.toThrow(); + }); + + it('disabled TLS with a lone certFile does not throw', () => { + expect(() => + mergeServerConfig({ tls: { enabled: false, certFile: '/x/c.pem' } }, { freshInstall: false }), + ).not.toThrow(); + }); +}); diff --git a/src/server/config.ts b/src/server/config.ts new file mode 100644 index 0000000..1eb6f7c --- /dev/null +++ b/src/server/config.ts @@ -0,0 +1,63 @@ +export interface ServerTlsConfig { + enabled: boolean; + certFile: string | null; + keyFile: string | null; + minVersion: 'TLSv1.2' | 'TLSv1.3'; + selfSignedDir: string; + selfSignedHosts: string[]; + httpRedirect: boolean; + httpRedirectPort: number; + /** + * Host used to build the HTTP→HTTPS redirect `Location` header. + * When null the bind host is used instead. + * Intentionally NOT taken from the request Host header to prevent open-redirect attacks. + */ + redirectHost: string | null; +} + +export interface ServerConfig { + tls: ServerTlsConfig; +} + +export const SERVER_TLS_DEFAULTS: ServerTlsConfig = { + enabled: false, + certFile: null, + keyFile: null, + minVersion: 'TLSv1.2', + selfSignedDir: './data/tls', + selfSignedHosts: [], + httpRedirect: true, + httpRedirectPort: 9080, + redirectHost: null, +}; + +export interface MergeServerOpts { + freshInstall: boolean; + httpsPort?: number; +} + +export function mergeServerConfig( + partial: Partial | undefined, + opts: MergeServerOpts, +): ServerConfig { + const tlsPartial = (partial?.tls ?? {}) as Partial; + const enabledDefault = opts.freshInstall; + const tls: ServerTlsConfig = { + ...SERVER_TLS_DEFAULTS, + ...tlsPartial, + enabled: tlsPartial.enabled ?? enabledDefault, + }; + tls.selfSignedHosts = [...(tlsPartial.selfSignedHosts ?? SERVER_TLS_DEFAULTS.selfSignedHosts)]; + + if (tls.enabled) { + const hasCert = !!tls.certFile; + const hasKey = !!tls.keyFile; + if (hasCert !== hasKey) { + throw new Error('server.tls: set both cert_file and key_file, or neither'); + } + if (opts.httpsPort != null && tls.httpRedirect && tls.httpRedirectPort === opts.httpsPort) { + throw new Error(`server.tls: http_redirect_port (${tls.httpRedirectPort}) must differ from the HTTPS port`); + } + } + return { tls }; +} diff --git a/src/title-generation.test.ts b/src/title-generation.test.ts index 697b2e1..b6144c5 100644 --- a/src/title-generation.test.ts +++ b/src/title-generation.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from 'vitest'; import { buildTitleFallback, + buildTitleFromGoal, buildTitlePrompt, isUrlOnlyTitleInput, stripUrlsForTitle, @@ -27,3 +28,29 @@ describe('title generation helpers', () => { expect(buildTitlePrompt(input)).not.toContain('example.com'); }); }); + +describe('buildTitleFromGoal', () => { + it('uses the first non-empty line of the goal', () => { + expect(buildTitleFromGoal('議事録を作成する\n\n詳細な背景説明...')).toBe('議事録を作成する'); + }); + + it('strips markdown heading and list markers', () => { + expect(buildTitleFromGoal('## 売上レポートをまとめる')).toBe('売上レポートをまとめる'); + expect(buildTitleFromGoal('- 顧客リストを整理する')).toBe('顧客リストを整理する'); + }); + + it('caps length at 40 chars', () => { + const long = 'あ'.repeat(80); + expect(buildTitleFromGoal(long).length).toBe(40); + }); + + it('masks URLs and returns empty for URL-only goals', () => { + expect(buildTitleFromGoal('https://example.com/x')).toBe(''); + expect(buildTitleFromGoal('要約して https://example.com/x')).toBe('要約して [URL]'); + }); + + it('returns empty string for blank goal so callers skip the update', () => { + expect(buildTitleFromGoal('')).toBe(''); + expect(buildTitleFromGoal(' \n ')).toBe(''); + }); +}); diff --git a/src/title-generation.ts b/src/title-generation.ts index ed709a2..444e6e3 100644 --- a/src/title-generation.ts +++ b/src/title-generation.ts @@ -15,6 +15,23 @@ export function buildTitleFallback(input: string): string { return stripUrlsForTitle(input).slice(0, 40).trim() || '新しい依頼'; } +/** + * Derive a concise task title from the Mission Brief `goal` set by the agent. + * Used at runtime (no LLM call): the agent already restates the user's + * requirement as `goal` via MissionUpdate, so we take its first non-empty + * line, strip markdown list/heading markers and URLs, and cap the length. + * Returns '' when nothing usable remains so callers can skip the update. + */ +export function buildTitleFromGoal(goal: string): string { + const firstLine = goal + .split('\n') + .map(l => l.trim()) + .find(l => l.length > 0) ?? ''; + const cleaned = firstLine.replace(/^#{1,6}\s*/, '').replace(/^[-*+]\s+/, ''); + if (isUrlOnlyTitleInput(cleaned)) return ''; + return stripUrlsForTitle(cleaned).slice(0, 40).trim(); +} + export function buildTitlePrompt(input: string): string | null { if (isUrlOnlyTitleInput(input)) return null; const sanitized = stripUrlsForTitle(input).slice(0, 500); diff --git a/src/worker-bootstrap.ts b/src/worker-bootstrap.ts index 75a2bd7..d0568ca 100644 --- a/src/worker-bootstrap.ts +++ b/src/worker-bootstrap.ts @@ -19,6 +19,7 @@ import { logger } from './logger.js'; import { accessSync, existsSync, mkdirSync, constants } from 'fs'; import { dirname, resolve, join } from 'path'; import { OpenAICompatClient } from './llm/openai-compat.js'; +import { setLlmUsageRecorder } from './llm/usage-recorder.js'; import { llmRoutingKey } from './llm/routing-key.js'; import { ConfigManager } from './config-manager.js'; import { WorkerManager } from './worker-manager.js'; @@ -121,6 +122,23 @@ export async function start(opts: StartWorkerOptions = {}): Promise { const repo = new Repository(dbPath); runMigrations(repo.getDb()); + // Install the process-global LLM usage recorder so every OpenAICompatClient + // completion (agent loop, title, classify, reflection — gateway + direct) + // lands in the per-user daily ledger. Best-effort: the recorder helper + // swallows write errors so a DB hiccup never kills an agent stream. + // Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md + setLlmUsageRecorder((event) => { + repo.incrementLlmUsage({ + userId: event.userId, + source: event.source, + model: event.model, + route: event.route, + tokensIn: event.tokensIn, + tokensOut: event.tokensOut, + requests: 1, + }); + }); + // 起動時に孤立ジョブを回復 await repo.recoverOrphanedJobs(); @@ -135,7 +153,7 @@ export async function start(opts: StartWorkerOptions = {}): Promise { config.provider.workers.find(w => w.enabled !== false && w.roles?.includes('title')) ?? config.provider.workers[0]; let titleClient: OpenAICompatClient | null = null; - let generateTitle: ((body: string) => Promise) | undefined; + let generateTitle: ((body: string, ownerId?: string) => Promise) | undefined; if (titleWorker) { const titleModel = titleWorker.model ?? config.provider.model; @@ -156,18 +174,24 @@ export async function start(opts: StartWorkerOptions = {}): Promise { titleClient = new OpenAICompatClient( titleWorker.endpoint, titleRoutingKey, - undefined, + titleWorker.apiKey, config.provider.retry, (config.provider.timeoutMinutes ?? 10) * 60 * 1000, + undefined, + undefined, + undefined, + // proxy mode so title/classification usage is recorded as + // source='gateway' with the backendId route (not mislabeled 'direct'). + { proxy: titleWorker.proxy === true }, ); - generateTitle = async (body: string): Promise => { + generateTitle = async (body: string, ownerId?: string): Promise => { const fallback = buildTitleFallback(body); const prompt = buildTitlePrompt(body); if (!prompt) return fallback; let title = ''; try { - for await (const event of titleClient!.chat([{ role: 'user', content: prompt }])) { + for await (const event of titleClient!.chat([{ role: 'user', content: prompt }], undefined, undefined, { userId: ownerId })) { if (event.type === 'text') title += event.text; if (event.type === 'error') return fallback; if (event.type === 'done') break; @@ -232,7 +256,7 @@ export async function start(opts: StartWorkerOptions = {}): Promise { const selectPiece = titleClient ? async (body: string, fileNames: string[], userId?: string): Promise => { const pieces = pieceCatalog.getForUser(userId ?? 'local'); - const result = await classifyPiece(titleClient!, body, pieces, fileNames); + const result = await classifyPiece(titleClient!, body, pieces, fileNames, undefined, userId ?? 'local'); return result ?? 'chat'; } : undefined; diff --git a/src/worker.ts b/src/worker.ts index c2e2e6d..f35079c 100644 --- a/src/worker.ts +++ b/src/worker.ts @@ -818,7 +818,7 @@ export class Worker { ]; let answer = ''; - for await (const event of llmClient.chat(messages)) { + for await (const event of llmClient.chat(messages, undefined, undefined, { userId: parentJob?.ownerId ?? 'local' })) { if (event.type === 'text') { answer += event.text; } else if (event.type === 'error') { @@ -1779,6 +1779,8 @@ export class Worker { // Same credential as normal task LLM calls — a key-enforcing // gateway 401s reflection without it. llmApiKey: this.getWorkerDef().apiKey, + llmProxy: this.getWorkerDef().proxy === true, + llmContextLimitTokens: this.contextLimitTokens, }, job ); diff --git a/ui/src/App.tsx b/ui/src/App.tsx index e724783..ac40ba8 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -21,7 +21,7 @@ import { useBackdropClose } from './lib/useBackdropClose'; import { TopBar } from './components/layout/TopBar'; import { NavDrawer } from './components/layout/NavDrawer'; import { useEdgeSwipe } from './hooks/useEdgeSwipe'; -import { visibleNavItemsFor, useCompactNav } from './components/layout/TopBar'; +import { visibleNavItemsFor } from './components/layout/TopBar'; import { ResizeHandle } from './components/layout/ResizeHandle'; import { TaskListPanel } from './components/list/TaskListPanel'; import { ChatPane } from './components/chat/ChatPane'; @@ -40,6 +40,7 @@ import { UsersPage } from './pages/UsersPage'; import { AdminCaptchaPage } from './pages/AdminCaptchaPage'; import { SharedView } from './pages/SharedView'; import { UserFolderTab } from './components/userfolder/UserFolderTab'; +import { UsagePage } from './components/usage/UsagePage'; import { HelpPage } from './pages/HelpPage'; import { TaskListWithSidePanel } from './components/dashboard/TaskListWithSidePanel'; import type { ConsoleStatus } from './lib/ssh-console-types'; @@ -163,7 +164,9 @@ function AppInner({ isAdmin, authEnabled, user }: { isAdmin: boolean; authEnable const tabletDetailBackdrop = useBackdropClose(() => setTabletDetailOpen(false)); const [navDrawerOpen, setNavDrawerOpen] = useState(false); const hamburgerRef = useRef(null); - const compactMode = useCompactNav(isAdmin, authEnabled); + // compactMode is measured by TopBar (actual fit) and reported up here so the + // nav drawer / edge-swipe stay in sync with whether the hamburger is shown. + const [compactMode, setCompactMode] = useState(false); const visibleNav = visibleNavItemsFor(isAdmin, authEnabled); const openNavDrawer = () => { @@ -355,10 +358,10 @@ function AppInner({ isAdmin, authEnabled, user }: { isAdmin: boolean; authEnable sortMode: sort, searchQuery: search, activeTaskId: localTaskId, - // Owner scope (自分/すべて). Only active when auth is on — in no-auth mode + // Owner scope (自分/他のユーザ). Only active when auth is on — in no-auth mode // every task is owned by 'local' and the toggle would be meaningless. scope: urlState.scope, - onScopeChange: (scope: 'mine' | 'all') => setUrlState(prev => ({ ...prev, scope })), + onScopeChange: (scope: 'mine' | 'others') => setUrlState(prev => ({ ...prev, scope })), currentUserId: user?.id ?? null, scopeEnabled: authEnabled && !!user, onStatusChange: (s: string) => setUrlState(prev => ({ ...prev, status: s as typeof status })), @@ -451,6 +454,7 @@ function AppInner({ isAdmin, authEnabled, user }: { isAdmin: boolean; authEnable hamburgerButtonRef={hamburgerRef} navDrawerOpen={navDrawerOpen} onOpenCommandK={() => setCmdkOpen(true)} + onCompactChange={setCompactMode} />
@@ -471,6 +475,7 @@ function AppInner({ isAdmin, authEnabled, user }: { isAdmin: boolean; authEnable {page === 'users' && isAdmin && authEnabled &&
} {page === 'captcha' &&
} {page === 'userfolder' &&
} + {page === 'usage' &&
} {page === 'help' &&
{ setCreateInitialPiece('help'); setShowCreateDialog(true); }} selectedId={urlState.help} onSelect={(id) => setUrlState(prev => ({ ...prev, help: id }))} />
} {page === 'tasks' &&
@@ -563,8 +568,10 @@ function AppInner({ isAdmin, authEnabled, user }: { isAdmin: boolean; authEnable )}
- {/* タブレット: 2カラム (sm 〜 lg) */} -
+ {/* タブレット: 2カラム (sm 〜 xl)。3列デスクトップは横幅が足りないと詳細列が + 狭まりタブが折り返すため、切替を xl(1280px) まで上げて中間帯はこの + 2列+詳細オーバーレイで運用する。 */} +
} @@ -583,9 +590,9 @@ function AppInner({ isAdmin, authEnabled, user }: { isAdmin: boolean; authEnable
- {/* デスクトップ: >= lg (1024px). normal=3 列、focused=rail/chat/handle/ws=4 列 */} + {/* デスクトップ: >= xl (1280px). normal=3 列、focused=rail/chat/handle/ws=4 列 */}
@@ -642,7 +649,7 @@ function AppInner({ isAdmin, authEnabled, user }: { isAdmin: boolean; authEnable {/* Tablet: detail overlay */} {tabletDetailOpen && panelOpen && ( -
+
e.stopPropagation()}> {localTaskId && ( )} {branding.footerText && ( diff --git a/ui/src/api.ts b/ui/src/api.ts index 362b016..a397422 100644 --- a/ui/src/api.ts +++ b/ui/src/api.ts @@ -99,9 +99,13 @@ export interface SubtaskActivity { activityLog: string; } +export type TitleSource = 'auto' | 'agent' | 'user'; + export interface LocalTask { id: number; title: string; + /** Provenance of the title: 'auto' (creation fallback), 'agent' (derived from goal), 'user' (manual edit). */ + titleSource?: TitleSource; body: string; pieceName: string; profile: string; @@ -255,7 +259,7 @@ export async function postLocalTaskComment(taskId: number, body: string, author: export async function updateLocalTask( taskId: number, - updates: { visibility?: Visibility; visibilityScopeOrgId?: string | null }, + updates: { title?: string; visibility?: Visibility; visibilityScopeOrgId?: string | null }, ): Promise { const res = await fetch(`${BASE}/local/tasks/${taskId}`, { method: 'PATCH', @@ -267,6 +271,14 @@ export async function updateLocalTask( return data.task; } +/** Trigger on-demand AI title regeneration. Owner/admin only. Returns the new title. */ +export async function regenerateTaskTitle(taskId: number): Promise { + const res = await fetch(`${BASE}/local/tasks/${taskId}/regenerate-title`, { method: 'POST' }); + const data = await res.json(); + if (!res.ok) throw new Error(data?.error ?? 'Failed to regenerate title'); + return data.title as string; +} + export async function continueTaskWithPiece( taskId: number, body: { piece: string; instruction: string }, @@ -319,6 +331,11 @@ export function getLocalFileRawUrl(taskId: number, section: 'workspace' | 'input return `${BASE}/local/tasks/${taskId}/files/raw?${params.toString()}`; } +export function getTrustedLocalHtmlUrl(taskId: number, section: 'workspace' | 'input' | 'output' | 'logs', path: string): string { + const params = new URLSearchParams({ section, path, trusted: '1' }); + return `${BASE}/local/tasks/${taskId}/files/raw?${params.toString()}`; +} + export async function updateLocalFileContent(taskId: number, section: string, path: string, content: string): Promise { const res = await fetch(`${BASE}/local/tasks/${taskId}/files/content`, { method: 'PUT', @@ -1266,3 +1283,46 @@ export async function postTestNotification(): Promise<{ ok: boolean }> { const res = await fetch(`${BASE}/notifications/test`, { method: 'POST' }); return notificationsJsonOrThrow(res, 'failed to send test notification'); } + +// ============================================================ +// LLM usage dashboard (per-user, gateway + direct). +// Spec: docs/superpowers/specs/2026-06-11-llm-usage-aggregation-design.md +export interface UsageCounters { + tokensIn: number; + tokensOut: number; + requests: number; +} +export interface UsageBucket { + bucket: string; // 'YYYY-MM-DD' | 'YYYY-Www' | 'YYYY-MM' + gateway: UsageCounters; + direct: UsageCounters; +} +export interface UsageByUser extends UsageCounters { + userId: string; + /** Resolved display name (real users); 'local' / 'system' for sentinels. */ + displayName: string; +} +export interface UsageDailyResponse { + from: string; + to: string; + granularity: 'day' | 'week' | 'month'; + scope: 'all' | 'self'; + series: UsageBucket[]; + totals: { gateway: UsageCounters; direct: UsageCounters }; + byUser?: UsageByUser[]; // admin / local mode only +} + +export async function getUsageDaily(params: { + from?: string; + to?: string; + granularity?: 'day' | 'week' | 'month'; +}): Promise { + const qs = new URLSearchParams(); + if (params.from) qs.set('from', params.from); + if (params.to) qs.set('to', params.to); + if (params.granularity) qs.set('granularity', params.granularity); + const q = qs.toString(); + const res = await fetch(`${BASE}/usage/daily${q ? `?${q}` : ''}`); + if (!res.ok) throw new Error(`Failed to load usage (${res.status})`); + return res.json(); +} diff --git a/ui/src/components/detail/DetailHeader.tsx b/ui/src/components/detail/DetailHeader.tsx index f88b107..571cc6d 100644 --- a/ui/src/components/detail/DetailHeader.tsx +++ b/ui/src/components/detail/DetailHeader.tsx @@ -227,7 +227,7 @@ export function DetailHeader({ title, subtitle, tabs, activeTab, tabTransitionPe
-
+
{tabs.map(tab => { const active = activeTab === tab.id; const pending = active && tabTransitionPending; @@ -237,7 +237,7 @@ export function DetailHeader({ title, subtitle, tabs, activeTab, tabTransitionPe role="tab" aria-selected={active} onClick={() => onTabChange(tab.id)} - className={`pb-2.5 text-xs border-b-2 active:scale-[0.97] transition-[transform,color,border-color] duration-100 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent-ring inline-flex items-center gap-1.5 ${ + className={`whitespace-nowrap pb-2.5 text-xs border-b-2 active:scale-[0.97] transition-[transform,color,border-color] duration-100 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent-ring inline-flex items-center gap-1.5 ${ active ? 'border-accent text-slate-900 font-semibold' : 'border-transparent text-slate-500 font-medium hover:text-slate-800' diff --git a/ui/src/components/detail/DetailPanel.tsx b/ui/src/components/detail/DetailPanel.tsx index f3ea1ea..f36587d 100644 --- a/ui/src/components/detail/DetailPanel.tsx +++ b/ui/src/components/detail/DetailPanel.tsx @@ -104,7 +104,27 @@ export function LocalDetailPanel({ refetchInterval: 5000, }); const showSshTab = consoleStatus?.active === true; - const visibleTabs = LOCAL_TABS.filter((t) => t.id !== 'ssh' || showSshTab); + + // Browser tab visibility: mirror SSH — show only once a viewable browser + // session is live for this task. `available: true` means a noVNC session + // exists (the agent has actually used the browser); every other case + // (no_session / headless_mode / display_unavailable / novnc_not_installed) + // is non-viewable, so the tab would show nothing useful and stays hidden. + // Shares the ['task-session', id] query with BrowserTab (deduped by key). + const { data: browserSession } = useQuery<{ available: boolean }>({ + queryKey: ['task-session', task?.id], + queryFn: async () => { + const r = await fetch(`/api/local/browser/sessions/task-session/${task!.id}`); + return r.ok ? r.json() : { available: false }; + }, + enabled: !!task, + refetchInterval: 5000, + }); + const showBrowserTab = browserSession?.available === true; + + const visibleTabs = LOCAL_TABS.filter( + (t) => (t.id !== 'ssh' || showSshTab) && (t.id !== 'browser' || showBrowserTab), + ); const handleStartEdit = () => { if (!task) return; diff --git a/ui/src/components/detail/tabs/OverviewTab.tsx b/ui/src/components/detail/tabs/OverviewTab.tsx index 9289930..5fba769 100644 --- a/ui/src/components/detail/tabs/OverviewTab.tsx +++ b/ui/src/components/detail/tabs/OverviewTab.tsx @@ -1,7 +1,7 @@ import { useEffect, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { useMutation, useQueryClient } from '@tanstack/react-query'; -import { LocalTask, MissionBrief, SubtaskActivity, putFeedback, updateMissionBrief } from '../../../api'; +import { LocalTask, MissionBrief, SubtaskActivity, putFeedback, updateMissionBrief, updateLocalTask, regenerateTaskTitle } from '../../../api'; import { StatusBadge } from '../../shared/StatusBadge'; import { SubtasksPanel, type SubtaskFilePreviewHandler } from './SubtasksPanel'; import { ContextUsageGauge } from '../ContextUsageGauge'; @@ -272,6 +272,116 @@ function MissionCard({ task }: { task: LocalTask }) { ); } +/** + * Editable task title with on-demand AI regeneration. The title is set to a + * cheap fallback at creation and upgraded by the agent (derived from the + * Mission Brief goal) during the run. A manual edit pins it (title_source = + * 'user') so the agent never overwrites it afterwards. + */ +function TaskTitleRow({ task }: { task: LocalTask }) { + const { t } = useTranslation('detail'); + const qc = useQueryClient(); + const [editing, setEditing] = useState(false); + const [draft, setDraft] = useState(task.title); + const [error, setError] = useState(null); + + // Sync the displayed title with server-side updates (the agent rewrites it + // mid-run) unless the user is actively editing. + useEffect(() => { + if (!editing) setDraft(task.title); + }, [task.title, editing]); + + const invalidate = () => { + qc.invalidateQueries({ queryKey: ['localTaskDetail', task.id] }); + qc.invalidateQueries({ queryKey: ['localTasks'] }); + }; + + const saveMutation = useMutation({ + mutationFn: () => updateLocalTask(task.id, { title: draft.trim() }), + onSuccess: () => { setEditing(false); setError(null); invalidate(); }, + onError: (err: unknown) => setError(err instanceof Error ? err.message : 'Failed to save title'), + }); + + const regenMutation = useMutation({ + mutationFn: () => regenerateTaskTitle(task.id), + onSuccess: () => { setError(null); invalidate(); }, + onError: (err: unknown) => setError(err instanceof Error ? err.message : 'Failed to regenerate title'), + }); + + const save = () => { if (draft.trim()) saveMutation.mutate(); }; + + if (editing) { + return ( +
+ setDraft(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter') { e.preventDefault(); save(); } + if (e.key === 'Escape') { setEditing(false); setError(null); setDraft(task.title); } + }} + className="w-full px-2.5 py-1.5 text-lg font-extrabold text-slate-900 border border-hairline rounded-md focus:outline-none focus:ring-2 focus:ring-accent-ring focus:border-accent" + /> + {error &&
{error}
} +
+ + +
+
+ ); + } + + return ( +
+
+
{task.title}
+
+ + +
+
+ {error &&
{error}
} +
+ ); +} + interface OverviewTabProps { task: LocalTask; subtaskActivities?: SubtaskActivity[]; @@ -284,7 +394,7 @@ export function OverviewTab({ task, subtaskActivities, onSubtaskFilePreview }: O return (
-
{task.title}
+
{task.pieceName} diff --git a/ui/src/components/files/FileBrowser.tsx b/ui/src/components/files/FileBrowser.tsx index 3efb789..bc1d5a1 100644 --- a/ui/src/components/files/FileBrowser.tsx +++ b/ui/src/components/files/FileBrowser.tsx @@ -2,6 +2,8 @@ import { useEffect, useMemo, useRef, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { LocalFileEntry, getLocalFileRawUrl } from '../../api'; import { isPreviewable, formatFileDate } from '../../lib/utils'; +import { splitFileName } from '../../lib/fileType'; +import { FileTypeIcon } from './FileTypeIcon'; interface FileBrowserProps { section: 'workspace' | 'input' | 'output' | 'logs'; @@ -211,20 +213,30 @@ export function FileBrowser({ key={`${entry.kind}:${entry.path}`} className="flex items-center gap-2 px-2.5 py-1.5 rounded-md bg-canvas border border-hairline hover:bg-surface transition-colors" > -