1831 lines
75 KiB
TypeScript
1831 lines
75 KiB
TypeScript
import { ToolDef } from '../../llm/openai-compat.js';
|
||
import type { ToolContext, ToolResult } from './core.js';
|
||
import { resolveOutputPathWithin } from './core.js';
|
||
import { logger } from '../../logger.js';
|
||
import { recorder } from '../browser-recorder.js';
|
||
import type { RecordedAction, FrameChainEntry } from '../browser-recorder.js';
|
||
import * as dns from 'dns';
|
||
import * as path from 'path';
|
||
import { existsSync, mkdirSync, appendFileSync, statSync } from 'fs';
|
||
import { fileURLToPath, pathToFileURL } from 'url';
|
||
import type { Browser, BrowserContext, Download, Frame, Locator, Page } from 'playwright';
|
||
import { SessionManager, type BrowserSession, CAPTCHA_POOL_SESSION_ID } from '../browser-session.js';
|
||
import { loadConfig } from '../../config.js';
|
||
import { buildNovncPath } from '../../bridge/novnc-proxy.js';
|
||
import { checkSSRF, isPrivateIPv4, isPrivateIPv6, isHostAllowed } from './shared/ssrf.js';
|
||
import { htmlToText } from './shared/html.js';
|
||
import { detectAuthExpiry } from '../browser-session-expiry.js';
|
||
|
||
export { detectAuthExpiry as runAuthCheck } from '../browser-session-expiry.js';
|
||
|
||
/**
|
||
* After navigation, check if the page indicates the auth session has expired
|
||
* (login URL redirect or logged-in selector missing). Returns the reason
|
||
* string if expired (and notifies ctx.onAuthExpired), or null otherwise.
|
||
*
|
||
* Skipped entirely when no browser session profile is bound to the job.
|
||
*/
|
||
async function checkAuthExpiry(page: Page, ctx: ToolContext): Promise<string | null> {
|
||
if (!ctx.browserSessionProfileId || !ctx.browserSessionProfile) return null;
|
||
const profile = ctx.browserSessionProfile;
|
||
const present = profile.loggedInSelector
|
||
? !!(await page.$(profile.loggedInSelector).catch(() => null))
|
||
: true;
|
||
// statusCode is hard to get reliably after waitFor; we rely on URL pattern +
|
||
// selector for the heuristic. 200 is a placeholder so detectAuthExpiry doesn't
|
||
// 401-flag.
|
||
const verdict = detectAuthExpiry({
|
||
profile,
|
||
finalUrl: page.url(),
|
||
statusCode: 200,
|
||
loggedInSelectorPresent: present,
|
||
});
|
||
if (verdict.expired) {
|
||
ctx.onAuthExpired?.(ctx.browserSessionProfileId, verdict.reason);
|
||
return verdict.reason;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* Check if a URL targets a private/internal address.
|
||
* Returns an error message if blocked, or null if allowed.
|
||
*/
|
||
function isPathWithin(parent: string, child: string): boolean {
|
||
const relative = path.relative(path.resolve(parent), path.resolve(child));
|
||
return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative));
|
||
}
|
||
|
||
function resolveWorkspaceFileUrl(parsed: URL, workspacePath: string): { filePath: string; url: string } | { error: string } {
|
||
let filePath: string;
|
||
try {
|
||
filePath = fileURLToPath(parsed);
|
||
} catch (e) {
|
||
return { error: `Invalid file URL: ${(e as Error).message}` };
|
||
}
|
||
|
||
const workspaceRoot = path.resolve(workspacePath);
|
||
// Backwards-compat: `file:///workspace/...` was previously documented as a
|
||
// virtual workspace root. The docs no longer advertise it, but in-flight
|
||
// jobs and LLMs trained on the old convention may still emit it. We
|
||
// silently remap and log a deprecation warning so the bad pattern is
|
||
// observable in logs.
|
||
const virtualWorkspaceRoot = path.resolve('/workspace');
|
||
if (!isPathWithin(workspaceRoot, filePath) && isPathWithin(virtualWorkspaceRoot, filePath)) {
|
||
logger.warn(`[BrowseWeb] deprecated /workspace virtual path used: ${filePath}. Use a workspace-relative path instead (e.g., "output/foo.html").`);
|
||
filePath = path.resolve(workspaceRoot, path.relative(virtualWorkspaceRoot, filePath));
|
||
}
|
||
|
||
if (!isPathWithin(workspacePath, filePath)) {
|
||
return { error: `file:// URL is only allowed within workspace: ${workspacePath}` };
|
||
}
|
||
|
||
const normalized = new URL(pathToFileURL(filePath).href);
|
||
normalized.search = parsed.search;
|
||
normalized.hash = parsed.hash;
|
||
return { filePath, url: normalized.href };
|
||
}
|
||
|
||
function validateFileUrlAccess(parsed: URL, workspacePath: string): string | null {
|
||
const resolved = resolveWorkspaceFileUrl(parsed, workspacePath);
|
||
return 'error' in resolved ? resolved.error : null;
|
||
}
|
||
|
||
// Detects strings that look like workspace-relative paths rather than URLs.
|
||
// We accept inputs without a scheme so the LLM can pass `output/viewer.html`
|
||
// directly (the recommended form). Strings starting with `//` (protocol-
|
||
// relative URL) or containing `://` are left to the URL parser.
|
||
//
|
||
// To avoid swallowing genuinely malformed URLs like "example.com" or
|
||
// "not-a-url", we require the input either to contain a `/` (path
|
||
// separator) or to start with `./` / `../`. A bare token with no slash
|
||
// is left to the URL parser, which will reject it as "Invalid URL".
|
||
function looksLikeWorkspaceRelativePath(url: string): boolean {
|
||
if (!url) return false;
|
||
if (url.startsWith('//')) return false;
|
||
// Has a URL scheme like "http:" / "https:" / "file:" / "about:" / "data:"
|
||
if (/^[a-zA-Z][a-zA-Z0-9+\-.]*:/.test(url)) return false;
|
||
if (url.startsWith('./') || url.startsWith('../')) return true;
|
||
return url.includes('/');
|
||
}
|
||
|
||
export function normalizeFileUrlForWorkspace(url: string, workspacePath: string): { url: string } | { error: string } {
|
||
// Workspace-relative path (recommended form): e.g., "output/viewer.html".
|
||
// Resolved against the actual workspace root and converted to file:// URL.
|
||
if (looksLikeWorkspaceRelativePath(url)) {
|
||
if (path.isAbsolute(url)) {
|
||
return { error: `BrowseWeb URL "${url}" は workspace 外の絶対パスです。workspace ルートからの相対パス (例: "output/foo.html") または完全な URL (https://...) を使ってください。` };
|
||
}
|
||
const resolved = path.resolve(workspacePath, url);
|
||
if (!isPathWithin(workspacePath, resolved)) {
|
||
return { error: `Path "${url}" is outside workspace: ${workspacePath}` };
|
||
}
|
||
return { url: pathToFileURL(resolved).href };
|
||
}
|
||
let parsed: URL;
|
||
try {
|
||
parsed = new URL(url);
|
||
} catch {
|
||
return { error: `Invalid URL: "${url}"` };
|
||
}
|
||
if (parsed.protocol !== 'file:') return { url };
|
||
const resolved = resolveWorkspaceFileUrl(parsed, workspacePath);
|
||
if ('error' in resolved) return { error: resolved.error };
|
||
return { url: resolved.url };
|
||
}
|
||
|
||
async function ssrfCheck(url: string, allowedHosts: string[], workspacePath: string): Promise<string | null> {
|
||
let parsed: URL;
|
||
try {
|
||
parsed = new URL(url);
|
||
} catch {
|
||
return `Invalid URL: "${url}"`;
|
||
}
|
||
if (parsed.protocol === 'file:') {
|
||
return validateFileUrlAccess(parsed, workspacePath);
|
||
}
|
||
try {
|
||
await checkSSRF(parsed.hostname, allowedHosts);
|
||
} catch (e) {
|
||
return (e as Error).message;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
// --- Tool definitions ---
|
||
|
||
interface BrowseWebAction {
|
||
type: 'goto' | 'click' | 'fill' | 'screenshot' | 'getText' | 'wait' | 'dumpHtml';
|
||
selector?: string;
|
||
ref?: string;
|
||
value?: string;
|
||
url?: string;
|
||
ms?: number;
|
||
/** dumpHtml: 包含する子孫の階層数。デフォルト 3 */
|
||
depth?: number;
|
||
/** dumpHtml: 戻り値プレビュー長 (ファイルにはフル保存)。デフォルト 5000 */
|
||
maxChars?: number;
|
||
}
|
||
|
||
const BROWSEWEB_DEF: ToolDef = {
|
||
type: 'function',
|
||
function: {
|
||
name: 'BrowseWeb',
|
||
description:
|
||
'ヘッドレスブラウザでWebページを操作する。同一ジョブ内ではセッション(Cookie・ログイン状態等)が維持される。\n' +
|
||
'基本モード: url を指定してページのテキストを取得。screenshot でスクリーンショットも保存可能。\n' +
|
||
'アクションモード: actions 配列で goto/click/fill/screenshot/getText/wait/dumpHtml を連続実行。\n' +
|
||
'出力には操作可能要素が {e1 button "..."} 形式の ref 注釈付きで埋め込まれ、click/fill で ref を直接指定できる。<div role="button"> 等の ARIA ベース要素・addEventListener で click handler が後付けされた要素・open shadow DOM・iframe (cross-origin 含む) の中身も検出される。iframe 内の ref は {f1.e3 ...} のように frame ID で prefix される。状態属性 (expanded/checked/selected/pressed/disabled/haspopup) は注釈末尾に列挙。\n' +
|
||
'ref で当たらない or 構造を直接見たいときは dumpHtml アクションで該当要素の outerHTML を取得できる(脱出口)。\n' +
|
||
'click が繰り返し空振り / ログイン or CAPTCHA / ドラッグ&ドロップや canvas など DOM では操作できない UI に当たったら、InteractiveBrowse でユーザーに noVNC 経由で手動操作してもらい、その後 BrowseWithSession で続きを引き継げる。\n' +
|
||
'ページから発生したファイルダウンロードは自動的に output/ に保存され、戻り値末尾に [download] saved output/<filename> として通知される。\n' +
|
||
'詳細な使い方・ワークフロー例は ReadToolDoc({ name: "BrowseWeb" }) で取得可能。',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {
|
||
url: { type: 'string', description: '取得する URL(アクションモード時は省略可)。ローカルファイルを開く場合は workspace ルートからの相対パスを指定 (例: "output/viewer.html")' },
|
||
waitFor: {
|
||
type: 'string',
|
||
description: '待機する CSS セレクタ(省略時は load イベント完了まで待機)',
|
||
},
|
||
extractSelector: {
|
||
type: 'string',
|
||
description: '特定要素のテキストだけ抽出する CSS セレクタ(省略時はページ全体)',
|
||
},
|
||
screenshot: {
|
||
type: 'string',
|
||
description: 'スクリーンショットを保存するファイル名(例: "page.png")。output/ に保存される',
|
||
},
|
||
actions: {
|
||
type: 'array',
|
||
description: '実行するアクションの配列(指定時は基本モードのパラメータは無視される)',
|
||
items: {
|
||
type: 'object',
|
||
properties: {
|
||
type: {
|
||
type: 'string',
|
||
enum: ['goto', 'click', 'fill', 'screenshot', 'getText', 'wait', 'dumpHtml'],
|
||
description: 'アクション種別',
|
||
},
|
||
selector: { type: 'string', description: 'CSS セレクタ (click, fill, getText, dumpHtml) — ref があれば不要' },
|
||
ref: { type: 'string', description: '前回スナップショットで割り振られた要素 ref (e1, e2, ...) — click/fill/dumpHtml で selector の代わりに使える' },
|
||
value: { type: 'string', description: '入力値 (fill) またはファイル名 (screenshot)' },
|
||
url: { type: 'string', description: 'URL (goto)。ローカルファイルを開く場合は workspace ルートからの相対パス (例: "output/viewer.html")' },
|
||
ms: { type: 'number', description: '待機ミリ秒 (wait)' },
|
||
depth: { type: 'number', description: 'dumpHtml: 包含する子孫の階層数 (デフォルト 3)' },
|
||
maxChars: { type: 'number', description: 'dumpHtml: 戻り値プレビュー長 (デフォルト 5000)。フル HTML は logs/browse/ に保存' },
|
||
},
|
||
required: ['type'],
|
||
},
|
||
},
|
||
timeout: {
|
||
type: 'number',
|
||
description: 'タイムアウト(ミリ秒、デフォルト: 60000)',
|
||
},
|
||
recordTo: {
|
||
type: 'string',
|
||
description:
|
||
'省略すると記録しない。指定すると、本ジョブで成功した各アクションを buffer に記録し、タスク終了時に data/users/{userId}/recordings/{recordTo}.json として保存する(Task 3.5 の Save as Script で使う)。',
|
||
},
|
||
},
|
||
},
|
||
},
|
||
};
|
||
|
||
// --- Session manager (injected from server.ts) ---
|
||
|
||
let _sessionManager: SessionManager | null = null;
|
||
|
||
/** server.ts から SessionManager を注入する */
|
||
export function setSessionManager(sm: SessionManager | null): void {
|
||
_sessionManager = sm;
|
||
}
|
||
|
||
function getSessionManager(): SessionManager | null {
|
||
return _sessionManager;
|
||
}
|
||
|
||
export { getSessionManager };
|
||
|
||
// --- Browser lifecycle ---
|
||
//
|
||
// 2026-05 redesign: 1 つの Browser を共有していたものを、3 種類に分離した:
|
||
// 1. CAPTCHA Pool (kind='pool'): admin が CAPTCHA を解く共有 noVNC session。
|
||
// WebSearch / WebFetch スクショなどタスク横断の処理が使う。
|
||
// 固定 sessionId `__captcha_pool__`。
|
||
// 2. Task Session (kind='task'): タスクごとに分離された noVNC session。
|
||
// BrowseWeb / InteractiveBrowse が ctx.taskId をキーに取得・再利用する。
|
||
// タスク visibility に基づき認可される。LRU 退避 + idle GC 対象。
|
||
// 3. Headless 共有 (skip mode): config の captchaSolve != 'novnc' の場合、
|
||
// または noVNC が立ち上げられない fallback 経路で使う single Browser。
|
||
|
||
let _headlessBrowser: Browser | null = null;
|
||
let _headlessInitPromise: Promise<Browser> | null = null;
|
||
let _browserUnavailable: string | null = null;
|
||
|
||
/** Headless 共有 Browser を取得する。skip モード / noVNC fallback 用 */
|
||
async function getHeadlessBrowser(): Promise<Browser> {
|
||
if (_browserUnavailable) throw new Error(_browserUnavailable);
|
||
if (_headlessBrowser?.isConnected()) return _headlessBrowser;
|
||
if (_headlessInitPromise) return _headlessInitPromise;
|
||
|
||
_headlessInitPromise = (async () => {
|
||
const { chromium } = await import('playwright');
|
||
const { buildLaunchOptions } = await import('../browser-launch.js');
|
||
_headlessBrowser = await chromium.launch(buildLaunchOptions(loadConfig().browser, true));
|
||
logger.debug('[browser] launched chromium headless');
|
||
return _headlessBrowser;
|
||
})().catch((e) => {
|
||
const msg = (e as Error).message ?? String(e);
|
||
if (msg.includes("Executable doesn't exist") || msg.includes('browserType.launch')) {
|
||
_browserUnavailable = `Playwright browser unavailable: ${msg}`;
|
||
logger.warn(`[browser] ${_browserUnavailable}`);
|
||
}
|
||
throw e;
|
||
}).finally(() => { _headlessInitPromise = null; });
|
||
|
||
return _headlessInitPromise;
|
||
}
|
||
|
||
/**
|
||
* CAPTCHA Pool の Browser を取得する。
|
||
* - noVNC モード: SessionManager.createPoolSession() で立ち上げる
|
||
* - skip モード or 立ち上げ失敗時: headless 共有 Browser に fallback
|
||
*
|
||
* Pool は WebSearch / WebFetch スクショなど "タスク横断で同じ Cookie / 認証
|
||
* を使いまわしたい" 処理が使う。admin が noVNC で CAPTCHA を手動解決すると
|
||
* Cookie が Pool 配下のコンテキストに残り、後続 WebSearch がそのまま使える。
|
||
*/
|
||
export async function getCaptchaPoolBrowser(): Promise<Browser> {
|
||
const config = loadConfig();
|
||
if (config.browser?.captchaSolve === 'novnc') {
|
||
const sm = getSessionManager();
|
||
if (sm) {
|
||
try {
|
||
const pool = await sm.createPoolSession();
|
||
if (pool.browser?.isConnected()) return pool.browser;
|
||
logger.warn('[browser] CAPTCHA pool browser disconnected, falling back to headless');
|
||
} catch (e) {
|
||
logger.warn(`[browser] CAPTCHA pool creation failed: ${e}, falling back to headless`);
|
||
}
|
||
} else {
|
||
logger.warn('[browser] noVNC deps missing (Xvfb/x11vnc/websockify), falling back to headless');
|
||
}
|
||
}
|
||
return getHeadlessBrowser();
|
||
}
|
||
|
||
/**
|
||
* 指定タスク用の Browser を取得する。
|
||
* - noVNC モード + ctx.taskId あり: SessionManager.getOrCreateTaskSession() で立ち上げる
|
||
* - それ以外: headless 共有 Browser に fallback
|
||
*
|
||
* Task Session は BrowseWeb / InteractiveBrowse が使う。同じ taskId への
|
||
* 連続呼び出しは同じ Browser を再利用するので Cookie / ログイン状態が維持される。
|
||
*/
|
||
export async function getTaskSessionBrowser(ctx: ToolContext): Promise<Browser> {
|
||
const config = loadConfig();
|
||
if (config.browser?.captchaSolve === 'novnc' && ctx.taskId) {
|
||
const sm = getSessionManager();
|
||
if (sm) {
|
||
try {
|
||
const session = await sm.getOrCreateTaskSession(ctx.taskId, ctx.userId);
|
||
sm.touchSession(session.id);
|
||
if (session.browser?.isConnected()) return session.browser;
|
||
logger.warn(`[browser] task session ${ctx.taskId} browser disconnected, falling back to headless`);
|
||
} catch (e) {
|
||
logger.warn(`[browser] task session ${ctx.taskId} creation failed: ${e}, falling back to headless`);
|
||
}
|
||
} else {
|
||
logger.warn('[browser] noVNC deps missing, falling back to headless for task session');
|
||
}
|
||
}
|
||
return getHeadlessBrowser();
|
||
}
|
||
|
||
/** UI が CAPTCHA Pool の noVNC パスを取得するためのヘルパー (admin only) */
|
||
export function getCaptchaPoolInfo(): {
|
||
sessionId: string;
|
||
novncPath: string;
|
||
display: string;
|
||
captchaPending: boolean;
|
||
} | null {
|
||
const sm = getSessionManager();
|
||
if (!sm) return null;
|
||
const pool = sm.getSession(CAPTCHA_POOL_SESSION_ID);
|
||
if (!pool) return null;
|
||
return {
|
||
sessionId: pool.id,
|
||
novncPath: buildNovncPath(pool.id),
|
||
display: pool.display,
|
||
captchaPending: pool.captchaPending === true,
|
||
};
|
||
}
|
||
|
||
/** UI が指定 taskId の Task Session の noVNC パスを取得するためのヘルパー */
|
||
export function getTaskSessionInfo(taskId: string): {
|
||
sessionId: string;
|
||
novncPath: string;
|
||
display: string;
|
||
} | null {
|
||
const sm = getSessionManager();
|
||
if (!sm) return null;
|
||
for (const s of sm.listSessions()) {
|
||
if (s.kind === 'task' && s.taskId === taskId) {
|
||
return { sessionId: s.id, novncPath: buildNovncPath(s.id), display: s.display };
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
export async function closeBrowser(): Promise<void> {
|
||
try {
|
||
const webModule = await import('./web.js') as unknown as { clearPersistentContexts?: () => void };
|
||
webModule.clearPersistentContexts?.();
|
||
} catch {
|
||
// web.js が未ロードの場合は無視
|
||
}
|
||
|
||
// headless 用ジョブコンテキストを全て閉じる
|
||
for (const [key, jobCtx] of _jobContexts) {
|
||
await jobCtx.close().catch(() => {});
|
||
_jobContexts.delete(key);
|
||
}
|
||
|
||
// 全 noVNC session (pool + task) を destroy
|
||
const sm = getSessionManager();
|
||
if (sm) {
|
||
await sm.destroyAll().catch(() => {});
|
||
}
|
||
|
||
if (_headlessBrowser) {
|
||
try {
|
||
await _headlessBrowser.close();
|
||
} catch {
|
||
// ignore cleanup errors
|
||
}
|
||
_headlessBrowser = null;
|
||
logger.debug('[browser] headless closed');
|
||
}
|
||
}
|
||
|
||
// --- Per-job persistent browser context ---
|
||
|
||
/**
|
||
* Headless モード時の per-workspace BrowserContext。
|
||
* noVNC モードのときは task session 自身が context を持つので、ここには
|
||
* エントリが入らない (session.context を直接使う)。
|
||
*/
|
||
const _jobContexts = new Map<string, BrowserContext>();
|
||
const _interceptedPages = new WeakSet<Page>();
|
||
const _hookedContexts = new WeakSet<BrowserContext>();
|
||
|
||
/**
|
||
* Per-page ref → (frame, selector) mapping. Main-frame refs are `e1, e2, ...`;
|
||
* child-frame refs are prefixed `f1.e1, f1.e2, ...` (`f1` is the first iframe
|
||
* encountered in page.frames(), excluding main).
|
||
*
|
||
* Storing the Frame reference rather than a frame ID lets us dispatch click /
|
||
* fill / dumpHtml on the correct execution context (Playwright's Frame has the
|
||
* same surface as Page for these methods, and it transparently bridges
|
||
* cross-origin iframes via CDP).
|
||
*/
|
||
type RefTarget = { frame: Frame; selector: string };
|
||
const _pageRefs = new WeakMap<Page, Map<string, RefTarget>>();
|
||
|
||
// --- Download capture (Playwright `page.on('download')`) ---
|
||
//
|
||
// クリック等で発生したダウンロードを workspace の output/ に保存して、agent から
|
||
// Read / ReadPdf / 等で続けて操作できるようにする。各 BrowseWeb / BrowseWithSession
|
||
// の戻り値末尾に `[download] saved output/foo.csv (12345 bytes)` を追加する。
|
||
|
||
export interface BrowserDownloadEntry {
|
||
filename: string;
|
||
/** workspace 相対パス (例: "output/report.csv") */
|
||
savedRelPath: string;
|
||
ok: boolean;
|
||
bytes?: number;
|
||
error?: string;
|
||
timestamp: string;
|
||
}
|
||
|
||
const _downloadHookedPages = new WeakSet<Page>();
|
||
const _pageDownloads = new WeakMap<Page, BrowserDownloadEntry[]>();
|
||
const _pageDownloadPromises = new WeakMap<Page, Set<Promise<void>>>();
|
||
|
||
/** path traversal や禁則文字を排除して安全な basename にする (export: テスト用) */
|
||
export function sanitizeDownloadFilename(name: string | undefined | null): string {
|
||
const base = path.basename((name ?? '').toString() || 'download');
|
||
const cleaned = base.replace(/[\\/:*?"<>|\s]/g, '_').slice(0, 200);
|
||
return cleaned || 'download';
|
||
}
|
||
|
||
/** 衝突したら "foo-1.csv" "foo-2.csv" 形式で空きを探す (export: テスト用) */
|
||
export function pickUniqueOutputPath(workspacePath: string, filename: string): string {
|
||
const dir = path.join(workspacePath, 'output');
|
||
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
||
const parsed = path.parse(filename);
|
||
let candidate = path.join(dir, filename);
|
||
for (let n = 1; existsSync(candidate) && n < 1000; n++) {
|
||
candidate = path.join(dir, `${parsed.name}-${n}${parsed.ext}`);
|
||
}
|
||
return candidate;
|
||
}
|
||
|
||
function logBrowserDownload(workspacePath: string, entry: BrowserDownloadEntry): void {
|
||
try {
|
||
const logsDir = path.join(workspacePath, 'logs');
|
||
if (!existsSync(logsDir)) mkdirSync(logsDir, { recursive: true });
|
||
appendFileSync(
|
||
path.join(logsDir, 'downloads.jsonl'),
|
||
JSON.stringify({ ...entry, source: 'BrowseWeb' }) + '\n',
|
||
);
|
||
} catch (e) {
|
||
logger.warn(`[BrowseWeb] failed to write download history: ${(e as Error).message}`);
|
||
}
|
||
}
|
||
|
||
function setupDownloadHandler(page: Page, workspacePath: string): void {
|
||
if (_downloadHookedPages.has(page)) return;
|
||
_downloadHookedPages.add(page);
|
||
if (!_pageDownloads.has(page)) _pageDownloads.set(page, []);
|
||
if (!_pageDownloadPromises.has(page)) _pageDownloadPromises.set(page, new Set());
|
||
|
||
page.on('download', (download: Download) => {
|
||
const promiseSet = _pageDownloadPromises.get(page)!;
|
||
const entries = _pageDownloads.get(page)!;
|
||
const filename = sanitizeDownloadFilename(download.suggestedFilename());
|
||
const ts = new Date().toISOString();
|
||
const handlerPromise = (async () => {
|
||
try {
|
||
const savePath = pickUniqueOutputPath(workspacePath, filename);
|
||
await download.saveAs(savePath);
|
||
const bytes = statSync(savePath).size;
|
||
const savedRelPath = path.relative(workspacePath, savePath);
|
||
const entry: BrowserDownloadEntry = {
|
||
filename: path.basename(savePath),
|
||
savedRelPath, ok: true, bytes, timestamp: ts,
|
||
};
|
||
entries.push(entry);
|
||
logBrowserDownload(workspacePath, entry);
|
||
logger.debug(`[BrowseWeb] downloaded ${savedRelPath} (${bytes} bytes)`);
|
||
} catch (e) {
|
||
const entry: BrowserDownloadEntry = {
|
||
filename, savedRelPath: '', ok: false,
|
||
error: (e as Error).message, timestamp: ts,
|
||
};
|
||
entries.push(entry);
|
||
logBrowserDownload(workspacePath, entry);
|
||
}
|
||
})();
|
||
promiseSet.add(handlerPromise);
|
||
handlerPromise.finally(() => promiseSet.delete(handlerPromise));
|
||
});
|
||
}
|
||
|
||
/** in-flight な download を最大 timeoutMs だけ待ち、完了済みエントリを取り出して queue を空にする */
|
||
async function drainDownloads(page: Page, timeoutMs: number = 30_000): Promise<BrowserDownloadEntry[]> {
|
||
const promiseSet = _pageDownloadPromises.get(page);
|
||
if (promiseSet && promiseSet.size > 0) {
|
||
await Promise.race([
|
||
Promise.all(Array.from(promiseSet)),
|
||
new Promise<void>(r => setTimeout(r, timeoutMs)),
|
||
]);
|
||
}
|
||
const entries = _pageDownloads.get(page) ?? [];
|
||
_pageDownloads.set(page, []);
|
||
return entries;
|
||
}
|
||
|
||
function formatDownloadLines(entries: BrowserDownloadEntry[]): string {
|
||
if (entries.length === 0) return '';
|
||
return entries.map(e =>
|
||
e.ok
|
||
? `[download] saved ${e.savedRelPath} (${e.bytes} bytes)`
|
||
: `[download] FAILED ${e.filename}: ${e.error}`,
|
||
).join('\n');
|
||
}
|
||
|
||
/**
|
||
* Task の BrowserContext を取得する。
|
||
* - noVNC + ctx.taskId あり: SessionManager から取った session.context を返す
|
||
* (admin / 該当タスク owner が noVNC で見ている画面と同じものを操作する)
|
||
* - それ以外: headless 共有 Browser から workspacePath ごとに新規 context を
|
||
* 作って _jobContexts に保存
|
||
*/
|
||
async function getJobContext(
|
||
ctx: ToolContext,
|
||
): Promise<BrowserContext> {
|
||
const config = loadConfig();
|
||
if (config.browser?.captchaSolve === 'novnc' && ctx.taskId) {
|
||
const sm = getSessionManager();
|
||
if (sm) {
|
||
try {
|
||
const session = await sm.getOrCreateTaskSessionWithState(
|
||
ctx.taskId,
|
||
ctx.userId,
|
||
ctx.browserSessionState ?? null,
|
||
ctx.browserSessionProfileId ?? null,
|
||
);
|
||
sm.touchSession(session.id);
|
||
if (session.context) return session.context;
|
||
} catch (e) {
|
||
logger.warn(`[browser] task session for taskId=${ctx.taskId} unavailable: ${(e as Error).message}, falling back to headless`);
|
||
}
|
||
}
|
||
}
|
||
|
||
let jobContext = _jobContexts.get(ctx.workspacePath);
|
||
if (!jobContext || jobContext.browser() === null) {
|
||
const browser = await getHeadlessBrowser();
|
||
jobContext = await browser.newContext(
|
||
ctx.browserSessionState
|
||
? { storageState: ctx.browserSessionState as never }
|
||
: {},
|
||
);
|
||
const { applyStealthInitScript, applyAgentSnapshotHooks } = await import('../browser-launch.js');
|
||
await applyStealthInitScript(jobContext);
|
||
await applyAgentSnapshotHooks(jobContext);
|
||
_jobContexts.set(ctx.workspacePath, jobContext);
|
||
}
|
||
return jobContext;
|
||
}
|
||
|
||
/** BrowserContext に SSRF インターセプト + ref マップ管理のフックを 1 回だけ装着する */
|
||
function ensureContextHooks(
|
||
context: BrowserContext,
|
||
allowedHosts: string[],
|
||
workspacePath: string,
|
||
): void {
|
||
if (_hookedContexts.has(context)) return;
|
||
_hookedContexts.add(context);
|
||
context.on('page', (newPage: Page) => {
|
||
if (!_interceptedPages.has(newPage)) {
|
||
_interceptedPages.add(newPage);
|
||
setupRouteInterception(newPage, allowedHosts, workspacePath).catch(() => {});
|
||
}
|
||
setupDownloadHandler(newPage, workspacePath);
|
||
newPage.on('framenavigated', (frame) => {
|
||
if (frame === newPage.mainFrame()) _pageRefs.delete(newPage);
|
||
});
|
||
});
|
||
// 既存ページにも装着 (context が既に作られている場合)
|
||
for (const existingPage of context.pages()) {
|
||
setupDownloadHandler(existingPage, workspacePath);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* BrowseWeb / InteractiveBrowse が使う Page を取得する。
|
||
* 同じ taskId / workspacePath 内ではセッション (Cookie / ログイン状態) が維持される。
|
||
*/
|
||
async function getJobPage(
|
||
ctx: ToolContext,
|
||
allowedHosts: string[],
|
||
timeout: number,
|
||
): Promise<Page> {
|
||
const context = await getJobContext(ctx);
|
||
ensureContextHooks(context, allowedHosts, ctx.workspacePath);
|
||
|
||
const pages = context.pages();
|
||
if (pages.length > 0) {
|
||
const page = pages[pages.length - 1]!;
|
||
page.setDefaultTimeout(timeout);
|
||
return page;
|
||
}
|
||
|
||
const page = await context.newPage();
|
||
page.setDefaultTimeout(timeout);
|
||
await setupRouteInterception(page, allowedHosts, ctx.workspacePath);
|
||
_interceptedPages.add(page);
|
||
page.on('framenavigated', (frame) => {
|
||
if (frame === page.mainFrame()) _pageRefs.delete(page);
|
||
});
|
||
return page;
|
||
}
|
||
|
||
/**
|
||
* ページの DOM をスキャンし、表示テキスト + 操作可能要素のリファレンス注釈を返す。
|
||
* 注釈は {e1 button "ログイン"} のような形式で本文中に埋め込まれる。
|
||
* 各 ref は Playwright が解釈できるセレクタ(属性ベース優先、fallback で
|
||
* XPath 風 nth-of-type)にマップされる。
|
||
*
|
||
* 検出する操作可能要素("div クリック" 系を取り逃さないための拡張):
|
||
* - 標準タグ: A, BUTTON, INPUT, SELECT, TEXTAREA, LABEL, SUMMARY, DETAILS, OPTION
|
||
* - ARIA role: button, link, menuitem, menuitemcheckbox/radio, tab, option,
|
||
* checkbox, radio, switch, combobox, listbox, slider, spinbutton,
|
||
* textbox, searchbox, treeitem
|
||
* - [onclick] / [tabindex >= 0] / [contenteditable=true]
|
||
*
|
||
* 状態属性 (aria-expanded / aria-checked / aria-selected / aria-pressed /
|
||
* aria-disabled / aria-haspopup) は注釈末尾に列挙される。
|
||
*
|
||
* Open shadow DOM も走査する。IFRAME と <svg> 内部は走査しない(前者は別
|
||
* frame、後者はノイズが多い。<svg role="button"> 自体は親側で interactive
|
||
* 検出される)。
|
||
*
|
||
* page.evaluate に渡す関数はブラウザ側で実行されるため、DOM API を使用する。
|
||
* Node サイドの tsconfig には DOM 型がないため、関数を文字列として渡す。
|
||
*/
|
||
const SNAPSHOT_SCRIPT = `(() => {
|
||
// IFRAME is intentionally NOT in HARD_SKIP — encountering an <iframe> emits
|
||
// an inline placeholder so the agent sees where the frame sits. The actual
|
||
// frame contents are walked separately on the Node side via Playwright's
|
||
// page.frames() API and merged into the final snapshot.
|
||
const HARD_SKIP_TAGS = new Set(['SCRIPT', 'STYLE', 'NOSCRIPT', 'TEMPLATE', 'SVG']);
|
||
const INTERACTIVE_TAGS = new Set(['A', 'BUTTON', 'INPUT', 'SELECT', 'TEXTAREA', 'LABEL', 'SUMMARY', 'DETAILS', 'OPTION']);
|
||
const INTERACTIVE_ROLES = new Set([
|
||
'button', 'link', 'menuitem', 'menuitemcheckbox', 'menuitemradio',
|
||
'tab', 'option', 'checkbox', 'radio', 'switch', 'combobox',
|
||
'listbox', 'slider', 'spinbutton', 'textbox', 'searchbox', 'treeitem'
|
||
]);
|
||
const SIMPLE_LABEL_TAGS = new Set(['A', 'BUTTON', 'INPUT', 'SELECT', 'OPTION', 'SUMMARY']);
|
||
const SIMPLE_LABEL_ROLES = new Set(['button', 'link', 'menuitem', 'option', 'tab', 'checkbox', 'radio', 'switch']);
|
||
const BLOCK_TAGS = new Set([
|
||
'DIV', 'P', 'SECTION', 'ARTICLE', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6',
|
||
'LI', 'TR', 'BR', 'BLOCKQUOTE', 'PRE', 'NAV', 'HEADER', 'FOOTER',
|
||
'ASIDE', 'MAIN', 'FORM', 'FIELDSET', 'TABLE', 'UL', 'OL', 'DL', 'DT', 'DD'
|
||
]);
|
||
|
||
const refs = [];
|
||
const lines = [];
|
||
let counter = 0;
|
||
|
||
// ── Selector generation ───────────────────────────────────
|
||
// Quote-free attribute values only — if a value contains " or \\, skip
|
||
// that selector and fall back to the next strategy. Avoids escaping
|
||
// nightmares in CSS attribute selectors.
|
||
function safeAttr(s) { return typeof s === 'string' && s.length > 0 && !/["\\\\]/.test(s); }
|
||
function uniqueIn(root, sel) {
|
||
try { return root.querySelectorAll(sel).length === 1; } catch (e) { return false; }
|
||
}
|
||
function buildSelector(el) {
|
||
const root = el.getRootNode();
|
||
const tagLower = el.tagName.toLowerCase();
|
||
const candidates = [];
|
||
const tid = el.getAttribute('data-testid');
|
||
if (safeAttr(tid)) candidates.push('[data-testid="' + tid + '"]');
|
||
const tid2 = el.getAttribute('data-test');
|
||
if (safeAttr(tid2)) candidates.push('[data-test="' + tid2 + '"]');
|
||
const tidQa = el.getAttribute('data-qa');
|
||
if (safeAttr(tidQa)) candidates.push('[data-qa="' + tidQa + '"]');
|
||
const id = el.id;
|
||
if (id && /^[A-Za-z][\\w-]*$/.test(id)) candidates.push('#' + id);
|
||
const name = el.getAttribute('name');
|
||
if (safeAttr(name) && (tagLower === 'input' || tagLower === 'select' || tagLower === 'textarea')) {
|
||
candidates.push(tagLower + '[name="' + name + '"]');
|
||
}
|
||
const aria = el.getAttribute('aria-label');
|
||
if (safeAttr(aria) && aria.length < 80) candidates.push('[aria-label="' + aria + '"]');
|
||
|
||
for (const sel of candidates) {
|
||
if (uniqueIn(root, sel)) return sel;
|
||
}
|
||
|
||
// Fallback: nth-of-type CSS chain. Only resolves in the document tree
|
||
// (not across shadow boundaries). For shadow DOM elements without a
|
||
// unique attribute, this fallback won't pierce — caller should use
|
||
// dumpHtml then a more specific selector.
|
||
const parts = [];
|
||
let cur = el;
|
||
while (cur && cur.nodeType === 1 && cur !== document.body && cur.tagName !== 'HTML') {
|
||
let index = 1;
|
||
let sib = cur.previousElementSibling;
|
||
while (sib) {
|
||
if (sib.tagName === cur.tagName) index++;
|
||
sib = sib.previousElementSibling;
|
||
}
|
||
parts.unshift(cur.tagName.toLowerCase() + ':nth-of-type(' + index + ')');
|
||
cur = cur.parentElement;
|
||
}
|
||
return 'body > ' + parts.join(' > ');
|
||
}
|
||
|
||
// ── Visibility & interactivity ────────────────────────────
|
||
function isHidden(el) {
|
||
if (el.getAttribute && el.getAttribute('aria-hidden') === 'true') return true;
|
||
if (el.hasAttribute && el.hasAttribute('hidden')) return true;
|
||
let cur = el;
|
||
while (cur && cur.hasAttribute) {
|
||
if (cur.hasAttribute('inert')) return true;
|
||
cur = cur.parentElement;
|
||
}
|
||
return false;
|
||
}
|
||
function isVisible(el) {
|
||
const style = window.getComputedStyle(el);
|
||
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return false;
|
||
const rect = el.getBoundingClientRect();
|
||
return rect.width > 0 && rect.height > 0;
|
||
}
|
||
function isContenteditable(el) {
|
||
const v = el.getAttribute && el.getAttribute('contenteditable');
|
||
return v === 'true' || v === '';
|
||
}
|
||
function isInteractive(el) {
|
||
if (INTERACTIVE_TAGS.has(el.tagName)) return true;
|
||
const role = el.getAttribute && el.getAttribute('role');
|
||
if (role && INTERACTIVE_ROLES.has(role)) return true;
|
||
if (el.hasAttribute && el.hasAttribute('onclick')) return true;
|
||
// Tagged by browser-launch.applyAgentSnapshotHooks when JS code calls
|
||
// addEventListener('click'|'mousedown'|'pointerdown', ...). Captures the
|
||
// "naked clickable <div>" pattern common in jQuery / vanilla / Vue apps.
|
||
if (el.hasAttribute && el.hasAttribute('data-ao-click')) return true;
|
||
const tabindex = el.getAttribute && el.getAttribute('tabindex');
|
||
if (tabindex !== null && tabindex !== undefined && parseInt(tabindex, 10) >= 0) return true;
|
||
if (isContenteditable(el)) return true;
|
||
return false;
|
||
}
|
||
|
||
// ── Description ───────────────────────────────────────────
|
||
function elementName(el) {
|
||
const aria = el.getAttribute && el.getAttribute('aria-label');
|
||
if (aria) return aria.trim().slice(0, 80);
|
||
const labelledBy = el.getAttribute && el.getAttribute('aria-labelledby');
|
||
if (labelledBy) {
|
||
const ids = labelledBy.split(/\\s+/);
|
||
const parts = [];
|
||
for (const lid of ids) {
|
||
const target = document.getElementById(lid);
|
||
if (target) parts.push((target.textContent || '').trim());
|
||
}
|
||
const joined = parts.join(' ').trim();
|
||
if (joined) return joined.slice(0, 80);
|
||
}
|
||
const text = (el.textContent || '').replace(/\\s+/g, ' ').trim();
|
||
if (text) return text.slice(0, 80);
|
||
const placeholder = el.getAttribute && el.getAttribute('placeholder');
|
||
if (placeholder) return placeholder.trim().slice(0, 80);
|
||
const title = el.getAttribute && el.getAttribute('title');
|
||
if (title) return title.trim().slice(0, 80);
|
||
const alt = el.getAttribute && el.getAttribute('alt');
|
||
if (alt) return alt.trim().slice(0, 80);
|
||
return '';
|
||
}
|
||
function elementRole(el) {
|
||
const explicit = el.getAttribute && el.getAttribute('role');
|
||
if (explicit) return explicit;
|
||
const tag = el.tagName.toLowerCase();
|
||
if (tag === 'a' && el.getAttribute && el.getAttribute('href')) return 'link';
|
||
if (tag === 'button') return 'button';
|
||
if (tag === 'input') {
|
||
const t = (el.getAttribute('type') || 'text').toLowerCase();
|
||
if (t === 'checkbox' || t === 'radio') return t;
|
||
if (t === 'submit' || t === 'button' || t === 'reset' || t === 'image') return 'button';
|
||
if (t === 'search') return 'searchbox';
|
||
return 'textbox';
|
||
}
|
||
if (tag === 'select') return 'combobox';
|
||
if (tag === 'textarea') return 'textbox';
|
||
if (tag === 'summary') return 'button';
|
||
if (tag === 'details') return 'group';
|
||
if (tag === 'option') return 'option';
|
||
if (isContenteditable(el)) return 'textbox';
|
||
return tag;
|
||
}
|
||
function elementStates(el) {
|
||
const states = [];
|
||
const expanded = el.getAttribute && el.getAttribute('aria-expanded');
|
||
if (expanded === 'true') states.push('expanded');
|
||
else if (expanded === 'false') states.push('collapsed');
|
||
if (el.tagName === 'DETAILS' && el.open) states.push('expanded');
|
||
const pressed = el.getAttribute && el.getAttribute('aria-pressed');
|
||
if (pressed === 'true') states.push('pressed');
|
||
const selected = el.getAttribute && el.getAttribute('aria-selected');
|
||
if (selected === 'true') states.push('selected');
|
||
const checked = el.getAttribute && el.getAttribute('aria-checked');
|
||
if (checked === 'true') states.push('checked');
|
||
else if (checked === 'mixed') states.push('mixed');
|
||
if (el.tagName === 'INPUT' && (el.type === 'checkbox' || el.type === 'radio') && el.checked && !states.includes('checked')) {
|
||
states.push('checked');
|
||
}
|
||
if (el.disabled === true || (el.getAttribute && el.getAttribute('aria-disabled') === 'true')) states.push('disabled');
|
||
if (el.required === true) states.push('required');
|
||
const haspopup = el.getAttribute && el.getAttribute('aria-haspopup');
|
||
if (haspopup && haspopup !== 'false') states.push('haspopup');
|
||
return states;
|
||
}
|
||
function describeElement(el, ref) {
|
||
const role = elementRole(el);
|
||
const name = elementName(el).replace(/"/g, "'");
|
||
const states = elementStates(el);
|
||
const tag = el.tagName.toLowerCase();
|
||
const parts = [ref, role];
|
||
if (name) parts.push('"' + name + '"');
|
||
if (tag === 'input' || tag === 'textarea') {
|
||
const v = el.value || '';
|
||
if (v) parts.push('value="' + String(v).slice(0, 30).replace(/"/g, "'") + '"');
|
||
}
|
||
if (tag === 'a') {
|
||
const href = el.getAttribute('href');
|
||
if (href) parts.push('href="' + href.slice(0, 60).replace(/"/g, "'") + '"');
|
||
}
|
||
if (states.length) parts.push(states.join(' '));
|
||
return '{' + parts.join(' ') + '}';
|
||
}
|
||
|
||
// ── Walk ─────────────────────────────────────────────────
|
||
function walk(node) {
|
||
if (node.nodeType === 3) {
|
||
const t = (node.textContent || '').replace(/\\s+/g, ' ').trim();
|
||
if (t) lines.push(t);
|
||
return;
|
||
}
|
||
if (node.nodeType !== 1) return;
|
||
const el = node;
|
||
if (HARD_SKIP_TAGS.has(el.tagName)) return;
|
||
if (isHidden(el)) return;
|
||
if (!isVisible(el)) return;
|
||
|
||
// <iframe> emits an inline placeholder so the agent sees its position.
|
||
// The Node side walks the frame's contents separately and replaces the
|
||
// matching placeholder with the merged frame snapshot.
|
||
if (el.tagName === 'IFRAME') {
|
||
const src = el.getAttribute('src') || '';
|
||
const name = el.getAttribute('name') || '';
|
||
const title = el.getAttribute('title') || '';
|
||
const labelParts = [];
|
||
if (name) labelParts.push('name=' + name);
|
||
if (title) labelParts.push('title=' + title.slice(0, 60));
|
||
if (src) labelParts.push('src=' + src.slice(0, 120));
|
||
lines.push('\\n[[IFRAME ' + (labelParts.join(' ') || '?') + ']]\\n');
|
||
return;
|
||
}
|
||
|
||
if (BLOCK_TAGS.has(el.tagName)) lines.push('\\n');
|
||
|
||
if (isInteractive(el)) {
|
||
counter++;
|
||
const ref = 'e' + counter;
|
||
refs.push({ ref: ref, selector: buildSelector(el) });
|
||
lines.push(describeElement(el, ref));
|
||
const role = el.getAttribute && el.getAttribute('role');
|
||
const skip = SIMPLE_LABEL_TAGS.has(el.tagName) || (role && SIMPLE_LABEL_ROLES.has(role));
|
||
if (skip) return;
|
||
}
|
||
|
||
for (const child of Array.from(node.childNodes)) walk(child);
|
||
if (el.shadowRoot && el.shadowRoot.mode === 'open') {
|
||
for (const child of Array.from(el.shadowRoot.childNodes)) walk(child);
|
||
}
|
||
}
|
||
|
||
if (document.body) walk(document.body);
|
||
|
||
let text = lines.join(' ')
|
||
.replace(/ \\n/g, '\\n')
|
||
.replace(/\\n /g, '\\n')
|
||
.replace(/\\n{3,}/g, '\\n\\n')
|
||
.trim();
|
||
|
||
return { text: text, refs: refs };
|
||
})()`;
|
||
|
||
type FrameSnapshotResult = { text: string; refs: Array<{ ref: string; selector: string }> };
|
||
|
||
const FRAME_SNAPSHOT_TIMEOUT_MS = 5000;
|
||
|
||
async function evaluateFrameSnapshot(frame: Frame): Promise<FrameSnapshotResult> {
|
||
return await Promise.race<FrameSnapshotResult>([
|
||
frame.evaluate(SNAPSHOT_SCRIPT) as unknown as Promise<FrameSnapshotResult>,
|
||
new Promise<never>((_, reject) =>
|
||
setTimeout(() => reject(new Error(`frame snapshot timeout ${FRAME_SNAPSHOT_TIMEOUT_MS}ms`)), FRAME_SNAPSHOT_TIMEOUT_MS),
|
||
),
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Snapshot a Page including all of its iframes.
|
||
*
|
||
* Layout: main-frame text first (with `[[IFRAME ...]]` placeholders inline at
|
||
* each iframe's position), followed by one section per child frame
|
||
* (`--- iframe fN url="..." ---`). Refs in child frames are prefixed `fN.eM`.
|
||
*
|
||
* Cross-origin frames are walked too: Playwright's Frame.evaluate transparently
|
||
* bridges the cross-origin barrier via CDP, so Stripe / OAuth / reCAPTCHA frame
|
||
* contents are reachable. Each frame walk is bounded by FRAME_SNAPSHOT_TIMEOUT_MS
|
||
* to avoid hanging on stuck frames.
|
||
*/
|
||
async function snapshotPage(page: Page): Promise<string> {
|
||
const refMap = new Map<string, RefTarget>();
|
||
const mainFrame = page.mainFrame();
|
||
|
||
let mainText: string;
|
||
try {
|
||
const result = await evaluateFrameSnapshot(mainFrame);
|
||
mainText = result.text;
|
||
for (const r of result.refs) refMap.set(r.ref, { frame: mainFrame, selector: r.selector });
|
||
} catch (e) {
|
||
mainText = `[main frame snapshot failed: ${(e as Error).message}]`;
|
||
}
|
||
|
||
const sections: string[] = [mainText];
|
||
let frameIdx = 0;
|
||
for (const frame of page.frames()) {
|
||
if (frame === mainFrame) continue;
|
||
if (frame.isDetached()) continue;
|
||
frameIdx++;
|
||
const fid = `f${frameIdx}`;
|
||
const fUrl = frame.url();
|
||
const fName = frame.name();
|
||
const sectionHeader = `--- iframe ${fid} url="${fUrl}"${fName ? ` name="${fName}"` : ''} ---`;
|
||
const sectionFooter = `--- end iframe ${fid} ---`;
|
||
|
||
let frameText: string;
|
||
let frameRefs: Array<{ ref: string; selector: string }>;
|
||
try {
|
||
const result = await evaluateFrameSnapshot(frame);
|
||
frameText = result.text;
|
||
frameRefs = result.refs;
|
||
} catch (e) {
|
||
sections.push(`\n${sectionHeader}\n[cannot inspect: ${(e as Error).message}]\n${sectionFooter}`);
|
||
continue;
|
||
}
|
||
|
||
for (const r of frameRefs) {
|
||
refMap.set(`${fid}.${r.ref}`, { frame, selector: r.selector });
|
||
}
|
||
const remappedText = frameText.replace(/\{e(\d+)\b/g, `{${fid}.e$1`);
|
||
sections.push(`\n${sectionHeader}\n${remappedText || '[empty]'}\n${sectionFooter}`);
|
||
}
|
||
|
||
_pageRefs.set(page, refMap);
|
||
return sections.join('\n');
|
||
}
|
||
|
||
/**
|
||
* BrowseWeb 系の getText / snapshot 結果が長すぎる場合、フルテキストを
|
||
* `<workspace>/logs/browse/{ts}-{hash}.txt` に保存し、戻り値には
|
||
* 先頭 BROWSE_TEXT_PREVIEW_CHARS 文字 + 「Read で続きを取得可能」の案内を返す。
|
||
* preview 以下の長さなら full をそのまま返す (ファイルは作らない)。
|
||
*
|
||
* 旧来は 10k/15k 文字でハードカットしていたが、長文ページの本文が
|
||
* 失われる問題があったため採用。LLM は必要に応じて
|
||
* Read({ file_path, offset, limit }) で読み直せる。
|
||
*/
|
||
const BROWSE_TEXT_PREVIEW_CHARS = 5000;
|
||
|
||
async function saveBrowseText(
|
||
ctx: { workspacePath: string },
|
||
url: string,
|
||
text: string,
|
||
label?: string,
|
||
previewLimit: number = BROWSE_TEXT_PREVIEW_CHARS,
|
||
): Promise<string> {
|
||
if (text.length <= previewLimit) return text;
|
||
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
||
const crypto = await import('crypto');
|
||
const hash = crypto.createHash('sha1').update(url + '\n' + (label ?? '')).digest('hex').slice(0, 8);
|
||
const dir = path.join(ctx.workspacePath, 'logs', 'browse');
|
||
const { mkdirSync, writeFileSync } = await import('fs');
|
||
mkdirSync(dir, { recursive: true });
|
||
const filename = `${ts}-${hash}.txt`;
|
||
const filepath = path.join(dir, filename);
|
||
writeFileSync(filepath, text, 'utf-8');
|
||
const relPath = path.posix.join('logs', 'browse', filename);
|
||
const preview = text.slice(0, previewLimit);
|
||
return (
|
||
preview +
|
||
`\n... (truncated; full ${text.length} chars saved to ${relPath} —` +
|
||
` Read({file_path:"${relPath}", offset, limit}) で続きを取得可能)`
|
||
);
|
||
}
|
||
|
||
/**
|
||
* dumpHtml 用ヘルパー: 指定セレクタ(or body)の outerHTML を取得し、
|
||
* script/style を除去した上で depth レベルを超える子孫を `…` に置き換える。
|
||
* 戻り値はサニタイズ済みの文字列(フル長)。要素が見つからない場合は null。
|
||
*
|
||
* frame は対象 Frame(メインまたは iframe)。Frame.evaluate は cross-origin
|
||
* iframe も透過的に扱えるので、resolveRef が返した frame をそのまま渡せばよい。
|
||
*/
|
||
async function dumpElementHtml(
|
||
frame: Frame,
|
||
selector: string | undefined,
|
||
depth: number,
|
||
): Promise<string | null> {
|
||
return await frame.evaluate(
|
||
(args: { selector: string | null; depth: number }) => {
|
||
const root = args.selector ? document.querySelector(args.selector) : document.body;
|
||
if (!root) return null;
|
||
const clone = root.cloneNode(true) as Element;
|
||
// Strip noise tags
|
||
const noisy = Array.from(clone.querySelectorAll('script, style, noscript, template'));
|
||
for (const n of noisy) n.remove();
|
||
// Truncate beyond depth
|
||
function truncate(el: Element, level: number): void {
|
||
if (level >= args.depth) {
|
||
const childCount = el.children.length;
|
||
if (childCount > 0) {
|
||
el.innerHTML = `<!-- truncated: ${childCount} children at depth ${level} -->`;
|
||
}
|
||
return;
|
||
}
|
||
for (const child of Array.from(el.children)) truncate(child, level + 1);
|
||
}
|
||
truncate(clone, 0);
|
||
return clone.outerHTML;
|
||
},
|
||
{ selector: selector ?? null, depth },
|
||
);
|
||
}
|
||
|
||
/** ref から (frame, selector) を解決。失敗時は null。 */
|
||
function resolveRef(page: Page, ref: string): RefTarget | null {
|
||
return _pageRefs.get(page)?.get(ref) ?? null;
|
||
}
|
||
|
||
function cssEscapeAttr(s: string): string {
|
||
return s.replace(/["\\]/g, '\\$&');
|
||
}
|
||
|
||
/**
|
||
* Build the iframe traversal chain from the main frame down to the target frame.
|
||
* Returns [] when the target IS the main frame.
|
||
*
|
||
* For each iframe boundary, picks the first stable attribute (id / name / src)
|
||
* whose selector uniquely matches within the parent frame. Falls back to a
|
||
* positional entry (selector: 'iframe', index: N) when no attribute is unique
|
||
* or when frameElement() throws (detached / race condition).
|
||
*/
|
||
async function buildFrameChain(targetFrame: Frame): Promise<FrameChainEntry[]> {
|
||
const chain: FrameChainEntry[] = [];
|
||
const page = targetFrame.page();
|
||
const mainFrame = page.mainFrame();
|
||
let current: Frame | null = targetFrame;
|
||
|
||
while (current && current !== mainFrame) {
|
||
const parent: Frame | null = current.parentFrame();
|
||
if (!parent) break;
|
||
|
||
let entry: FrameChainEntry;
|
||
try {
|
||
const el = await current.frameElement();
|
||
const id = await el.getAttribute('id').catch(() => null);
|
||
const name = await el.getAttribute('name').catch(() => null);
|
||
const src = await el.getAttribute('src').catch(() => null);
|
||
|
||
const candidates: string[] = [];
|
||
if (id) candidates.push(`iframe[id="${cssEscapeAttr(id)}"]`);
|
||
if (name) candidates.push(`iframe[name="${cssEscapeAttr(name)}"]`);
|
||
if (src) candidates.push(`iframe[src="${cssEscapeAttr(src)}"]`);
|
||
|
||
let chosen: string | null = null;
|
||
for (const sel of candidates) {
|
||
const count = await parent.locator(sel).count().catch(() => 0);
|
||
if (count === 1) { chosen = sel; break; }
|
||
}
|
||
|
||
if (chosen) {
|
||
entry = { selector: chosen };
|
||
} else {
|
||
const idx = parent.childFrames().indexOf(current);
|
||
entry = { selector: 'iframe', index: idx >= 0 ? idx : 0 };
|
||
}
|
||
} catch {
|
||
const idx = parent.childFrames().indexOf(current);
|
||
entry = { selector: 'iframe', index: idx >= 0 ? idx : 0 };
|
||
}
|
||
|
||
chain.unshift(entry);
|
||
current = parent;
|
||
}
|
||
return chain;
|
||
}
|
||
|
||
/** Convenience: returns [] for the main frame, otherwise the captured chain.
|
||
* Exported for testing.
|
||
*/
|
||
export async function captureFrameChain(frame: Frame): Promise<FrameChainEntry[]> {
|
||
if (frame === frame.page().mainFrame()) return [];
|
||
return buildFrameChain(frame);
|
||
}
|
||
|
||
/** ジョブ終了時に呼ぶ。そのジョブの BrowserContext だけを閉じる。 */
|
||
export async function cleanupJobContext(workspacePath: string): Promise<void> {
|
||
const context = _jobContexts.get(workspacePath);
|
||
if (context) {
|
||
await context.close().catch(() => {});
|
||
_jobContexts.delete(workspacePath);
|
||
logger.debug(`[browser] cleaned up job context for ${workspacePath}`);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Set up route interception on a page to block requests to private IPs (SSRF protection).
|
||
*/
|
||
async function setupRouteInterception(page: Page, allowedHosts: string[], workspacePath: string): Promise<void> {
|
||
await page.route('**/*', async (route) => {
|
||
const reqUrl = route.request().url();
|
||
let parsed: URL;
|
||
try {
|
||
parsed = new URL(reqUrl);
|
||
} catch {
|
||
await route.abort('blockedbyclient');
|
||
return;
|
||
}
|
||
const hostname = parsed.hostname;
|
||
|
||
if (parsed.protocol === 'file:') {
|
||
const normalized = normalizeFileUrlForWorkspace(reqUrl, workspacePath);
|
||
if ('error' in normalized) {
|
||
logger.warn(`[browser] blocked file URL outside workspace: ${reqUrl}`);
|
||
await route.abort('blockedbyclient');
|
||
return;
|
||
}
|
||
await route.continue({ url: normalized.url });
|
||
return;
|
||
}
|
||
|
||
if (parsed.protocol === 'data:' || parsed.protocol === 'blob:') {
|
||
await route.continue();
|
||
return;
|
||
}
|
||
|
||
// Skip check for allowed hosts
|
||
if (isHostAllowed(hostname, allowedHosts)) {
|
||
await route.continue();
|
||
return;
|
||
}
|
||
|
||
// Block localhost explicitly
|
||
if (hostname === 'localhost') {
|
||
logger.warn(`[browser] SSRF blocked navigation to localhost: ${reqUrl}`);
|
||
await route.abort('blockedbyclient');
|
||
return;
|
||
}
|
||
|
||
// DNS resolve and check for private IPs
|
||
try {
|
||
const result = await dns.promises.lookup(hostname);
|
||
if (isPrivateIPv4(result.address) || isPrivateIPv6(result.address)) {
|
||
logger.warn(`[browser] SSRF blocked: ${hostname} -> ${result.address}`);
|
||
await route.abort('blockedbyclient');
|
||
return;
|
||
}
|
||
} catch {
|
||
// DNS failure for network URLs: block the request.
|
||
await route.abort('blockedbyclient');
|
||
return;
|
||
}
|
||
|
||
await route.continue();
|
||
});
|
||
}
|
||
|
||
// --- BrowseWeb implementation ---
|
||
|
||
async function executeBrowseWeb(
|
||
input: Record<string, unknown>,
|
||
ctx: ToolContext,
|
||
): Promise<ToolResult> {
|
||
const url = input['url'] as string | undefined;
|
||
const actions = input['actions'] as BrowseWebAction[] | undefined;
|
||
const recordTo = input['recordTo'] as string | undefined;
|
||
|
||
if (!url && (!actions || actions.length === 0)) {
|
||
return { output: 'BrowseWeb error: url または actions のいずれかが必要です', isError: true };
|
||
}
|
||
|
||
// Enable recorder for this task when recordTo is specified.
|
||
// Guard with recordTo() to avoid resetting an already-active buffer on
|
||
// a second BrowseWeb call for the same task (idempotent enable).
|
||
if (recordTo && ctx.taskId && ctx.userId) {
|
||
if (!recorder.recordTo(ctx.taskId)) {
|
||
recorder.enable(ctx.taskId, recordTo);
|
||
logger.debug(`[BrowseWeb] recorder enabled taskId=${ctx.taskId} recordTo=${recordTo}`);
|
||
}
|
||
}
|
||
|
||
// アクションモード
|
||
if (actions && actions.length > 0) {
|
||
return executeActions(actions, ctx, recordTo);
|
||
}
|
||
|
||
// 基本モード
|
||
return executeSimple(input, ctx);
|
||
}
|
||
|
||
async function executeSimple(
|
||
input: Record<string, unknown>,
|
||
ctx: ToolContext,
|
||
): Promise<ToolResult> {
|
||
const url = input['url'] as string;
|
||
const waitFor = input['waitFor'] as string | undefined;
|
||
const extractSelector = input['extractSelector'] as string | undefined;
|
||
const screenshotFile = input['screenshot'] as string | undefined;
|
||
const pageTimeout = typeof input['timeout'] === 'number'
|
||
? input['timeout']
|
||
: (ctx.toolsConfig?.browserPageTimeout ?? 60000);
|
||
const allowedHosts = ctx.toolsConfig?.webfetchAllowedHosts ?? [];
|
||
|
||
// Pre-navigation SSRF check
|
||
const normalizedUrl = normalizeFileUrlForWorkspace(url, ctx.workspacePath);
|
||
if ('error' in normalizedUrl) {
|
||
return { output: `BrowseWeb error: ${normalizedUrl.error}`, isError: true };
|
||
}
|
||
const ssrfError = await ssrfCheck(normalizedUrl.url, allowedHosts, ctx.workspacePath);
|
||
if (ssrfError) {
|
||
return { output: `BrowseWeb error: ${ssrfError}`, isError: true };
|
||
}
|
||
|
||
try {
|
||
const page = await getJobPage(ctx, allowedHosts, pageTimeout);
|
||
|
||
await page.goto(normalizedUrl.url, { waitUntil: 'load', timeout: pageTimeout });
|
||
|
||
if (waitFor) {
|
||
await page.waitForSelector(waitFor, { timeout: pageTimeout });
|
||
}
|
||
|
||
const expiredReason = await checkAuthExpiry(page, ctx);
|
||
if (expiredReason) {
|
||
return { output: `AUTH_SESSION_EXPIRED: ${expiredReason}`, isError: true };
|
||
}
|
||
|
||
let content: string;
|
||
if (extractSelector) {
|
||
const el = await page.$(extractSelector);
|
||
if (el) {
|
||
const html = await el.innerHTML();
|
||
const fullText = htmlToText(html);
|
||
content = await saveBrowseText(ctx, page.url(), fullText, `selector:${extractSelector}`);
|
||
} else {
|
||
content = `(selector "${extractSelector}" not found on page)`;
|
||
}
|
||
} else {
|
||
// ref 注釈付きのスナップショットを取得(操作可能要素を埋め込み)
|
||
const fullText = await snapshotPage(page);
|
||
content = await saveBrowseText(ctx, page.url(), fullText, 'snapshot');
|
||
}
|
||
|
||
// スクリーンショット
|
||
if (screenshotFile) {
|
||
try {
|
||
const savePath = resolveOutputPathWithin(ctx.workspacePath, path.join('output', screenshotFile), ['output']);
|
||
const { mkdirSync } = await import('fs');
|
||
mkdirSync(path.dirname(savePath), { recursive: true });
|
||
await page.screenshot({ path: savePath, fullPage: true });
|
||
content += `\n\n[Screenshot saved to output/${screenshotFile}]`;
|
||
} catch (e) {
|
||
content += `\n\n[Screenshot error: ${(e as Error).message}]`;
|
||
}
|
||
}
|
||
|
||
// ページ滞在中に発生したファイルダウンロードを output/ に取り出してレポート
|
||
const downloads = await drainDownloads(page);
|
||
const dlSummary = formatDownloadLines(downloads);
|
||
if (dlSummary) content += `\n\n${dlSummary}`;
|
||
|
||
return { output: content, isError: false };
|
||
} catch (e) {
|
||
const msg = (e as Error).message ?? String(e);
|
||
logger.warn(`[BrowseWeb] error: ${msg}`);
|
||
return { output: `BrowseWeb error: ${msg}`, isError: true };
|
||
}
|
||
}
|
||
|
||
async function executeActions(
|
||
actions: BrowseWebAction[],
|
||
ctx: ToolContext,
|
||
recordTo?: string,
|
||
): Promise<ToolResult> {
|
||
const actionTimeout = ctx.toolsConfig?.browserActionTimeout ?? 30000;
|
||
const allowedHosts = ctx.toolsConfig?.webfetchAllowedHosts ?? [];
|
||
|
||
// Helper to record a successful action. Errors in recording must never fail the BrowseWeb action.
|
||
function tryRecord(entry: Omit<RecordedAction, 'ts'>): void {
|
||
if (!ctx.taskId || !recordTo) return;
|
||
if (!recorder.recordTo(ctx.taskId)) return;
|
||
try {
|
||
recorder.record(ctx.taskId, entry);
|
||
logger.debug(`[BrowseWeb] recorded action type=${entry.type} taskId=${ctx.taskId}`);
|
||
} catch (e) {
|
||
logger.debug(`[BrowseWeb] recorder.record failed (ignored): ${(e as Error).message}`);
|
||
}
|
||
}
|
||
|
||
try {
|
||
const page = await getJobPage(ctx, allowedHosts, actionTimeout);
|
||
const results: string[] = [];
|
||
|
||
for (const action of actions) {
|
||
switch (action.type) {
|
||
case 'goto': {
|
||
const gotoUrl = action.url;
|
||
if (!gotoUrl) {
|
||
results.push('[goto] error: url is required');
|
||
break;
|
||
}
|
||
const normalizedUrl = normalizeFileUrlForWorkspace(gotoUrl, ctx.workspacePath);
|
||
if ('error' in normalizedUrl) {
|
||
results.push(`[goto] error: ${normalizedUrl.error}`);
|
||
break;
|
||
}
|
||
const ssrfError = await ssrfCheck(normalizedUrl.url, allowedHosts, ctx.workspacePath);
|
||
if (ssrfError) {
|
||
results.push(`[goto] SSRF blocked: ${ssrfError}`);
|
||
break;
|
||
}
|
||
await page.goto(normalizedUrl.url, { waitUntil: 'load', timeout: actionTimeout });
|
||
results.push(`[goto] navigated to ${gotoUrl}`);
|
||
tryRecord({ type: 'goto', url: gotoUrl, frameChain: [] });
|
||
const expiredReason = await checkAuthExpiry(page, ctx);
|
||
if (expiredReason) {
|
||
return {
|
||
output: `AUTH_SESSION_EXPIRED: ${expiredReason}\n${results.join('\n')}`,
|
||
isError: true,
|
||
};
|
||
}
|
||
break;
|
||
}
|
||
case 'click': {
|
||
// ref があればそれを優先 (フレームを跨いで解決可能)。selector 直指定は
|
||
// メインフレームに対する操作とみなす。
|
||
let frame: Frame = page.mainFrame();
|
||
let selector = action.selector;
|
||
if (!selector && action.ref) {
|
||
const target = resolveRef(page, action.ref);
|
||
if (!target) {
|
||
results.push(`[click] ref "${action.ref}" not found in current snapshot. Get a fresh snapshot with getText first.`);
|
||
break;
|
||
}
|
||
frame = target.frame;
|
||
selector = target.selector;
|
||
}
|
||
if (!selector) {
|
||
results.push('[click] error: selector または ref が必要です');
|
||
break;
|
||
}
|
||
await frame.click(selector, { timeout: actionTimeout });
|
||
results.push(`[click] clicked ${action.ref ?? selector}`);
|
||
// Only resolve the DOM-based selector path when recording is active.
|
||
// buildSelectorPath runs a page.evaluate round-trip; skip it for
|
||
// non-recording BrowseWeb calls. tryRecord already early-returns when
|
||
// not recording, but this avoids the evaluate overhead entirely.
|
||
if (ctx.taskId && recorder.recordTo(ctx.taskId)) {
|
||
let resolvedSelector = selector;
|
||
try {
|
||
const locator = frame.locator(selector).first();
|
||
resolvedSelector = await buildSelectorPath(locator);
|
||
} catch (_e) {
|
||
// element may have detached — fall back to the raw selector
|
||
}
|
||
const frameChain = await captureFrameChain(frame);
|
||
tryRecord({ type: 'click', selector: resolvedSelector, originalRef: action.ref, frameChain });
|
||
}
|
||
break;
|
||
}
|
||
case 'fill': {
|
||
let frame: Frame = page.mainFrame();
|
||
let selector = action.selector;
|
||
if (!selector && action.ref) {
|
||
const target = resolveRef(page, action.ref);
|
||
if (!target) {
|
||
results.push(`[fill] ref "${action.ref}" not found in current snapshot.`);
|
||
break;
|
||
}
|
||
frame = target.frame;
|
||
selector = target.selector;
|
||
}
|
||
if (!selector) {
|
||
results.push('[fill] error: selector または ref が必要です');
|
||
break;
|
||
}
|
||
await frame.fill(selector, action.value ?? '', { timeout: actionTimeout });
|
||
results.push(`[fill] filled ${action.ref ?? selector}`);
|
||
// Only resolve the DOM-based selector path when recording is active.
|
||
if (ctx.taskId && recorder.recordTo(ctx.taskId)) {
|
||
let resolvedSelector = selector;
|
||
try {
|
||
const locator = frame.locator(selector).first();
|
||
resolvedSelector = await buildSelectorPath(locator);
|
||
} catch (_e) {
|
||
// element may have detached — fall back to the raw selector
|
||
}
|
||
const frameChain = await captureFrameChain(frame);
|
||
tryRecord({ type: 'fill', selector: resolvedSelector, originalRef: action.ref, value: action.value, frameChain });
|
||
}
|
||
break;
|
||
}
|
||
case 'screenshot': {
|
||
const filename = action.value ?? 'screenshot.png';
|
||
let savePath: string;
|
||
try {
|
||
savePath = resolveOutputPathWithin(ctx.workspacePath, path.join('output', filename), ['output']);
|
||
} catch (e) {
|
||
results.push(`[screenshot] error: ${(e as Error).message}`);
|
||
break;
|
||
}
|
||
const { mkdirSync } = await import('fs');
|
||
mkdirSync(path.dirname(savePath), { recursive: true });
|
||
await page.screenshot({ path: savePath, fullPage: true });
|
||
results.push(`[screenshot] saved to output/${filename}`);
|
||
tryRecord({ type: 'screenshot', value: filename, frameChain: [] });
|
||
break;
|
||
}
|
||
case 'getText': {
|
||
if (!action.selector) {
|
||
// 全ページの ref 注釈付きスナップショットを取得
|
||
const fullText = await snapshotPage(page);
|
||
const text = await saveBrowseText(ctx, page.url(), fullText, 'snapshot');
|
||
results.push(`[getText] ${text}`);
|
||
tryRecord({ type: 'getText', frameChain: [] });
|
||
} else {
|
||
const el = await page.$(action.selector);
|
||
if (el) {
|
||
const fullText = await el.innerText();
|
||
const text = await saveBrowseText(ctx, page.url(), fullText, `selector:${action.selector}`);
|
||
results.push(`[getText] ${text}`);
|
||
tryRecord({ type: 'getText', selector: action.selector, frameChain: [] });
|
||
} else {
|
||
results.push(`[getText] selector "${action.selector}" not found`);
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
case 'wait': {
|
||
const ms = action.ms ?? 1000;
|
||
const waitMs = Math.min(ms, 30000); // cap at 30s
|
||
await page.waitForTimeout(waitMs);
|
||
results.push(`[wait] waited ${waitMs}ms`);
|
||
tryRecord({ type: 'wait', ms: waitMs, frameChain: [] });
|
||
break;
|
||
}
|
||
case 'dumpHtml': {
|
||
let frame: Frame = page.mainFrame();
|
||
let selector = action.selector;
|
||
if (!selector && action.ref) {
|
||
const target = resolveRef(page, action.ref);
|
||
if (!target) {
|
||
results.push(`[dumpHtml] ref "${action.ref}" not found in current snapshot.`);
|
||
break;
|
||
}
|
||
frame = target.frame;
|
||
selector = target.selector;
|
||
}
|
||
const depth = Math.max(0, Math.min(action.depth ?? 3, 10));
|
||
const fullHtml = await dumpElementHtml(frame, selector, depth);
|
||
if (fullHtml === null) {
|
||
results.push(`[dumpHtml] selector "${selector}" not found`);
|
||
break;
|
||
}
|
||
const previewLimit = Math.max(500, Math.min(action.maxChars ?? BROWSE_TEXT_PREVIEW_CHARS, 50_000));
|
||
const text = await saveBrowseText(
|
||
ctx,
|
||
page.url(),
|
||
fullHtml,
|
||
`dumpHtml:${action.ref ?? selector ?? 'body'}`,
|
||
previewLimit,
|
||
);
|
||
results.push(`[dumpHtml ${action.ref ?? selector ?? 'body'}] ${text}`);
|
||
if (ctx.taskId && recorder.recordTo(ctx.taskId)) {
|
||
const frameChain = await captureFrameChain(frame);
|
||
tryRecord({ type: 'dumpHtml', selector: selector ?? undefined, originalRef: action.ref, frameChain });
|
||
}
|
||
break;
|
||
}
|
||
default:
|
||
results.push(`[${action.type}] unknown action type`);
|
||
}
|
||
}
|
||
|
||
// アクション中に発生したファイルダウンロードを output/ に取り出してレポート
|
||
const downloads = await drainDownloads(page);
|
||
const dlSummary = formatDownloadLines(downloads);
|
||
if (dlSummary) results.push(dlSummary);
|
||
|
||
return { output: results.join('\n'), isError: false };
|
||
} catch (e) {
|
||
const msg = (e as Error).message ?? String(e);
|
||
logger.warn(`[BrowseWeb] action error: ${msg}`);
|
||
return { output: `BrowseWeb error: ${msg}`, isError: true };
|
||
}
|
||
}
|
||
|
||
// --- InteractiveBrowse / BrowseWithSession tool definitions ---
|
||
|
||
const INTERACTIVEBROWSE_DEF: ToolDef = {
|
||
type: 'function',
|
||
function: {
|
||
name: 'InteractiveBrowse',
|
||
description:
|
||
'ブラウザ操作をユーザーに委譲する。指定URLでブラウザ画面を起動し、noVNC経由でユーザーがその画面を直接見て手動操作できる。完了するとユーザーが release ボタンを押し、ジョブが再開して agent が BrowseWithSession で続きを引き継げる(Cookie / DOM 状態保持)。\n' +
|
||
'使うべき場面: (1) ログイン/2FA/SSO 同意などパスワード入力を伴う認証、(2) CAPTCHA や bot 検証、(3) BrowseWeb の click が空振りし続ける複雑な UI(ドラッグ&ドロップ、canvas/WebGL、closed shadow DOM)、(4) 画面状態を目視確認したいとき。\n' +
|
||
'呼び出すとジョブは waiting_human に遷移し、ユーザー操作完了まで停止する。失敗 (noVNC 未構成) の場合はエラーが返るので BrowseWeb で続けるか諦めること。詳細は ReadToolDoc({ name: "InteractiveBrowse" }) で取得可能。',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {
|
||
url: { type: 'string', description: '開くURL' },
|
||
reason: { type: 'string', description: 'ユーザーに操作を依頼する理由(例: ログインが必要です)' },
|
||
sessionId: { type: 'string', description: '既存セッションID(省略時は新規作成)' },
|
||
},
|
||
required: ['url', 'reason'],
|
||
},
|
||
},
|
||
};
|
||
|
||
const BROWSEWITHSESSION_DEF: ToolDef = {
|
||
type: 'function',
|
||
function: {
|
||
name: 'BrowseWithSession',
|
||
description:
|
||
'InteractiveBrowse でユーザーが手動操作した直後のセッションを使って agent が後続操作を続けるためのツール。Cookie・ログイン状態・DOM がそのまま引き継がれる。\n' +
|
||
'sessionId は InteractiveBrowse の戻り値から取得する。actions(getText/screenshot/click/fill)と selector/value で操作できる。詳細は ReadToolDoc({ name: "BrowseWithSession" }) で取得可能。',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {
|
||
sessionId: { type: 'string', description: 'セッションID' },
|
||
url: { type: 'string', description: '移動先URL' },
|
||
action: {
|
||
type: 'string',
|
||
enum: ['getText', 'screenshot', 'click', 'fill'],
|
||
description: '実行するアクション(省略時はgetText)',
|
||
},
|
||
selector: { type: 'string', description: 'CSSセレクタ(click/fill/getText で使用)' },
|
||
value: { type: 'string', description: '入力値(fill で使用)' },
|
||
},
|
||
required: ['sessionId', 'url'],
|
||
},
|
||
},
|
||
};
|
||
|
||
// --- InteractiveBrowse implementation ---
|
||
|
||
async function executeInteractiveBrowse(
|
||
input: Record<string, unknown>,
|
||
ctx: ToolContext,
|
||
): Promise<ToolResult> {
|
||
const url = input['url'] as string;
|
||
const reason = input['reason'] as string;
|
||
const sessionId = input['sessionId'] as string | undefined;
|
||
|
||
const sm = getSessionManager();
|
||
if (!sm) {
|
||
return {
|
||
output: 'InteractiveBrowse error: ブラウザセッション機能が利用できません(Xvfb, x11vnc, websockify が必要です)',
|
||
isError: true,
|
||
};
|
||
}
|
||
|
||
try {
|
||
let session: BrowserSession;
|
||
if (sessionId) {
|
||
const existing = sm.getSession(sessionId);
|
||
if (!existing) {
|
||
return { output: `InteractiveBrowse error: セッション ${sessionId} が見つかりません`, isError: true };
|
||
}
|
||
session = existing;
|
||
} else {
|
||
// ToolContext.taskId は worker → piece-runner で必ず埋まる (ローカルタスク
|
||
// 経由の場合のみ)。subtask root が gitea issue 等で taskId が立たないと
|
||
// visibility ベースの認可が取れないので、明示的に拒否する。
|
||
if (!ctx.taskId) {
|
||
return {
|
||
output: 'InteractiveBrowse error: このジョブには taskId が紐付いていないため、新規セッションを作れません (ローカルタスク経由で実行してください)',
|
||
isError: true,
|
||
};
|
||
}
|
||
session = await sm.getOrCreateTaskSession(ctx.taskId, ctx.userId);
|
||
}
|
||
sm.touchSession(session.id);
|
||
|
||
// Navigate to URL
|
||
if (session.context) {
|
||
const pages = session.context.pages();
|
||
const page = pages.length > 0 ? pages[0]! : await session.context.newPage();
|
||
await page.goto(url, { waitUntil: 'load', timeout: ctx.toolsConfig?.browserPageTimeout ?? 60000 });
|
||
}
|
||
|
||
session.state = 'user_interactive';
|
||
|
||
const resultData = {
|
||
action: 'waiting_human',
|
||
waitReason: 'browser_login',
|
||
sessionId: session.id,
|
||
novncPath: buildNovncPath(session.id),
|
||
reason,
|
||
};
|
||
|
||
return {
|
||
output: JSON.stringify(resultData),
|
||
isError: false,
|
||
};
|
||
} catch (e) {
|
||
const msg = (e as Error).message ?? String(e);
|
||
logger.warn(`[InteractiveBrowse] error: ${msg}`);
|
||
return { output: `InteractiveBrowse error: ${msg}`, isError: true };
|
||
}
|
||
}
|
||
|
||
// --- BrowseWithSession implementation ---
|
||
|
||
async function executeBrowseWithSession(
|
||
input: Record<string, unknown>,
|
||
ctx: ToolContext,
|
||
): Promise<ToolResult> {
|
||
const sessionId = input['sessionId'] as string;
|
||
const url = input['url'] as string;
|
||
const action = (input['action'] as string | undefined) ?? 'getText';
|
||
const selector = input['selector'] as string | undefined;
|
||
const value = input['value'] as string | undefined;
|
||
|
||
const sm = getSessionManager();
|
||
if (!sm) {
|
||
return {
|
||
output: 'BrowseWithSession error: ブラウザセッション機能が利用できません',
|
||
isError: true,
|
||
};
|
||
}
|
||
|
||
const session = sm.getSession(sessionId);
|
||
if (!session) {
|
||
return { output: `BrowseWithSession error: セッション ${sessionId} が見つかりません`, isError: true };
|
||
}
|
||
|
||
if (!session.context) {
|
||
return { output: 'BrowseWithSession error: セッションのコンテキストが無効です', isError: true };
|
||
}
|
||
|
||
sm.touchSession(session.id);
|
||
|
||
try {
|
||
const pages = session.context.pages();
|
||
const page = pages.length > 0 ? pages[0]! : await session.context.newPage();
|
||
const pageTimeout = ctx.toolsConfig?.browserPageTimeout ?? 60000;
|
||
const actionTimeout = ctx.toolsConfig?.browserActionTimeout ?? 30000;
|
||
setupDownloadHandler(page, ctx.workspacePath);
|
||
|
||
await page.goto(url, { waitUntil: 'load', timeout: pageTimeout });
|
||
|
||
switch (action) {
|
||
case 'getText': {
|
||
if (selector) {
|
||
const el = await page.$(selector);
|
||
if (el) {
|
||
const fullText = await el.innerText();
|
||
const out = await saveBrowseText(ctx, page.url(), fullText, `selector:${selector}`);
|
||
return { output: out, isError: false };
|
||
}
|
||
return { output: `(selector "${selector}" not found)`, isError: false };
|
||
}
|
||
const html = await page.content();
|
||
const fullText = htmlToText(html);
|
||
const out = await saveBrowseText(ctx, page.url(), fullText, 'page');
|
||
return { output: out, isError: false };
|
||
}
|
||
case 'screenshot': {
|
||
const filename = `screenshot-${Date.now()}.png`;
|
||
const savePath = path.join(ctx.workspacePath, 'output', filename);
|
||
const { mkdirSync } = await import('fs');
|
||
mkdirSync(path.dirname(savePath), { recursive: true });
|
||
await page.screenshot({ path: savePath, fullPage: true });
|
||
return { output: `Screenshot saved to output/${filename}`, isError: false };
|
||
}
|
||
case 'click': {
|
||
if (!selector) {
|
||
return { output: 'BrowseWithSession error: click には selector が必要です', isError: true };
|
||
}
|
||
await page.click(selector, { timeout: actionTimeout });
|
||
const downloads = await drainDownloads(page);
|
||
const dlSummary = formatDownloadLines(downloads);
|
||
const msg = dlSummary ? `Clicked ${selector}\n${dlSummary}` : `Clicked ${selector}`;
|
||
return { output: msg, isError: false };
|
||
}
|
||
case 'fill': {
|
||
if (!selector) {
|
||
return { output: 'BrowseWithSession error: fill には selector が必要です', isError: true };
|
||
}
|
||
await page.fill(selector, value ?? '', { timeout: actionTimeout });
|
||
return { output: `Filled ${selector}`, isError: false };
|
||
}
|
||
default:
|
||
return { output: `BrowseWithSession error: unknown action "${action}"`, isError: true };
|
||
}
|
||
} catch (e) {
|
||
const msg = (e as Error).message ?? String(e);
|
||
logger.warn(`[BrowseWithSession] error: ${msg}`);
|
||
return { output: `BrowseWithSession error: ${msg}`, isError: true };
|
||
}
|
||
}
|
||
|
||
// --- buildSelectorPath ---
|
||
//
|
||
// Given a Playwright Locator, evaluates in the browser to build a stable CSS selector
|
||
// path using a priority list: data-testid > stable #id > [name="..."] > nth-of-type.
|
||
// Used by the recorder to store resolved selectors (not LLM refs) in the action buffer.
|
||
|
||
async function buildSelectorPath(locator: Locator): Promise<string> {
|
||
return await locator.evaluate((el: Element) => {
|
||
function tryStrictAttr(node: Element, attr: string): string | null {
|
||
// Strict: only simple alphanumeric/underscore/hyphen values (e.g. data-testid)
|
||
const v = node.getAttribute(attr);
|
||
return v && /^[a-zA-Z0-9_-]+$/.test(v) ? v : null;
|
||
}
|
||
function tryNameAttr(node: Element): string | null {
|
||
// More permissive: allow dots, slashes, colons common in Django/Rails form names
|
||
// (e.g. user.email, items[0], user:email). These are valid inside [name="..."].
|
||
const v = node.getAttribute('name');
|
||
return v && /^[a-zA-Z0-9_./:[\]-]+$/.test(v) ? v : null;
|
||
}
|
||
function isStableId(id: string): boolean {
|
||
// Reject auto-generated IDs from React/Vue (`:r-N:`, `__NEXT_*`, long GUIDs).
|
||
// Allow leading underscore (_root, _app) common in Next.js / SvelteKit.
|
||
return /^[a-zA-Z_][a-zA-Z0-9_-]*$/.test(id) && !id.startsWith(':') && id.length < 60;
|
||
}
|
||
function nthOfType(node: Element): number {
|
||
const siblings = Array.from(node.parentElement?.children ?? []).filter(
|
||
s => s.tagName === node.tagName,
|
||
);
|
||
return siblings.indexOf(node) + 1;
|
||
}
|
||
function pathFor(node: Element, depth: number): string {
|
||
const testId = tryStrictAttr(node, 'data-testid');
|
||
if (testId) return `[data-testid="${testId}"]`;
|
||
if (node.id && isStableId(node.id)) return `#${node.id}`;
|
||
const name = tryNameAttr(node);
|
||
const tag = node.tagName.toLowerCase();
|
||
if (name) return `${tag}[name="${name}"]`;
|
||
// For input elements, include type to be more specific (e.g. input[type="email"])
|
||
const type = (node as HTMLInputElement).type;
|
||
const typeFragment = node.tagName === 'INPUT' && type ? `[type="${type}"]` : '';
|
||
const nth = nthOfType(node);
|
||
const indexFragment = nth > 1 ? `:nth-of-type(${nth})` : '';
|
||
const local = `${tag}${typeFragment}${indexFragment}`;
|
||
if (depth === 0 || !node.parentElement) return local;
|
||
return `${pathFor(node.parentElement, depth - 1)} > ${local}`;
|
||
}
|
||
return pathFor(el, 4);
|
||
});
|
||
}
|
||
|
||
// --- Exports ---
|
||
|
||
const baseDefs: Record<string, ToolDef> = {
|
||
BrowseWeb: BROWSEWEB_DEF,
|
||
};
|
||
|
||
if (SessionManager.isAvailable()) {
|
||
baseDefs['InteractiveBrowse'] = INTERACTIVEBROWSE_DEF;
|
||
baseDefs['BrowseWithSession'] = BROWSEWITHSESSION_DEF;
|
||
}
|
||
|
||
export const TOOL_DEFS: Record<string, ToolDef> = baseDefs;
|
||
|
||
export async function executeTool(
|
||
name: string,
|
||
input: Record<string, unknown>,
|
||
ctx: ToolContext,
|
||
): Promise<ToolResult | null> {
|
||
switch (name) {
|
||
case 'BrowseWeb':
|
||
return executeBrowseWeb(input, ctx);
|
||
case 'InteractiveBrowse':
|
||
return executeInteractiveBrowse(input, ctx);
|
||
case 'BrowseWithSession':
|
||
return executeBrowseWithSession(input, ctx);
|
||
default:
|
||
return null;
|
||
}
|
||
}
|