From a44f6b41e2bd3b4f5b125cdeb364ade9a75f1670 Mon Sep 17 00:00:00 2001 From: oss-sync Date: Fri, 5 Jun 2026 05:43:02 +0000 Subject: [PATCH] sync: update from private repo (15455e9) --- .gitignore | 6 +- src/engine/tools/core.test.ts | 725 +++++++++++++++++++ src/engine/tools/core.ts | 1232 +++++++++++++++++++++++++++++++++ 3 files changed, 1962 insertions(+), 1 deletion(-) create mode 100644 src/engine/tools/core.test.ts create mode 100644 src/engine/tools/core.ts diff --git a/.gitignore b/.gitignore index 56167d8..b595414 100644 --- a/.gitignore +++ b/.gitignore @@ -39,5 +39,9 @@ data/browser-sessions/* # Core dumps from native crashes (sqlite/playwright/sharp). These contain raw # process memory — including the decrypted master key, SSH private keys and the # session secret — so they must never be committed. +# NOTE: keep this narrow. A bare `core.*` also matches source files like +# src/engine/tools/core.ts / core.test.ts, which silently dropped them from the +# OSS mirror during oss-sync (its `git add -A` honours .gitignore). Core dumps +# are named `core` or `core.`, so match only a numeric suffix. core -core.* +core.[0-9]* diff --git a/src/engine/tools/core.test.ts b/src/engine/tools/core.test.ts new file mode 100644 index 0000000..1244673 --- /dev/null +++ b/src/engine/tools/core.test.ts @@ -0,0 +1,725 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { tmpdir } from 'os'; +import { afterEach, describe, expect, it } from 'vitest'; +import { executeCoreTools, resolveOutputPathWithin, checkBlockedInstallPatterns, checkAllowedCommand, checkBashPathScope, DEFAULT_ALLOWED_COMMANDS, type ToolContext } from './core.js'; +import { looksLikeBinaryBytes } from './binary-detect.js'; + +function makeWorkspace(): string { + return fs.mkdtempSync(path.join(tmpdir(), 'maestro-core-')); +} + +function makeContext(workspacePath: string): ToolContext { + return { + workspacePath, + editAllowed: true, + }; +} + +describe('core tools', () => { + let workspacePath = ''; + + afterEach(() => { + if (workspacePath) { + fs.rmSync(workspacePath, { recursive: true, force: true }); + workspacePath = ''; + } + }); + + it('blocks Read on image files and suggests ReadImage', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + fs.writeFileSync(path.join(workspacePath, 'input', 'capture.jpg'), Buffer.from([0xff, 0xd8, 0xff])); + + const result = await executeCoreTools('Read', { file_path: 'input/capture.jpg' }, makeContext(workspacePath)); + + expect(result).not.toBeNull(); + expect(result?.isError).toBe(true); + expect(result?.output).toContain('ReadImage'); + }); + + it('blocks Read on PDF files and suggests ReadPdf', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + fs.writeFileSync(path.join(workspacePath, 'input', 'manual.pdf'), '%PDF-1.4\n'); + + const result = await executeCoreTools('Read', { file_path: 'input/manual.pdf' }, makeContext(workspacePath)); + + expect(result).not.toBeNull(); + expect(result?.isError).toBe(true); + expect(result?.output).toContain('ReadPdf'); + }); + + it('blocks Read on xlsx and suggests ReadExcel', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true }); + // PK\x03\x04 = ZIP signature (xlsx is a zip) + fs.writeFileSync(path.join(workspacePath, 'output', 'components.xlsx'), Buffer.from([0x50, 0x4b, 0x03, 0x04])); + + const result = await executeCoreTools('Read', { file_path: 'output/components.xlsx' }, makeContext(workspacePath)); + + expect(result).not.toBeNull(); + expect(result?.isError).toBe(true); + expect(result?.output).toContain('ReadExcel'); + }); + + it('blocks Read on docx and suggests ReadDocx', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + fs.writeFileSync(path.join(workspacePath, 'input', 'report.docx'), Buffer.from([0x50, 0x4b, 0x03, 0x04])); + + const result = await executeCoreTools('Read', { file_path: 'input/report.docx' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + expect(result?.output).toContain('ReadDocx'); + }); + + it('blocks Read on pptx and suggests ReadPPTX', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + fs.writeFileSync(path.join(workspacePath, 'input', 'deck.pptx'), Buffer.from([0x50, 0x4b, 0x03, 0x04])); + + const result = await executeCoreTools('Read', { file_path: 'input/deck.pptx' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + expect(result?.output).toContain('ReadPPTX'); + }); + + it('blocks Read on opaque binary extensions like .zip', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + fs.writeFileSync(path.join(workspacePath, 'input', 'archive.zip'), Buffer.from([0x50, 0x4b, 0x03, 0x04])); + + const result = await executeCoreTools('Read', { file_path: 'input/archive.zip' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + expect(result?.output).toContain('binary'); + }); + + it('rejects file with NUL byte in head even if extension is unknown', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true }); + // .bak は extension list に無いが先頭に NUL がある → content-based detection で弾く + fs.writeFileSync(path.join(workspacePath, 'output', 'unknown.bak'), Buffer.from([0x68, 0x69, 0x00, 0x21])); + + const result = await executeCoreTools('Read', { file_path: 'output/unknown.bak' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + expect(result?.output).toContain('binary'); + }); + + it('still allows Read on plain text files (regression guard)', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true }); + fs.writeFileSync(path.join(workspacePath, 'output', 'note.txt'), 'hello\nworld\n'); + + const result = await executeCoreTools('Read', { file_path: 'output/note.txt' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(false); + expect(result?.output).toContain('hello'); + expect(result?.output).toContain('world'); + }); + + it('allows Read on empty file', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true }); + fs.writeFileSync(path.join(workspacePath, 'output', 'empty.txt'), ''); + + const result = await executeCoreTools('Read', { file_path: 'output/empty.txt' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(false); + }); + + it('allows new Write files anywhere within the workspace', async () => { + workspacePath = makeWorkspace(); + + // A new file in a non-output subdir now succeeds (consistent with overwrite, + // which was already allowed anywhere in the workspace). + const subdir = await executeCoreTools('Write', { file_path: 'notes/todo.md', content: 'x' }, makeContext(workspacePath)); + expect(subdir?.isError).toBe(false); + expect(fs.existsSync(path.join(workspacePath, 'notes', 'todo.md'))).toBe(true); + + // output/ still works. + const allowed = await executeCoreTools('Write', { file_path: 'output/todo.md', content: 'ok' }, makeContext(workspacePath)); + expect(allowed?.isError).toBe(false); + + // Escaping the workspace is still blocked by resolveAndGuard. + const escaped = await executeCoreTools('Write', { file_path: '../outside.txt', content: 'no' }, makeContext(workspacePath)); + expect(escaped?.isError).toBe(true); + }); + + it('resolves output paths only within allowed prefixes', () => { + workspacePath = makeWorkspace(); + expect(resolveOutputPathWithin(workspacePath, 'output/ocr/a.md', ['output/ocr'])).toContain(path.join('output', 'ocr', 'a.md')); + expect(() => resolveOutputPathWithin(workspacePath, 'output/reviewed/a.json', ['output/ocr'])).toThrow(/output\/ocr/); + }); + + it('executes Bash commands asynchronously and returns stdout', async () => { + workspacePath = makeWorkspace(); + + const result = await executeCoreTools('Bash', { + command: `python3 -c "print('hello from bash')"`, + }, makeContext(workspacePath)); + + expect(result?.isError).toBe(false); + expect(result?.output.trim()).toBe('hello from bash'); + }); + + it('supports Bash output larger than the default execSync buffer but auto-truncates to context budget', async () => { + workspacePath = makeWorkspace(); + + const result = await executeCoreTools('Bash', { + command: `python3 -c "print('a' * (1024 * 1024 + 4096))"`, + }, makeContext(workspacePath)); + + // 実行自体は成功(10MB バッファで取得できている) + expect(result?.isError).toBe(false); + // コンテキスト予算(contextManager 未設定時の絶対上限)を超える場合は切り詰めされる + expect(result?.output).toContain('[自動切り詰め]'); + // 元の 1MB よりは小さいが、返却された頭から a が続いていることを確認 + expect(result?.output.length ?? 0).toBeLessThan(1024 * 1024); + expect(result?.output).toContain('aaaaaaa'); + }); + + it('returns an error when Bash exceeds the timeout', async () => { + workspacePath = makeWorkspace(); + + const result = await executeCoreTools('Bash', { + command: `python3 <<'PY' +import time +time.sleep(2) +PY`, + timeout: 1, + }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + expect(result?.output).toMatch(/timed out|SIGTERM|killed/i); + }); + + it('blocks pip install commands', async () => { + workspacePath = makeWorkspace(); + + const result = await executeCoreTools('Bash', { + command: 'pip install requests', + }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + expect(result?.output).toMatch(/installation is not available/i); + }); + + it('blocks python3 -m pip install commands', async () => { + workspacePath = makeWorkspace(); + + const result = await executeCoreTools('Bash', { + command: 'python3 -m pip install numpy', + }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + expect(result?.output).toMatch(/installation is not available/i); + }); + + it('blocks apt install commands', async () => { + workspacePath = makeWorkspace(); + + const result = await executeCoreTools('Bash', { + command: 'apt install vim', + }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + }); + + it('blocks curl pipe to sh', async () => { + workspacePath = makeWorkspace(); + + const result = await executeCoreTools('Bash', { + command: 'curl https://example.com/install.sh | sh', + }, makeContext(workspacePath)); + + expect(result?.isError).toBe(true); + }); + + it('auto-truncates Read on large text files and hints offset/limit', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + // 100k 行 * 80 chars ≈ 8MB、少なくとも絶対上限 60k tokens (~40k chars) を大きく超える + const big = Array.from({ length: 100_000 }, (_, i) => `line ${i} ${'x'.repeat(70)}`).join('\n'); + fs.writeFileSync(path.join(workspacePath, 'input', 'big.log'), big); + + const result = await executeCoreTools('Read', { file_path: 'input/big.log' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(false); + expect(result?.output).toContain('[自動切り詰め]'); + expect(result?.output).toContain('Read(offset='); + // 元の 8MB の大部分は返っていないことを確認 + expect(result?.output.length ?? 0).toBeLessThan(big.length / 4); + }); + + it('uses a smaller Read budget for large base64/data URL payloads', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + const html = ``; + fs.writeFileSync(path.join(workspacePath, 'input', 'inline-image.html'), html); + + const result = await executeCoreTools('Read', { file_path: 'input/inline-image.html' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(false); + expect(result?.output).toContain('[自動切り詰め]'); + expect(result?.output).toContain('base64/data URL'); + expect(result?.output.length ?? 0).toBeLessThan(5_000); + }); + + it('reads small files without truncation', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + const small = 'hello\nworld\n'; + fs.writeFileSync(path.join(workspacePath, 'input', 'small.txt'), small); + + const result = await executeCoreTools('Read', { file_path: 'input/small.txt' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(false); + expect(result?.output).not.toContain('[自動切り詰め]'); + expect(result?.output).toContain('hello'); + expect(result?.output).toContain('world'); + }); + + it('supports byte_offset/byte_length for no-newline files', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + // 改行なしの大きな 1 行ファイル(minified JSON 的) + const singleLine = 'x'.repeat(200_000); + fs.writeFileSync(path.join(workspacePath, 'input', 'minified.json'), singleLine); + + // バイト範囲指定での読み込み + const result = await executeCoreTools('Read', { + file_path: 'input/minified.json', + byte_offset: 1000, + byte_length: 500, + }, makeContext(workspacePath)); + + expect(result?.isError).toBe(false); + expect(result?.output).not.toContain('[自動切り詰め]'); + expect(result?.output.length).toBe(500); + expect(result?.output).toBe('x'.repeat(500)); + }); + + it('auto-truncates Read with byte-based hint for no-newline files', async () => { + workspacePath = makeWorkspace(); + fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); + // 大きな単一行ファイル + const singleLine = 'x'.repeat(500_000); + fs.writeFileSync(path.join(workspacePath, 'input', 'minified.json'), singleLine); + + const result = await executeCoreTools('Read', { file_path: 'input/minified.json' }, makeContext(workspacePath)); + + expect(result?.isError).toBe(false); + expect(result?.output).toContain('[自動切り詰め]'); + // 行指定ではなく byte_offset を案内していること + expect(result?.output).toContain('byte_offset'); + }); +}); + +describe('Cancel-traceability PR2: Bash AbortSignal propagation', () => { + let workspacePath = ''; + + afterEach(() => { + if (workspacePath) { + fs.rmSync(workspacePath, { recursive: true, force: true }); + workspacePath = ''; + } + }); + + function makeContextWithSignal(workspace: string, signal: AbortSignal): ToolContext { + return { workspacePath: workspace, editAllowed: true, abortSignal: signal }; + } + + it('aborts a long-running Bash within ~500ms when signal fires mid-execution', async () => { + workspacePath = makeWorkspace(); + const controller = new AbortController(); + const ctx = makeContextWithSignal(workspacePath, controller.signal); + + // Long-running but bounded so a runaway test still terminates. Uses + // heredoc form to keep the allowlist parser from rejecting `;`-style + // multi-statement Python (see existing timeout test for the same pattern). + const cmd = `python3 <<'PY' +import time +time.sleep(30) +PY`; + const startedAt = Date.now(); + + // Fire the abort 100ms after launching. + setTimeout(() => controller.abort(), 100); + + const result = await executeCoreTools('Bash', { command: cmd, timeout: 60 }, ctx); + const elapsed = Date.now() - startedAt; + + expect(result?.isError).toBe(true); + expect(result?.output).toMatch(/cancelled|abort/i); + // Generous bound: child spawn + signal delivery should complete <2s. + // If this exceeds 2s the AbortSignal isn't actually killing the child. + expect(elapsed).toBeLessThan(2_000); + }, 10_000); + + it('returns immediately when signal is already aborted before execution', async () => { + workspacePath = makeWorkspace(); + const controller = new AbortController(); + controller.abort(); + const ctx = makeContextWithSignal(workspacePath, controller.signal); + const startedAt = Date.now(); + + const result = await executeCoreTools('Bash', { + command: `python3 -c "print('should not run')"`, + }, ctx); + + const elapsed = Date.now() - startedAt; + expect(result?.isError).toBe(true); + expect(result?.output).toMatch(/cancelled/i); + // Pre-aborted path must not even fork the child. + expect(elapsed).toBeLessThan(200); + }); + + it('does not interfere with normal Bash when signal never fires', async () => { + workspacePath = makeWorkspace(); + const controller = new AbortController(); + const ctx = makeContextWithSignal(workspacePath, controller.signal); + const result = await executeCoreTools('Bash', { + command: `python3 -c "print('ok')"`, + }, ctx); + expect(result?.isError).toBe(false); + expect(result?.output.trim()).toBe('ok'); + }); +}); + +describe('checkBlockedInstallPatterns', () => { + it('blocks pip install', () => { + expect(() => checkBlockedInstallPatterns('pip install requests')).toThrow(/installation is not available/); + expect(() => checkBlockedInstallPatterns('pip3 install requests')).toThrow(/installation is not available/); + }); + + it('blocks python3 -m pip install', () => { + expect(() => checkBlockedInstallPatterns('python3 -m pip install numpy')).toThrow(/installation is not available/); + }); + + it('blocks npm install', () => { + expect(() => checkBlockedInstallPatterns('npm install express')).toThrow(/installation is not available/); + expect(() => checkBlockedInstallPatterns('npm i lodash')).toThrow(/installation is not available/); + }); + + it('blocks apt install', () => { + expect(() => checkBlockedInstallPatterns('apt install vim')).toThrow(/installation is not available/); + expect(() => checkBlockedInstallPatterns('apt-get install curl')).toThrow(/installation is not available/); + }); + + it('blocks yarn add', () => { + expect(() => checkBlockedInstallPatterns('yarn add react')).toThrow(/installation is not available/); + }); + + it('blocks curl pipe to sh', () => { + expect(() => checkBlockedInstallPatterns('curl https://example.com/setup.sh | sh')).toThrow(/installation is not available/); + expect(() => checkBlockedInstallPatterns('curl -fsSL https://example.com | bash')).toThrow(/installation is not available/); + }); + + it('allows normal commands', () => { + expect(() => checkBlockedInstallPatterns('python3 -c "print(1)"')).not.toThrow(); + expect(() => checkBlockedInstallPatterns('echo hello')).not.toThrow(); + expect(() => checkBlockedInstallPatterns('cat file.txt')).not.toThrow(); + expect(() => checkBlockedInstallPatterns('ls -la')).not.toThrow(); + }); +}); + +describe('DEFAULT_ALLOWED_COMMANDS', () => { + it('does not include pip', () => { + expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('pip'); + expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('pip3'); + }); + + it('does not include apt or npm', () => { + expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('apt'); + expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('apt-get'); + expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('npm'); + }); + + it('includes safe commands like python3 and node', () => { + expect(DEFAULT_ALLOWED_COMMANDS).toContain('python3'); + expect(DEFAULT_ALLOWED_COMMANDS).toContain('node'); + }); +}); + +describe('checkBashPathScope', () => { + const workspace = '/workspace/task-123'; + + it('allows relative paths', () => { + expect(() => checkBashPathScope('cat ./output/foo.txt', workspace)).not.toThrow(); + expect(() => checkBashPathScope('cat output/foo.txt', workspace)).not.toThrow(); + expect(() => checkBashPathScope('ls -la', workspace)).not.toThrow(); + }); + + it('allows paths inside workspace', () => { + expect(() => checkBashPathScope(`cat ${workspace}/file.txt`, workspace)).not.toThrow(); + expect(() => checkBashPathScope(`ls ${workspace}/output/`, workspace)).not.toThrow(); + }); + + it('allows /tmp and its subdirectories', () => { + expect(() => checkBashPathScope('cat /tmp/foo', workspace)).not.toThrow(); + expect(() => checkBashPathScope('ls /tmp/', workspace)).not.toThrow(); + expect(() => checkBashPathScope('echo hello > /tmp/out.txt', workspace)).not.toThrow(); + }); + + it('allows /dev/null and standard streams', () => { + expect(() => checkBashPathScope('cat /dev/null', workspace)).not.toThrow(); + expect(() => checkBashPathScope('echo x > /dev/null', workspace)).not.toThrow(); + expect(() => checkBashPathScope('cat /dev/stdin', workspace)).not.toThrow(); + }); + + it('allows /usr/bin and other binary directories', () => { + expect(() => checkBashPathScope('ls /usr/bin/grep', workspace)).not.toThrow(); + expect(() => checkBashPathScope('/usr/local/bin/node --version', workspace)).not.toThrow(); + expect(() => checkBashPathScope('ls /bin/bash', workspace)).not.toThrow(); + }); + + it('rejects /root/', () => { + expect(() => checkBashPathScope('cat /root/.bashrc', workspace)).toThrow(/workspace 外/); + }); + + it('rejects /etc/', () => { + expect(() => checkBashPathScope('cat /etc/passwd', workspace)).toThrow(/workspace 外/); + }); + + it('rejects bare / (root directory scan)', () => { + expect(() => checkBashPathScope('find / -name x', workspace)).toThrow(/workspace 外/); + }); + + it('rejects /root in compound commands', () => { + expect(() => checkBashPathScope('cd /root && ls', workspace)).toThrow(/workspace 外/); + }); + + it('rejects /etc/ on right-hand side of redirect', () => { + expect(() => checkBashPathScope('echo /etc/passwd > /tmp/x', workspace)).toThrow(/workspace 外/); + }); + + it('rejects /root/ in compound with semicolons', () => { + expect(() => checkBashPathScope('cat ./input/foo.txt; ls /root/', workspace)).toThrow(/workspace 外/); + }); + + it('rejects paths in single-quoted globs', () => { + expect(() => checkBashPathScope("ls '/etc/*'", workspace)).toThrow(/workspace 外/); + }); + + it('rejects /home/other-user', () => { + expect(() => checkBashPathScope('cat /home/otheruser/.ssh/id_rsa', workspace)).toThrow(/workspace 外/); + }); + + it('allows executeCoreTools Bash with workspace-relative path', async () => { + const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'scope-test-')); + try { + fs.writeFileSync(path.join(tmpWs, 'hello.txt'), 'hi'); + const result = await executeCoreTools('Bash', { command: 'cat hello.txt' }, { workspacePath: tmpWs, editAllowed: false }); + expect(result?.isError).toBe(false); + expect(result?.output).toContain('hi'); + } finally { + fs.rmSync(tmpWs, { recursive: true, force: true }); + } + }); + + it('rejects executeCoreTools Bash with /root/ path', async () => { + const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'scope-test-')); + try { + const result = await executeCoreTools('Bash', { command: 'cat /root/.bashrc' }, { workspacePath: tmpWs, editAllowed: false }); + expect(result?.isError).toBe(true); + expect(result?.output).toContain('workspace 外'); + } finally { + fs.rmSync(tmpWs, { recursive: true, force: true }); + } + }); +}); + +describe('checkBashPathScope relative containment', () => { + const ws = '/work/local/42'; + it('rejects relative parent traversal', () => { + expect(() => checkBashPathScope('cat ../../config.yaml', ws)).toThrow(/workspace/); + expect(() => checkBashPathScope('cat ../../../etc/passwd', ws)).toThrow(/workspace/); + }); + it('allows in-workspace relative paths', () => { + expect(() => checkBashPathScope('cat ./output/result.txt', ws)).not.toThrow(); + expect(() => checkBashPathScope('cat output/result.txt', ws)).not.toThrow(); + }); + it('still allows safe absolute prefixes', () => { + expect(() => checkBashPathScope('cat /usr/bin/env', ws)).not.toThrow(); + expect(() => checkBashPathScope('echo hi > /tmp/x', ws)).not.toThrow(); + }); + it('rejects descend-then-escape relative paths', () => { + expect(() => checkBashPathScope('cat output/../../config.yaml', ws)).toThrow(/workspace|外/); + expect(() => checkBashPathScope('cat sub/dir/../../../../etc/passwd', ws)).toThrow(/workspace|外/); + }); + it('rejects traversal hidden behind a flag= or VAR= prefix', () => { + expect(() => checkBashPathScope('python3 run.py --file=../../config.yaml', ws)).toThrow(/workspace|外/); + expect(() => checkBashPathScope('PYTHONPATH=../../lib python3 x.py', ws)).toThrow(/workspace|外/); + }); + it('does not false-positive on in-workspace slashed args, regex patterns, or urls', () => { + expect(() => checkBashPathScope('grep -rn "foo/bar" .', ws)).not.toThrow(); + expect(() => checkBashPathScope("sed 's/a/b/' output/x.txt", ws)).not.toThrow(); + expect(() => checkBashPathScope('echo https://example.com/a/b', ws)).not.toThrow(); + expect(() => checkBashPathScope('find . -name "*.ts"', ws)).not.toThrow(); + }); +}); + +describe('bashUnrestricted mode', () => { + it('skips command whitelist when bashUnrestricted is true (delegates to sandbox)', async () => { + const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'unrestricted-')); + try { + fs.writeFileSync(path.join(tmpWs, 'data.txt'), 'hello'); + // 'curl' is not in DEFAULT_ALLOWED_COMMANDS — in restricted mode it would fail. + // In unrestricted mode it delegates to bwrap, which may fail (no user namespace) + // but the error should NOT be "Command not allowed". + const result = await executeCoreTools( + 'Bash', + { command: 'cat data.txt' }, + { workspacePath: tmpWs, editAllowed: false, bashUnrestricted: true }, + ); + // If bwrap is available, it succeeds. If not, the error is from bwrap, not whitelist. + if (result?.isError) { + expect(result.output).not.toContain('not in the allowed commands list'); + } else { + expect(result?.output).toContain('hello'); + } + } finally { + fs.rmSync(tmpWs, { recursive: true, force: true }); + } + }); + + it('skips path scope check in sandboxed mode (bwrap handles isolation)', async () => { + const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'unrestricted-')); + try { + // /root/ would normally be blocked by checkBashPathScope. + // In sandboxed mode the host path scope check is skipped (bwrap handles + // isolation). bashSandbox: 'always' forces the sandboxed path regardless + // of whether bwrap can actually launch in this env. + const result = await executeCoreTools( + 'Bash', + { command: 'cat /root/.bashrc' }, + { workspacePath: tmpWs, editAllowed: false, bashUnrestricted: true, bashSandbox: 'always' }, + ); + // Should NOT contain the path scope error message (it is never invoked). + if (result?.isError) { + expect(result.output).not.toContain('workspace 外'); + } + } finally { + fs.rmSync(tmpWs, { recursive: true, force: true }); + } + }); +}); + +describe('Bash history logging', () => { + it('appends a JSONL entry to logs/bash-history.jsonl on successful execution', async () => { + const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-')); + try { + await executeCoreTools('Bash', { command: 'echo hello' }, { workspacePath: tmpWs, editAllowed: false }); + const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl'); + expect(fs.existsSync(logPath)).toBe(true); + const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n'); + expect(lines.length).toBe(1); + const entry = JSON.parse(lines[0]); + expect(entry.command).toBe('echo hello'); + expect(entry.isError).toBe(false); + expect(entry).toHaveProperty('timestamp'); + expect(entry).toHaveProperty('durationMs'); + expect(typeof entry.durationMs).toBe('number'); + } finally { + fs.rmSync(tmpWs, { recursive: true, force: true }); + } + }); + + it('logs failed commands with isError=true and exitCode', async () => { + const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-')); + try { + await executeCoreTools('Bash', { command: 'false' }, { workspacePath: tmpWs, editAllowed: false }); + const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl'); + const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n'); + const entry = JSON.parse(lines[0]); + expect(entry.command).toBe('false'); + expect(entry.isError).toBe(true); + } finally { + fs.rmSync(tmpWs, { recursive: true, force: true }); + } + }); + + it('logs blocked commands (whitelist rejection) with isError=true', async () => { + const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-')); + try { + await executeCoreTools('Bash', { command: 'apt install foo' }, { workspacePath: tmpWs, editAllowed: false }); + const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl'); + const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n'); + const entry = JSON.parse(lines[0]); + expect(entry.command).toBe('apt install foo'); + expect(entry.isError).toBe(true); + expect(entry.blocked).toBe(true); + } finally { + fs.rmSync(tmpWs, { recursive: true, force: true }); + } + }); + + it('accumulates multiple entries across calls', async () => { + const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-')); + try { + await executeCoreTools('Bash', { command: 'echo one' }, { workspacePath: tmpWs, editAllowed: false }); + await executeCoreTools('Bash', { command: 'echo two' }, { workspacePath: tmpWs, editAllowed: false }); + const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl'); + const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n'); + expect(lines.length).toBe(2); + expect(JSON.parse(lines[0]).command).toBe('echo one'); + expect(JSON.parse(lines[1]).command).toBe('echo two'); + } finally { + fs.rmSync(tmpWs, { recursive: true, force: true }); + } + }); +}); + +describe('Bash install rejection (all modes)', () => { + function installCtx(over: Partial = {}): ToolContext { + return { workspacePath: process.cwd(), editAllowed: false, ...over }; + } + + it('blocks pip install even when bashUnrestricted', async () => { + const r = await executeCoreTools('Bash', { command: 'pip install pypdf' }, installCtx({ bashUnrestricted: true })); + expect(r?.isError).toBe(true); + expect(r?.output).toMatch(/installation is not available|not allowed/i); + expect(r?.output).toMatch(/preinstalled|pre-installed|プリインストール/i); + }); + + it('blocks npm install', async () => { + const r = await executeCoreTools('Bash', { command: 'npm install left-pad' }, installCtx()); + expect(r?.isError).toBe(true); + }); +}); + +describe('resolveBashMode + env scrub (hardened fallback)', () => { + function installCtx(over: Partial = {}): ToolContext { + return { workspacePath: process.cwd(), editAllowed: false, bashSandbox: 'off', ...over }; + } + + it('off mode runs plain exec and respects whitelist', async () => { + const r = await executeCoreTools('Bash', { command: 'echo hello' }, + installCtx({ bashSandbox: 'off' })); + expect(r?.isError).toBe(false); + expect(r?.output).toContain('hello'); + }); + + it('hardened fallback scrubs secrets from env', async () => { + process.env.MCP_ENCRYPTION_KEY = 'topsecret'; + const r = await executeCoreTools('Bash', + { command: 'node -e "process.stdout.write(String(process.env.MCP_ENCRYPTION_KEY))"' }, + installCtx({ bashSandbox: 'off', bashUnrestricted: true })); + delete process.env.MCP_ENCRYPTION_KEY; + expect(r?.output).not.toContain('topsecret'); + expect(r?.output).toMatch(/undefined/); + }); +}); + +describe('Read delegates to shared binary detector', () => { + it('flags a magic-byte binary with no NUL in head', () => { + const head = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0x41, 0x42]); + expect(looksLikeBinaryBytes(head).binary).toBe(true); + }); + it('keeps plain source text readable', () => { + expect(looksLikeBinaryBytes(Buffer.from('export const x = 1;\n', 'utf-8')).binary).toBe(false); + }); +}); diff --git a/src/engine/tools/core.ts b/src/engine/tools/core.ts new file mode 100644 index 0000000..29246f8 --- /dev/null +++ b/src/engine/tools/core.ts @@ -0,0 +1,1232 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import * as child_process from 'child_process'; +import { ToolDef } from '../../llm/openai-compat.js'; +import type { Message } from '../../llm/openai-compat.js'; +import { logger } from '../../logger.js'; +import type { SearchFilterConfig } from '../../config.js'; +import type { ContextManager } from '../context-manager.js'; +import { executeSandboxedBash, isBwrapAvailable, buildSandboxEnv, type SandboxedBashResult } from './sandbox.js'; +import { looksLikeBinaryBytes, SNIFF_HEAD_BYTES } from './binary-detect.js'; + +export interface ToolsConfig { + searxngUrl?: string; // default: http://searxng:8080 + visionModel?: string; // default: qwen2-vl:8b-instruct + visionBaseUrl?: string; // default: same as provider.baseUrl + visionTimeout?: number; // seconds, default: 60 + visionMaxTokens?: number; // default: 1024 + webfetchTimeout?: number; // seconds, default: 30 + websearchTimeout?: number; // seconds, default: 15 + webfetchAllowedHosts?: string[]; // private IPs to explicitly allow + xCliCommand?: string[] | string; + xTimeout?: number; + xAuthToken?: string; + xCt0?: string; + xProxy?: string; + xChromeProfile?: string; + /** X 投稿の画像/動画 自動 DL モード。default 'auto' (media[] が空でなければ DL) */ + xDownloadMedia?: 'auto' | 'never'; + /** 動画の扱い。default 'thumbnail' (poster のみ)。'full' で variants 最高画質 mp4 も DL、'never' でスキップ */ + xDownloadVideo?: 'thumbnail' | 'full' | 'never'; + /** 1 メディア当たりの DL サイズ上限 MB (default 25) */ + xMediaMaxMb?: number; + /** 1 メディアあたりの fetch hard timeout 秒 (default 15)。CDN 応答停止で無限 hang を防ぐ */ + xMediaFetchTimeoutSeconds?: number; + ocrModel?: string; // GLM-OCR モデル名 (default: glm-ocr) + /** @deprecated Use AppConfig.searchFilter instead */ + searchFilter?: SearchFilterConfig; + browserPageTimeout?: number; // ms, default: 60000 + browserActionTimeout?: number; // ms, default: 30000 + googleMapsApiKey?: string; // Google Maps API キー(省略時は Nominatim/OSRM を使用) + mapsTimeout?: number; // 地図 API タイムアウト秒 (default: 30) + amazonAffiliateTag?: string; // Amazon アソシエイトタグ + keepaApiKey?: string; // Keepa API キー(省略時はグラフ画像リンクのみ) + speechServerUrl?: string; // 音声認識サーバー URL (例: http://localhost:8000/v1) + speechTimeout?: number; // タイムアウト秒 (default: 300) + speechLanguage?: string; // デフォルト言語 (default: ja) + knowledgeServiceUrl?: string; // DKS サーバー URL (例: http://dks-server:8100) + knowledgeNamespaces?: Record; // namespace ごとの API キー + officeExcelMaxSizeMb?: number; // ReadExcel の最大ファイルサイズ (default: 10) + officeDocxMaxSizeMb?: number; // ReadDocx の最大ファイルサイズ (default: 10) + officePdfMaxSizeMb?: number; // ReadPdf の最大ファイルサイズ (default: 10) + officePptxMaxSizeMb?: number; // ReadPPTX の最大ファイルサイズ (default: 50) + officePptxMaxUncompressedMb?: number; // ReadPPTX の ZIP 展開後サイズ上限 (default: 200) + webfetchScreenshot?: boolean; // WebFetch で vlmEnabled 時にスクショを添付するか (default: true) + webfetchScreenshotTimeoutMs?: number; // スクショ取得のタイムアウト (default: 15000) +} + +export interface ToolContext { + workspacePath: string; // ツール実行のルートディレクトリ + editAllowed: boolean; // movement の edit フラグ + vlmEnabled?: boolean; // worker の vlm=true → ReadImage で AgentLoop LLM に画像注入可能 + allowedCommands?: string[]; // Bash ツールで許可するコマンド名一覧 (省略時は DEFAULT_ALLOWED_COMMANDS) + bashUnrestricted?: boolean; // true: skip the command whitelist (bwrap/exec is chosen by bashSandbox, not this) + bashSandbox?: 'auto' | 'always' | 'off'; // サンドボックス機構の選択 (config.safety.bashSandbox 由来) + skillCatalog?: import('../skills.js').SkillCatalog; + toolsConfig?: ToolsConfig; + searchFilter?: SearchFilterConfig; // AppConfig.searchFilter (トップレベル) + customPiecesDir?: string | string[]; + /** + * ローカルタスク ID (string)。BrowseWeb / InteractiveBrowse が + * per-task noVNC session を使うときのキー。subtask 実行のときは親の + * localTaskId が入る (worker 側で resolve)。subtask root が gitea issue + * など local_task に紐づかないジョブのときは undefined になる。 + */ + taskId?: string; + /** + * タスク owner の user.id。noVNC upgrade の visibility チェック以外には + * 直接参照されないが、BrowseWeb が新しい task session を生成するときに + * 渡してログ / 認可で使う。 + */ + userId?: string; + /** + * Job owner user.id. Required by the MCP aggregator to look up the user's + * stored OAuth tokens. Distinct from `userId` (which is also the owner's id) + * — kept as a separate field name to make MCP-specific code paths explicit. + * In practice ownerId === userId today. + */ + ownerId?: string | null; + /** Job id. Used by MCP binary saver for output file paths under workspace/output/. */ + jobId?: string | null; + /** Runtime config for MCP subsystem (timeouts + quotas). Worker reads from AppConfig.mcp. */ + mcpConfig?: import('../../mcp/config.js').McpRuntimeConfig; + /** Per-movement quota accumulator for MCP binary saves. Reset at movement entry. */ + mcpQuotaState?: { files: number; bytes: number }; + runIsolatedLlm?: (messages: Message[]) => Promise; + spawnSubTask?: (params: { title: string; instruction: string; piece?: string }) => Promise<{ jobId: string; subtaskIndex: number; workspacePath: string }>; + contextManager?: ContextManager; // Read 系 / Bash の出力サイズをコンテキスト残量に合わせて切り詰めるために使用 + /** + * Traceability T-1: structured event log writer. Optional at the + * type boundary so existing tests that construct minimal ToolContexts + * stay green; the engine (agent-loop / piece-runner) wraps with a + * NoopEventLogger when undefined so internal emission code never has + * to deal with the unset case. Production callers (worker → piece- + * runner) always pass a real FileEventLogger. + */ + eventLogger?: import('../../progress/event-log.js').EventLogger; + /** + * Cancel-traceability PR2: cancellation signal threaded from + * piece-runner → agent-loop. Tools that spawn child processes / make + * network calls should pass it to the underlying API so user-initiated + * cancel actually kills in-flight work instead of letting it run to + * completion in the background. + */ + abortSignal?: AbortSignal; + /** + * Mission Brief IO. Provided by piece-runner when a local_task ID is + * known so the MissionUpdate tool and the system-prompt injector + * can read/write the per-task pinned memo. Subtask contexts that + * aren't bound to a local_task leave this unset. + */ + missionBrief?: MissionBriefIO; + /** Decrypted Playwright storageState for the browser session profile bound to this job. */ + browserSessionState?: object; + /** Profile id for audit/expiry callbacks. */ + browserSessionProfileId?: number; + /** Profile metadata used by BrowseWeb's expiry check (Task 9 will use this). */ + browserSessionProfile?: { loggedInSelector: string | null; loginUrlPatterns: string[] }; + /** Worker-provided callback invoked when BrowseWeb detects auth expiry. */ + onAuthExpired?: (profileId: number, reason: string) => void; + /** + * Phase 4: per-movement SSH connection allowlist forwarded from piece YAML + * `allowed_ssh_connections`. UUID list, or `['*']` for "any registered + * connection". undefined = SSH tools (Phase 7) will reject with + * `no_allowed_connections_declared` before any other check (so we don't + * leak the existence of connections the piece can't use). + */ + allowedSshConnections?: string[]; + /** + * Phase 7: name of the piece running this tool. Used by SSH tools to evaluate + * per-piece grants (a grant may apply to a specific piece or to all pieces). + * Plumbed by piece-runner from `piece.name`. + */ + pieceName?: string; + /** Per-task option: when true, MCP tools are not loaded/dispatched. */ + mcpDisabled?: boolean; + /** Per-task option: when true, skill index is not injected into the system prompt. */ + skillsDisabled?: boolean; + /** + * Shared-knowledge notes service. When set, buildSystemPrompt injects a + * "## Subscribed Notes" section for inject-mode subscriptions owned by + * the job's user. Constructed once in the worker and forwarded here. + */ + notesService?: import('../../notes/notes-service.js').NotesService; + /** + * Resolved inject config (merged with defaults). Forwarded alongside + * notesService so buildSystemPrompt can apply the same budget limits. + */ + notesInjectConfig?: import('../../config.js').NotesInjectConfig; + /** + * orgIds for the job owner. Required by notesService.listInjectableForConsumer + * which calls buildVisibilityWhere. The worker resolves this from + * repo.listUserGiteaOrgs at job start. Undefined when auth is disabled + * (no-auth mode — treated as empty list). + */ + notesUserOrgIds?: string[]; + /** + * Role for the job owner ('admin' | 'user'). Required to construct the + * minimal Express.User object passed to buildInjectSection. Defaults to + * 'user' when not set. + */ + notesUserRole?: 'admin' | 'user'; +} + +export interface MissionBriefValue { + goal: string; + done: string; + open: string; + clarifications: string; +} + +export interface MissionBriefIO { + read(): MissionBriefValue | null; + update(patch: Partial): MissionBriefValue | null; +} + +export interface ToolResult { + output: string; + isError: boolean; + structuredBlocks?: import('./structured-blocks.js').StructuredBlock[]; + images?: Array<{ dataUrl: string; label?: string }>; +} + +const BLOCKED_BINARY_EXTENSIONS = new Set([ + '.png', + '.jpg', + '.jpeg', + '.gif', + '.webp', + '.bmp', +]); + +const BLOCKED_DOCUMENT_EXTENSIONS = new Set([ + '.pdf', +]); + +// Office 系: 拡張子から専用ツールに誘導する +const OFFICE_EXTENSION_TOOL: Record = { + '.xlsx': 'ReadExcel', + '.xls': 'ReadExcel', + '.xlsm': 'ReadExcel', + '.docx': 'ReadDocx', + '.doc': 'ReadDocx', + '.pptx': 'ReadPPTX', + '.ppt': 'ReadPPTX', +}; + +// 一般的に Read で開く価値が無いバイナリ拡張子(誘導先なし) +const BLOCKED_OPAQUE_BINARY_EXTENSIONS = new Set([ + '.zip', '.tar', '.gz', '.tgz', '.bz2', '.xz', '.7z', '.rar', + '.exe', '.dll', '.so', '.dylib', '.o', '.obj', '.a', '.lib', + '.bin', '.dat', '.db', '.sqlite', '.sqlite3', + '.ttf', '.otf', '.woff', '.woff2', '.eot', + '.mp3', '.mp4', '.m4a', '.m4v', '.mov', '.avi', '.mkv', '.webm', + '.wav', '.flac', '.ogg', '.opus', + '.ico', '.tiff', '.tif', + '.class', '.jar', '.war', '.pyc', '.pyo', '.wasm', +]); + +function readHeadBytes(filePath: string, maxBytes: number): Buffer | null { + try { + const fd = fs.openSync(filePath, 'r'); + try { + const stat = fs.fstatSync(fd); + const size = Math.min(stat.size, maxBytes); + const buf = Buffer.alloc(size); + if (size > 0) fs.readSync(fd, buf, 0, size, 0); + return buf; + } finally { + fs.closeSync(fd); + } + } catch { + return null; + } +} + +function looksLikeBinaryByContent(filePath: string): boolean { + const head = readHeadBytes(filePath, SNIFF_HEAD_BYTES); + if (!head) return false; + return looksLikeBinaryBytes(head).binary; +} + +const BASH_MAX_BUFFER_BYTES = 10 * 1024 * 1024; + +// --- コンテキスト予算に基づく切り詰め --- + +const CHARS_TO_TOKENS = 1.5; // 保守的(ASCII worst case: 1 char ≈ 1.5 tokens) +const ABSOLUTE_MAX_OUTPUT_TOKENS = 60_000; // contextManager 不在時の絶対上限 +const MIN_RETURNED_TOKENS = 2_000; // 切り詰め下限(小さすぎても意味がないので最低限返す) +const ENCODED_PAYLOAD_MAX_OUTPUT_TOKENS = 2_000; // base64/data URL を含む HTML 等は通常より強く抑制 +const TRUNCATION_NOTICE_RESERVE_CHARS = 600; // 注記分の予約 + +export function estimateTokensFromChars(chars: number): number { + return Math.ceil(chars * CHARS_TO_TOKENS); +} + +export function estimateCharsForTokenBudget(tokens: number): number { + return Math.floor(tokens / CHARS_TO_TOKENS); +} + +/** + * 残コンテキストから「1 回の tool result に使って良い」トークン予算を決める。 + * - contextManager があれば getAvailableTokens の 50% を目安にする + * - なければ ABSOLUTE_MAX_OUTPUT_TOKENS を返す + */ +export function getToolOutputBudgetTokens(ctx: ToolContext): number { + const cm = ctx.contextManager; + if (cm) { + const available = cm.getAvailableTokens(); + // 半分を 1 回のツール出力に割り当て(残り半分は会話履歴や次の思考用) + const budget = Math.floor(available * 0.5); + return Math.max(MIN_RETURNED_TOKENS, Math.min(budget, ABSOLUTE_MAX_OUTPUT_TOKENS)); + } + return ABSOLUTE_MAX_OUTPUT_TOKENS; +} + +/** + * コンテンツが改行構造を持つかを判定する。 + * - 1KB あたり 1 本以上の改行があれば「行指向」とみなす + * - 行指向なら line 単位で切り詰め、そうでなければ byte/char 単位で切り詰める + */ +function isLineOriented(content: string): boolean { + if (content.length < 500) return content.includes('\n'); + const newlines = (content.match(/\n/g) || []).length; + return newlines >= content.length / 1024; +} + +function looksLikeLargeEncodedPayload(content: string): boolean { + if (content.length < 8_000) return false; + if (/data:[^;,\s]+;base64,[A-Za-z0-9+/=\s]{2000,}/.test(content)) return true; + if (/base64[,:"'\s]+[A-Za-z0-9+/=\s]{2000,}/i.test(content)) return true; + return false; +} + +function capBudgetForEncodedPayload(content: string, budgetTokens: number): number { + if (!looksLikeLargeEncodedPayload(content)) return budgetTokens; + return Math.min(budgetTokens, ENCODED_PAYLOAD_MAX_OUTPUT_TOKENS); +} + +/** + * 文字列が budget を超える場合、先頭側を残して切り詰め、統一書式の注記を付ける。 + * 行指向コンテンツは改行境界で、そうでなければ文字境界で切る。 + * 注記には元サイズ・返却サイズ・続きの読み方を含める。 + */ +export function truncateToBudget( + content: string, + budgetTokens: number, + options: { + sourceLabel: string; // "file.txt" など、何のコンテンツか + totalLines?: number; // 元の行数(分かる場合) + totalBytes?: number; // 元のバイト数(分かる場合) + continuationHint?: string; // 続きの読み方(上書き)。未指定時は行指向/非行指向で自動選択 + }, +): { text: string; truncated: boolean; returnedTokensEstimate: number; cutCharIndex: number } { + const totalChars = content.length; + const totalTokens = estimateTokensFromChars(totalChars); + if (totalTokens <= budgetTokens) { + return { text: content, truncated: false, returnedTokensEstimate: totalTokens, cutCharIndex: totalChars }; + } + + const budgetChars = estimateCharsForTokenBudget(budgetTokens) - TRUNCATION_NOTICE_RESERVE_CHARS; + const safeBudgetChars = Math.max(1000, budgetChars); + + const lineOriented = isLineOriented(content); + let cutIndex = Math.min(safeBudgetChars, content.length); + if (lineOriented) { + const lastNewline = content.lastIndexOf('\n', cutIndex); + if (lastNewline > safeBudgetChars * 0.8) { + cutIndex = lastNewline; + } + } + const truncatedText = content.slice(0, cutIndex); + const returnedTokens = estimateTokensFromChars(truncatedText.length); + const returnedLines = truncatedText.split('\n').length; + const returnedBytes = Buffer.byteLength(truncatedText, 'utf-8'); + + const sizeParts: string[] = []; + sizeParts.push(`推定 ${totalTokens.toLocaleString()} tokens`); + if (options.totalLines !== undefined) sizeParts.push(`${options.totalLines.toLocaleString()} 行`); + if (options.totalBytes !== undefined) sizeParts.push(`${options.totalBytes.toLocaleString()} bytes`); + + const defaultHint = lineOriented + ? `続きを読むには Read(offset=${returnedLines}, limit=...) で次の範囲を指定するか、Grep/Bash(head/tail/grep) で必要箇所だけ抽出してください` + : `このファイルは改行が少なく行指定が効きません。続きは Read(byte_offset=${returnedBytes}, byte_length=...) で指定するか、Grep で必要箇所を抽出してください`; + const hint = options.continuationHint ?? defaultHint; + + const returnedDescr = lineOriented + ? `先頭 ${returnedLines.toLocaleString()} 行` + : `先頭 ${returnedBytes.toLocaleString()} bytes`; + + const notice = + `[自動切り詰め] ${options.sourceLabel} は大きすぎるため (${sizeParts.join(' / ')})、` + + `残コンテキスト予算 ${budgetTokens.toLocaleString()} tokens に収まる ${returnedDescr} ` + + `(約 ${returnedTokens.toLocaleString()} tokens) のみ返却しました。\n` + + `${hint}\n` + + `--- 以下、${returnedDescr} ---\n`; + + return { + text: notice + truncatedText, + truncated: true, + returnedTokensEstimate: estimateTokensFromChars((notice + truncatedText).length), + cutCharIndex: cutIndex, + }; +} + +// --- パスガード --- + +export function resolveAndGuard(workspacePath: string, filePath: string): string { + const resolved = path.resolve(workspacePath, filePath); + if (!resolved.startsWith(path.resolve(workspacePath) + path.sep) && resolved !== path.resolve(workspacePath)) { + throw new Error(`Path traversal detected: "${filePath}" is outside workspace`); + } + return resolved; +} + +function normalizeWorkspaceRelativePath(workspacePath: string, targetPath: string): string { + return path.relative(path.resolve(workspacePath), path.resolve(targetPath)).split(path.sep).join('/'); +} + +function normalizeAllowedPrefix(prefix: string): string { + return prefix.replace(/^\/+/, '').replace(/\/+$/, ''); +} + +export function resolveOutputPathWithin(workspacePath: string, requestedPath: string, allowedPrefixes: string[]): string { + const resolved = resolveAndGuard(workspacePath, requestedPath); + const relative = normalizeWorkspaceRelativePath(workspacePath, resolved); + const normalizedPrefixes = allowedPrefixes.map(normalizeAllowedPrefix).filter(Boolean); + const isAllowed = normalizedPrefixes.some((prefix) => relative === prefix || relative.startsWith(`${prefix}/`)); + if (!isAllowed) { + throw new Error(`Output path "${requestedPath}" must be within one of: ${normalizedPrefixes.join(', ')}`); + } + return resolved; +} + +// --- コマンドホワイトリスト --- + +export const DEFAULT_ALLOWED_COMMANDS: string[] = [ + 'cat', 'head', 'tail', 'wc', 'sort', 'uniq', 'awk', 'sed', 'grep', + 'jq', 'ls', 'find', 'echo', 'date', 'diff', + 'tr', 'cut', 'paste', 'tee', 'mkdir', 'cp', 'mv', 'touch', + 'basename', 'dirname', 'realpath', 'stat', 'file', 'which', + 'yes', 'true', 'false', 'test', 'expr', + 'python3', 'node', +]; + +/** + * コマンド文字列からサブコマンドの先頭トークンを抽出する。 + * パイプ (|)、セミコロン (;)、&&、||、改行 (\n) によって連結された各サブコマンドを分割し、 + * さらにバッククォートや $() によるコマンド置換内のコマンドも抽出する。 + * プロセス置換 <(...) や >(...) も検出する。 + * ここ文字列 <<< も検出する。 + */ +export function extractCommandTokens(command: string): string[] { + const tokens: string[] = []; + + // プロセス置換 <(...) や >(...) を検出して内部を再帰抽出 + const processSubstPattern = /[<>]\(([^)]*(?:\([^)]*\)[^)]*)*)\)/g; + let psMatch: RegExpExecArray | null; + while ((psMatch = processSubstPattern.exec(command)) !== null) { + const inner = psMatch[1] ?? ''; + tokens.push(...extractCommandTokens(inner)); + } + + // $(...) のネストを適切に処理: 深さを追跡しながら抽出 + function extractDollarParens(cmd: string): string[] { + const inner: string[] = []; + let i = 0; + while (i < cmd.length) { + if (cmd[i] === '$' && cmd[i + 1] === '(') { + let depth = 1; + let start = i + 2; + i = start; + while (i < cmd.length && depth > 0) { + if (cmd[i] === '(') depth++; + else if (cmd[i] === ')') depth--; + i++; + } + const content = cmd.slice(start, i - 1); + inner.push(content); + // 再帰的にネストされた $() を処理 + inner.push(...extractDollarParens(content)); + } else { + i++; + } + } + return inner; + } + + const dollarParenContents = extractDollarParens(command); + for (const inner of dollarParenContents) { + tokens.push(...extractCommandTokens(inner)); + } + + // バッククォート内を抽出して再帰 + const backtickPattern = /`([^`]*)`/g; + let btMatch: RegExpExecArray | null; + while ((btMatch = backtickPattern.exec(command)) !== null) { + const inner = btMatch[1] ?? ''; + tokens.push(...extractCommandTokens(inner)); + } + + // $(...) および `...` およびプロセス置換を除去してからオペレータで分割 + // パイプ(|)、セミコロン(;)、&&、||(論理OR)、改行(\n)、<<<(here string) + let stripped = command; + // ヒアドキュメント (<< 'DELIM' ... DELIM, << "DELIM" ... DELIM, <]\([^)]*\)/g, ''); + // <<< を除去 (here string) + stripped = stripped.replace(/<<<\s*\S+/g, ''); + + // オペレータで分割: ||, &&, |, ;, \n + const parts = stripped.split(/\|\||&&|\||\n|;/); + for (const part of parts) { + const trimmed = part.trim(); + if (trimmed.length === 0) continue; + // 先頭トークン: 環境変数割り当て (KEY=VALUE) をスキップして最初の単語を取得 + const words = trimmed.split(/\s+/); + for (const word of words) { + if (/^[A-Za-z_][A-Za-z0-9_]*=/.test(word)) continue; // env var assignment + tokens.push(word); + break; + } + } + + return tokens; +} + +/** + * インストール系コマンドパターンを検出してブロックする。 + * apt/pip/npm 等によるパッケージインストールを防止する。 + */ +const BLOCKED_INSTALL_PATTERNS: RegExp[] = [ + /\bpython3?\s+-m\s+pip\s+install\b/, + /\bpip3?\s+install\b/, + /\bapt(?:-get)?\s+install\b/, + /\bnpm\s+install\b/, + /\bnpm\s+i\b/, + /\byarn\s+add\b/, + /\bcurl\s+.*\|\s*(?:ba)?sh\b/, + /\bwget\s+.*\|\s*(?:ba)?sh\b/, +]; + +/** + * Packages baked into the runtime python (see runtime/python-requirements.txt). + * Listed by IMPORT name (what `import` takes), with the pip name in parens where + * it differs — e.g. you write `import docx`, not `import python-docx`. + */ +export const PREINSTALLED_HINT = + 'pypdf, fitz(pymupdf), pdfplumber, docx(python-docx), pptx(python-pptx), openpyxl, ' + + 'xlsxwriter, xlrd, odf(odfpy), striprtf, bs4(beautifulsoup4), lxml, markdownify, ' + + 'markdown, numpy, pandas, tabulate, dateutil(python-dateutil), matplotlib, ' + + 'PIL(Pillow), charset_normalizer, yaml(PyYAML)'; + +export function checkBlockedInstallPatterns(command: string): void { + for (const pattern of BLOCKED_INSTALL_PATTERNS) { + if (pattern.test(command)) { + throw new Error( + `Package installation is not available in the sandbox. ` + + `These are preinstalled and importable: ${PREINSTALLED_HINT}. ` + + `If you need another package, ask an admin to add it to ` + + `runtime/python-requirements.txt and rebuild — do not retry the install.` + ); + } + } +} + +const SAFE_ABS_PATH_PREFIXES = [ + '/tmp/', + '/dev/null', + '/dev/stdin', + '/dev/stdout', + '/dev/stderr', + '/usr/bin/', + '/usr/local/bin/', + '/usr/lib/', + '/usr/share/', + '/usr/include/', + '/bin/', + '/sbin/', + '/lib/', + '/lib64/', + '/proc/self/', +]; + +const SAFE_EXACT_ABS_PATHS = new Set([ + '/tmp', + '/dev/null', + '/dev/stdin', + '/dev/stdout', + '/dev/stderr', +]); + +function isAbsPathAllowed(token: string, workspacePath: string): boolean { + const resolvedWorkspace = path.resolve(workspacePath); + // Resolve relative tokens against the workspace (the bash cwd). + const resolved = path.isAbsolute(token) + ? path.resolve(token) + : path.resolve(resolvedWorkspace, token); + if (resolved === resolvedWorkspace || resolved.startsWith(resolvedWorkspace + path.sep)) { + return true; + } + if (SAFE_EXACT_ABS_PATHS.has(token) || SAFE_EXACT_ABS_PATHS.has(resolved)) { + return true; + } + return SAFE_ABS_PATH_PREFIXES.some((prefix) => + resolved.startsWith(prefix) || token.startsWith(prefix) + ); +} + +export function checkBashPathScope(command: string, workspacePath: string): void { + // Defense-in-depth path confinement for the non-bwrap (hardened) bash path. + // Tokenize on shell operators/whitespace, strip quotes and VAR=/--flag= + // prefixes, then confine any token that looks like a filesystem path. + // A token "looks like a path" if it is absolute or contains a "/" or is "..". + // Resolution is workspace-relative, so an in-workspace token (incl. regex + // patterns / URLs that happen to contain "/") resolves inside the workspace + // and passes; only tokens that escape the workspace (and aren't a safe + // system prefix) are rejected. This catches descend-then-escape + // (output/../../x) and flag/var-prefixed traversal (--file=../../x). + const rawWords = command.split(new RegExp('[\\s|;&<>()\\x60]+')); + for (const raw of rawWords) { + if (!raw) continue; + // Strip surrounding quotes. + let token = raw.replace(/^['"]+/, '').replace(/['"]+$/, ''); + // Strip a leading VAR= or --flag= / -f= prefix (keep the value). + token = token.replace(/^--?[A-Za-z0-9][A-Za-z0-9-]*=/, ''); + token = token.replace(/^[A-Za-z_][A-Za-z0-9_]*=/, ''); + if (!token) continue; + const looksLikePath = token.startsWith('/') || token.includes('/') || token === '..'; + if (!looksLikePath) continue; + if (!isAbsPathAllowed(token, workspacePath)) { + throw new Error( + `パス "${token}" は workspace 外を参照しています。workspace 内の相対パスを使用してください ` + + `(許可: ${workspacePath} 配下, /tmp, /usr/bin 等)` + ); + } + } +} + +export function checkAllowedCommand(command: string, allowedCommands: string[]): void { + // まずインストールパターンをチェック + checkBlockedInstallPatterns(command); + + const tokens = extractCommandTokens(command); + for (const token of tokens) { + // パスが含まれる場合はベース名で比較 (例: /usr/bin/grep → grep) + const base = path.basename(token); + if (!allowedCommands.includes(base)) { + throw new Error(`Command not allowed: "${base}" is not in the allowed commands list`); + } + } +} + +// --- ツール定義 --- + +const READ_DEF: ToolDef = { + type: 'function', + function: { + name: 'Read', + description: 'ファイルを読み込む。offset/limit で行範囲、byte_offset/byte_length でバイト範囲を指定可能。大きすぎる場合はコンテキストに収まる範囲に自動切り詰めされる。', + parameters: { + type: 'object', + properties: { + file_path: { type: 'string', description: 'workspace 内の相対または絶対パス' }, + offset: { type: 'number', description: '読み始める行番号 (0-indexed, 省略時は先頭)' }, + limit: { type: 'number', description: '読む最大行数 (省略時は全行)' }, + byte_offset: { type: 'number', description: '読み始めるバイト位置。改行のない大容量ファイル向け。offset/limit と排他' }, + byte_length: { type: 'number', description: '読むバイト数。byte_offset と併用' }, + }, + required: ['file_path'], + }, + }, +}; + +const WRITE_DEF: ToolDef = { + type: 'function', + function: { + name: 'Write', + description: 'ファイルを書き込む。親ディレクトリが存在しない場合は自動作成する。', + parameters: { + type: 'object', + properties: { + file_path: { type: 'string', description: 'workspace 内の相対または絶対パス' }, + content: { type: 'string', description: '書き込む内容' }, + }, + required: ['file_path', 'content'], + }, + }, +}; + +const EDIT_DEF: ToolDef = { + type: 'function', + function: { + name: 'Edit', + description: 'ファイル内の old_string を new_string に置換する (最初の一致のみ)。', + parameters: { + type: 'object', + properties: { + file_path: { type: 'string', description: 'workspace 内の相対または絶対パス' }, + old_string: { type: 'string', description: '置換前の文字列 (完全一致)' }, + new_string: { type: 'string', description: '置換後の文字列' }, + }, + required: ['file_path', 'old_string', 'new_string'], + }, + }, +}; + +const BASH_DEF: ToolDef = { + type: 'function', + function: { + name: 'Bash', + description: 'シェルコマンドを実行する。cwd は workspace ルート。workspace 外への絶対パス (/root/, /etc/ 等) は拒否される (例外: /tmp, /usr/bin 等)。パッケージインストール(apt/pip/npm 等)は禁止。詳細は ReadToolDoc({ name: "Bash" })。', + parameters: { + type: 'object', + properties: { + command: { type: 'string', description: '実行するシェルコマンド' }, + timeout: { type: 'number', description: 'タイムアウト秒数 (デフォルト: 30, 最大: 300)' }, + }, + required: ['command'], + }, + }, +}; + +const GLOB_DEF: ToolDef = { + type: 'function', + function: { + name: 'Glob', + description: 'ファイルパターン (glob) で workspace 内を検索する。', + parameters: { + type: 'object', + properties: { + pattern: { type: 'string', description: 'glob パターン (例: **/*.ts)' }, + path: { type: 'string', description: '検索の起点ディレクトリ (省略時は workspace ルート)' }, + }, + required: ['pattern'], + }, + }, +}; + +const GREP_DEF: ToolDef = { + type: 'function', + function: { + name: 'Grep', + description: 'workspace 内のファイルを再帰的にテキスト検索する。', + parameters: { + type: 'object', + properties: { + pattern: { type: 'string', description: '正規表現パターン' }, + path: { type: 'string', description: '検索の起点ディレクトリ (省略時は workspace ルート)' }, + glob: { type: 'string', description: 'ファイル名フィルタ (例: *.ts)' }, + }, + required: ['pattern'], + }, + }, +}; + +export const ALL_TOOL_DEFS: Record = { + Read: READ_DEF, + Write: WRITE_DEF, + Edit: EDIT_DEF, + Bash: BASH_DEF, + Glob: GLOB_DEF, + Grep: GREP_DEF, +}; + +// ツール定義一覧を返す (allowedTools と editAllowed に応じてフィルタリング) +export function getToolDefs(allowedTools: string[], editAllowed: boolean): ToolDef[] { + return allowedTools + .filter((name) => { + if (!editAllowed && (name === 'Write' || name === 'Edit')) return false; + return name in ALL_TOOL_DEFS; + }) + .map((name) => ALL_TOOL_DEFS[name]!); +} + +// --- ツール実行実装 --- + +function executRead(input: Record, ctx: ToolContext): ToolResult { + const filePath = input['file_path'] as string; + const offset = typeof input['offset'] === 'number' ? input['offset'] : 0; + const limit = typeof input['limit'] === 'number' ? input['limit'] : undefined; + const byteOffset = typeof input['byte_offset'] === 'number' ? input['byte_offset'] : undefined; + const byteLength = typeof input['byte_length'] === 'number' ? input['byte_length'] : undefined; + + let resolved: string; + try { + resolved = resolveAndGuard(ctx.workspacePath, filePath); + } catch (e) { + return { output: (e as Error).message, isError: true }; + } + + const ext = path.extname(resolved).toLowerCase(); + if (BLOCKED_BINARY_EXTENSIONS.has(ext)) { + return { + output: `Read cannot open binary image files like "${filePath}". Use ReadImage to view the image.`, + isError: true, + }; + } + if (BLOCKED_DOCUMENT_EXTENSIONS.has(ext)) { + return { + output: `Read cannot open binary document files like "${filePath}". Use ReadPdf to extract text from PDF files.`, + isError: true, + }; + } + const officeTool = OFFICE_EXTENSION_TOOL[ext]; + if (officeTool) { + return { + output: `Read cannot open Office files like "${filePath}". Use ${officeTool} to extract text from this file.`, + isError: true, + }; + } + if (BLOCKED_OPAQUE_BINARY_EXTENSIONS.has(ext)) { + return { + output: `Read cannot open binary files like "${filePath}". バイナリデータを読み込むと LLM コンテキストが壊れるので拒否しました。Bash で \`file\` や \`head -c 200 ${filePath} | xxd\` を使って必要部分だけ確認してください。`, + isError: true, + }; + } + + // 1. ファイルサイズ取得 + let totalBytes = 0; + try { + totalBytes = fs.statSync(resolved).size; + } catch (e) { + return { output: `Read error: ${(e as Error).message}`, isError: true }; + } + + // 拡張子フォールバック: 先頭 8KB を sniff し binary(magic byte / NUL / 不正 UTF-8 / + // 制御文字比率)と判定したら拒否する。拡張子のホワイト/ブラックリストで漏れた未知のバイナリを止める。 + if (totalBytes > 0 && looksLikeBinaryByContent(resolved)) { + return { + output: `Read cannot open "${filePath}" (binary content detected in head ${SNIFF_HEAD_BYTES} bytes). バイナリを読み込むと LLM コンテキストが壊れます。Bash で \`file\` や \`head -c 200 ${filePath} | xxd\` を使って必要部分だけ確認してください。`, + isError: true, + }; + } + + const budgetTokens = getToolOutputBudgetTokens(ctx); + const budgetBytes = estimateCharsForTokenBudget(budgetTokens); + + // 2. バイト範囲指定が優先(改行のないファイル向け) + if (byteOffset !== undefined || byteLength !== undefined) { + const start = Math.max(0, byteOffset ?? 0); + if (start >= totalBytes) { + return { output: `Read: byte_offset ${start} is beyond file size ${totalBytes}`, isError: false }; + } + // byte_length が未指定または予算より大きければ budget にクリップ + const requested = byteLength ?? (totalBytes - start); + const effectiveLength = Math.min(requested, budgetBytes, totalBytes - start); + const buf = Buffer.alloc(effectiveLength); + try { + const fd = fs.openSync(resolved, 'r'); + try { + fs.readSync(fd, buf, 0, effectiveLength, start); + } finally { + fs.closeSync(fd); + } + } catch (e) { + return { output: `Read error: ${(e as Error).message}`, isError: true }; + } + const text = buf.toString('utf-8'); + const capped = effectiveLength < requested; + if (capped) { + const nextOffset = start + effectiveLength; + const notice = + `[自動切り詰め] ${filePath} からバイト範囲 [${start}, ${start + requested}) が要求されましたが、` + + `残コンテキスト予算 ${budgetTokens.toLocaleString()} tokens に収まる [${start}, ${nextOffset}) (${effectiveLength.toLocaleString()} bytes) のみ返却しました。\n` + + `続きは Read(byte_offset=${nextOffset}, byte_length=...) で指定してください\n` + + `--- 以下、要求範囲の先頭 ${effectiveLength.toLocaleString()} bytes ---\n`; + return { output: notice + text, isError: false }; + } + return { output: text, isError: false }; + } + + // 3. 行単位読み込み(既存挙動)+ 事前切り詰め + try { + const content = fs.readFileSync(resolved, 'utf-8'); + const lines = content.split('\n'); + const sliced = limit !== undefined ? lines.slice(offset, offset + limit) : lines.slice(offset); + const slicedText = sliced.join('\n'); + + const effectiveBudgetTokens = capBudgetForEncodedPayload(slicedText, budgetTokens); + const { text, truncated } = truncateToBudget(slicedText, effectiveBudgetTokens, { + sourceLabel: filePath, + totalLines: lines.length, + totalBytes, + continuationHint: looksLikeLargeEncodedPayload(slicedText) + ? 'base64/data URL を含む大きな内容です。全文を読む代わりに Grep や Read(offset/limit)、Read(byte_offset/byte_length) で必要箇所だけ抽出してください' + : undefined, + }); + if (truncated) { + logger.info(`[read] truncated file=${filePath} original_lines=${lines.length} original_bytes=${totalBytes} budget_tokens=${effectiveBudgetTokens}`); + } + return { output: text, isError: false }; + } catch (e) { + return { output: `Read error: ${(e as Error).message}`, isError: true }; + } +} + +function executeWrite(input: Record, ctx: ToolContext): ToolResult { + if (!ctx.editAllowed) { + return { output: 'Write is not allowed: edit flag is false', isError: true }; + } + const filePath = input['file_path'] as string; + const content = input['content'] as string; + + let resolved: string; + try { + // The workspace itself is the boundary (resolveAndGuard rejects path + // traversal outside it). Both creating and overwriting are allowed + // anywhere inside the workspace — previously new files were restricted to + // output/, which was inconsistent with overwrite (allowed anywhere) and + // produced confusing failures for legitimate writes to other subdirs. + resolved = resolveAndGuard(ctx.workspacePath, filePath); + } catch (e) { + return { output: (e as Error).message, isError: true }; + } + + try { + fs.mkdirSync(path.dirname(resolved), { recursive: true }); + fs.writeFileSync(resolved, content, 'utf-8'); + return { output: `Written ${content.length} bytes to ${filePath}`, isError: false }; + } catch (e) { + return { output: `Write error: ${(e as Error).message}`, isError: true }; + } +} + +function executeEdit(input: Record, ctx: ToolContext): ToolResult { + if (!ctx.editAllowed) { + return { output: 'Edit is not allowed: edit flag is false', isError: true }; + } + const filePath = input['file_path'] as string; + const oldString = input['old_string'] as string; + const newString = input['new_string'] as string; + + let resolved: string; + try { + resolved = resolveAndGuard(ctx.workspacePath, filePath); + } catch (e) { + return { output: (e as Error).message, isError: true }; + } + + try { + const original = fs.readFileSync(resolved, 'utf-8'); + const idx = original.indexOf(oldString); + if (idx === -1) { + return { output: `Edit error: old_string not found in ${filePath}`, isError: true }; + } + const updated = original.slice(0, idx) + newString + original.slice(idx + oldString.length); + fs.writeFileSync(resolved, updated, 'utf-8'); + return { output: `Edited ${filePath}: replaced ${oldString.length} chars`, isError: false }; + } catch (e) { + return { output: `Edit error: ${(e as Error).message}`, isError: true }; + } +} + +function logBashHistory( + workspacePath: string, + command: string, + isError: boolean, + durationMs: number, + extra?: { blocked?: boolean; outputBytes?: number }, +): void { + try { + const logsDir = path.join(workspacePath, 'logs'); + fs.mkdirSync(logsDir, { recursive: true }); + const entry = { + timestamp: new Date().toISOString(), + command, + isError, + durationMs, + ...extra, + }; + fs.appendFileSync( + path.join(logsDir, 'bash-history.jsonl'), + JSON.stringify(entry) + '\n', + 'utf-8', + ); + } catch { + // best-effort logging + } +} + +type BashMode = 'sandboxed' | 'hardened'; + +async function resolveBashMode(ctx: ToolContext): Promise { + const setting = ctx.bashSandbox ?? 'auto'; + if (setting === 'off') return 'hardened'; // plain exec path, still env-scrubbed + if (setting === 'always') return 'sandboxed'; // bwrap verified at boot + return (await isBwrapAvailable()) ? 'sandboxed' : 'hardened'; // auto +} + +async function executeBash(input: Record, ctx: ToolContext): Promise { + const command = input['command'] as string; + let timeoutSec = typeof input['timeout'] === 'number' ? input['timeout'] : 30; + timeoutSec = Math.min(timeoutSec, 300); + const startedAt = Date.now(); + + const budgetTokens = getToolOutputBudgetTokens(ctx); + const capOutput = (raw: string, label: string): string => { + const { text, truncated } = truncateToBudget(raw, budgetTokens, { + sourceLabel: `Bash ${label}`, + continuationHint: + 'コマンド出力が大きすぎます。cat/less で全文を取得する代わりに、head/tail/grep/awk/sed で必要箇所だけに絞ってください', + }); + if (truncated) { + logger.info(`[bash] truncated ${label} original_chars=${raw.length} budget_tokens=${budgetTokens}`); + } + return text; + }; + + // Install commands are never supported (packages are pre-baked). Reject in + // EVERY mode — including bashUnrestricted, which previously skipped this. + try { + checkBlockedInstallPatterns(command); + } catch (e) { + logBashHistory(ctx.workspacePath, command, true, Date.now() - startedAt, { blocked: true }); + return { output: (e as Error).message, isError: true }; + } + + const mode = await resolveBashMode(ctx); + const applyWhitelist = !ctx.bashUnrestricted; + + if (mode === 'sandboxed') { + const skillBinds = ctx.skillCatalog?.getSkillBinds?.(ctx.userId ?? 'local') ?? []; + if (applyWhitelist) { + try { + checkAllowedCommand(command, ctx.allowedCommands ?? DEFAULT_ALLOWED_COMMANDS); + } catch (e) { + logBashHistory(ctx.workspacePath, command, true, Date.now() - startedAt, { blocked: true }); + return { output: (e as Error).message, isError: true }; + } + } + const result: SandboxedBashResult = await executeSandboxedBash( + command, ctx.workspacePath, timeoutSec, BASH_MAX_BUFFER_BYTES, ctx.abortSignal, skillBinds, + ); + const out = result.isError ? result.output : capOutput(result.output, 'stdout'); + logBashHistory(ctx.workspacePath, command, result.isError, Date.now() - startedAt, { + outputBytes: Buffer.byteLength(out, 'utf-8'), + }); + return { output: out, isError: result.isError }; + } + + // mode === 'hardened': no bwrap. Whitelist (unless unrestricted) + path scope + env scrub. + try { + if (applyWhitelist) checkAllowedCommand(command, ctx.allowedCommands ?? DEFAULT_ALLOWED_COMMANDS); + checkBashPathScope(command, ctx.workspacePath); + } catch (e) { + logBashHistory(ctx.workspacePath, command, true, Date.now() - startedAt, { blocked: true }); + return { output: (e as Error).message, isError: true }; + } + + return await new Promise((resolve) => { + if (ctx.abortSignal?.aborted) { + logBashHistory(ctx.workspacePath, command, true, Date.now() - startedAt); + resolve({ output: 'Cancelled before bash launch', isError: true }); + return; + } + child_process.exec( + command, + { + cwd: ctx.workspacePath, + timeout: timeoutSec * 1000, + encoding: 'utf-8', + maxBuffer: BASH_MAX_BUFFER_BYTES, + signal: ctx.abortSignal, + env: buildSandboxEnv(process.env, ctx.workspacePath), // scrub secrets + }, + (error, stdout, stderr) => { + if (!error) { + const out = capOutput(stdout, 'stdout'); + logBashHistory(ctx.workspacePath, command, false, Date.now() - startedAt, { + outputBytes: Buffer.byteLength(out, 'utf-8'), + }); + resolve({ output: out, isError: false }); + return; + } + + const msg = error.message ?? String(error); + const execError = error as Error & { signal?: NodeJS.Signals | null; killed?: boolean; code?: string }; + const cappedStdout = stdout ? capOutput(stdout, 'stdout') : ''; + const cappedStderr = stderr ? capOutput(stderr, 'stderr') : ''; + const details: string[] = [cappedStdout, cappedStderr]; + if (execError.code === 'ABORT_ERR' || ctx.abortSignal?.aborted) { + details.push('Cancelled by user request'); + } else if (execError.killed) { + details.push(`Command timed out after ${timeoutSec}s`); + } + if (execError.signal) { + details.push(`Signal: ${execError.signal}`); + } + const output = [...details, msg].filter(Boolean).join('\n'); + logBashHistory(ctx.workspacePath, command, true, Date.now() - startedAt, { + outputBytes: Buffer.byteLength(output, 'utf-8'), + }); + resolve({ output, isError: true }); + }, + ); + }); +} + +// 簡易 glob: minimatch 相当の実装 (** と * のみサポート) +function expandBraces(pattern: string): string[] { + const match = pattern.match(/^(.*?)\{([^}]+)\}(.*)$/); + if (!match) return [pattern]; + const [, prefix, alternatives, suffix] = match; + const results: string[] = []; + for (const alt of alternatives.split(',')) { + results.push(...expandBraces(`${prefix}${alt}${suffix}`)); + } + return results; +} + +function globMatch(pattern: string, filePath: string): boolean { + // ブレース展開 {a,b,c} をサポート + const expanded = expandBraces(pattern); + return expanded.some((pat) => { + // **/ → 0個以上のディレクトリ(ルート直下にもマッチ) + // ** → 任意のパスセグメント列 + // * → 単一セグメント内の任意文字列 + const regexStr = pat + .replace(/\*\*\//g, '%%GLOBSTAR_SLASH%%') + .replace(/\*\*/g, '%%GLOBSTAR%%') + .split('%%GLOBSTAR_SLASH%%') + .map((segment) => + segment + .split('%%GLOBSTAR%%') + .map((part) => + part + .split('*') + .map((s) => s.replace(/[.+^${}()|[\]\\]/g, '\\$&')) + .join('[^/]*'), + ) + .join('.*'), + ) + .join('(.+/)?'); + const regex = new RegExp(`^${regexStr}$`); + return regex.test(filePath); + }); +} + +function collectFiles(dir: string, base: string): string[] { + const results: string[] = []; + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return results; + } + for (const entry of entries) { + const full = path.join(dir, entry.name); + const rel = path.relative(base, full); + if (entry.isDirectory()) { + results.push(...collectFiles(full, base)); + } else { + results.push(rel); + } + } + return results; +} + +function executeGlob(input: Record, ctx: ToolContext): ToolResult { + const pattern = input['pattern'] as string; + const searchPath = input['path'] as string | undefined; + + let baseDir: string; + try { + baseDir = searchPath + ? resolveAndGuard(ctx.workspacePath, searchPath) + : path.resolve(ctx.workspacePath); + } catch (e) { + return { output: (e as Error).message, isError: true }; + } + + const allFiles = collectFiles(baseDir, baseDir); + const matched = allFiles.filter((f) => globMatch(pattern, f)); + + if (matched.length === 0) { + return { output: '(no files matched)', isError: false }; + } + return { output: matched.join('\n'), isError: false }; +} + +function executeGrep(input: Record, ctx: ToolContext): ToolResult { + const pattern = input['pattern'] as string; + const searchPath = input['path'] as string | undefined; + const globFilter = input['glob'] as string | undefined; + + let baseDir: string; + try { + baseDir = searchPath + ? resolveAndGuard(ctx.workspacePath, searchPath) + : path.resolve(ctx.workspacePath); + } catch (e) { + return { output: (e as Error).message, isError: true }; + } + + let regex: RegExp; + try { + regex = new RegExp(pattern); + } catch (e) { + return { output: `Invalid regex: ${(e as Error).message}`, isError: true }; + } + + const allFiles = collectFiles(baseDir, baseDir); + const filteredFiles = globFilter + ? allFiles.filter((f) => globMatch(globFilter, path.basename(f))) + : allFiles; + + const results: string[] = []; + for (const rel of filteredFiles) { + const full = path.join(baseDir, rel); + let content: string; + try { + content = fs.readFileSync(full, 'utf-8'); + } catch { + continue; + } + const lines = content.split('\n'); + for (let i = 0; i < lines.length; i++) { + if (regex.test(lines[i]!)) { + results.push(`${rel}:${i + 1}: ${lines[i]}`); + } + } + } + + if (results.length === 0) { + return { output: '(no matches)', isError: false }; + } + return { output: results.join('\n'), isError: false }; +} + +// コアツールを実行 (Read, Write, Edit, Bash, Glob, Grep) +// null を返す場合はこのモジュールでは処理しないことを意味する +export async function executeCoreTools( + name: string, + input: Record, + ctx: ToolContext, +): Promise { + logger.debug(`[tools/core] executing ${name}`); + + switch (name) { + case 'Read': + return executRead(input, ctx); + case 'Write': + return executeWrite(input, ctx); + case 'Edit': + return executeEdit(input, ctx); + case 'Bash': + return await executeBash(input, ctx); + case 'Glob': + return executeGlob(input, ctx); + case 'Grep': + return executeGrep(input, ctx); + default: + return null; + } +}