sync: update from private repo (15455e9)
Some checks failed
CI / build-and-test (push) Has been cancelled

This commit is contained in:
oss-sync 2026-06-05 05:43:02 +00:00
parent 02c7dfdd83
commit a44f6b41e2
3 changed files with 1962 additions and 1 deletions

6
.gitignore vendored
View File

@ -39,5 +39,9 @@ data/browser-sessions/*
# Core dumps from native crashes (sqlite/playwright/sharp). These contain raw
# process memory — including the decrypted master key, SSH private keys and the
# session secret — so they must never be committed.
# NOTE: keep this narrow. A bare `core.*` also matches source files like
# src/engine/tools/core.ts / core.test.ts, which silently dropped them from the
# OSS mirror during oss-sync (its `git add -A` honours .gitignore). Core dumps
# are named `core` or `core.<pid>`, so match only a numeric suffix.
core
core.*
core.[0-9]*

View File

@ -0,0 +1,725 @@
import * as fs from 'fs';
import * as path from 'path';
import { tmpdir } from 'os';
import { afterEach, describe, expect, it } from 'vitest';
import { executeCoreTools, resolveOutputPathWithin, checkBlockedInstallPatterns, checkAllowedCommand, checkBashPathScope, DEFAULT_ALLOWED_COMMANDS, type ToolContext } from './core.js';
import { looksLikeBinaryBytes } from './binary-detect.js';
function makeWorkspace(): string {
return fs.mkdtempSync(path.join(tmpdir(), 'maestro-core-'));
}
function makeContext(workspacePath: string): ToolContext {
return {
workspacePath,
editAllowed: true,
};
}
describe('core tools', () => {
let workspacePath = '';
afterEach(() => {
if (workspacePath) {
fs.rmSync(workspacePath, { recursive: true, force: true });
workspacePath = '';
}
});
it('blocks Read on image files and suggests ReadImage', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'input', 'capture.jpg'), Buffer.from([0xff, 0xd8, 0xff]));
const result = await executeCoreTools('Read', { file_path: 'input/capture.jpg' }, makeContext(workspacePath));
expect(result).not.toBeNull();
expect(result?.isError).toBe(true);
expect(result?.output).toContain('ReadImage');
});
it('blocks Read on PDF files and suggests ReadPdf', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'input', 'manual.pdf'), '%PDF-1.4\n');
const result = await executeCoreTools('Read', { file_path: 'input/manual.pdf' }, makeContext(workspacePath));
expect(result).not.toBeNull();
expect(result?.isError).toBe(true);
expect(result?.output).toContain('ReadPdf');
});
it('blocks Read on xlsx and suggests ReadExcel', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
// PK\x03\x04 = ZIP signature (xlsx is a zip)
fs.writeFileSync(path.join(workspacePath, 'output', 'components.xlsx'), Buffer.from([0x50, 0x4b, 0x03, 0x04]));
const result = await executeCoreTools('Read', { file_path: 'output/components.xlsx' }, makeContext(workspacePath));
expect(result).not.toBeNull();
expect(result?.isError).toBe(true);
expect(result?.output).toContain('ReadExcel');
});
it('blocks Read on docx and suggests ReadDocx', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'input', 'report.docx'), Buffer.from([0x50, 0x4b, 0x03, 0x04]));
const result = await executeCoreTools('Read', { file_path: 'input/report.docx' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toContain('ReadDocx');
});
it('blocks Read on pptx and suggests ReadPPTX', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'input', 'deck.pptx'), Buffer.from([0x50, 0x4b, 0x03, 0x04]));
const result = await executeCoreTools('Read', { file_path: 'input/deck.pptx' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toContain('ReadPPTX');
});
it('blocks Read on opaque binary extensions like .zip', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'input', 'archive.zip'), Buffer.from([0x50, 0x4b, 0x03, 0x04]));
const result = await executeCoreTools('Read', { file_path: 'input/archive.zip' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toContain('binary');
});
it('rejects file with NUL byte in head even if extension is unknown', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
// .bak は extension list に無いが先頭に NUL がある → content-based detection で弾く
fs.writeFileSync(path.join(workspacePath, 'output', 'unknown.bak'), Buffer.from([0x68, 0x69, 0x00, 0x21]));
const result = await executeCoreTools('Read', { file_path: 'output/unknown.bak' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toContain('binary');
});
it('still allows Read on plain text files (regression guard)', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'output', 'note.txt'), 'hello\nworld\n');
const result = await executeCoreTools('Read', { file_path: 'output/note.txt' }, makeContext(workspacePath));
expect(result?.isError).toBe(false);
expect(result?.output).toContain('hello');
expect(result?.output).toContain('world');
});
it('allows Read on empty file', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'output', 'empty.txt'), '');
const result = await executeCoreTools('Read', { file_path: 'output/empty.txt' }, makeContext(workspacePath));
expect(result?.isError).toBe(false);
});
it('allows new Write files anywhere within the workspace', async () => {
workspacePath = makeWorkspace();
// A new file in a non-output subdir now succeeds (consistent with overwrite,
// which was already allowed anywhere in the workspace).
const subdir = await executeCoreTools('Write', { file_path: 'notes/todo.md', content: 'x' }, makeContext(workspacePath));
expect(subdir?.isError).toBe(false);
expect(fs.existsSync(path.join(workspacePath, 'notes', 'todo.md'))).toBe(true);
// output/ still works.
const allowed = await executeCoreTools('Write', { file_path: 'output/todo.md', content: 'ok' }, makeContext(workspacePath));
expect(allowed?.isError).toBe(false);
// Escaping the workspace is still blocked by resolveAndGuard.
const escaped = await executeCoreTools('Write', { file_path: '../outside.txt', content: 'no' }, makeContext(workspacePath));
expect(escaped?.isError).toBe(true);
});
it('resolves output paths only within allowed prefixes', () => {
workspacePath = makeWorkspace();
expect(resolveOutputPathWithin(workspacePath, 'output/ocr/a.md', ['output/ocr'])).toContain(path.join('output', 'ocr', 'a.md'));
expect(() => resolveOutputPathWithin(workspacePath, 'output/reviewed/a.json', ['output/ocr'])).toThrow(/output\/ocr/);
});
it('executes Bash commands asynchronously and returns stdout', async () => {
workspacePath = makeWorkspace();
const result = await executeCoreTools('Bash', {
command: `python3 -c "print('hello from bash')"`,
}, makeContext(workspacePath));
expect(result?.isError).toBe(false);
expect(result?.output.trim()).toBe('hello from bash');
});
it('supports Bash output larger than the default execSync buffer but auto-truncates to context budget', async () => {
workspacePath = makeWorkspace();
const result = await executeCoreTools('Bash', {
command: `python3 -c "print('a' * (1024 * 1024 + 4096))"`,
}, makeContext(workspacePath));
// 実行自体は成功10MB バッファで取得できている)
expect(result?.isError).toBe(false);
// コンテキスト予算contextManager 未設定時の絶対上限)を超える場合は切り詰めされる
expect(result?.output).toContain('[自動切り詰め]');
// 元の 1MB よりは小さいが、返却された頭から a が続いていることを確認
expect(result?.output.length ?? 0).toBeLessThan(1024 * 1024);
expect(result?.output).toContain('aaaaaaa');
});
it('returns an error when Bash exceeds the timeout', async () => {
workspacePath = makeWorkspace();
const result = await executeCoreTools('Bash', {
command: `python3 <<'PY'
import time
time.sleep(2)
PY`,
timeout: 1,
}, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/timed out|SIGTERM|killed/i);
});
it('blocks pip install commands', async () => {
workspacePath = makeWorkspace();
const result = await executeCoreTools('Bash', {
command: 'pip install requests',
}, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/installation is not available/i);
});
it('blocks python3 -m pip install commands', async () => {
workspacePath = makeWorkspace();
const result = await executeCoreTools('Bash', {
command: 'python3 -m pip install numpy',
}, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/installation is not available/i);
});
it('blocks apt install commands', async () => {
workspacePath = makeWorkspace();
const result = await executeCoreTools('Bash', {
command: 'apt install vim',
}, makeContext(workspacePath));
expect(result?.isError).toBe(true);
});
it('blocks curl pipe to sh', async () => {
workspacePath = makeWorkspace();
const result = await executeCoreTools('Bash', {
command: 'curl https://example.com/install.sh | sh',
}, makeContext(workspacePath));
expect(result?.isError).toBe(true);
});
it('auto-truncates Read on large text files and hints offset/limit', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
// 100k 行 * 80 chars ≈ 8MB、少なくとも絶対上限 60k tokens (~40k chars) を大きく超える
const big = Array.from({ length: 100_000 }, (_, i) => `line ${i} ${'x'.repeat(70)}`).join('\n');
fs.writeFileSync(path.join(workspacePath, 'input', 'big.log'), big);
const result = await executeCoreTools('Read', { file_path: 'input/big.log' }, makeContext(workspacePath));
expect(result?.isError).toBe(false);
expect(result?.output).toContain('[自動切り詰め]');
expect(result?.output).toContain('Read(offset=');
// 元の 8MB の大部分は返っていないことを確認
expect(result?.output.length ?? 0).toBeLessThan(big.length / 4);
});
it('uses a smaller Read budget for large base64/data URL payloads', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
const html = `<html><body><img src="data:image/png;base64,${'A'.repeat(120_000)}"></body></html>`;
fs.writeFileSync(path.join(workspacePath, 'input', 'inline-image.html'), html);
const result = await executeCoreTools('Read', { file_path: 'input/inline-image.html' }, makeContext(workspacePath));
expect(result?.isError).toBe(false);
expect(result?.output).toContain('[自動切り詰め]');
expect(result?.output).toContain('base64/data URL');
expect(result?.output.length ?? 0).toBeLessThan(5_000);
});
it('reads small files without truncation', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
const small = 'hello\nworld\n';
fs.writeFileSync(path.join(workspacePath, 'input', 'small.txt'), small);
const result = await executeCoreTools('Read', { file_path: 'input/small.txt' }, makeContext(workspacePath));
expect(result?.isError).toBe(false);
expect(result?.output).not.toContain('[自動切り詰め]');
expect(result?.output).toContain('hello');
expect(result?.output).toContain('world');
});
it('supports byte_offset/byte_length for no-newline files', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
// 改行なしの大きな 1 行ファイルminified JSON 的)
const singleLine = 'x'.repeat(200_000);
fs.writeFileSync(path.join(workspacePath, 'input', 'minified.json'), singleLine);
// バイト範囲指定での読み込み
const result = await executeCoreTools('Read', {
file_path: 'input/minified.json',
byte_offset: 1000,
byte_length: 500,
}, makeContext(workspacePath));
expect(result?.isError).toBe(false);
expect(result?.output).not.toContain('[自動切り詰め]');
expect(result?.output.length).toBe(500);
expect(result?.output).toBe('x'.repeat(500));
});
it('auto-truncates Read with byte-based hint for no-newline files', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
// 大きな単一行ファイル
const singleLine = 'x'.repeat(500_000);
fs.writeFileSync(path.join(workspacePath, 'input', 'minified.json'), singleLine);
const result = await executeCoreTools('Read', { file_path: 'input/minified.json' }, makeContext(workspacePath));
expect(result?.isError).toBe(false);
expect(result?.output).toContain('[自動切り詰め]');
// 行指定ではなく byte_offset を案内していること
expect(result?.output).toContain('byte_offset');
});
});
describe('Cancel-traceability PR2: Bash AbortSignal propagation', () => {
let workspacePath = '';
afterEach(() => {
if (workspacePath) {
fs.rmSync(workspacePath, { recursive: true, force: true });
workspacePath = '';
}
});
function makeContextWithSignal(workspace: string, signal: AbortSignal): ToolContext {
return { workspacePath: workspace, editAllowed: true, abortSignal: signal };
}
it('aborts a long-running Bash within ~500ms when signal fires mid-execution', async () => {
workspacePath = makeWorkspace();
const controller = new AbortController();
const ctx = makeContextWithSignal(workspacePath, controller.signal);
// Long-running but bounded so a runaway test still terminates. Uses
// heredoc form to keep the allowlist parser from rejecting `;`-style
// multi-statement Python (see existing timeout test for the same pattern).
const cmd = `python3 <<'PY'
import time
time.sleep(30)
PY`;
const startedAt = Date.now();
// Fire the abort 100ms after launching.
setTimeout(() => controller.abort(), 100);
const result = await executeCoreTools('Bash', { command: cmd, timeout: 60 }, ctx);
const elapsed = Date.now() - startedAt;
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/cancelled|abort/i);
// Generous bound: child spawn + signal delivery should complete <2s.
// If this exceeds 2s the AbortSignal isn't actually killing the child.
expect(elapsed).toBeLessThan(2_000);
}, 10_000);
it('returns immediately when signal is already aborted before execution', async () => {
workspacePath = makeWorkspace();
const controller = new AbortController();
controller.abort();
const ctx = makeContextWithSignal(workspacePath, controller.signal);
const startedAt = Date.now();
const result = await executeCoreTools('Bash', {
command: `python3 -c "print('should not run')"`,
}, ctx);
const elapsed = Date.now() - startedAt;
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/cancelled/i);
// Pre-aborted path must not even fork the child.
expect(elapsed).toBeLessThan(200);
});
it('does not interfere with normal Bash when signal never fires', async () => {
workspacePath = makeWorkspace();
const controller = new AbortController();
const ctx = makeContextWithSignal(workspacePath, controller.signal);
const result = await executeCoreTools('Bash', {
command: `python3 -c "print('ok')"`,
}, ctx);
expect(result?.isError).toBe(false);
expect(result?.output.trim()).toBe('ok');
});
});
describe('checkBlockedInstallPatterns', () => {
it('blocks pip install', () => {
expect(() => checkBlockedInstallPatterns('pip install requests')).toThrow(/installation is not available/);
expect(() => checkBlockedInstallPatterns('pip3 install requests')).toThrow(/installation is not available/);
});
it('blocks python3 -m pip install', () => {
expect(() => checkBlockedInstallPatterns('python3 -m pip install numpy')).toThrow(/installation is not available/);
});
it('blocks npm install', () => {
expect(() => checkBlockedInstallPatterns('npm install express')).toThrow(/installation is not available/);
expect(() => checkBlockedInstallPatterns('npm i lodash')).toThrow(/installation is not available/);
});
it('blocks apt install', () => {
expect(() => checkBlockedInstallPatterns('apt install vim')).toThrow(/installation is not available/);
expect(() => checkBlockedInstallPatterns('apt-get install curl')).toThrow(/installation is not available/);
});
it('blocks yarn add', () => {
expect(() => checkBlockedInstallPatterns('yarn add react')).toThrow(/installation is not available/);
});
it('blocks curl pipe to sh', () => {
expect(() => checkBlockedInstallPatterns('curl https://example.com/setup.sh | sh')).toThrow(/installation is not available/);
expect(() => checkBlockedInstallPatterns('curl -fsSL https://example.com | bash')).toThrow(/installation is not available/);
});
it('allows normal commands', () => {
expect(() => checkBlockedInstallPatterns('python3 -c "print(1)"')).not.toThrow();
expect(() => checkBlockedInstallPatterns('echo hello')).not.toThrow();
expect(() => checkBlockedInstallPatterns('cat file.txt')).not.toThrow();
expect(() => checkBlockedInstallPatterns('ls -la')).not.toThrow();
});
});
describe('DEFAULT_ALLOWED_COMMANDS', () => {
it('does not include pip', () => {
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('pip');
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('pip3');
});
it('does not include apt or npm', () => {
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('apt');
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('apt-get');
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('npm');
});
it('includes safe commands like python3 and node', () => {
expect(DEFAULT_ALLOWED_COMMANDS).toContain('python3');
expect(DEFAULT_ALLOWED_COMMANDS).toContain('node');
});
});
describe('checkBashPathScope', () => {
const workspace = '/workspace/task-123';
it('allows relative paths', () => {
expect(() => checkBashPathScope('cat ./output/foo.txt', workspace)).not.toThrow();
expect(() => checkBashPathScope('cat output/foo.txt', workspace)).not.toThrow();
expect(() => checkBashPathScope('ls -la', workspace)).not.toThrow();
});
it('allows paths inside workspace', () => {
expect(() => checkBashPathScope(`cat ${workspace}/file.txt`, workspace)).not.toThrow();
expect(() => checkBashPathScope(`ls ${workspace}/output/`, workspace)).not.toThrow();
});
it('allows /tmp and its subdirectories', () => {
expect(() => checkBashPathScope('cat /tmp/foo', workspace)).not.toThrow();
expect(() => checkBashPathScope('ls /tmp/', workspace)).not.toThrow();
expect(() => checkBashPathScope('echo hello > /tmp/out.txt', workspace)).not.toThrow();
});
it('allows /dev/null and standard streams', () => {
expect(() => checkBashPathScope('cat /dev/null', workspace)).not.toThrow();
expect(() => checkBashPathScope('echo x > /dev/null', workspace)).not.toThrow();
expect(() => checkBashPathScope('cat /dev/stdin', workspace)).not.toThrow();
});
it('allows /usr/bin and other binary directories', () => {
expect(() => checkBashPathScope('ls /usr/bin/grep', workspace)).not.toThrow();
expect(() => checkBashPathScope('/usr/local/bin/node --version', workspace)).not.toThrow();
expect(() => checkBashPathScope('ls /bin/bash', workspace)).not.toThrow();
});
it('rejects /root/', () => {
expect(() => checkBashPathScope('cat /root/.bashrc', workspace)).toThrow(/workspace 外/);
});
it('rejects /etc/', () => {
expect(() => checkBashPathScope('cat /etc/passwd', workspace)).toThrow(/workspace 外/);
});
it('rejects bare / (root directory scan)', () => {
expect(() => checkBashPathScope('find / -name x', workspace)).toThrow(/workspace 外/);
});
it('rejects /root in compound commands', () => {
expect(() => checkBashPathScope('cd /root && ls', workspace)).toThrow(/workspace 外/);
});
it('rejects /etc/ on right-hand side of redirect', () => {
expect(() => checkBashPathScope('echo /etc/passwd > /tmp/x', workspace)).toThrow(/workspace 外/);
});
it('rejects /root/ in compound with semicolons', () => {
expect(() => checkBashPathScope('cat ./input/foo.txt; ls /root/', workspace)).toThrow(/workspace 外/);
});
it('rejects paths in single-quoted globs', () => {
expect(() => checkBashPathScope("ls '/etc/*'", workspace)).toThrow(/workspace 外/);
});
it('rejects /home/other-user', () => {
expect(() => checkBashPathScope('cat /home/otheruser/.ssh/id_rsa', workspace)).toThrow(/workspace 外/);
});
it('allows executeCoreTools Bash with workspace-relative path', async () => {
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'scope-test-'));
try {
fs.writeFileSync(path.join(tmpWs, 'hello.txt'), 'hi');
const result = await executeCoreTools('Bash', { command: 'cat hello.txt' }, { workspacePath: tmpWs, editAllowed: false });
expect(result?.isError).toBe(false);
expect(result?.output).toContain('hi');
} finally {
fs.rmSync(tmpWs, { recursive: true, force: true });
}
});
it('rejects executeCoreTools Bash with /root/ path', async () => {
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'scope-test-'));
try {
const result = await executeCoreTools('Bash', { command: 'cat /root/.bashrc' }, { workspacePath: tmpWs, editAllowed: false });
expect(result?.isError).toBe(true);
expect(result?.output).toContain('workspace 外');
} finally {
fs.rmSync(tmpWs, { recursive: true, force: true });
}
});
});
describe('checkBashPathScope relative containment', () => {
const ws = '/work/local/42';
it('rejects relative parent traversal', () => {
expect(() => checkBashPathScope('cat ../../config.yaml', ws)).toThrow(/workspace/);
expect(() => checkBashPathScope('cat ../../../etc/passwd', ws)).toThrow(/workspace/);
});
it('allows in-workspace relative paths', () => {
expect(() => checkBashPathScope('cat ./output/result.txt', ws)).not.toThrow();
expect(() => checkBashPathScope('cat output/result.txt', ws)).not.toThrow();
});
it('still allows safe absolute prefixes', () => {
expect(() => checkBashPathScope('cat /usr/bin/env', ws)).not.toThrow();
expect(() => checkBashPathScope('echo hi > /tmp/x', ws)).not.toThrow();
});
it('rejects descend-then-escape relative paths', () => {
expect(() => checkBashPathScope('cat output/../../config.yaml', ws)).toThrow(/workspace|外/);
expect(() => checkBashPathScope('cat sub/dir/../../../../etc/passwd', ws)).toThrow(/workspace|外/);
});
it('rejects traversal hidden behind a flag= or VAR= prefix', () => {
expect(() => checkBashPathScope('python3 run.py --file=../../config.yaml', ws)).toThrow(/workspace|外/);
expect(() => checkBashPathScope('PYTHONPATH=../../lib python3 x.py', ws)).toThrow(/workspace|外/);
});
it('does not false-positive on in-workspace slashed args, regex patterns, or urls', () => {
expect(() => checkBashPathScope('grep -rn "foo/bar" .', ws)).not.toThrow();
expect(() => checkBashPathScope("sed 's/a/b/' output/x.txt", ws)).not.toThrow();
expect(() => checkBashPathScope('echo https://example.com/a/b', ws)).not.toThrow();
expect(() => checkBashPathScope('find . -name "*.ts"', ws)).not.toThrow();
});
});
describe('bashUnrestricted mode', () => {
it('skips command whitelist when bashUnrestricted is true (delegates to sandbox)', async () => {
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'unrestricted-'));
try {
fs.writeFileSync(path.join(tmpWs, 'data.txt'), 'hello');
// 'curl' is not in DEFAULT_ALLOWED_COMMANDS — in restricted mode it would fail.
// In unrestricted mode it delegates to bwrap, which may fail (no user namespace)
// but the error should NOT be "Command not allowed".
const result = await executeCoreTools(
'Bash',
{ command: 'cat data.txt' },
{ workspacePath: tmpWs, editAllowed: false, bashUnrestricted: true },
);
// If bwrap is available, it succeeds. If not, the error is from bwrap, not whitelist.
if (result?.isError) {
expect(result.output).not.toContain('not in the allowed commands list');
} else {
expect(result?.output).toContain('hello');
}
} finally {
fs.rmSync(tmpWs, { recursive: true, force: true });
}
});
it('skips path scope check in sandboxed mode (bwrap handles isolation)', async () => {
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'unrestricted-'));
try {
// /root/ would normally be blocked by checkBashPathScope.
// In sandboxed mode the host path scope check is skipped (bwrap handles
// isolation). bashSandbox: 'always' forces the sandboxed path regardless
// of whether bwrap can actually launch in this env.
const result = await executeCoreTools(
'Bash',
{ command: 'cat /root/.bashrc' },
{ workspacePath: tmpWs, editAllowed: false, bashUnrestricted: true, bashSandbox: 'always' },
);
// Should NOT contain the path scope error message (it is never invoked).
if (result?.isError) {
expect(result.output).not.toContain('workspace 外');
}
} finally {
fs.rmSync(tmpWs, { recursive: true, force: true });
}
});
});
describe('Bash history logging', () => {
it('appends a JSONL entry to logs/bash-history.jsonl on successful execution', async () => {
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-'));
try {
await executeCoreTools('Bash', { command: 'echo hello' }, { workspacePath: tmpWs, editAllowed: false });
const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl');
expect(fs.existsSync(logPath)).toBe(true);
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
expect(lines.length).toBe(1);
const entry = JSON.parse(lines[0]);
expect(entry.command).toBe('echo hello');
expect(entry.isError).toBe(false);
expect(entry).toHaveProperty('timestamp');
expect(entry).toHaveProperty('durationMs');
expect(typeof entry.durationMs).toBe('number');
} finally {
fs.rmSync(tmpWs, { recursive: true, force: true });
}
});
it('logs failed commands with isError=true and exitCode', async () => {
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-'));
try {
await executeCoreTools('Bash', { command: 'false' }, { workspacePath: tmpWs, editAllowed: false });
const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl');
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
const entry = JSON.parse(lines[0]);
expect(entry.command).toBe('false');
expect(entry.isError).toBe(true);
} finally {
fs.rmSync(tmpWs, { recursive: true, force: true });
}
});
it('logs blocked commands (whitelist rejection) with isError=true', async () => {
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-'));
try {
await executeCoreTools('Bash', { command: 'apt install foo' }, { workspacePath: tmpWs, editAllowed: false });
const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl');
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
const entry = JSON.parse(lines[0]);
expect(entry.command).toBe('apt install foo');
expect(entry.isError).toBe(true);
expect(entry.blocked).toBe(true);
} finally {
fs.rmSync(tmpWs, { recursive: true, force: true });
}
});
it('accumulates multiple entries across calls', async () => {
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-'));
try {
await executeCoreTools('Bash', { command: 'echo one' }, { workspacePath: tmpWs, editAllowed: false });
await executeCoreTools('Bash', { command: 'echo two' }, { workspacePath: tmpWs, editAllowed: false });
const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl');
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
expect(lines.length).toBe(2);
expect(JSON.parse(lines[0]).command).toBe('echo one');
expect(JSON.parse(lines[1]).command).toBe('echo two');
} finally {
fs.rmSync(tmpWs, { recursive: true, force: true });
}
});
});
describe('Bash install rejection (all modes)', () => {
function installCtx(over: Partial<ToolContext> = {}): ToolContext {
return { workspacePath: process.cwd(), editAllowed: false, ...over };
}
it('blocks pip install even when bashUnrestricted', async () => {
const r = await executeCoreTools('Bash', { command: 'pip install pypdf' }, installCtx({ bashUnrestricted: true }));
expect(r?.isError).toBe(true);
expect(r?.output).toMatch(/installation is not available|not allowed/i);
expect(r?.output).toMatch(/preinstalled|pre-installed|プリインストール/i);
});
it('blocks npm install', async () => {
const r = await executeCoreTools('Bash', { command: 'npm install left-pad' }, installCtx());
expect(r?.isError).toBe(true);
});
});
describe('resolveBashMode + env scrub (hardened fallback)', () => {
function installCtx(over: Partial<ToolContext> = {}): ToolContext {
return { workspacePath: process.cwd(), editAllowed: false, bashSandbox: 'off', ...over };
}
it('off mode runs plain exec and respects whitelist', async () => {
const r = await executeCoreTools('Bash', { command: 'echo hello' },
installCtx({ bashSandbox: 'off' }));
expect(r?.isError).toBe(false);
expect(r?.output).toContain('hello');
});
it('hardened fallback scrubs secrets from env', async () => {
process.env.MCP_ENCRYPTION_KEY = 'topsecret';
const r = await executeCoreTools('Bash',
{ command: 'node -e "process.stdout.write(String(process.env.MCP_ENCRYPTION_KEY))"' },
installCtx({ bashSandbox: 'off', bashUnrestricted: true }));
delete process.env.MCP_ENCRYPTION_KEY;
expect(r?.output).not.toContain('topsecret');
expect(r?.output).toMatch(/undefined/);
});
});
describe('Read delegates to shared binary detector', () => {
it('flags a magic-byte binary with no NUL in head', () => {
const head = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0x41, 0x42]);
expect(looksLikeBinaryBytes(head).binary).toBe(true);
});
it('keeps plain source text readable', () => {
expect(looksLikeBinaryBytes(Buffer.from('export const x = 1;\n', 'utf-8')).binary).toBe(false);
});
});

1232
src/engine/tools/core.ts Normal file

File diff suppressed because it is too large Load Diff