sync: update from private repo (15455e9)
Some checks failed
CI / build-and-test (push) Has been cancelled
Some checks failed
CI / build-and-test (push) Has been cancelled
This commit is contained in:
parent
02c7dfdd83
commit
a44f6b41e2
6
.gitignore
vendored
6
.gitignore
vendored
@ -39,5 +39,9 @@ data/browser-sessions/*
|
|||||||
# Core dumps from native crashes (sqlite/playwright/sharp). These contain raw
|
# Core dumps from native crashes (sqlite/playwright/sharp). These contain raw
|
||||||
# process memory — including the decrypted master key, SSH private keys and the
|
# process memory — including the decrypted master key, SSH private keys and the
|
||||||
# session secret — so they must never be committed.
|
# session secret — so they must never be committed.
|
||||||
|
# NOTE: keep this narrow. A bare `core.*` also matches source files like
|
||||||
|
# src/engine/tools/core.ts / core.test.ts, which silently dropped them from the
|
||||||
|
# OSS mirror during oss-sync (its `git add -A` honours .gitignore). Core dumps
|
||||||
|
# are named `core` or `core.<pid>`, so match only a numeric suffix.
|
||||||
core
|
core
|
||||||
core.*
|
core.[0-9]*
|
||||||
|
|||||||
725
src/engine/tools/core.test.ts
Normal file
725
src/engine/tools/core.test.ts
Normal file
@ -0,0 +1,725 @@
|
|||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import { tmpdir } from 'os';
|
||||||
|
import { afterEach, describe, expect, it } from 'vitest';
|
||||||
|
import { executeCoreTools, resolveOutputPathWithin, checkBlockedInstallPatterns, checkAllowedCommand, checkBashPathScope, DEFAULT_ALLOWED_COMMANDS, type ToolContext } from './core.js';
|
||||||
|
import { looksLikeBinaryBytes } from './binary-detect.js';
|
||||||
|
|
||||||
|
function makeWorkspace(): string {
|
||||||
|
return fs.mkdtempSync(path.join(tmpdir(), 'maestro-core-'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeContext(workspacePath: string): ToolContext {
|
||||||
|
return {
|
||||||
|
workspacePath,
|
||||||
|
editAllowed: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('core tools', () => {
|
||||||
|
let workspacePath = '';
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (workspacePath) {
|
||||||
|
fs.rmSync(workspacePath, { recursive: true, force: true });
|
||||||
|
workspacePath = '';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks Read on image files and suggests ReadImage', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'capture.jpg'), Buffer.from([0xff, 0xd8, 0xff]));
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/capture.jpg' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toContain('ReadImage');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks Read on PDF files and suggests ReadPdf', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'manual.pdf'), '%PDF-1.4\n');
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/manual.pdf' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toContain('ReadPdf');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks Read on xlsx and suggests ReadExcel', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
|
||||||
|
// PK\x03\x04 = ZIP signature (xlsx is a zip)
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'output', 'components.xlsx'), Buffer.from([0x50, 0x4b, 0x03, 0x04]));
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'output/components.xlsx' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toContain('ReadExcel');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks Read on docx and suggests ReadDocx', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'report.docx'), Buffer.from([0x50, 0x4b, 0x03, 0x04]));
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/report.docx' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toContain('ReadDocx');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks Read on pptx and suggests ReadPPTX', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'deck.pptx'), Buffer.from([0x50, 0x4b, 0x03, 0x04]));
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/deck.pptx' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toContain('ReadPPTX');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks Read on opaque binary extensions like .zip', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'archive.zip'), Buffer.from([0x50, 0x4b, 0x03, 0x04]));
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/archive.zip' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toContain('binary');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects file with NUL byte in head even if extension is unknown', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
|
||||||
|
// .bak は extension list に無いが先頭に NUL がある → content-based detection で弾く
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'output', 'unknown.bak'), Buffer.from([0x68, 0x69, 0x00, 0x21]));
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'output/unknown.bak' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toContain('binary');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('still allows Read on plain text files (regression guard)', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'output', 'note.txt'), 'hello\nworld\n');
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'output/note.txt' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output).toContain('hello');
|
||||||
|
expect(result?.output).toContain('world');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows Read on empty file', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'output', 'empty.txt'), '');
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'output/empty.txt' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows new Write files anywhere within the workspace', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
|
||||||
|
// A new file in a non-output subdir now succeeds (consistent with overwrite,
|
||||||
|
// which was already allowed anywhere in the workspace).
|
||||||
|
const subdir = await executeCoreTools('Write', { file_path: 'notes/todo.md', content: 'x' }, makeContext(workspacePath));
|
||||||
|
expect(subdir?.isError).toBe(false);
|
||||||
|
expect(fs.existsSync(path.join(workspacePath, 'notes', 'todo.md'))).toBe(true);
|
||||||
|
|
||||||
|
// output/ still works.
|
||||||
|
const allowed = await executeCoreTools('Write', { file_path: 'output/todo.md', content: 'ok' }, makeContext(workspacePath));
|
||||||
|
expect(allowed?.isError).toBe(false);
|
||||||
|
|
||||||
|
// Escaping the workspace is still blocked by resolveAndGuard.
|
||||||
|
const escaped = await executeCoreTools('Write', { file_path: '../outside.txt', content: 'no' }, makeContext(workspacePath));
|
||||||
|
expect(escaped?.isError).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('resolves output paths only within allowed prefixes', () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
expect(resolveOutputPathWithin(workspacePath, 'output/ocr/a.md', ['output/ocr'])).toContain(path.join('output', 'ocr', 'a.md'));
|
||||||
|
expect(() => resolveOutputPathWithin(workspacePath, 'output/reviewed/a.json', ['output/ocr'])).toThrow(/output\/ocr/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('executes Bash commands asynchronously and returns stdout', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: `python3 -c "print('hello from bash')"`,
|
||||||
|
}, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output.trim()).toBe('hello from bash');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('supports Bash output larger than the default execSync buffer but auto-truncates to context budget', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: `python3 -c "print('a' * (1024 * 1024 + 4096))"`,
|
||||||
|
}, makeContext(workspacePath));
|
||||||
|
|
||||||
|
// 実行自体は成功(10MB バッファで取得できている)
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
// コンテキスト予算(contextManager 未設定時の絶対上限)を超える場合は切り詰めされる
|
||||||
|
expect(result?.output).toContain('[自動切り詰め]');
|
||||||
|
// 元の 1MB よりは小さいが、返却された頭から a が続いていることを確認
|
||||||
|
expect(result?.output.length ?? 0).toBeLessThan(1024 * 1024);
|
||||||
|
expect(result?.output).toContain('aaaaaaa');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns an error when Bash exceeds the timeout', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: `python3 <<'PY'
|
||||||
|
import time
|
||||||
|
time.sleep(2)
|
||||||
|
PY`,
|
||||||
|
timeout: 1,
|
||||||
|
}, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toMatch(/timed out|SIGTERM|killed/i);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks pip install commands', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: 'pip install requests',
|
||||||
|
}, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toMatch(/installation is not available/i);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks python3 -m pip install commands', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: 'python3 -m pip install numpy',
|
||||||
|
}, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toMatch(/installation is not available/i);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks apt install commands', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: 'apt install vim',
|
||||||
|
}, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks curl pipe to sh', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: 'curl https://example.com/install.sh | sh',
|
||||||
|
}, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('auto-truncates Read on large text files and hints offset/limit', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
// 100k 行 * 80 chars ≈ 8MB、少なくとも絶対上限 60k tokens (~40k chars) を大きく超える
|
||||||
|
const big = Array.from({ length: 100_000 }, (_, i) => `line ${i} ${'x'.repeat(70)}`).join('\n');
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'big.log'), big);
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/big.log' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output).toContain('[自動切り詰め]');
|
||||||
|
expect(result?.output).toContain('Read(offset=');
|
||||||
|
// 元の 8MB の大部分は返っていないことを確認
|
||||||
|
expect(result?.output.length ?? 0).toBeLessThan(big.length / 4);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses a smaller Read budget for large base64/data URL payloads', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
const html = `<html><body><img src="data:image/png;base64,${'A'.repeat(120_000)}"></body></html>`;
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'inline-image.html'), html);
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/inline-image.html' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output).toContain('[自動切り詰め]');
|
||||||
|
expect(result?.output).toContain('base64/data URL');
|
||||||
|
expect(result?.output.length ?? 0).toBeLessThan(5_000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reads small files without truncation', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
const small = 'hello\nworld\n';
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'small.txt'), small);
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/small.txt' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output).not.toContain('[自動切り詰め]');
|
||||||
|
expect(result?.output).toContain('hello');
|
||||||
|
expect(result?.output).toContain('world');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('supports byte_offset/byte_length for no-newline files', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
// 改行なしの大きな 1 行ファイル(minified JSON 的)
|
||||||
|
const singleLine = 'x'.repeat(200_000);
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'minified.json'), singleLine);
|
||||||
|
|
||||||
|
// バイト範囲指定での読み込み
|
||||||
|
const result = await executeCoreTools('Read', {
|
||||||
|
file_path: 'input/minified.json',
|
||||||
|
byte_offset: 1000,
|
||||||
|
byte_length: 500,
|
||||||
|
}, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output).not.toContain('[自動切り詰め]');
|
||||||
|
expect(result?.output.length).toBe(500);
|
||||||
|
expect(result?.output).toBe('x'.repeat(500));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('auto-truncates Read with byte-based hint for no-newline files', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
||||||
|
// 大きな単一行ファイル
|
||||||
|
const singleLine = 'x'.repeat(500_000);
|
||||||
|
fs.writeFileSync(path.join(workspacePath, 'input', 'minified.json'), singleLine);
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Read', { file_path: 'input/minified.json' }, makeContext(workspacePath));
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output).toContain('[自動切り詰め]');
|
||||||
|
// 行指定ではなく byte_offset を案内していること
|
||||||
|
expect(result?.output).toContain('byte_offset');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Cancel-traceability PR2: Bash AbortSignal propagation', () => {
|
||||||
|
let workspacePath = '';
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (workspacePath) {
|
||||||
|
fs.rmSync(workspacePath, { recursive: true, force: true });
|
||||||
|
workspacePath = '';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function makeContextWithSignal(workspace: string, signal: AbortSignal): ToolContext {
|
||||||
|
return { workspacePath: workspace, editAllowed: true, abortSignal: signal };
|
||||||
|
}
|
||||||
|
|
||||||
|
it('aborts a long-running Bash within ~500ms when signal fires mid-execution', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
const controller = new AbortController();
|
||||||
|
const ctx = makeContextWithSignal(workspacePath, controller.signal);
|
||||||
|
|
||||||
|
// Long-running but bounded so a runaway test still terminates. Uses
|
||||||
|
// heredoc form to keep the allowlist parser from rejecting `;`-style
|
||||||
|
// multi-statement Python (see existing timeout test for the same pattern).
|
||||||
|
const cmd = `python3 <<'PY'
|
||||||
|
import time
|
||||||
|
time.sleep(30)
|
||||||
|
PY`;
|
||||||
|
const startedAt = Date.now();
|
||||||
|
|
||||||
|
// Fire the abort 100ms after launching.
|
||||||
|
setTimeout(() => controller.abort(), 100);
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', { command: cmd, timeout: 60 }, ctx);
|
||||||
|
const elapsed = Date.now() - startedAt;
|
||||||
|
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toMatch(/cancelled|abort/i);
|
||||||
|
// Generous bound: child spawn + signal delivery should complete <2s.
|
||||||
|
// If this exceeds 2s the AbortSignal isn't actually killing the child.
|
||||||
|
expect(elapsed).toBeLessThan(2_000);
|
||||||
|
}, 10_000);
|
||||||
|
|
||||||
|
it('returns immediately when signal is already aborted before execution', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
const controller = new AbortController();
|
||||||
|
controller.abort();
|
||||||
|
const ctx = makeContextWithSignal(workspacePath, controller.signal);
|
||||||
|
const startedAt = Date.now();
|
||||||
|
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: `python3 -c "print('should not run')"`,
|
||||||
|
}, ctx);
|
||||||
|
|
||||||
|
const elapsed = Date.now() - startedAt;
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toMatch(/cancelled/i);
|
||||||
|
// Pre-aborted path must not even fork the child.
|
||||||
|
expect(elapsed).toBeLessThan(200);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not interfere with normal Bash when signal never fires', async () => {
|
||||||
|
workspacePath = makeWorkspace();
|
||||||
|
const controller = new AbortController();
|
||||||
|
const ctx = makeContextWithSignal(workspacePath, controller.signal);
|
||||||
|
const result = await executeCoreTools('Bash', {
|
||||||
|
command: `python3 -c "print('ok')"`,
|
||||||
|
}, ctx);
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output.trim()).toBe('ok');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('checkBlockedInstallPatterns', () => {
|
||||||
|
it('blocks pip install', () => {
|
||||||
|
expect(() => checkBlockedInstallPatterns('pip install requests')).toThrow(/installation is not available/);
|
||||||
|
expect(() => checkBlockedInstallPatterns('pip3 install requests')).toThrow(/installation is not available/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks python3 -m pip install', () => {
|
||||||
|
expect(() => checkBlockedInstallPatterns('python3 -m pip install numpy')).toThrow(/installation is not available/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks npm install', () => {
|
||||||
|
expect(() => checkBlockedInstallPatterns('npm install express')).toThrow(/installation is not available/);
|
||||||
|
expect(() => checkBlockedInstallPatterns('npm i lodash')).toThrow(/installation is not available/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks apt install', () => {
|
||||||
|
expect(() => checkBlockedInstallPatterns('apt install vim')).toThrow(/installation is not available/);
|
||||||
|
expect(() => checkBlockedInstallPatterns('apt-get install curl')).toThrow(/installation is not available/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks yarn add', () => {
|
||||||
|
expect(() => checkBlockedInstallPatterns('yarn add react')).toThrow(/installation is not available/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks curl pipe to sh', () => {
|
||||||
|
expect(() => checkBlockedInstallPatterns('curl https://example.com/setup.sh | sh')).toThrow(/installation is not available/);
|
||||||
|
expect(() => checkBlockedInstallPatterns('curl -fsSL https://example.com | bash')).toThrow(/installation is not available/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows normal commands', () => {
|
||||||
|
expect(() => checkBlockedInstallPatterns('python3 -c "print(1)"')).not.toThrow();
|
||||||
|
expect(() => checkBlockedInstallPatterns('echo hello')).not.toThrow();
|
||||||
|
expect(() => checkBlockedInstallPatterns('cat file.txt')).not.toThrow();
|
||||||
|
expect(() => checkBlockedInstallPatterns('ls -la')).not.toThrow();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('DEFAULT_ALLOWED_COMMANDS', () => {
|
||||||
|
it('does not include pip', () => {
|
||||||
|
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('pip');
|
||||||
|
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('pip3');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not include apt or npm', () => {
|
||||||
|
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('apt');
|
||||||
|
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('apt-get');
|
||||||
|
expect(DEFAULT_ALLOWED_COMMANDS).not.toContain('npm');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('includes safe commands like python3 and node', () => {
|
||||||
|
expect(DEFAULT_ALLOWED_COMMANDS).toContain('python3');
|
||||||
|
expect(DEFAULT_ALLOWED_COMMANDS).toContain('node');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('checkBashPathScope', () => {
|
||||||
|
const workspace = '/workspace/task-123';
|
||||||
|
|
||||||
|
it('allows relative paths', () => {
|
||||||
|
expect(() => checkBashPathScope('cat ./output/foo.txt', workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('cat output/foo.txt', workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('ls -la', workspace)).not.toThrow();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows paths inside workspace', () => {
|
||||||
|
expect(() => checkBashPathScope(`cat ${workspace}/file.txt`, workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope(`ls ${workspace}/output/`, workspace)).not.toThrow();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows /tmp and its subdirectories', () => {
|
||||||
|
expect(() => checkBashPathScope('cat /tmp/foo', workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('ls /tmp/', workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('echo hello > /tmp/out.txt', workspace)).not.toThrow();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows /dev/null and standard streams', () => {
|
||||||
|
expect(() => checkBashPathScope('cat /dev/null', workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('echo x > /dev/null', workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('cat /dev/stdin', workspace)).not.toThrow();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows /usr/bin and other binary directories', () => {
|
||||||
|
expect(() => checkBashPathScope('ls /usr/bin/grep', workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('/usr/local/bin/node --version', workspace)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('ls /bin/bash', workspace)).not.toThrow();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects /root/', () => {
|
||||||
|
expect(() => checkBashPathScope('cat /root/.bashrc', workspace)).toThrow(/workspace 外/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects /etc/', () => {
|
||||||
|
expect(() => checkBashPathScope('cat /etc/passwd', workspace)).toThrow(/workspace 外/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects bare / (root directory scan)', () => {
|
||||||
|
expect(() => checkBashPathScope('find / -name x', workspace)).toThrow(/workspace 外/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects /root in compound commands', () => {
|
||||||
|
expect(() => checkBashPathScope('cd /root && ls', workspace)).toThrow(/workspace 外/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects /etc/ on right-hand side of redirect', () => {
|
||||||
|
expect(() => checkBashPathScope('echo /etc/passwd > /tmp/x', workspace)).toThrow(/workspace 外/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects /root/ in compound with semicolons', () => {
|
||||||
|
expect(() => checkBashPathScope('cat ./input/foo.txt; ls /root/', workspace)).toThrow(/workspace 外/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects paths in single-quoted globs', () => {
|
||||||
|
expect(() => checkBashPathScope("ls '/etc/*'", workspace)).toThrow(/workspace 外/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects /home/other-user', () => {
|
||||||
|
expect(() => checkBashPathScope('cat /home/otheruser/.ssh/id_rsa', workspace)).toThrow(/workspace 外/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows executeCoreTools Bash with workspace-relative path', async () => {
|
||||||
|
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'scope-test-'));
|
||||||
|
try {
|
||||||
|
fs.writeFileSync(path.join(tmpWs, 'hello.txt'), 'hi');
|
||||||
|
const result = await executeCoreTools('Bash', { command: 'cat hello.txt' }, { workspacePath: tmpWs, editAllowed: false });
|
||||||
|
expect(result?.isError).toBe(false);
|
||||||
|
expect(result?.output).toContain('hi');
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(tmpWs, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects executeCoreTools Bash with /root/ path', async () => {
|
||||||
|
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'scope-test-'));
|
||||||
|
try {
|
||||||
|
const result = await executeCoreTools('Bash', { command: 'cat /root/.bashrc' }, { workspacePath: tmpWs, editAllowed: false });
|
||||||
|
expect(result?.isError).toBe(true);
|
||||||
|
expect(result?.output).toContain('workspace 外');
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(tmpWs, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('checkBashPathScope relative containment', () => {
|
||||||
|
const ws = '/work/local/42';
|
||||||
|
it('rejects relative parent traversal', () => {
|
||||||
|
expect(() => checkBashPathScope('cat ../../config.yaml', ws)).toThrow(/workspace/);
|
||||||
|
expect(() => checkBashPathScope('cat ../../../etc/passwd', ws)).toThrow(/workspace/);
|
||||||
|
});
|
||||||
|
it('allows in-workspace relative paths', () => {
|
||||||
|
expect(() => checkBashPathScope('cat ./output/result.txt', ws)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('cat output/result.txt', ws)).not.toThrow();
|
||||||
|
});
|
||||||
|
it('still allows safe absolute prefixes', () => {
|
||||||
|
expect(() => checkBashPathScope('cat /usr/bin/env', ws)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('echo hi > /tmp/x', ws)).not.toThrow();
|
||||||
|
});
|
||||||
|
it('rejects descend-then-escape relative paths', () => {
|
||||||
|
expect(() => checkBashPathScope('cat output/../../config.yaml', ws)).toThrow(/workspace|外/);
|
||||||
|
expect(() => checkBashPathScope('cat sub/dir/../../../../etc/passwd', ws)).toThrow(/workspace|外/);
|
||||||
|
});
|
||||||
|
it('rejects traversal hidden behind a flag= or VAR= prefix', () => {
|
||||||
|
expect(() => checkBashPathScope('python3 run.py --file=../../config.yaml', ws)).toThrow(/workspace|外/);
|
||||||
|
expect(() => checkBashPathScope('PYTHONPATH=../../lib python3 x.py', ws)).toThrow(/workspace|外/);
|
||||||
|
});
|
||||||
|
it('does not false-positive on in-workspace slashed args, regex patterns, or urls', () => {
|
||||||
|
expect(() => checkBashPathScope('grep -rn "foo/bar" .', ws)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope("sed 's/a/b/' output/x.txt", ws)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('echo https://example.com/a/b', ws)).not.toThrow();
|
||||||
|
expect(() => checkBashPathScope('find . -name "*.ts"', ws)).not.toThrow();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('bashUnrestricted mode', () => {
|
||||||
|
it('skips command whitelist when bashUnrestricted is true (delegates to sandbox)', async () => {
|
||||||
|
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'unrestricted-'));
|
||||||
|
try {
|
||||||
|
fs.writeFileSync(path.join(tmpWs, 'data.txt'), 'hello');
|
||||||
|
// 'curl' is not in DEFAULT_ALLOWED_COMMANDS — in restricted mode it would fail.
|
||||||
|
// In unrestricted mode it delegates to bwrap, which may fail (no user namespace)
|
||||||
|
// but the error should NOT be "Command not allowed".
|
||||||
|
const result = await executeCoreTools(
|
||||||
|
'Bash',
|
||||||
|
{ command: 'cat data.txt' },
|
||||||
|
{ workspacePath: tmpWs, editAllowed: false, bashUnrestricted: true },
|
||||||
|
);
|
||||||
|
// If bwrap is available, it succeeds. If not, the error is from bwrap, not whitelist.
|
||||||
|
if (result?.isError) {
|
||||||
|
expect(result.output).not.toContain('not in the allowed commands list');
|
||||||
|
} else {
|
||||||
|
expect(result?.output).toContain('hello');
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(tmpWs, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skips path scope check in sandboxed mode (bwrap handles isolation)', async () => {
|
||||||
|
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'unrestricted-'));
|
||||||
|
try {
|
||||||
|
// /root/ would normally be blocked by checkBashPathScope.
|
||||||
|
// In sandboxed mode the host path scope check is skipped (bwrap handles
|
||||||
|
// isolation). bashSandbox: 'always' forces the sandboxed path regardless
|
||||||
|
// of whether bwrap can actually launch in this env.
|
||||||
|
const result = await executeCoreTools(
|
||||||
|
'Bash',
|
||||||
|
{ command: 'cat /root/.bashrc' },
|
||||||
|
{ workspacePath: tmpWs, editAllowed: false, bashUnrestricted: true, bashSandbox: 'always' },
|
||||||
|
);
|
||||||
|
// Should NOT contain the path scope error message (it is never invoked).
|
||||||
|
if (result?.isError) {
|
||||||
|
expect(result.output).not.toContain('workspace 外');
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(tmpWs, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Bash history logging', () => {
|
||||||
|
it('appends a JSONL entry to logs/bash-history.jsonl on successful execution', async () => {
|
||||||
|
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-'));
|
||||||
|
try {
|
||||||
|
await executeCoreTools('Bash', { command: 'echo hello' }, { workspacePath: tmpWs, editAllowed: false });
|
||||||
|
const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl');
|
||||||
|
expect(fs.existsSync(logPath)).toBe(true);
|
||||||
|
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
|
||||||
|
expect(lines.length).toBe(1);
|
||||||
|
const entry = JSON.parse(lines[0]);
|
||||||
|
expect(entry.command).toBe('echo hello');
|
||||||
|
expect(entry.isError).toBe(false);
|
||||||
|
expect(entry).toHaveProperty('timestamp');
|
||||||
|
expect(entry).toHaveProperty('durationMs');
|
||||||
|
expect(typeof entry.durationMs).toBe('number');
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(tmpWs, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('logs failed commands with isError=true and exitCode', async () => {
|
||||||
|
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-'));
|
||||||
|
try {
|
||||||
|
await executeCoreTools('Bash', { command: 'false' }, { workspacePath: tmpWs, editAllowed: false });
|
||||||
|
const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl');
|
||||||
|
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
|
||||||
|
const entry = JSON.parse(lines[0]);
|
||||||
|
expect(entry.command).toBe('false');
|
||||||
|
expect(entry.isError).toBe(true);
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(tmpWs, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('logs blocked commands (whitelist rejection) with isError=true', async () => {
|
||||||
|
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-'));
|
||||||
|
try {
|
||||||
|
await executeCoreTools('Bash', { command: 'apt install foo' }, { workspacePath: tmpWs, editAllowed: false });
|
||||||
|
const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl');
|
||||||
|
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
|
||||||
|
const entry = JSON.parse(lines[0]);
|
||||||
|
expect(entry.command).toBe('apt install foo');
|
||||||
|
expect(entry.isError).toBe(true);
|
||||||
|
expect(entry.blocked).toBe(true);
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(tmpWs, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accumulates multiple entries across calls', async () => {
|
||||||
|
const tmpWs = fs.mkdtempSync(path.join(tmpdir(), 'bash-log-'));
|
||||||
|
try {
|
||||||
|
await executeCoreTools('Bash', { command: 'echo one' }, { workspacePath: tmpWs, editAllowed: false });
|
||||||
|
await executeCoreTools('Bash', { command: 'echo two' }, { workspacePath: tmpWs, editAllowed: false });
|
||||||
|
const logPath = path.join(tmpWs, 'logs', 'bash-history.jsonl');
|
||||||
|
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
|
||||||
|
expect(lines.length).toBe(2);
|
||||||
|
expect(JSON.parse(lines[0]).command).toBe('echo one');
|
||||||
|
expect(JSON.parse(lines[1]).command).toBe('echo two');
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(tmpWs, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Bash install rejection (all modes)', () => {
|
||||||
|
function installCtx(over: Partial<ToolContext> = {}): ToolContext {
|
||||||
|
return { workspacePath: process.cwd(), editAllowed: false, ...over };
|
||||||
|
}
|
||||||
|
|
||||||
|
it('blocks pip install even when bashUnrestricted', async () => {
|
||||||
|
const r = await executeCoreTools('Bash', { command: 'pip install pypdf' }, installCtx({ bashUnrestricted: true }));
|
||||||
|
expect(r?.isError).toBe(true);
|
||||||
|
expect(r?.output).toMatch(/installation is not available|not allowed/i);
|
||||||
|
expect(r?.output).toMatch(/preinstalled|pre-installed|プリインストール/i);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks npm install', async () => {
|
||||||
|
const r = await executeCoreTools('Bash', { command: 'npm install left-pad' }, installCtx());
|
||||||
|
expect(r?.isError).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('resolveBashMode + env scrub (hardened fallback)', () => {
|
||||||
|
function installCtx(over: Partial<ToolContext> = {}): ToolContext {
|
||||||
|
return { workspacePath: process.cwd(), editAllowed: false, bashSandbox: 'off', ...over };
|
||||||
|
}
|
||||||
|
|
||||||
|
it('off mode runs plain exec and respects whitelist', async () => {
|
||||||
|
const r = await executeCoreTools('Bash', { command: 'echo hello' },
|
||||||
|
installCtx({ bashSandbox: 'off' }));
|
||||||
|
expect(r?.isError).toBe(false);
|
||||||
|
expect(r?.output).toContain('hello');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('hardened fallback scrubs secrets from env', async () => {
|
||||||
|
process.env.MCP_ENCRYPTION_KEY = 'topsecret';
|
||||||
|
const r = await executeCoreTools('Bash',
|
||||||
|
{ command: 'node -e "process.stdout.write(String(process.env.MCP_ENCRYPTION_KEY))"' },
|
||||||
|
installCtx({ bashSandbox: 'off', bashUnrestricted: true }));
|
||||||
|
delete process.env.MCP_ENCRYPTION_KEY;
|
||||||
|
expect(r?.output).not.toContain('topsecret');
|
||||||
|
expect(r?.output).toMatch(/undefined/);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Read delegates to shared binary detector', () => {
|
||||||
|
it('flags a magic-byte binary with no NUL in head', () => {
|
||||||
|
const head = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0x41, 0x42]);
|
||||||
|
expect(looksLikeBinaryBytes(head).binary).toBe(true);
|
||||||
|
});
|
||||||
|
it('keeps plain source text readable', () => {
|
||||||
|
expect(looksLikeBinaryBytes(Buffer.from('export const x = 1;\n', 'utf-8')).binary).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
1232
src/engine/tools/core.ts
Normal file
1232
src/engine/tools/core.ts
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user