maestro/src/engine/tools/office.test.ts
2026-06-04 03:03:12 +00:00

385 lines
14 KiB
TypeScript

import * as fs from 'fs';
import * as path from 'path';
import { tmpdir } from 'os';
import { execSync } from 'child_process';
import { afterEach, describe, expect, it } from 'vitest';
import { executeTool } from './office.js';
import type { ToolContext } from './core.js';
function makeWorkspace(): string {
return fs.mkdtempSync(path.join(tmpdir(), 'maestro-office-'));
}
function makeContext(workspacePath: string): ToolContext {
return {
workspacePath,
editAllowed: true,
};
}
function writeMinimalPdf(filePath: string, text: string): void {
// Build the content stream first so /Length is accurate. Hard-coding
// it (the previous approach) silently truncated longer text, which
// broke the query / search-mode tests that needed multi-word strings
// like "find KEYWORD here" to extract correctly via pdf-parse.
const stream = `BT\n/F1 24 Tf\n100 100 Td\n(${text}) Tj\nET\n`;
const streamLen = Buffer.byteLength(stream, 'utf-8');
const pdf = `%PDF-1.4
1 0 obj
<< /Type /Catalog /Pages 2 0 R >>
endobj
2 0 obj
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
endobj
3 0 obj
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>
endobj
4 0 obj
<< /Length ${streamLen} >>
stream
${stream}endstream
endobj
5 0 obj
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000241 00000 n
0000000335 00000 n
trailer
<< /Root 1 0 R /Size 6 >>
startxref
405
%%EOF
`;
fs.writeFileSync(filePath, pdf, 'utf-8');
}
// pymupdf が使えるかどうかを一度確認
function hasPymupdf(): boolean {
try {
execSync('python3 -c "import fitz"', { stdio: 'ignore' });
return true;
} catch {
return false;
}
}
describe('office tools', () => {
let workspacePath = '';
afterEach(() => {
if (workspacePath) {
fs.rmSync(workspacePath, { recursive: true, force: true });
workspacePath = '';
}
});
it('reads PDF text with ReadPdf', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
writeMinimalPdf(path.join(workspacePath, 'input', 'sample.pdf'), 'Hello PDF');
const result = await executeTool('ReadPdf', { path: 'input/sample.pdf' }, makeContext(workspacePath));
expect(result).not.toBeNull();
expect(result?.isError).toBe(false);
expect(result?.output).toContain('# sample.pdf');
expect(result?.output).toContain('Total pages: 1');
expect(result?.output).toContain('Hello PDF');
});
// query=... is the grep-style search mode added 2026-05-21.
describe('ReadPdf — query / search mode', () => {
it('returns grep-style snippet for matching pages and skips the rest', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
writeMinimalPdf(
path.join(workspacePath, 'input', 'doc.pdf'),
['intro line', 'KEYWORD shows up here', 'trailing line'].join(' '),
);
const result = await executeTool(
'ReadPdf',
{ path: 'input/doc.pdf', query: 'KEYWORD' },
makeContext(workspacePath),
);
expect(result?.isError).toBe(false);
expect(result?.output).toContain('query: "KEYWORD"');
expect(result?.output).toContain('### Matches');
expect(result?.output).toContain('Pages with match: 1');
expect(result?.output).toMatch(/>\s*\d+:.*KEYWORD/);
});
it('returns "no matches" when query is absent from every page', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'just some text');
const result = await executeTool(
'ReadPdf',
{ path: 'input/doc.pdf', query: 'WILL-NOT-FIND' },
makeContext(workspacePath),
);
expect(result?.isError).toBe(false);
expect(result?.output).toContain('Pages with match: 0');
expect(result?.output).toContain('(no matches for "WILL-NOT-FIND")');
});
it('is case-insensitive in default substring mode', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'Mixed Case Keyword');
const result = await executeTool(
'ReadPdf',
{ path: 'input/doc.pdf', query: 'keyword' },
makeContext(workspacePath),
);
expect(result?.isError).toBe(false);
expect(result?.output).toContain('Pages with match: 1');
});
it('errors out gracefully on an invalid regex pattern', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'anything');
const result = await executeTool(
'ReadPdf',
{ path: 'input/doc.pdf', query: '(unbalanced', query_mode: 'regex' },
makeContext(workspacePath),
);
expect(result?.isError).toBe(true);
expect(result?.output).toContain('query error');
expect(result?.output).toContain('invalid regex');
});
it('ignores empty / whitespace-only query and falls back to full-text mode', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'whole document text');
const result = await executeTool(
'ReadPdf',
{ path: 'input/doc.pdf', query: ' ' },
makeContext(workspacePath),
);
expect(result?.isError).toBe(false);
expect(result?.output).toContain('### Content');
expect(result?.output).not.toContain('### Matches');
expect(result?.output).toContain('whole document text');
});
});
});
describe('PdfToImages', () => {
let workspaceDir: string;
afterEach(() => {
if (workspaceDir) {
fs.rmSync(workspaceDir, { recursive: true, force: true });
workspaceDir = '';
}
});
it('returns error when edit is not allowed', async () => {
workspaceDir = makeWorkspace();
const ctx = { ...makeContext(workspaceDir), editAllowed: false };
const result = await executeTool('PdfToImages', { path: 'input/any.pdf' }, ctx);
expect(result.isError).toBe(true);
expect(result.output).toContain('not allowed');
});
it('returns error for missing file', async () => {
workspaceDir = makeWorkspace();
const ctx = makeContext(workspaceDir);
const result = await executeTool('PdfToImages', { path: 'input/notfound.pdf' }, ctx);
expect(result.isError).toBe(true);
expect(result.output).toMatch(/not found/i);
});
it('returns error for invalid page_range', async () => {
workspaceDir = makeWorkspace();
const ctx = makeContext(workspaceDir);
fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
writeMinimalPdf(path.join(workspaceDir, 'input', 'sample.pdf'), 'test');
const result = await executeTool('PdfToImages', {
path: 'input/sample.pdf',
page_range: 'invalid',
}, ctx);
expect(result.isError).toBe(true);
expect(result.output).toContain('Invalid page_range');
});
// pymupdf が必要なテストは環境依存のため条件付き実行
const itWithPymupdf = hasPymupdf() ? it : it.skip;
itWithPymupdf('converts PDF to PNG images in output/ReadPdf/', async () => {
workspaceDir = makeWorkspace();
const ctx = makeContext(workspaceDir);
fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
const pdfPath = path.join(workspaceDir, 'input', 'sample.pdf');
writeMinimalPdf(pdfPath, 'Hello OCR');
const result = await executeTool('PdfToImages', { path: 'input/sample.pdf' }, ctx);
expect(result.isError).toBe(false);
const outDir = path.join(workspaceDir, 'output', 'ReadPdf', 'sample');
expect(fs.existsSync(outDir)).toBe(true);
const files = fs.readdirSync(outDir);
expect(files.some((f) => f.startsWith('page-') && f.endsWith('.png'))).toBe(true);
expect(result.output).toContain('page-0001.png');
expect(result.output).toContain('ReadImage');
});
itWithPymupdf('respects page_range parameter', async () => {
workspaceDir = makeWorkspace();
const ctx = makeContext(workspaceDir);
fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
writeMinimalPdf(path.join(workspaceDir, 'input', 'multi.pdf'), 'page1');
const result = await executeTool('PdfToImages', {
path: 'input/multi.pdf',
page_range: '1-1',
}, ctx);
expect(result.isError).toBe(false);
expect(result.output).toContain('page-0001.png');
});
});
// Issue #246: ReadExcel/ReadPdf/ReadDocx/ReadPPTX が、間違ったフォーマットの
// ファイルを渡された時に cryptic JSZip / pdf-parse エラーで agent ループに
// 陥っていた。helper validateFileFormat が拡張子 + magic byte で early-reject
// して agent-actionable な error を返すことを確認する。
describe('Read* tools — format mismatch rejection (issue #246)', () => {
let workspacePath = '';
afterEach(() => {
if (workspacePath) {
fs.rmSync(workspacePath, { recursive: true, force: true });
workspacePath = '';
}
});
it('ReadPdf rejects .md path with actionable error pointing to Read', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'output', 'report.md'), '# Hello');
const result = await executeTool('ReadPdf', { path: 'output/report.md' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toContain('.md');
expect(result?.output).toContain('Read(');
});
it('ReadExcel rejects .md path with actionable error', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'output', 'data.md'), 'col1,col2\n1,2');
const result = await executeTool('ReadExcel', { path: 'output/data.md' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toContain('Read(');
});
it('ReadExcel rejects CFB (old .xls) wearing a .xlsx extension', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
// CFB magic header
const cfb = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0, 0, 0, 0, 0, 0, 0, 0]);
fs.writeFileSync(path.join(workspacePath, 'input', 'old.xlsx'), cfb);
const result = await executeTool('ReadExcel', { path: 'input/old.xlsx' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/旧バイナリ|CFB|\.xls/);
// JSZip からの cryptic error が漏れていないこと
expect(result?.output).not.toContain("Can't find end of central");
});
it('ReadExcel rejects HTML disguised as .xlsx', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'input', 'report.xlsx'), '<!DOCTYPE html><html><body>Table</body></html>');
const result = await executeTool('ReadExcel', { path: 'input/report.xlsx' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/HTML/);
expect(result?.output).not.toContain("Can't find end of central");
});
it('ReadExcel rejects CSV disguised as .xlsx', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
fs.writeFileSync(path.join(workspacePath, 'input', 'data.xlsx'), 'col1,col2,col3\n1,2,3\n4,5,6\n');
const result = await executeTool('ReadExcel', { path: 'input/data.xlsx' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/テキスト|CSV/);
expect(result?.output).not.toContain("Can't find end of central");
});
it('ReadPdf rejects OOXML mistakenly named .pdf', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
// ZIP signature
const zip = Buffer.from([0x50, 0x4B, 0x03, 0x04, 0, 0, 0, 0]);
fs.writeFileSync(path.join(workspacePath, 'input', 'fake.pdf'), zip);
const result = await executeTool('ReadPdf', { path: 'input/fake.pdf' }, makeContext(workspacePath));
expect(result?.isError).toBe(true);
expect(result?.output).toMatch(/OOXML|ReadExcel|ReadDocx|ReadPPTX/);
});
it('ReadExcel still accepts a real .xlsx without warning', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
// Real OOXML built via exceljs
const ExcelJS = (await import('exceljs')).default;
const wb = new ExcelJS.Workbook();
const ws = wb.addWorksheet('Sheet1');
ws.addRow(['a', 'b', 'c']);
await wb.xlsx.writeFile(path.join(workspacePath, 'input', 'ok.xlsx'));
const result = await executeTool('ReadExcel', { path: 'input/ok.xlsx' }, makeContext(workspacePath));
expect(result?.isError).toBe(false);
expect(result?.output).toContain('Sheet1');
});
it('ReadExcel includes a Styles section only when include_styles=true', async () => {
workspacePath = makeWorkspace();
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
const ExcelJS = (await import('exceljs')).default;
const wb = new ExcelJS.Workbook();
const ws = wb.addWorksheet('Sheet1');
ws.getCell('A1').value = 'Header';
ws.getCell('A1').fill = { type: 'pattern', pattern: 'solid', fgColor: { argb: 'FFFFF2CC' } };
ws.getCell('A1').font = { bold: true };
await wb.xlsx.writeFile(path.join(workspacePath, 'input', 'styled.xlsx'));
// Without include_styles: no Styles section (backward compat)
const plain = await executeTool('ReadExcel', { path: 'input/styled.xlsx' }, makeContext(workspacePath));
expect(plain!.output).not.toContain('### Styles');
// With include_styles: Styles section present with fill color and font bold
const styled = await executeTool('ReadExcel', { path: 'input/styled.xlsx', include_styles: true }, makeContext(workspacePath));
expect(styled!.output).toContain('### Styles');
expect(styled!.output).toContain('#FFF2CC');
expect(styled!.output).toMatch(/bold/);
});
});