385 lines
14 KiB
TypeScript
385 lines
14 KiB
TypeScript
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
import { tmpdir } from 'os';
|
|
import { execSync } from 'child_process';
|
|
import { afterEach, describe, expect, it } from 'vitest';
|
|
import { executeTool } from './office.js';
|
|
import type { ToolContext } from './core.js';
|
|
|
|
function makeWorkspace(): string {
|
|
return fs.mkdtempSync(path.join(tmpdir(), 'maestro-office-'));
|
|
}
|
|
|
|
function makeContext(workspacePath: string): ToolContext {
|
|
return {
|
|
workspacePath,
|
|
editAllowed: true,
|
|
};
|
|
}
|
|
|
|
function writeMinimalPdf(filePath: string, text: string): void {
|
|
// Build the content stream first so /Length is accurate. Hard-coding
|
|
// it (the previous approach) silently truncated longer text, which
|
|
// broke the query / search-mode tests that needed multi-word strings
|
|
// like "find KEYWORD here" to extract correctly via pdf-parse.
|
|
const stream = `BT\n/F1 24 Tf\n100 100 Td\n(${text}) Tj\nET\n`;
|
|
const streamLen = Buffer.byteLength(stream, 'utf-8');
|
|
const pdf = `%PDF-1.4
|
|
1 0 obj
|
|
<< /Type /Catalog /Pages 2 0 R >>
|
|
endobj
|
|
2 0 obj
|
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
|
endobj
|
|
3 0 obj
|
|
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>
|
|
endobj
|
|
4 0 obj
|
|
<< /Length ${streamLen} >>
|
|
stream
|
|
${stream}endstream
|
|
endobj
|
|
5 0 obj
|
|
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
|
endobj
|
|
xref
|
|
0 6
|
|
0000000000 65535 f
|
|
0000000009 00000 n
|
|
0000000058 00000 n
|
|
0000000115 00000 n
|
|
0000000241 00000 n
|
|
0000000335 00000 n
|
|
trailer
|
|
<< /Root 1 0 R /Size 6 >>
|
|
startxref
|
|
405
|
|
%%EOF
|
|
`;
|
|
fs.writeFileSync(filePath, pdf, 'utf-8');
|
|
}
|
|
|
|
// pymupdf が使えるかどうかを一度確認
|
|
function hasPymupdf(): boolean {
|
|
try {
|
|
execSync('python3 -c "import fitz"', { stdio: 'ignore' });
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
describe('office tools', () => {
|
|
let workspacePath = '';
|
|
|
|
afterEach(() => {
|
|
if (workspacePath) {
|
|
fs.rmSync(workspacePath, { recursive: true, force: true });
|
|
workspacePath = '';
|
|
}
|
|
});
|
|
|
|
it('reads PDF text with ReadPdf', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
writeMinimalPdf(path.join(workspacePath, 'input', 'sample.pdf'), 'Hello PDF');
|
|
|
|
const result = await executeTool('ReadPdf', { path: 'input/sample.pdf' }, makeContext(workspacePath));
|
|
|
|
expect(result).not.toBeNull();
|
|
expect(result?.isError).toBe(false);
|
|
expect(result?.output).toContain('# sample.pdf');
|
|
expect(result?.output).toContain('Total pages: 1');
|
|
expect(result?.output).toContain('Hello PDF');
|
|
});
|
|
|
|
// query=... is the grep-style search mode added 2026-05-21.
|
|
describe('ReadPdf — query / search mode', () => {
|
|
it('returns grep-style snippet for matching pages and skips the rest', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
writeMinimalPdf(
|
|
path.join(workspacePath, 'input', 'doc.pdf'),
|
|
['intro line', 'KEYWORD shows up here', 'trailing line'].join(' '),
|
|
);
|
|
const result = await executeTool(
|
|
'ReadPdf',
|
|
{ path: 'input/doc.pdf', query: 'KEYWORD' },
|
|
makeContext(workspacePath),
|
|
);
|
|
expect(result?.isError).toBe(false);
|
|
expect(result?.output).toContain('query: "KEYWORD"');
|
|
expect(result?.output).toContain('### Matches');
|
|
expect(result?.output).toContain('Pages with match: 1');
|
|
expect(result?.output).toMatch(/>\s*\d+:.*KEYWORD/);
|
|
});
|
|
|
|
it('returns "no matches" when query is absent from every page', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'just some text');
|
|
const result = await executeTool(
|
|
'ReadPdf',
|
|
{ path: 'input/doc.pdf', query: 'WILL-NOT-FIND' },
|
|
makeContext(workspacePath),
|
|
);
|
|
expect(result?.isError).toBe(false);
|
|
expect(result?.output).toContain('Pages with match: 0');
|
|
expect(result?.output).toContain('(no matches for "WILL-NOT-FIND")');
|
|
});
|
|
|
|
it('is case-insensitive in default substring mode', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'Mixed Case Keyword');
|
|
const result = await executeTool(
|
|
'ReadPdf',
|
|
{ path: 'input/doc.pdf', query: 'keyword' },
|
|
makeContext(workspacePath),
|
|
);
|
|
expect(result?.isError).toBe(false);
|
|
expect(result?.output).toContain('Pages with match: 1');
|
|
});
|
|
|
|
it('errors out gracefully on an invalid regex pattern', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'anything');
|
|
const result = await executeTool(
|
|
'ReadPdf',
|
|
{ path: 'input/doc.pdf', query: '(unbalanced', query_mode: 'regex' },
|
|
makeContext(workspacePath),
|
|
);
|
|
expect(result?.isError).toBe(true);
|
|
expect(result?.output).toContain('query error');
|
|
expect(result?.output).toContain('invalid regex');
|
|
});
|
|
|
|
it('ignores empty / whitespace-only query and falls back to full-text mode', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'whole document text');
|
|
const result = await executeTool(
|
|
'ReadPdf',
|
|
{ path: 'input/doc.pdf', query: ' ' },
|
|
makeContext(workspacePath),
|
|
);
|
|
expect(result?.isError).toBe(false);
|
|
expect(result?.output).toContain('### Content');
|
|
expect(result?.output).not.toContain('### Matches');
|
|
expect(result?.output).toContain('whole document text');
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('PdfToImages', () => {
|
|
let workspaceDir: string;
|
|
|
|
afterEach(() => {
|
|
if (workspaceDir) {
|
|
fs.rmSync(workspaceDir, { recursive: true, force: true });
|
|
workspaceDir = '';
|
|
}
|
|
});
|
|
|
|
it('returns error when edit is not allowed', async () => {
|
|
workspaceDir = makeWorkspace();
|
|
const ctx = { ...makeContext(workspaceDir), editAllowed: false };
|
|
|
|
const result = await executeTool('PdfToImages', { path: 'input/any.pdf' }, ctx);
|
|
expect(result.isError).toBe(true);
|
|
expect(result.output).toContain('not allowed');
|
|
});
|
|
|
|
it('returns error for missing file', async () => {
|
|
workspaceDir = makeWorkspace();
|
|
const ctx = makeContext(workspaceDir);
|
|
|
|
const result = await executeTool('PdfToImages', { path: 'input/notfound.pdf' }, ctx);
|
|
expect(result.isError).toBe(true);
|
|
expect(result.output).toMatch(/not found/i);
|
|
});
|
|
|
|
it('returns error for invalid page_range', async () => {
|
|
workspaceDir = makeWorkspace();
|
|
const ctx = makeContext(workspaceDir);
|
|
|
|
fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
|
|
writeMinimalPdf(path.join(workspaceDir, 'input', 'sample.pdf'), 'test');
|
|
|
|
const result = await executeTool('PdfToImages', {
|
|
path: 'input/sample.pdf',
|
|
page_range: 'invalid',
|
|
}, ctx);
|
|
expect(result.isError).toBe(true);
|
|
expect(result.output).toContain('Invalid page_range');
|
|
});
|
|
|
|
// pymupdf が必要なテストは環境依存のため条件付き実行
|
|
const itWithPymupdf = hasPymupdf() ? it : it.skip;
|
|
|
|
itWithPymupdf('converts PDF to PNG images in output/ReadPdf/', async () => {
|
|
workspaceDir = makeWorkspace();
|
|
const ctx = makeContext(workspaceDir);
|
|
|
|
fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
|
|
const pdfPath = path.join(workspaceDir, 'input', 'sample.pdf');
|
|
writeMinimalPdf(pdfPath, 'Hello OCR');
|
|
|
|
const result = await executeTool('PdfToImages', { path: 'input/sample.pdf' }, ctx);
|
|
|
|
expect(result.isError).toBe(false);
|
|
const outDir = path.join(workspaceDir, 'output', 'ReadPdf', 'sample');
|
|
expect(fs.existsSync(outDir)).toBe(true);
|
|
const files = fs.readdirSync(outDir);
|
|
expect(files.some((f) => f.startsWith('page-') && f.endsWith('.png'))).toBe(true);
|
|
expect(result.output).toContain('page-0001.png');
|
|
expect(result.output).toContain('ReadImage');
|
|
});
|
|
|
|
itWithPymupdf('respects page_range parameter', async () => {
|
|
workspaceDir = makeWorkspace();
|
|
const ctx = makeContext(workspaceDir);
|
|
|
|
fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
|
|
writeMinimalPdf(path.join(workspaceDir, 'input', 'multi.pdf'), 'page1');
|
|
|
|
const result = await executeTool('PdfToImages', {
|
|
path: 'input/multi.pdf',
|
|
page_range: '1-1',
|
|
}, ctx);
|
|
|
|
expect(result.isError).toBe(false);
|
|
expect(result.output).toContain('page-0001.png');
|
|
});
|
|
});
|
|
|
|
// Issue #246: ReadExcel/ReadPdf/ReadDocx/ReadPPTX が、間違ったフォーマットの
|
|
// ファイルを渡された時に cryptic JSZip / pdf-parse エラーで agent ループに
|
|
// 陥っていた。helper validateFileFormat が拡張子 + magic byte で early-reject
|
|
// して agent-actionable な error を返すことを確認する。
|
|
describe('Read* tools — format mismatch rejection (issue #246)', () => {
|
|
let workspacePath = '';
|
|
|
|
afterEach(() => {
|
|
if (workspacePath) {
|
|
fs.rmSync(workspacePath, { recursive: true, force: true });
|
|
workspacePath = '';
|
|
}
|
|
});
|
|
|
|
it('ReadPdf rejects .md path with actionable error pointing to Read', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
|
|
fs.writeFileSync(path.join(workspacePath, 'output', 'report.md'), '# Hello');
|
|
|
|
const result = await executeTool('ReadPdf', { path: 'output/report.md' }, makeContext(workspacePath));
|
|
|
|
expect(result?.isError).toBe(true);
|
|
expect(result?.output).toContain('.md');
|
|
expect(result?.output).toContain('Read(');
|
|
});
|
|
|
|
it('ReadExcel rejects .md path with actionable error', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
|
|
fs.writeFileSync(path.join(workspacePath, 'output', 'data.md'), 'col1,col2\n1,2');
|
|
|
|
const result = await executeTool('ReadExcel', { path: 'output/data.md' }, makeContext(workspacePath));
|
|
|
|
expect(result?.isError).toBe(true);
|
|
expect(result?.output).toContain('Read(');
|
|
});
|
|
|
|
it('ReadExcel rejects CFB (old .xls) wearing a .xlsx extension', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
// CFB magic header
|
|
const cfb = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0, 0, 0, 0, 0, 0, 0, 0]);
|
|
fs.writeFileSync(path.join(workspacePath, 'input', 'old.xlsx'), cfb);
|
|
|
|
const result = await executeTool('ReadExcel', { path: 'input/old.xlsx' }, makeContext(workspacePath));
|
|
|
|
expect(result?.isError).toBe(true);
|
|
expect(result?.output).toMatch(/旧バイナリ|CFB|\.xls/);
|
|
// JSZip からの cryptic error が漏れていないこと
|
|
expect(result?.output).not.toContain("Can't find end of central");
|
|
});
|
|
|
|
it('ReadExcel rejects HTML disguised as .xlsx', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
fs.writeFileSync(path.join(workspacePath, 'input', 'report.xlsx'), '<!DOCTYPE html><html><body>Table</body></html>');
|
|
|
|
const result = await executeTool('ReadExcel', { path: 'input/report.xlsx' }, makeContext(workspacePath));
|
|
|
|
expect(result?.isError).toBe(true);
|
|
expect(result?.output).toMatch(/HTML/);
|
|
expect(result?.output).not.toContain("Can't find end of central");
|
|
});
|
|
|
|
it('ReadExcel rejects CSV disguised as .xlsx', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
fs.writeFileSync(path.join(workspacePath, 'input', 'data.xlsx'), 'col1,col2,col3\n1,2,3\n4,5,6\n');
|
|
|
|
const result = await executeTool('ReadExcel', { path: 'input/data.xlsx' }, makeContext(workspacePath));
|
|
|
|
expect(result?.isError).toBe(true);
|
|
expect(result?.output).toMatch(/テキスト|CSV/);
|
|
expect(result?.output).not.toContain("Can't find end of central");
|
|
});
|
|
|
|
it('ReadPdf rejects OOXML mistakenly named .pdf', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
// ZIP signature
|
|
const zip = Buffer.from([0x50, 0x4B, 0x03, 0x04, 0, 0, 0, 0]);
|
|
fs.writeFileSync(path.join(workspacePath, 'input', 'fake.pdf'), zip);
|
|
|
|
const result = await executeTool('ReadPdf', { path: 'input/fake.pdf' }, makeContext(workspacePath));
|
|
|
|
expect(result?.isError).toBe(true);
|
|
expect(result?.output).toMatch(/OOXML|ReadExcel|ReadDocx|ReadPPTX/);
|
|
});
|
|
|
|
it('ReadExcel still accepts a real .xlsx without warning', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
// Real OOXML built via exceljs
|
|
const ExcelJS = (await import('exceljs')).default;
|
|
const wb = new ExcelJS.Workbook();
|
|
const ws = wb.addWorksheet('Sheet1');
|
|
ws.addRow(['a', 'b', 'c']);
|
|
await wb.xlsx.writeFile(path.join(workspacePath, 'input', 'ok.xlsx'));
|
|
|
|
const result = await executeTool('ReadExcel', { path: 'input/ok.xlsx' }, makeContext(workspacePath));
|
|
|
|
expect(result?.isError).toBe(false);
|
|
expect(result?.output).toContain('Sheet1');
|
|
});
|
|
|
|
it('ReadExcel includes a Styles section only when include_styles=true', async () => {
|
|
workspacePath = makeWorkspace();
|
|
fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
|
|
|
|
const ExcelJS = (await import('exceljs')).default;
|
|
const wb = new ExcelJS.Workbook();
|
|
const ws = wb.addWorksheet('Sheet1');
|
|
ws.getCell('A1').value = 'Header';
|
|
ws.getCell('A1').fill = { type: 'pattern', pattern: 'solid', fgColor: { argb: 'FFFFF2CC' } };
|
|
ws.getCell('A1').font = { bold: true };
|
|
await wb.xlsx.writeFile(path.join(workspacePath, 'input', 'styled.xlsx'));
|
|
|
|
// Without include_styles: no Styles section (backward compat)
|
|
const plain = await executeTool('ReadExcel', { path: 'input/styled.xlsx' }, makeContext(workspacePath));
|
|
expect(plain!.output).not.toContain('### Styles');
|
|
|
|
// With include_styles: Styles section present with fill color and font bold
|
|
const styled = await executeTool('ReadExcel', { path: 'input/styled.xlsx', include_styles: true }, makeContext(workspacePath));
|
|
expect(styled!.output).toContain('### Styles');
|
|
expect(styled!.output).toContain('#FFF2CC');
|
|
expect(styled!.output).toMatch(/bold/);
|
|
});
|
|
});
|