import * as fs from 'fs'; import * as path from 'path'; import { tmpdir } from 'os'; import { execSync } from 'child_process'; import { afterEach, describe, expect, it } from 'vitest'; import { executeTool } from './office.js'; import type { ToolContext } from './core.js'; function makeWorkspace(): string { return fs.mkdtempSync(path.join(tmpdir(), 'maestro-office-')); } function makeContext(workspacePath: string): ToolContext { return { workspacePath, editAllowed: true, }; } function writeMinimalPdf(filePath: string, text: string): void { // Build the content stream first so /Length is accurate. Hard-coding // it (the previous approach) silently truncated longer text, which // broke the query / search-mode tests that needed multi-word strings // like "find KEYWORD here" to extract correctly via pdf-parse. const stream = `BT\n/F1 24 Tf\n100 100 Td\n(${text}) Tj\nET\n`; const streamLen = Buffer.byteLength(stream, 'utf-8'); const pdf = `%PDF-1.4 1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj 2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj 3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >> endobj 4 0 obj << /Length ${streamLen} >> stream ${stream}endstream endobj 5 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj xref 0 6 0000000000 65535 f 0000000009 00000 n 0000000058 00000 n 0000000115 00000 n 0000000241 00000 n 0000000335 00000 n trailer << /Root 1 0 R /Size 6 >> startxref 405 %%EOF `; fs.writeFileSync(filePath, pdf, 'utf-8'); } // pymupdf が使えるかどうかを一度確認 function hasPymupdf(): boolean { try { execSync('python3 -c "import fitz"', { stdio: 'ignore' }); return true; } catch { return false; } } describe('office tools', () => { let workspacePath = ''; afterEach(() => { if (workspacePath) { fs.rmSync(workspacePath, { recursive: true, force: true }); workspacePath = ''; } }); it('reads PDF text with ReadPdf', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); writeMinimalPdf(path.join(workspacePath, 'input', 'sample.pdf'), 'Hello PDF'); const result = await executeTool('ReadPdf', { path: 'input/sample.pdf' }, makeContext(workspacePath)); expect(result).not.toBeNull(); expect(result?.isError).toBe(false); expect(result?.output).toContain('# sample.pdf'); expect(result?.output).toContain('Total pages: 1'); expect(result?.output).toContain('Hello PDF'); }); // query=... is the grep-style search mode added 2026-05-21. describe('ReadPdf — query / search mode', () => { it('returns grep-style snippet for matching pages and skips the rest', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); writeMinimalPdf( path.join(workspacePath, 'input', 'doc.pdf'), ['intro line', 'KEYWORD shows up here', 'trailing line'].join(' '), ); const result = await executeTool( 'ReadPdf', { path: 'input/doc.pdf', query: 'KEYWORD' }, makeContext(workspacePath), ); expect(result?.isError).toBe(false); expect(result?.output).toContain('query: "KEYWORD"'); expect(result?.output).toContain('### Matches'); expect(result?.output).toContain('Pages with match: 1'); expect(result?.output).toMatch(/>\s*\d+:.*KEYWORD/); }); it('returns "no matches" when query is absent from every page', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'just some text'); const result = await executeTool( 'ReadPdf', { path: 'input/doc.pdf', query: 'WILL-NOT-FIND' }, makeContext(workspacePath), ); expect(result?.isError).toBe(false); expect(result?.output).toContain('Pages with match: 0'); expect(result?.output).toContain('(no matches for "WILL-NOT-FIND")'); }); it('is case-insensitive in default substring mode', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'Mixed Case Keyword'); const result = await executeTool( 'ReadPdf', { path: 'input/doc.pdf', query: 'keyword' }, makeContext(workspacePath), ); expect(result?.isError).toBe(false); expect(result?.output).toContain('Pages with match: 1'); }); it('errors out gracefully on an invalid regex pattern', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'anything'); const result = await executeTool( 'ReadPdf', { path: 'input/doc.pdf', query: '(unbalanced', query_mode: 'regex' }, makeContext(workspacePath), ); expect(result?.isError).toBe(true); expect(result?.output).toContain('query error'); expect(result?.output).toContain('invalid regex'); }); it('ignores empty / whitespace-only query and falls back to full-text mode', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'whole document text'); const result = await executeTool( 'ReadPdf', { path: 'input/doc.pdf', query: ' ' }, makeContext(workspacePath), ); expect(result?.isError).toBe(false); expect(result?.output).toContain('### Content'); expect(result?.output).not.toContain('### Matches'); expect(result?.output).toContain('whole document text'); }); }); }); describe('PdfToImages', () => { let workspaceDir: string; afterEach(() => { if (workspaceDir) { fs.rmSync(workspaceDir, { recursive: true, force: true }); workspaceDir = ''; } }); it('returns error when edit is not allowed', async () => { workspaceDir = makeWorkspace(); const ctx = { ...makeContext(workspaceDir), editAllowed: false }; const result = await executeTool('PdfToImages', { path: 'input/any.pdf' }, ctx); expect(result.isError).toBe(true); expect(result.output).toContain('not allowed'); }); it('returns error for missing file', async () => { workspaceDir = makeWorkspace(); const ctx = makeContext(workspaceDir); const result = await executeTool('PdfToImages', { path: 'input/notfound.pdf' }, ctx); expect(result.isError).toBe(true); expect(result.output).toMatch(/not found/i); }); it('returns error for invalid page_range', async () => { workspaceDir = makeWorkspace(); const ctx = makeContext(workspaceDir); fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true }); writeMinimalPdf(path.join(workspaceDir, 'input', 'sample.pdf'), 'test'); const result = await executeTool('PdfToImages', { path: 'input/sample.pdf', page_range: 'invalid', }, ctx); expect(result.isError).toBe(true); expect(result.output).toContain('Invalid page_range'); }); // pymupdf が必要なテストは環境依存のため条件付き実行 const itWithPymupdf = hasPymupdf() ? it : it.skip; itWithPymupdf('converts PDF to PNG images in output/ReadPdf/', async () => { workspaceDir = makeWorkspace(); const ctx = makeContext(workspaceDir); fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true }); const pdfPath = path.join(workspaceDir, 'input', 'sample.pdf'); writeMinimalPdf(pdfPath, 'Hello OCR'); const result = await executeTool('PdfToImages', { path: 'input/sample.pdf' }, ctx); expect(result.isError).toBe(false); const outDir = path.join(workspaceDir, 'output', 'ReadPdf', 'sample'); expect(fs.existsSync(outDir)).toBe(true); const files = fs.readdirSync(outDir); expect(files.some((f) => f.startsWith('page-') && f.endsWith('.png'))).toBe(true); expect(result.output).toContain('page-0001.png'); expect(result.output).toContain('ReadImage'); }); itWithPymupdf('respects page_range parameter', async () => { workspaceDir = makeWorkspace(); const ctx = makeContext(workspaceDir); fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true }); writeMinimalPdf(path.join(workspaceDir, 'input', 'multi.pdf'), 'page1'); const result = await executeTool('PdfToImages', { path: 'input/multi.pdf', page_range: '1-1', }, ctx); expect(result.isError).toBe(false); expect(result.output).toContain('page-0001.png'); }); }); // Issue #246: ReadExcel/ReadPdf/ReadDocx/ReadPPTX が、間違ったフォーマットの // ファイルを渡された時に cryptic JSZip / pdf-parse エラーで agent ループに // 陥っていた。helper validateFileFormat が拡張子 + magic byte で early-reject // して agent-actionable な error を返すことを確認する。 describe('Read* tools — format mismatch rejection (issue #246)', () => { let workspacePath = ''; afterEach(() => { if (workspacePath) { fs.rmSync(workspacePath, { recursive: true, force: true }); workspacePath = ''; } }); it('ReadPdf rejects .md path with actionable error pointing to Read', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true }); fs.writeFileSync(path.join(workspacePath, 'output', 'report.md'), '# Hello'); const result = await executeTool('ReadPdf', { path: 'output/report.md' }, makeContext(workspacePath)); expect(result?.isError).toBe(true); expect(result?.output).toContain('.md'); expect(result?.output).toContain('Read('); }); it('ReadExcel rejects .md path with actionable error', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true }); fs.writeFileSync(path.join(workspacePath, 'output', 'data.md'), 'col1,col2\n1,2'); const result = await executeTool('ReadExcel', { path: 'output/data.md' }, makeContext(workspacePath)); expect(result?.isError).toBe(true); expect(result?.output).toContain('Read('); }); it('ReadExcel rejects CFB (old .xls) wearing a .xlsx extension', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); // CFB magic header const cfb = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0, 0, 0, 0, 0, 0, 0, 0]); fs.writeFileSync(path.join(workspacePath, 'input', 'old.xlsx'), cfb); const result = await executeTool('ReadExcel', { path: 'input/old.xlsx' }, makeContext(workspacePath)); expect(result?.isError).toBe(true); expect(result?.output).toMatch(/旧バイナリ|CFB|\.xls/); // JSZip からの cryptic error が漏れていないこと expect(result?.output).not.toContain("Can't find end of central"); }); it('ReadExcel rejects HTML disguised as .xlsx', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); fs.writeFileSync(path.join(workspacePath, 'input', 'report.xlsx'), 'Table'); const result = await executeTool('ReadExcel', { path: 'input/report.xlsx' }, makeContext(workspacePath)); expect(result?.isError).toBe(true); expect(result?.output).toMatch(/HTML/); expect(result?.output).not.toContain("Can't find end of central"); }); it('ReadExcel rejects CSV disguised as .xlsx', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); fs.writeFileSync(path.join(workspacePath, 'input', 'data.xlsx'), 'col1,col2,col3\n1,2,3\n4,5,6\n'); const result = await executeTool('ReadExcel', { path: 'input/data.xlsx' }, makeContext(workspacePath)); expect(result?.isError).toBe(true); expect(result?.output).toMatch(/テキスト|CSV/); expect(result?.output).not.toContain("Can't find end of central"); }); it('ReadPdf rejects OOXML mistakenly named .pdf', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); // ZIP signature const zip = Buffer.from([0x50, 0x4B, 0x03, 0x04, 0, 0, 0, 0]); fs.writeFileSync(path.join(workspacePath, 'input', 'fake.pdf'), zip); const result = await executeTool('ReadPdf', { path: 'input/fake.pdf' }, makeContext(workspacePath)); expect(result?.isError).toBe(true); expect(result?.output).toMatch(/OOXML|ReadExcel|ReadDocx|ReadPPTX/); }); it('ReadExcel still accepts a real .xlsx without warning', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); // Real OOXML built via exceljs const ExcelJS = (await import('exceljs')).default; const wb = new ExcelJS.Workbook(); const ws = wb.addWorksheet('Sheet1'); ws.addRow(['a', 'b', 'c']); await wb.xlsx.writeFile(path.join(workspacePath, 'input', 'ok.xlsx')); const result = await executeTool('ReadExcel', { path: 'input/ok.xlsx' }, makeContext(workspacePath)); expect(result?.isError).toBe(false); expect(result?.output).toContain('Sheet1'); }); it('ReadExcel includes a Styles section only when include_styles=true', async () => { workspacePath = makeWorkspace(); fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true }); const ExcelJS = (await import('exceljs')).default; const wb = new ExcelJS.Workbook(); const ws = wb.addWorksheet('Sheet1'); ws.getCell('A1').value = 'Header'; ws.getCell('A1').fill = { type: 'pattern', pattern: 'solid', fgColor: { argb: 'FFFFF2CC' } }; ws.getCell('A1').font = { bold: true }; await wb.xlsx.writeFile(path.join(workspacePath, 'input', 'styled.xlsx')); // Without include_styles: no Styles section (backward compat) const plain = await executeTool('ReadExcel', { path: 'input/styled.xlsx' }, makeContext(workspacePath)); expect(plain!.output).not.toContain('### Styles'); // With include_styles: Styles section present with fill color and font bold const styled = await executeTool('ReadExcel', { path: 'input/styled.xlsx', include_styles: true }, makeContext(workspacePath)); expect(styled!.output).toContain('### Styles'); expect(styled!.output).toContain('#FFF2CC'); expect(styled!.output).toMatch(/bold/); }); });