maestro/src/engine/tools/office.test.ts

import * as fs from 'fs';
import * as path from 'path';
import { tmpdir } from 'os';
import { execSync } from 'child_process';
import { afterEach, describe, expect, it } from 'vitest';
import { executeTool } from './office.js';
import type { ToolContext } from './core.js';

function makeWorkspace(): string {
  return fs.mkdtempSync(path.join(tmpdir(), 'maestro-office-'));
}

function makeContext(workspacePath: string): ToolContext {
  return {
    workspacePath,
    editAllowed: true,
  };
}

function writeMinimalPdf(filePath: string, text: string): void {
  // Build the content stream first so /Length is accurate. Hard-coding
  // it (the previous approach) silently truncated longer text, which
  // broke the query / search-mode tests that needed multi-word strings
  // like "find KEYWORD here" to extract correctly via pdf-parse.
  const stream = `BT\n/F1 24 Tf\n100 100 Td\n(${text}) Tj\nET\n`;
  const streamLen = Buffer.byteLength(stream, 'utf-8');
  const pdf = `%PDF-1.4
1 0 obj
<< /Type /Catalog /Pages 2 0 R >>
endobj
2 0 obj
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
endobj
3 0 obj
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>
endobj
4 0 obj
<< /Length ${streamLen} >>
stream
${stream}endstream
endobj
5 0 obj
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000241 00000 n
0000000335 00000 n
trailer
<< /Root 1 0 R /Size 6 >>
startxref
405
%%EOF
`;
  fs.writeFileSync(filePath, pdf, 'utf-8');
}

// pymupdf が使えるかどうかを一度確認
function hasPymupdf(): boolean {
  try {
    execSync('python3 -c "import fitz"', { stdio: 'ignore' });
    return true;
  } catch {
    return false;
  }
}

describe('office tools', () => {
  let workspacePath = '';

  afterEach(() => {
    if (workspacePath) {
      fs.rmSync(workspacePath, { recursive: true, force: true });
      workspacePath = '';
    }
  });

  it('reads PDF text with ReadPdf', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
    writeMinimalPdf(path.join(workspacePath, 'input', 'sample.pdf'), 'Hello PDF');

    const result = await executeTool('ReadPdf', { path: 'input/sample.pdf' }, makeContext(workspacePath));

    expect(result).not.toBeNull();
    expect(result?.isError).toBe(false);
    expect(result?.output).toContain('# sample.pdf');
    expect(result?.output).toContain('Total pages: 1');
    expect(result?.output).toContain('Hello PDF');
  });

  // query=... is the grep-style search mode added 2026-05-21.
  describe('ReadPdf — query / search mode', () => {
    it('returns grep-style snippet for matching pages and skips the rest', async () => {
      workspacePath = makeWorkspace();
      fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
      writeMinimalPdf(
        path.join(workspacePath, 'input', 'doc.pdf'),
        ['intro line', 'KEYWORD shows up here', 'trailing line'].join(' '),
      );
      const result = await executeTool(
        'ReadPdf',
        { path: 'input/doc.pdf', query: 'KEYWORD' },
        makeContext(workspacePath),
      );
      expect(result?.isError).toBe(false);
      expect(result?.output).toContain('query: "KEYWORD"');
      expect(result?.output).toContain('### Matches');
      expect(result?.output).toContain('Pages with match: 1');
      expect(result?.output).toMatch(/>\s*\d+:.*KEYWORD/);
    });

    it('returns "no matches" when query is absent from every page', async () => {
      workspacePath = makeWorkspace();
      fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
      writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'just some text');
      const result = await executeTool(
        'ReadPdf',
        { path: 'input/doc.pdf', query: 'WILL-NOT-FIND' },
        makeContext(workspacePath),
      );
      expect(result?.isError).toBe(false);
      expect(result?.output).toContain('Pages with match: 0');
      expect(result?.output).toContain('(no matches for "WILL-NOT-FIND")');
    });

    it('is case-insensitive in default substring mode', async () => {
      workspacePath = makeWorkspace();
      fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
      writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'Mixed Case Keyword');
      const result = await executeTool(
        'ReadPdf',
        { path: 'input/doc.pdf', query: 'keyword' },
        makeContext(workspacePath),
      );
      expect(result?.isError).toBe(false);
      expect(result?.output).toContain('Pages with match: 1');
    });

    it('errors out gracefully on an invalid regex pattern', async () => {
      workspacePath = makeWorkspace();
      fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
      writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'anything');
      const result = await executeTool(
        'ReadPdf',
        { path: 'input/doc.pdf', query: '(unbalanced', query_mode: 'regex' },
        makeContext(workspacePath),
      );
      expect(result?.isError).toBe(true);
      expect(result?.output).toContain('query error');
      expect(result?.output).toContain('invalid regex');
    });

    it('ignores empty / whitespace-only query and falls back to full-text mode', async () => {
      workspacePath = makeWorkspace();
      fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
      writeMinimalPdf(path.join(workspacePath, 'input', 'doc.pdf'), 'whole document text');
      const result = await executeTool(
        'ReadPdf',
        { path: 'input/doc.pdf', query: '   ' },
        makeContext(workspacePath),
      );
      expect(result?.isError).toBe(false);
      expect(result?.output).toContain('### Content');
      expect(result?.output).not.toContain('### Matches');
      expect(result?.output).toContain('whole document text');
    });
  });
});

describe('PdfToImages', () => {
  let workspaceDir: string;

  afterEach(() => {
    if (workspaceDir) {
      fs.rmSync(workspaceDir, { recursive: true, force: true });
      workspaceDir = '';
    }
  });

  it('returns error when edit is not allowed', async () => {
    workspaceDir = makeWorkspace();
    const ctx = { ...makeContext(workspaceDir), editAllowed: false };

    const result = await executeTool('PdfToImages', { path: 'input/any.pdf' }, ctx);
    expect(result.isError).toBe(true);
    expect(result.output).toContain('not allowed');
  });

  it('returns error for missing file', async () => {
    workspaceDir = makeWorkspace();
    const ctx = makeContext(workspaceDir);

    const result = await executeTool('PdfToImages', { path: 'input/notfound.pdf' }, ctx);
    expect(result.isError).toBe(true);
    expect(result.output).toMatch(/not found/i);
  });

  it('returns error for invalid page_range', async () => {
    workspaceDir = makeWorkspace();
    const ctx = makeContext(workspaceDir);

    fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
    writeMinimalPdf(path.join(workspaceDir, 'input', 'sample.pdf'), 'test');

    const result = await executeTool('PdfToImages', {
      path: 'input/sample.pdf',
      page_range: 'invalid',
    }, ctx);
    expect(result.isError).toBe(true);
    expect(result.output).toContain('Invalid page_range');
  });

  // pymupdf が必要なテストは環境依存のため条件付き実行
  const itWithPymupdf = hasPymupdf() ? it : it.skip;

  itWithPymupdf('converts PDF to PNG images in output/ReadPdf/', async () => {
    workspaceDir = makeWorkspace();
    const ctx = makeContext(workspaceDir);

    fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
    const pdfPath = path.join(workspaceDir, 'input', 'sample.pdf');
    writeMinimalPdf(pdfPath, 'Hello OCR');

    const result = await executeTool('PdfToImages', { path: 'input/sample.pdf' }, ctx);

    expect(result.isError).toBe(false);
    const outDir = path.join(workspaceDir, 'output', 'ReadPdf', 'sample');
    expect(fs.existsSync(outDir)).toBe(true);
    const files = fs.readdirSync(outDir);
    expect(files.some((f) => f.startsWith('page-') && f.endsWith('.png'))).toBe(true);
    expect(result.output).toContain('page-0001.png');
    expect(result.output).toContain('ReadImage');
  });

  itWithPymupdf('respects page_range parameter', async () => {
    workspaceDir = makeWorkspace();
    const ctx = makeContext(workspaceDir);

    fs.mkdirSync(path.join(workspaceDir, 'input'), { recursive: true });
    writeMinimalPdf(path.join(workspaceDir, 'input', 'multi.pdf'), 'page1');

    const result = await executeTool('PdfToImages', {
      path: 'input/multi.pdf',
      page_range: '1-1',
    }, ctx);

    expect(result.isError).toBe(false);
    expect(result.output).toContain('page-0001.png');
  });
});

// Issue #246: ReadExcel/ReadPdf/ReadDocx/ReadPPTX が、間違ったフォーマットの
// ファイルを渡された時に cryptic JSZip / pdf-parse エラーで agent ループに
// 陥っていた。helper validateFileFormat が拡張子 + magic byte で early-reject
// して agent-actionable な error を返すことを確認する。
describe('Read* tools — format mismatch rejection (issue #246)', () => {
  let workspacePath = '';

  afterEach(() => {
    if (workspacePath) {
      fs.rmSync(workspacePath, { recursive: true, force: true });
      workspacePath = '';
    }
  });

  it('ReadPdf rejects .md path with actionable error pointing to Read', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
    fs.writeFileSync(path.join(workspacePath, 'output', 'report.md'), '# Hello');

    const result = await executeTool('ReadPdf', { path: 'output/report.md' }, makeContext(workspacePath));

    expect(result?.isError).toBe(true);
    expect(result?.output).toContain('.md');
    expect(result?.output).toContain('Read(');
  });

  it('ReadExcel rejects .md path with actionable error', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'output'), { recursive: true });
    fs.writeFileSync(path.join(workspacePath, 'output', 'data.md'), 'col1,col2\n1,2');

    const result = await executeTool('ReadExcel', { path: 'output/data.md' }, makeContext(workspacePath));

    expect(result?.isError).toBe(true);
    expect(result?.output).toContain('Read(');
  });

  it('ReadExcel rejects CFB (old .xls) wearing a .xlsx extension', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
    // CFB magic header
    const cfb = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0, 0, 0, 0, 0, 0, 0, 0]);
    fs.writeFileSync(path.join(workspacePath, 'input', 'old.xlsx'), cfb);

    const result = await executeTool('ReadExcel', { path: 'input/old.xlsx' }, makeContext(workspacePath));

    expect(result?.isError).toBe(true);
    expect(result?.output).toMatch(/旧バイナリ|CFB|\.xls/);
    // JSZip からの cryptic error が漏れていないこと
    expect(result?.output).not.toContain("Can't find end of central");
  });

  it('ReadExcel rejects HTML disguised as .xlsx', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
    fs.writeFileSync(path.join(workspacePath, 'input', 'report.xlsx'), '<!DOCTYPE html><html><body>Table</body></html>');

    const result = await executeTool('ReadExcel', { path: 'input/report.xlsx' }, makeContext(workspacePath));

    expect(result?.isError).toBe(true);
    expect(result?.output).toMatch(/HTML/);
    expect(result?.output).not.toContain("Can't find end of central");
  });

  it('ReadExcel rejects CSV disguised as .xlsx', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
    fs.writeFileSync(path.join(workspacePath, 'input', 'data.xlsx'), 'col1,col2,col3\n1,2,3\n4,5,6\n');

    const result = await executeTool('ReadExcel', { path: 'input/data.xlsx' }, makeContext(workspacePath));

    expect(result?.isError).toBe(true);
    expect(result?.output).toMatch(/テキスト|CSV/);
    expect(result?.output).not.toContain("Can't find end of central");
  });

  it('ReadPdf rejects OOXML mistakenly named .pdf', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
    // ZIP signature
    const zip = Buffer.from([0x50, 0x4B, 0x03, 0x04, 0, 0, 0, 0]);
    fs.writeFileSync(path.join(workspacePath, 'input', 'fake.pdf'), zip);

    const result = await executeTool('ReadPdf', { path: 'input/fake.pdf' }, makeContext(workspacePath));

    expect(result?.isError).toBe(true);
    expect(result?.output).toMatch(/OOXML|ReadExcel|ReadDocx|ReadPPTX/);
  });

  it('ReadExcel still accepts a real .xlsx without warning', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });
    // Real OOXML built via exceljs
    const ExcelJS = (await import('exceljs')).default;
    const wb = new ExcelJS.Workbook();
    const ws = wb.addWorksheet('Sheet1');
    ws.addRow(['a', 'b', 'c']);
    await wb.xlsx.writeFile(path.join(workspacePath, 'input', 'ok.xlsx'));

    const result = await executeTool('ReadExcel', { path: 'input/ok.xlsx' }, makeContext(workspacePath));

    expect(result?.isError).toBe(false);
    expect(result?.output).toContain('Sheet1');
  });

  it('ReadExcel includes a Styles section only when include_styles=true', async () => {
    workspacePath = makeWorkspace();
    fs.mkdirSync(path.join(workspacePath, 'input'), { recursive: true });

    const ExcelJS = (await import('exceljs')).default;
    const wb = new ExcelJS.Workbook();
    const ws = wb.addWorksheet('Sheet1');
    ws.getCell('A1').value = 'Header';
    ws.getCell('A1').fill = { type: 'pattern', pattern: 'solid', fgColor: { argb: 'FFFFF2CC' } };
    ws.getCell('A1').font = { bold: true };
    await wb.xlsx.writeFile(path.join(workspacePath, 'input', 'styled.xlsx'));

    // Without include_styles: no Styles section (backward compat)
    const plain = await executeTool('ReadExcel', { path: 'input/styled.xlsx' }, makeContext(workspacePath));
    expect(plain!.output).not.toContain('### Styles');

    // With include_styles: Styles section present with fill color and font bold
    const styled = await executeTool('ReadExcel', { path: 'input/styled.xlsx', include_styles: true }, makeContext(workspacePath));
    expect(styled!.output).toContain('### Styles');
    expect(styled!.output).toContain('#FFF2CC');
    expect(styled!.output).toMatch(/bold/);
  });
});