maestro/src/engine/tools/msg.test.ts
oss-sync 3b1645cc91
Some checks failed
CI / build-and-test (push) Has been cancelled
sync: update from private repo (d31b280)
2026-06-11 11:28:40 +00:00

325 lines
12 KiB
TypeScript

import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { fileURLToPath } from 'url';
import {
formatAddress,
stripHtml,
selectMsgBody,
sanitizeAttachmentName,
formatMsgOutput,
assembleMsgOutput,
pickEmail,
isParsedMsgValid,
executeReadMsg,
type MsgView,
} from './msg.js';
import type { ToolContext } from './core.js';
import { executeTool as officeExecuteTool, TOOL_DEFS as OFFICE_TOOL_DEFS } from './office.js';
const FIXTURE = path.join(
path.dirname(fileURLToPath(import.meta.url)),
'__fixtures__',
'attachmentFiles.msg',
);
describe('formatAddress', () => {
it('renders name and email together', () => {
expect(formatAddress({ name: 'Alice', email: 'alice@example.com' })).toBe(
'Alice <alice@example.com>',
);
});
it('renders name only when email is missing', () => {
expect(formatAddress({ name: 'Alice' })).toBe('Alice');
});
it('renders email only when name is missing', () => {
expect(formatAddress({ email: 'alice@example.com' })).toBe('alice@example.com');
});
it('falls back to a placeholder when both are missing', () => {
expect(formatAddress({})).toBe('(unknown)');
});
});
describe('stripHtml', () => {
it('removes tags and decodes entities', () => {
expect(stripHtml('<p>Hello&nbsp;<b>world</b> &amp; co</p>')).toBe('Hello world & co');
});
it('drops script and style content', () => {
const html = '<style>.x{color:red}</style><p>Keep</p><script>alert(1)</script>';
expect(stripHtml(html)).toBe('Keep');
});
it('turns block boundaries into newlines', () => {
expect(stripHtml('<div>line1</div><div>line2</div>')).toBe('line1\nline2');
});
it('decodes valid numeric entities', () => {
expect(stripHtml('<p>&#65;&#x42;</p>')).toBe('AB');
});
it('does not throw on out-of-range numeric entities', () => {
expect(() => stripHtml('<p>&#999999999;</p>')).not.toThrow();
expect(stripHtml('A&#x110000;B')).toBe('A&#x110000;B');
});
});
describe('selectMsgBody', () => {
it('prefers the plain-text body', () => {
expect(selectMsgBody({ body: 'plain text', bodyHtml: '<p>html</p>' })).toEqual({
text: 'plain text',
format: 'plain',
});
});
it('falls back to stripped HTML when no plain body exists', () => {
expect(selectMsgBody({ bodyHtml: '<p>html body</p>' })).toEqual({
text: 'html body',
format: 'html',
});
});
it('reports none when no body is present', () => {
expect(selectMsgBody({})).toEqual({ text: '', format: 'none' });
});
it('decodes PidTagHtml (html) when body and bodyHtml are absent', () => {
const html = new TextEncoder().encode('<p>from pidtag</p>');
expect(selectMsgBody({ html })).toEqual({ text: 'from pidtag', format: 'html' });
});
it('prefers plain body over the PidTagHtml field', () => {
const html = new TextEncoder().encode('<p>html</p>');
expect(selectMsgBody({ body: 'plain', html })).toEqual({ text: 'plain', format: 'plain' });
});
it('falls back to PidTagHtml when bodyHtml is empty/whitespace', () => {
const html = new TextEncoder().encode('<p>pidtag body</p>');
expect(selectMsgBody({ bodyHtml: ' ', html })).toEqual({
text: 'pidtag body',
format: 'html',
});
});
});
describe('pickEmail', () => {
it('prefers a real SMTP address over a legacy EX DN', () => {
expect(pickEmail('/O=EX/OU=x/CN=alice', 'alice@example.com')).toBe('alice@example.com');
expect(pickEmail('alice@example.com', '/O=EX/OU=x/CN=alice')).toBe('alice@example.com');
});
it('falls back to the EX DN when no SMTP-looking address exists', () => {
expect(pickEmail(undefined, '/O=EX/OU=x/CN=alice')).toBe('/O=EX/OU=x/CN=alice');
});
it('returns undefined when nothing usable is provided', () => {
expect(pickEmail(undefined, undefined)).toBeUndefined();
expect(pickEmail('', ' ')).toBeUndefined();
});
});
describe('isParsedMsgValid', () => {
it('accepts a parsed Outlook message', () => {
expect(isParsedMsgValid({ dataType: 'msg' })).toBe(true);
});
it('rejects an unsupported CFBF result (old .doc/.xls, corrupted compound file)', () => {
expect(isParsedMsgValid({ error: 'Unsupported file type!', dataType: null })).toBe(false);
expect(isParsedMsgValid({ dataType: null })).toBe(false);
expect(isParsedMsgValid({ dataType: 'attachment' })).toBe(false);
});
it('treats a whitespace-only plain body as empty and uses HTML', () => {
expect(selectMsgBody({ body: ' \n ', bodyHtml: '<p>real</p>' })).toEqual({
text: 'real',
format: 'html',
});
});
});
describe('sanitizeAttachmentName', () => {
it('keeps a normal filename unchanged', () => {
expect(sanitizeAttachmentName('report.pdf', 0)).toBe('report.pdf');
});
it('strips directory components to prevent path traversal', () => {
expect(sanitizeAttachmentName('../../etc/passwd', 0)).toBe('passwd');
expect(sanitizeAttachmentName('foo/bar/baz.txt', 0)).toBe('baz.txt');
expect(sanitizeAttachmentName('a\\b\\c.doc', 0)).toBe('c.doc');
});
it('removes control characters and null bytes', () => {
expect(sanitizeAttachmentName('na\x00me.txt', 0)).toBe('name.txt');
expect(sanitizeAttachmentName('tab\tname.txt', 0)).toBe('tabname.txt');
});
it('preserves spaces inside the filename', () => {
expect(sanitizeAttachmentName('my report.pdf', 0)).toBe('my report.pdf');
});
it('falls back to an indexed name when the result is empty', () => {
expect(sanitizeAttachmentName('', 2)).toBe('attachment-3');
expect(sanitizeAttachmentName('...', 0)).toBe('attachment-1');
});
});
describe('formatMsgOutput', () => {
const baseView: MsgView = {
subject: 'Quarterly report',
from: { name: 'Alice', email: 'alice@example.com' },
to: [{ name: 'Bob', email: 'bob@example.com' }],
cc: [],
date: 'Mon, 1 Jun 2026 10:00:00 +0900',
body: { text: 'See attached.', format: 'plain' },
attachments: [],
};
it('renders the header block and body', () => {
const out = formatMsgOutput(baseView);
expect(out).toContain('Subject: Quarterly report');
expect(out).toContain('From: Alice <alice@example.com>');
expect(out).toContain('To: Bob <bob@example.com>');
expect(out).toContain('Date: Mon, 1 Jun 2026 10:00:00 +0900');
expect(out).toContain('See attached.');
});
it('lists saved attachments with their paths and sizes', () => {
const out = formatMsgOutput({
...baseView,
attachments: [{ fileName: 'report.pdf', contentLength: 2048, savedPath: 'input/report.pdf' }],
});
expect(out).toContain('Attachments (1)');
expect(out).toContain('report.pdf');
expect(out).toContain('input/report.pdf');
expect(out).toContain('2048');
});
it('shows a skip reason for attachments that were not saved', () => {
const out = formatMsgOutput({
...baseView,
attachments: [{ fileName: 'huge.bin', skipped: 'exceeds size limit' }],
});
expect(out).toContain('huge.bin');
expect(out).toContain('exceeds size limit');
});
it('notes when the body could not be extracted', () => {
const out = formatMsgOutput({ ...baseView, body: { text: '', format: 'none' } });
expect(out).toContain('(no text body)');
});
it('omits the CC line when there are no CC recipients', () => {
expect(formatMsgOutput(baseView)).not.toContain('Cc:');
});
it('keeps the attachment list when the body is truncated to budget', () => {
const longBody = 'word '.repeat(20000);
const out = assembleMsgOutput(
{
...baseView,
body: { text: longBody, format: 'plain' },
attachments: [{ fileName: 'a.pdf', contentLength: 10, savedPath: 'input/a.pdf' }],
},
100,
'mail.msg',
);
expect(out).toContain('input/a.pdf');
expect(out).toContain('Subject: Quarterly report');
expect(out.length).toBeLessThan(longBody.length);
});
it('includes the CC line when CC recipients exist', () => {
const out = formatMsgOutput({ ...baseView, cc: [{ email: 'carol@example.com' }] });
expect(out).toContain('Cc: carol@example.com');
});
});
describe('executeReadMsg (integration)', () => {
let workspace: string;
const ctx = (): ToolContext => ({ workspacePath: workspace, editAllowed: true });
beforeEach(() => {
workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'readmsg-'));
fs.copyFileSync(FIXTURE, path.join(workspace, 'mail.msg'));
});
afterEach(() => {
fs.rmSync(workspace, { recursive: true, force: true });
});
it('extracts headers and body from a real .msg file', async () => {
const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx());
expect(result.isError).toBeFalsy();
expect(result.output).toContain('Subject: attachmentFiles');
expect(result.output).toContain('From: hmailuser <hmailuser@hmailserver.test>');
expect(result.output).toContain('To: hmailuser@hmailserver.test');
expect(result.output).toContain('attachmentFiles');
});
it('saves attachments to input/ and lists them', async () => {
const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx());
expect(result.output).toContain('Attachments (3)');
for (const [name, size] of [
['jpg.jpg', 726],
['png.png', 134],
['tif.tif', 664],
] as const) {
const saved = path.join(workspace, 'input', name);
expect(fs.existsSync(saved)).toBe(true);
expect(fs.statSync(saved).size).toBe(size);
expect(result.output).toContain(path.join('input', name));
}
});
it('rejects paths outside the workspace', async () => {
const result = await executeReadMsg({ file_path: '../../etc/passwd' }, ctx());
expect(result.isError).toBe(true);
});
it('reports a clear error for a non-.msg file', async () => {
fs.writeFileSync(path.join(workspace, 'junk.msg'), 'not a real msg file');
const result = await executeReadMsg({ file_path: 'junk.msg' }, ctx());
expect(result.isError).toBe(true);
expect(result.output).toContain('ReadMsg');
});
it('does not write attachments in a read-only phase', async () => {
const result = await executeReadMsg(
{ file_path: 'mail.msg' },
{ workspacePath: workspace, editAllowed: false },
);
expect(result.isError).toBeFalsy();
expect(fs.existsSync(path.join(workspace, 'input', 'jpg.jpg'))).toBe(false);
expect(result.output).toContain('read-only');
});
it('does not overwrite an existing file in input/', async () => {
fs.mkdirSync(path.join(workspace, 'input'), { recursive: true });
fs.writeFileSync(path.join(workspace, 'input', 'jpg.jpg'), 'pre-existing');
const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx());
expect(fs.readFileSync(path.join(workspace, 'input', 'jpg.jpg'), 'utf8')).toBe('pre-existing');
expect(fs.existsSync(path.join(workspace, 'input', 'jpg-1.jpg'))).toBe(true);
expect(result.output).toContain('jpg-1.jpg');
});
it('rejects files exceeding the configured size limit', async () => {
const result = await executeReadMsg(
{ file_path: 'mail.msg' },
{ workspacePath: workspace, editAllowed: true, toolsConfig: { officeMsgMaxSizeMb: 0.001 } },
);
expect(result.isError).toBe(true);
expect(result.output).toMatch(/size|limit|too large/i);
});
it('is registered and routed through the office module dispatch', async () => {
expect(OFFICE_TOOL_DEFS.ReadMsg).toBeDefined();
const result = await officeExecuteTool('ReadMsg', { file_path: 'mail.msg' }, ctx());
expect(result?.isError).toBeFalsy();
expect(result?.output).toContain('Subject: attachmentFiles');
});
});