325 lines
12 KiB
TypeScript
325 lines
12 KiB
TypeScript
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
import * as fs from 'fs';
|
|
import * as os from 'os';
|
|
import * as path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
import {
|
|
formatAddress,
|
|
stripHtml,
|
|
selectMsgBody,
|
|
sanitizeAttachmentName,
|
|
formatMsgOutput,
|
|
assembleMsgOutput,
|
|
pickEmail,
|
|
isParsedMsgValid,
|
|
executeReadMsg,
|
|
type MsgView,
|
|
} from './msg.js';
|
|
import type { ToolContext } from './core.js';
|
|
import { executeTool as officeExecuteTool, TOOL_DEFS as OFFICE_TOOL_DEFS } from './office.js';
|
|
|
|
const FIXTURE = path.join(
|
|
path.dirname(fileURLToPath(import.meta.url)),
|
|
'__fixtures__',
|
|
'attachmentFiles.msg',
|
|
);
|
|
|
|
describe('formatAddress', () => {
|
|
it('renders name and email together', () => {
|
|
expect(formatAddress({ name: 'Alice', email: 'alice@example.com' })).toBe(
|
|
'Alice <alice@example.com>',
|
|
);
|
|
});
|
|
|
|
it('renders name only when email is missing', () => {
|
|
expect(formatAddress({ name: 'Alice' })).toBe('Alice');
|
|
});
|
|
|
|
it('renders email only when name is missing', () => {
|
|
expect(formatAddress({ email: 'alice@example.com' })).toBe('alice@example.com');
|
|
});
|
|
|
|
it('falls back to a placeholder when both are missing', () => {
|
|
expect(formatAddress({})).toBe('(unknown)');
|
|
});
|
|
});
|
|
|
|
describe('stripHtml', () => {
|
|
it('removes tags and decodes entities', () => {
|
|
expect(stripHtml('<p>Hello <b>world</b> & co</p>')).toBe('Hello world & co');
|
|
});
|
|
|
|
it('drops script and style content', () => {
|
|
const html = '<style>.x{color:red}</style><p>Keep</p><script>alert(1)</script>';
|
|
expect(stripHtml(html)).toBe('Keep');
|
|
});
|
|
|
|
it('turns block boundaries into newlines', () => {
|
|
expect(stripHtml('<div>line1</div><div>line2</div>')).toBe('line1\nline2');
|
|
});
|
|
|
|
it('decodes valid numeric entities', () => {
|
|
expect(stripHtml('<p>AB</p>')).toBe('AB');
|
|
});
|
|
|
|
it('does not throw on out-of-range numeric entities', () => {
|
|
expect(() => stripHtml('<p>�</p>')).not.toThrow();
|
|
expect(stripHtml('A�B')).toBe('A�B');
|
|
});
|
|
});
|
|
|
|
describe('selectMsgBody', () => {
|
|
it('prefers the plain-text body', () => {
|
|
expect(selectMsgBody({ body: 'plain text', bodyHtml: '<p>html</p>' })).toEqual({
|
|
text: 'plain text',
|
|
format: 'plain',
|
|
});
|
|
});
|
|
|
|
it('falls back to stripped HTML when no plain body exists', () => {
|
|
expect(selectMsgBody({ bodyHtml: '<p>html body</p>' })).toEqual({
|
|
text: 'html body',
|
|
format: 'html',
|
|
});
|
|
});
|
|
|
|
it('reports none when no body is present', () => {
|
|
expect(selectMsgBody({})).toEqual({ text: '', format: 'none' });
|
|
});
|
|
|
|
it('decodes PidTagHtml (html) when body and bodyHtml are absent', () => {
|
|
const html = new TextEncoder().encode('<p>from pidtag</p>');
|
|
expect(selectMsgBody({ html })).toEqual({ text: 'from pidtag', format: 'html' });
|
|
});
|
|
|
|
it('prefers plain body over the PidTagHtml field', () => {
|
|
const html = new TextEncoder().encode('<p>html</p>');
|
|
expect(selectMsgBody({ body: 'plain', html })).toEqual({ text: 'plain', format: 'plain' });
|
|
});
|
|
|
|
it('falls back to PidTagHtml when bodyHtml is empty/whitespace', () => {
|
|
const html = new TextEncoder().encode('<p>pidtag body</p>');
|
|
expect(selectMsgBody({ bodyHtml: ' ', html })).toEqual({
|
|
text: 'pidtag body',
|
|
format: 'html',
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('pickEmail', () => {
|
|
it('prefers a real SMTP address over a legacy EX DN', () => {
|
|
expect(pickEmail('/O=EX/OU=x/CN=alice', 'alice@example.com')).toBe('alice@example.com');
|
|
expect(pickEmail('alice@example.com', '/O=EX/OU=x/CN=alice')).toBe('alice@example.com');
|
|
});
|
|
|
|
it('falls back to the EX DN when no SMTP-looking address exists', () => {
|
|
expect(pickEmail(undefined, '/O=EX/OU=x/CN=alice')).toBe('/O=EX/OU=x/CN=alice');
|
|
});
|
|
|
|
it('returns undefined when nothing usable is provided', () => {
|
|
expect(pickEmail(undefined, undefined)).toBeUndefined();
|
|
expect(pickEmail('', ' ')).toBeUndefined();
|
|
});
|
|
});
|
|
|
|
describe('isParsedMsgValid', () => {
|
|
it('accepts a parsed Outlook message', () => {
|
|
expect(isParsedMsgValid({ dataType: 'msg' })).toBe(true);
|
|
});
|
|
|
|
it('rejects an unsupported CFBF result (old .doc/.xls, corrupted compound file)', () => {
|
|
expect(isParsedMsgValid({ error: 'Unsupported file type!', dataType: null })).toBe(false);
|
|
expect(isParsedMsgValid({ dataType: null })).toBe(false);
|
|
expect(isParsedMsgValid({ dataType: 'attachment' })).toBe(false);
|
|
});
|
|
|
|
it('treats a whitespace-only plain body as empty and uses HTML', () => {
|
|
expect(selectMsgBody({ body: ' \n ', bodyHtml: '<p>real</p>' })).toEqual({
|
|
text: 'real',
|
|
format: 'html',
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('sanitizeAttachmentName', () => {
|
|
it('keeps a normal filename unchanged', () => {
|
|
expect(sanitizeAttachmentName('report.pdf', 0)).toBe('report.pdf');
|
|
});
|
|
|
|
it('strips directory components to prevent path traversal', () => {
|
|
expect(sanitizeAttachmentName('../../etc/passwd', 0)).toBe('passwd');
|
|
expect(sanitizeAttachmentName('foo/bar/baz.txt', 0)).toBe('baz.txt');
|
|
expect(sanitizeAttachmentName('a\\b\\c.doc', 0)).toBe('c.doc');
|
|
});
|
|
|
|
it('removes control characters and null bytes', () => {
|
|
expect(sanitizeAttachmentName('na\x00me.txt', 0)).toBe('name.txt');
|
|
expect(sanitizeAttachmentName('tab\tname.txt', 0)).toBe('tabname.txt');
|
|
});
|
|
|
|
it('preserves spaces inside the filename', () => {
|
|
expect(sanitizeAttachmentName('my report.pdf', 0)).toBe('my report.pdf');
|
|
});
|
|
|
|
it('falls back to an indexed name when the result is empty', () => {
|
|
expect(sanitizeAttachmentName('', 2)).toBe('attachment-3');
|
|
expect(sanitizeAttachmentName('...', 0)).toBe('attachment-1');
|
|
});
|
|
});
|
|
|
|
describe('formatMsgOutput', () => {
|
|
const baseView: MsgView = {
|
|
subject: 'Quarterly report',
|
|
from: { name: 'Alice', email: 'alice@example.com' },
|
|
to: [{ name: 'Bob', email: 'bob@example.com' }],
|
|
cc: [],
|
|
date: 'Mon, 1 Jun 2026 10:00:00 +0900',
|
|
body: { text: 'See attached.', format: 'plain' },
|
|
attachments: [],
|
|
};
|
|
|
|
it('renders the header block and body', () => {
|
|
const out = formatMsgOutput(baseView);
|
|
expect(out).toContain('Subject: Quarterly report');
|
|
expect(out).toContain('From: Alice <alice@example.com>');
|
|
expect(out).toContain('To: Bob <bob@example.com>');
|
|
expect(out).toContain('Date: Mon, 1 Jun 2026 10:00:00 +0900');
|
|
expect(out).toContain('See attached.');
|
|
});
|
|
|
|
it('lists saved attachments with their paths and sizes', () => {
|
|
const out = formatMsgOutput({
|
|
...baseView,
|
|
attachments: [{ fileName: 'report.pdf', contentLength: 2048, savedPath: 'input/report.pdf' }],
|
|
});
|
|
expect(out).toContain('Attachments (1)');
|
|
expect(out).toContain('report.pdf');
|
|
expect(out).toContain('input/report.pdf');
|
|
expect(out).toContain('2048');
|
|
});
|
|
|
|
it('shows a skip reason for attachments that were not saved', () => {
|
|
const out = formatMsgOutput({
|
|
...baseView,
|
|
attachments: [{ fileName: 'huge.bin', skipped: 'exceeds size limit' }],
|
|
});
|
|
expect(out).toContain('huge.bin');
|
|
expect(out).toContain('exceeds size limit');
|
|
});
|
|
|
|
it('notes when the body could not be extracted', () => {
|
|
const out = formatMsgOutput({ ...baseView, body: { text: '', format: 'none' } });
|
|
expect(out).toContain('(no text body)');
|
|
});
|
|
|
|
it('omits the CC line when there are no CC recipients', () => {
|
|
expect(formatMsgOutput(baseView)).not.toContain('Cc:');
|
|
});
|
|
|
|
it('keeps the attachment list when the body is truncated to budget', () => {
|
|
const longBody = 'word '.repeat(20000);
|
|
const out = assembleMsgOutput(
|
|
{
|
|
...baseView,
|
|
body: { text: longBody, format: 'plain' },
|
|
attachments: [{ fileName: 'a.pdf', contentLength: 10, savedPath: 'input/a.pdf' }],
|
|
},
|
|
100,
|
|
'mail.msg',
|
|
);
|
|
expect(out).toContain('input/a.pdf');
|
|
expect(out).toContain('Subject: Quarterly report');
|
|
expect(out.length).toBeLessThan(longBody.length);
|
|
});
|
|
|
|
it('includes the CC line when CC recipients exist', () => {
|
|
const out = formatMsgOutput({ ...baseView, cc: [{ email: 'carol@example.com' }] });
|
|
expect(out).toContain('Cc: carol@example.com');
|
|
});
|
|
});
|
|
|
|
describe('executeReadMsg (integration)', () => {
|
|
let workspace: string;
|
|
const ctx = (): ToolContext => ({ workspacePath: workspace, editAllowed: true });
|
|
|
|
beforeEach(() => {
|
|
workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'readmsg-'));
|
|
fs.copyFileSync(FIXTURE, path.join(workspace, 'mail.msg'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
fs.rmSync(workspace, { recursive: true, force: true });
|
|
});
|
|
|
|
it('extracts headers and body from a real .msg file', async () => {
|
|
const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx());
|
|
expect(result.isError).toBeFalsy();
|
|
expect(result.output).toContain('Subject: attachmentFiles');
|
|
expect(result.output).toContain('From: hmailuser <hmailuser@hmailserver.test>');
|
|
expect(result.output).toContain('To: hmailuser@hmailserver.test');
|
|
expect(result.output).toContain('attachmentFiles');
|
|
});
|
|
|
|
it('saves attachments to input/ and lists them', async () => {
|
|
const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx());
|
|
expect(result.output).toContain('Attachments (3)');
|
|
for (const [name, size] of [
|
|
['jpg.jpg', 726],
|
|
['png.png', 134],
|
|
['tif.tif', 664],
|
|
] as const) {
|
|
const saved = path.join(workspace, 'input', name);
|
|
expect(fs.existsSync(saved)).toBe(true);
|
|
expect(fs.statSync(saved).size).toBe(size);
|
|
expect(result.output).toContain(path.join('input', name));
|
|
}
|
|
});
|
|
|
|
it('rejects paths outside the workspace', async () => {
|
|
const result = await executeReadMsg({ file_path: '../../etc/passwd' }, ctx());
|
|
expect(result.isError).toBe(true);
|
|
});
|
|
|
|
it('reports a clear error for a non-.msg file', async () => {
|
|
fs.writeFileSync(path.join(workspace, 'junk.msg'), 'not a real msg file');
|
|
const result = await executeReadMsg({ file_path: 'junk.msg' }, ctx());
|
|
expect(result.isError).toBe(true);
|
|
expect(result.output).toContain('ReadMsg');
|
|
});
|
|
|
|
it('does not write attachments in a read-only phase', async () => {
|
|
const result = await executeReadMsg(
|
|
{ file_path: 'mail.msg' },
|
|
{ workspacePath: workspace, editAllowed: false },
|
|
);
|
|
expect(result.isError).toBeFalsy();
|
|
expect(fs.existsSync(path.join(workspace, 'input', 'jpg.jpg'))).toBe(false);
|
|
expect(result.output).toContain('read-only');
|
|
});
|
|
|
|
it('does not overwrite an existing file in input/', async () => {
|
|
fs.mkdirSync(path.join(workspace, 'input'), { recursive: true });
|
|
fs.writeFileSync(path.join(workspace, 'input', 'jpg.jpg'), 'pre-existing');
|
|
const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx());
|
|
expect(fs.readFileSync(path.join(workspace, 'input', 'jpg.jpg'), 'utf8')).toBe('pre-existing');
|
|
expect(fs.existsSync(path.join(workspace, 'input', 'jpg-1.jpg'))).toBe(true);
|
|
expect(result.output).toContain('jpg-1.jpg');
|
|
});
|
|
|
|
it('rejects files exceeding the configured size limit', async () => {
|
|
const result = await executeReadMsg(
|
|
{ file_path: 'mail.msg' },
|
|
{ workspacePath: workspace, editAllowed: true, toolsConfig: { officeMsgMaxSizeMb: 0.001 } },
|
|
);
|
|
expect(result.isError).toBe(true);
|
|
expect(result.output).toMatch(/size|limit|too large/i);
|
|
});
|
|
|
|
it('is registered and routed through the office module dispatch', async () => {
|
|
expect(OFFICE_TOOL_DEFS.ReadMsg).toBeDefined();
|
|
const result = await officeExecuteTool('ReadMsg', { file_path: 'mail.msg' }, ctx());
|
|
expect(result?.isError).toBeFalsy();
|
|
expect(result?.output).toContain('Subject: attachmentFiles');
|
|
});
|
|
});
|