import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import * as fs from 'fs'; import * as os from 'os'; import * as path from 'path'; import { fileURLToPath } from 'url'; import { formatAddress, stripHtml, selectMsgBody, sanitizeAttachmentName, formatMsgOutput, assembleMsgOutput, pickEmail, isParsedMsgValid, executeReadMsg, type MsgView, } from './msg.js'; import type { ToolContext } from './core.js'; import { executeTool as officeExecuteTool, TOOL_DEFS as OFFICE_TOOL_DEFS } from './office.js'; const FIXTURE = path.join( path.dirname(fileURLToPath(import.meta.url)), '__fixtures__', 'attachmentFiles.msg', ); describe('formatAddress', () => { it('renders name and email together', () => { expect(formatAddress({ name: 'Alice', email: 'alice@example.com' })).toBe( 'Alice ', ); }); it('renders name only when email is missing', () => { expect(formatAddress({ name: 'Alice' })).toBe('Alice'); }); it('renders email only when name is missing', () => { expect(formatAddress({ email: 'alice@example.com' })).toBe('alice@example.com'); }); it('falls back to a placeholder when both are missing', () => { expect(formatAddress({})).toBe('(unknown)'); }); }); describe('stripHtml', () => { it('removes tags and decodes entities', () => { expect(stripHtml('

Hello world & co

')).toBe('Hello world & co'); }); it('drops script and style content', () => { const html = '

Keep

'; expect(stripHtml(html)).toBe('Keep'); }); it('turns block boundaries into newlines', () => { expect(stripHtml('
line1
line2
')).toBe('line1\nline2'); }); it('decodes valid numeric entities', () => { expect(stripHtml('

AB

')).toBe('AB'); }); it('does not throw on out-of-range numeric entities', () => { expect(() => stripHtml('

')).not.toThrow(); expect(stripHtml('A�B')).toBe('A�B'); }); }); describe('selectMsgBody', () => { it('prefers the plain-text body', () => { expect(selectMsgBody({ body: 'plain text', bodyHtml: '

html

' })).toEqual({ text: 'plain text', format: 'plain', }); }); it('falls back to stripped HTML when no plain body exists', () => { expect(selectMsgBody({ bodyHtml: '

html body

' })).toEqual({ text: 'html body', format: 'html', }); }); it('reports none when no body is present', () => { expect(selectMsgBody({})).toEqual({ text: '', format: 'none' }); }); it('decodes PidTagHtml (html) when body and bodyHtml are absent', () => { const html = new TextEncoder().encode('

from pidtag

'); expect(selectMsgBody({ html })).toEqual({ text: 'from pidtag', format: 'html' }); }); it('prefers plain body over the PidTagHtml field', () => { const html = new TextEncoder().encode('

html

'); expect(selectMsgBody({ body: 'plain', html })).toEqual({ text: 'plain', format: 'plain' }); }); it('falls back to PidTagHtml when bodyHtml is empty/whitespace', () => { const html = new TextEncoder().encode('

pidtag body

'); expect(selectMsgBody({ bodyHtml: ' ', html })).toEqual({ text: 'pidtag body', format: 'html', }); }); }); describe('pickEmail', () => { it('prefers a real SMTP address over a legacy EX DN', () => { expect(pickEmail('/O=EX/OU=x/CN=alice', 'alice@example.com')).toBe('alice@example.com'); expect(pickEmail('alice@example.com', '/O=EX/OU=x/CN=alice')).toBe('alice@example.com'); }); it('falls back to the EX DN when no SMTP-looking address exists', () => { expect(pickEmail(undefined, '/O=EX/OU=x/CN=alice')).toBe('/O=EX/OU=x/CN=alice'); }); it('returns undefined when nothing usable is provided', () => { expect(pickEmail(undefined, undefined)).toBeUndefined(); expect(pickEmail('', ' ')).toBeUndefined(); }); }); describe('isParsedMsgValid', () => { it('accepts a parsed Outlook message', () => { expect(isParsedMsgValid({ dataType: 'msg' })).toBe(true); }); it('rejects an unsupported CFBF result (old .doc/.xls, corrupted compound file)', () => { expect(isParsedMsgValid({ error: 'Unsupported file type!', dataType: null })).toBe(false); expect(isParsedMsgValid({ dataType: null })).toBe(false); expect(isParsedMsgValid({ dataType: 'attachment' })).toBe(false); }); it('treats a whitespace-only plain body as empty and uses HTML', () => { expect(selectMsgBody({ body: ' \n ', bodyHtml: '

real

' })).toEqual({ text: 'real', format: 'html', }); }); }); describe('sanitizeAttachmentName', () => { it('keeps a normal filename unchanged', () => { expect(sanitizeAttachmentName('report.pdf', 0)).toBe('report.pdf'); }); it('strips directory components to prevent path traversal', () => { expect(sanitizeAttachmentName('../../etc/passwd', 0)).toBe('passwd'); expect(sanitizeAttachmentName('foo/bar/baz.txt', 0)).toBe('baz.txt'); expect(sanitizeAttachmentName('a\\b\\c.doc', 0)).toBe('c.doc'); }); it('removes control characters and null bytes', () => { expect(sanitizeAttachmentName('na\x00me.txt', 0)).toBe('name.txt'); expect(sanitizeAttachmentName('tab\tname.txt', 0)).toBe('tabname.txt'); }); it('preserves spaces inside the filename', () => { expect(sanitizeAttachmentName('my report.pdf', 0)).toBe('my report.pdf'); }); it('falls back to an indexed name when the result is empty', () => { expect(sanitizeAttachmentName('', 2)).toBe('attachment-3'); expect(sanitizeAttachmentName('...', 0)).toBe('attachment-1'); }); }); describe('formatMsgOutput', () => { const baseView: MsgView = { subject: 'Quarterly report', from: { name: 'Alice', email: 'alice@example.com' }, to: [{ name: 'Bob', email: 'bob@example.com' }], cc: [], date: 'Mon, 1 Jun 2026 10:00:00 +0900', body: { text: 'See attached.', format: 'plain' }, attachments: [], }; it('renders the header block and body', () => { const out = formatMsgOutput(baseView); expect(out).toContain('Subject: Quarterly report'); expect(out).toContain('From: Alice '); expect(out).toContain('To: Bob '); expect(out).toContain('Date: Mon, 1 Jun 2026 10:00:00 +0900'); expect(out).toContain('See attached.'); }); it('lists saved attachments with their paths and sizes', () => { const out = formatMsgOutput({ ...baseView, attachments: [{ fileName: 'report.pdf', contentLength: 2048, savedPath: 'input/report.pdf' }], }); expect(out).toContain('Attachments (1)'); expect(out).toContain('report.pdf'); expect(out).toContain('input/report.pdf'); expect(out).toContain('2048'); }); it('shows a skip reason for attachments that were not saved', () => { const out = formatMsgOutput({ ...baseView, attachments: [{ fileName: 'huge.bin', skipped: 'exceeds size limit' }], }); expect(out).toContain('huge.bin'); expect(out).toContain('exceeds size limit'); }); it('notes when the body could not be extracted', () => { const out = formatMsgOutput({ ...baseView, body: { text: '', format: 'none' } }); expect(out).toContain('(no text body)'); }); it('omits the CC line when there are no CC recipients', () => { expect(formatMsgOutput(baseView)).not.toContain('Cc:'); }); it('keeps the attachment list when the body is truncated to budget', () => { const longBody = 'word '.repeat(20000); const out = assembleMsgOutput( { ...baseView, body: { text: longBody, format: 'plain' }, attachments: [{ fileName: 'a.pdf', contentLength: 10, savedPath: 'input/a.pdf' }], }, 100, 'mail.msg', ); expect(out).toContain('input/a.pdf'); expect(out).toContain('Subject: Quarterly report'); expect(out.length).toBeLessThan(longBody.length); }); it('includes the CC line when CC recipients exist', () => { const out = formatMsgOutput({ ...baseView, cc: [{ email: 'carol@example.com' }] }); expect(out).toContain('Cc: carol@example.com'); }); }); describe('executeReadMsg (integration)', () => { let workspace: string; const ctx = (): ToolContext => ({ workspacePath: workspace, editAllowed: true }); beforeEach(() => { workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'readmsg-')); fs.copyFileSync(FIXTURE, path.join(workspace, 'mail.msg')); }); afterEach(() => { fs.rmSync(workspace, { recursive: true, force: true }); }); it('extracts headers and body from a real .msg file', async () => { const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx()); expect(result.isError).toBeFalsy(); expect(result.output).toContain('Subject: attachmentFiles'); expect(result.output).toContain('From: hmailuser '); expect(result.output).toContain('To: hmailuser@hmailserver.test'); expect(result.output).toContain('attachmentFiles'); }); it('saves attachments to input/ and lists them', async () => { const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx()); expect(result.output).toContain('Attachments (3)'); for (const [name, size] of [ ['jpg.jpg', 726], ['png.png', 134], ['tif.tif', 664], ] as const) { const saved = path.join(workspace, 'input', name); expect(fs.existsSync(saved)).toBe(true); expect(fs.statSync(saved).size).toBe(size); expect(result.output).toContain(path.join('input', name)); } }); it('rejects paths outside the workspace', async () => { const result = await executeReadMsg({ file_path: '../../etc/passwd' }, ctx()); expect(result.isError).toBe(true); }); it('reports a clear error for a non-.msg file', async () => { fs.writeFileSync(path.join(workspace, 'junk.msg'), 'not a real msg file'); const result = await executeReadMsg({ file_path: 'junk.msg' }, ctx()); expect(result.isError).toBe(true); expect(result.output).toContain('ReadMsg'); }); it('does not write attachments in a read-only phase', async () => { const result = await executeReadMsg( { file_path: 'mail.msg' }, { workspacePath: workspace, editAllowed: false }, ); expect(result.isError).toBeFalsy(); expect(fs.existsSync(path.join(workspace, 'input', 'jpg.jpg'))).toBe(false); expect(result.output).toContain('read-only'); }); it('does not overwrite an existing file in input/', async () => { fs.mkdirSync(path.join(workspace, 'input'), { recursive: true }); fs.writeFileSync(path.join(workspace, 'input', 'jpg.jpg'), 'pre-existing'); const result = await executeReadMsg({ file_path: 'mail.msg' }, ctx()); expect(fs.readFileSync(path.join(workspace, 'input', 'jpg.jpg'), 'utf8')).toBe('pre-existing'); expect(fs.existsSync(path.join(workspace, 'input', 'jpg-1.jpg'))).toBe(true); expect(result.output).toContain('jpg-1.jpg'); }); it('rejects files exceeding the configured size limit', async () => { const result = await executeReadMsg( { file_path: 'mail.msg' }, { workspacePath: workspace, editAllowed: true, toolsConfig: { officeMsgMaxSizeMb: 0.001 } }, ); expect(result.isError).toBe(true); expect(result.output).toMatch(/size|limit|too large/i); }); it('is registered and routed through the office module dispatch', async () => { expect(OFFICE_TOOL_DEFS.ReadMsg).toBeDefined(); const result = await officeExecuteTool('ReadMsg', { file_path: 'mail.msg' }, ctx()); expect(result?.isError).toBeFalsy(); expect(result?.output).toContain('Subject: attachmentFiles'); }); });