maestro/src/engine/skills-scanner.ts
2026-06-03 05:08:00 +00:00

201 lines
5.0 KiB
TypeScript

/**
* Security scanner for skill content (SKILL.md and embedded scripts).
* Detects dangerous patterns and returns structured findings.
*/
import { readdirSync, lstatSync, readFileSync } from 'fs';
import { join, relative } from 'path';
export interface ScanFinding {
severity: 'medium' | 'high';
pattern: string; // pattern category name
match: string; // the matched text (truncated to 100 chars)
line: number; // 1-based line number
file?: string; // relative file path within a skill directory
}
interface PatternDef {
severity: 'medium' | 'high';
name: string;
regex: RegExp;
}
const PATTERNS: PatternDef[] = [
// --- Medium severity ---
{
severity: 'medium',
name: 'external-url',
regex: /https?:\/\/[^\s)'"]+/g,
},
{
severity: 'medium',
name: 'network-cmd-direct',
regex: /\b(?:curl|wget|nc|ncat|netcat)\b/g,
},
{
severity: 'medium',
name: 'network-cmd-indirect',
regex: /\b(?:urllib|http\.client|require\s*\(\s*['"](?:http|https|net)['"]\s*\)|fetch\s*\()/g,
},
{
severity: 'medium',
name: 'exfil-tool',
regex: /\b(?:WebFetch|DownloadFile)\b/g,
},
// --- High severity ---
{
severity: 'high',
name: 'other-user-resource',
regex: /\b(?:ReadUserMemory|UpdateUserMemory)\b/g,
},
{
severity: 'high',
name: 'path-traversal',
regex: /\.\.\/|\/home\//g,
},
{
severity: 'high',
name: 'broad-collection',
regex: /(?:全ファイル|秘密情報|all\s+files|secret|credential|password|private\s+key)/gi,
},
{
severity: 'high',
name: 'prompt-injection',
regex: /\b(?:ignore\s+previous|disregard|system\s+prompt|override\s+instructions|forget\s+(?:your|all|the)\s+(?:instructions|rules))\b/gi,
},
];
function truncate(s: string, max: number): string {
return s.length > max ? s.slice(0, max) : s;
}
/**
* Scan skill content line-by-line against known dangerous patterns.
*/
export function scanSkillContent(content: string): ScanFinding[] {
const findings: ScanFinding[] = [];
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
for (const pat of PATTERNS) {
pat.regex.lastIndex = 0;
let m: RegExpExecArray | null;
while ((m = pat.regex.exec(line)) !== null) {
findings.push({
severity: pat.severity,
pattern: pat.name,
match: truncate(m[0], 100),
line: i + 1,
});
}
}
}
return findings;
}
/**
* Return the highest severity across all findings.
*/
export function maxSeverity(findings: ScanFinding[]): 'high' | 'medium' | 'none' {
if (findings.some(f => f.severity === 'high')) return 'high';
if (findings.some(f => f.severity === 'medium')) return 'medium';
return 'none';
}
export interface ScanDirectoryOptions {
maxDepth?: number; // default: 3
maxFiles?: number; // default: 100
}
const DEFAULT_MAX_DEPTH = 3;
const DEFAULT_MAX_FILES = 100;
const MAX_FILE_SIZE = 256 * 1024; // 256 KB
/**
* Check if a buffer looks like binary content (contains null bytes in first 512 bytes).
*/
function isBinary(buf: Buffer): boolean {
const check = Math.min(buf.length, 512);
for (let i = 0; i < check; i++) {
if (buf[i] === 0) return true;
}
return false;
}
/**
* Scan all text files in a skill directory recursively.
* Skips symlinks, binary files, files > 256 KB, and respects depth/file count limits.
*/
export function scanSkillDirectory(
dirPath: string,
options?: ScanDirectoryOptions,
): ScanFinding[] {
const maxDepth = options?.maxDepth ?? DEFAULT_MAX_DEPTH;
const maxFiles = options?.maxFiles ?? DEFAULT_MAX_FILES;
const findings: ScanFinding[] = [];
let fileCount = 0;
function walk(currentDir: string, depth: number): void {
if (depth > maxDepth) return;
if (fileCount >= maxFiles) return;
let entries: string[];
try {
entries = readdirSync(currentDir);
} catch {
return; // unreadable directory — skip
}
for (const entry of entries) {
if (fileCount >= maxFiles) return;
const fullPath = join(currentDir, entry);
let stat;
try {
stat = lstatSync(fullPath);
} catch {
continue; // unreadable entry — skip
}
// Skip symlinks
if (stat.isSymbolicLink()) continue;
if (stat.isDirectory()) {
walk(fullPath, depth + 1);
continue;
}
if (!stat.isFile()) continue;
// Skip files larger than 256 KB
if (stat.size > MAX_FILE_SIZE) continue;
let buf: Buffer;
try {
buf = readFileSync(fullPath);
} catch {
continue; // unreadable file — skip
}
// Skip binary files
if (buf.length > 0 && isBinary(buf)) continue;
fileCount++;
const content = buf.toString('utf-8');
const fileFindings = scanSkillContent(content);
const relPath = relative(dirPath, fullPath);
for (const finding of fileFindings) {
findings.push({ ...finding, file: relPath });
}
}
}
walk(dirPath, 0);
return findings;
}