maestro/src/engine/tools/knowledge.ts
2026-06-03 05:08:00 +00:00

478 lines
16 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// knowledge.ts — DKS (Document Knowledge Service) client tools
import { readFileSync, mkdirSync, writeFileSync, appendFileSync } from 'fs';
import { join } from 'path';
import { ToolDef } from '../../llm/openai-compat.js';
import type { ToolContext, ToolResult } from './core.js';
import { logger } from '../../logger.js';
import { generateRawFilename } from './raw-save.js';
// --- Config access ---
interface KnowledgeNamespaceConfig {
apiKey: string;
}
function getServiceUrl(ctx: ToolContext): string | null {
return ctx.toolsConfig?.knowledgeServiceUrl ?? null;
}
function getNamespaces(ctx: ToolContext): Record<string, KnowledgeNamespaceConfig> | null {
return ctx.toolsConfig?.knowledgeNamespaces ?? null;
}
function getApiKey(ctx: ToolContext, namespace: string): string | null {
return ctx.toolsConfig?.knowledgeNamespaces?.[namespace]?.apiKey ?? null;
}
// --- Fetch helper ---
async function dksFetch(
serviceUrl: string,
path: string,
apiKey: string,
options: RequestInit & { timeoutMs?: number } = {},
): Promise<Response> {
const url = `${serviceUrl.replace(/\/+$/, '')}${path}`;
const timeoutMs = options.timeoutMs ?? 10000;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
try {
const { timeoutMs: _, ...fetchOptions } = options;
const headers: Record<string, string> = {
'Authorization': `Bearer ${apiKey}`,
...(fetchOptions.headers as Record<string, string> || {}),
};
return await fetch(url, {
...fetchOptions,
headers,
signal: controller.signal,
});
} finally {
clearTimeout(timer);
}
}
// --- History logging ---
interface KnowledgeHistoryRecord {
timestamp: string;
tool: string;
namespace?: string;
query?: string;
sectionsFound?: number;
imagesDownloaded?: number;
error?: string;
durationMs?: number;
[key: string]: unknown;
}
function appendKnowledgeHistory(ctx: ToolContext, record: KnowledgeHistoryRecord): void {
try {
const logsDir = join(ctx.workspacePath, 'logs');
const historyPath = join(logsDir, 'knowledge-history.jsonl');
mkdirSync(logsDir, { recursive: true });
appendFileSync(historyPath, `${JSON.stringify(record)}\n`, 'utf-8');
} catch (e) {
logger.warn(`[knowledge] failed to write history: ${(e as Error).message}`);
}
}
// --- Raw response save helper ---
function saveRawResponse(ctx: ToolContext, toolName: string, data: unknown): void {
try {
const rawDir = join(ctx.workspacePath, 'logs', 'raw');
mkdirSync(rawDir, { recursive: true });
const filename = generateRawFilename(toolName, '.json');
writeFileSync(join(rawDir, filename), JSON.stringify(data, null, 2), 'utf-8');
const indexPath = join(ctx.workspacePath, 'logs', 'rawdata-history.jsonl');
const content = JSON.stringify(data);
appendFileSync(indexPath, JSON.stringify({
timestamp: new Date().toISOString(),
tool: toolName,
filename,
bytes: Buffer.byteLength(content, 'utf-8'),
}) + '\n', 'utf-8');
} catch (e) {
logger.warn(`[knowledge] failed to save raw response: ${(e as Error).message}`);
}
}
// --- Page image download helper ---
async function downloadPageImages(
serviceUrl: string,
apiKey: string,
imageUrls: string[],
ctx: ToolContext,
namespace: string,
): Promise<{ localPaths: string[]; errors: string[] }> {
const localPaths: string[] = [];
const errors: string[] = [];
if (imageUrls.length === 0) return { localPaths, errors };
const saveDir = join(ctx.workspacePath, 'input', 'knowledge', namespace);
mkdirSync(saveDir, { recursive: true });
for (const relUrl of imageUrls) {
const fullUrl = `${serviceUrl.replace(/\/+$/, '')}${relUrl}`;
try {
const resp = await dksFetch(serviceUrl, relUrl, apiKey, { timeoutMs: 15000 });
if (!resp.ok) {
errors.push(`${relUrl}: HTTP ${resp.status}`);
continue;
}
const buffer = Buffer.from(await resp.arrayBuffer());
// Extract filename from URL path (e.g. /pages/abc123.png -> abc123.png)
const urlPath = relUrl.split('/').pop() || `page-${Date.now()}.png`;
const localPath = join(saveDir, urlPath);
writeFileSync(localPath, buffer);
localPaths.push(`input/knowledge/${namespace}/${urlPath}`);
} catch (e) {
const msg = (e as Error).name === 'AbortError' ? 'timeout' : (e as Error).message;
errors.push(`${fullUrl}: ${msg}`);
}
}
return { localPaths, errors };
}
// --- Tool Definitions ---
const INGEST_DOCUMENT_DEF: ToolDef = {
type: 'function',
function: {
name: 'IngestDocument',
description: 'ドキュメントをナレッジベースに取り込む非同期。PDF, Word, PowerPoint, Excel, 画像, CSV/TSV に対応。',
parameters: {
type: 'object',
properties: {
namespace: { type: 'string', description: '対象ネームスペース' },
file_path: { type: 'string', description: 'ワークスペース内のファイルパス' },
},
required: ['namespace', 'file_path'],
},
},
};
const INGEST_STATUS_DEF: ToolDef = {
type: 'function',
function: {
name: 'IngestStatus',
description: '取込ジョブの進捗状況を確認する。',
parameters: {
type: 'object',
properties: {
namespace: { type: 'string', description: '対象ネームスペース' },
job_id: { type: 'string', description: 'IngestDocument で返却されたジョブID' },
},
required: ['namespace', 'job_id'],
},
},
};
const SEARCH_KNOWLEDGE_DEF: ToolDef = {
type: 'function',
function: {
name: 'SearchKnowledge',
description: 'DKS社内ナレッジを自然言語で検索する。関連セクションテキスト+ ページ画像が返り、画像は input/knowledge/{ns}/ に自動保存され ReadImage で閲覧可能。詳細は ReadToolDoc({ name: "SearchKnowledge" })。',
parameters: {
type: 'object',
properties: {
namespace: { type: 'string', description: '検索対象ネームスペース' },
query: { type: 'string', description: '検索クエリ(自然言語)' },
},
required: ['namespace', 'query'],
},
},
};
const LIST_NAMESPACES_DEF: ToolDef = {
type: 'function',
function: {
name: 'ListNamespaces',
description: '利用可能なナレッジベースのネームスペース一覧を表示する。',
parameters: {
type: 'object',
properties: {},
required: [],
},
},
};
const LIST_DOCUMENTS_DEF: ToolDef = {
type: 'function',
function: {
name: 'ListDocuments',
description: 'ネームスペース内の文書一覧を表示する。',
parameters: {
type: 'object',
properties: {
namespace: { type: 'string', description: '対象ネームスペース' },
},
required: ['namespace'],
},
},
};
export const TOOL_DEFS: Record<string, ToolDef> = {
IngestDocument: INGEST_DOCUMENT_DEF,
IngestStatus: INGEST_STATUS_DEF,
SearchKnowledge: SEARCH_KNOWLEDGE_DEF,
ListNamespaces: LIST_NAMESPACES_DEF,
ListDocuments: LIST_DOCUMENTS_DEF,
};
// --- Tool Execution ---
export async function executeTool(
name: string,
input: Record<string, unknown>,
ctx: ToolContext,
): Promise<ToolResult | null> {
if (name === 'IngestDocument') return executeIngestDocument(input, ctx);
if (name === 'IngestStatus') return executeIngestStatus(input, ctx);
if (name === 'SearchKnowledge') return executeSearchKnowledge(input, ctx);
if (name === 'ListNamespaces') return executeListNamespaces(ctx);
if (name === 'ListDocuments') return executeListDocuments(input, ctx);
return null;
}
// --- Tool Implementations ---
async function executeIngestDocument(input: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
const serviceUrl = getServiceUrl(ctx);
if (!serviceUrl) return { output: 'Knowledge service not configured', isError: true };
const namespace = input.namespace as string;
const filePath = input.file_path as string;
const apiKey = getApiKey(ctx, namespace);
if (!apiKey) return { output: `Namespace "${namespace}" not configured`, isError: true };
const startMs = Date.now();
try {
const resolvedPath = filePath.startsWith('/') ? filePath : `${ctx.workspacePath}/${filePath}`;
const fileData = readFileSync(resolvedPath);
const fileName = resolvedPath.split('/').pop() || 'unknown';
const formData = new FormData();
formData.append('file', new Blob([fileData]), fileName);
const resp = await dksFetch(serviceUrl, `/namespaces/${namespace}/ingest`, apiKey, {
method: 'POST',
body: formData,
});
if (!resp.ok) {
const errText = await resp.text();
const output = `Ingest failed (${resp.status}): ${errText}`;
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'IngestDocument',
namespace, error: output, durationMs: Date.now() - startMs,
});
return { output, isError: true };
}
const data = await resp.json() as any;
const output = `取込を開始しました (job: ${data.job_id}, ${data.pages_detected}ページ検出)。完了確認は IngestStatus で可能です。他の作業を続行できます。`;
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'IngestDocument',
namespace, fileName, jobId: data.job_id, pagesDetected: data.pages_detected,
durationMs: Date.now() - startMs,
});
return { output, isError: false };
} catch (e: any) {
const error = e.name === 'AbortError'
? `IngestDocument timeout: DKS server did not respond within 10s`
: `IngestDocument error: ${e.message}`;
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'IngestDocument',
namespace, error, durationMs: Date.now() - startMs,
});
return { output: error, isError: true };
}
}
async function executeIngestStatus(input: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
const serviceUrl = getServiceUrl(ctx);
if (!serviceUrl) return { output: 'Knowledge service not configured', isError: true };
const namespace = input.namespace as string;
const jobId = input.job_id as string;
const apiKey = getApiKey(ctx, namespace);
if (!apiKey) return { output: `Namespace "${namespace}" not configured`, isError: true };
try {
const resp = await dksFetch(serviceUrl, `/namespaces/${namespace}/jobs/${jobId}`, apiKey);
if (!resp.ok) {
return { output: `Job not found (${resp.status})`, isError: true };
}
const data = await resp.json() as any;
const p = data.progress || {};
const statusLine = data.status === 'completed'
? `完了 (${data.document_name})`
: data.status === 'failed'
? `失敗: ${data.error || 'unknown'}`
: `処理中: VLM ${p.vlm_completed || 0}/${p.total_pages || 0}ページ, ツリー構築: ${p.tree_built ? '完了' : '未完了'}`;
return { output: `ジョブ ${data.job_id}: ${statusLine}`, isError: false };
} catch (e: any) {
if (e.name === 'AbortError') {
return { output: `IngestStatus timeout: DKS server did not respond within 10s`, isError: true };
}
return { output: `IngestStatus error: ${e.message}`, isError: true };
}
}
async function executeSearchKnowledge(input: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
const serviceUrl = getServiceUrl(ctx);
if (!serviceUrl) return { output: 'Knowledge service not configured', isError: true };
const namespace = input.namespace as string;
const query = input.query as string;
const apiKey = getApiKey(ctx, namespace);
if (!apiKey) return { output: `Namespace "${namespace}" not configured`, isError: true };
const startMs = Date.now();
try {
const resp = await dksFetch(serviceUrl, `/namespaces/${namespace}/search`, apiKey, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query }),
timeoutMs: 30000,
});
if (!resp.ok) {
const errText = await resp.text();
const output = `Search failed (${resp.status}): ${errText}`;
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'SearchKnowledge',
namespace, query, error: output, durationMs: Date.now() - startMs,
});
return { output, isError: true };
}
const data = await resp.json() as any;
// DKS 生レスポンスを logs/raw/ に保存
saveRawResponse(ctx, 'SearchKnowledge', { query, namespace, response: data });
const sections = data.sections || [];
const rawImageUrls: string[] = data.page_image_urls || [];
// Download page images to workspace so ReadImage can access them
const { localPaths, errors: imgErrors } = await downloadPageImages(
serviceUrl, apiKey, rawImageUrls, ctx, namespace,
);
// Format response
const lines: string[] = [];
for (const section of sections) {
lines.push(`## ${section.title} (${section.doc_name}, pages: ${section.pages.join(', ')})`);
lines.push(section.content);
lines.push('');
}
if (localPaths.length > 0) {
lines.push('### ページ画像ReadImage で閲覧可能)');
for (const p of localPaths) {
lines.push(`- ${p}`);
}
}
if (imgErrors.length > 0) {
lines.push(`\n[画像ダウンロードエラー: ${imgErrors.length}件]`);
}
if (data.system_status?.message) {
lines.push(`\n[Info] ${data.system_status.message}`);
}
const output = lines.join('\n');
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'SearchKnowledge',
namespace, query, sectionsFound: sections.length,
imagesDownloaded: localPaths.length, imageErrors: imgErrors.length,
durationMs: Date.now() - startMs,
});
return { output, isError: false };
} catch (e: any) {
const error = e.name === 'AbortError'
? `SearchKnowledge timeout: DKS server did not respond within 30s`
: `SearchKnowledge error: ${e.message}`;
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'SearchKnowledge',
namespace, query, error, durationMs: Date.now() - startMs,
});
return { output: error, isError: true };
}
}
async function executeListNamespaces(ctx: ToolContext): Promise<ToolResult> {
const serviceUrl = getServiceUrl(ctx);
const namespaces = getNamespaces(ctx);
if (!serviceUrl || !namespaces) {
return { output: 'Knowledge service not configured', isError: true };
}
const names = Object.keys(namespaces);
if (names.length === 0) {
return { output: '利用可能なネームスペースはありません', isError: false };
}
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'ListNamespaces',
namespacesFound: names.length,
});
return { output: `利用可能なネームスペース:\n${names.map(n => `- ${n}`).join('\n')}`, isError: false };
}
async function executeListDocuments(input: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
const serviceUrl = getServiceUrl(ctx);
if (!serviceUrl) return { output: 'Knowledge service not configured', isError: true };
const namespace = input.namespace as string;
const apiKey = getApiKey(ctx, namespace);
if (!apiKey) return { output: `Namespace "${namespace}" not configured`, isError: true };
const startMs = Date.now();
try {
const resp = await dksFetch(serviceUrl, `/namespaces/${namespace}/documents`, apiKey);
if (!resp.ok) {
return { output: `ListDocuments failed (${resp.status})`, isError: true };
}
const data = await resp.json() as any;
const docs = data.documents || [];
if (docs.length === 0) {
return { output: `"${namespace}" にはまだ文書がありません`, isError: false };
}
const lines = docs.map((d: any) => `- ${d.name} (${d.page_count}ページ, id: ${d.id})`);
const output = `"${namespace}" の文書一覧:\n${lines.join('\n')}`;
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'ListDocuments',
namespace, documentsFound: docs.length, durationMs: Date.now() - startMs,
});
return { output, isError: false };
} catch (e: any) {
const error = e.name === 'AbortError'
? `ListDocuments timeout: DKS server did not respond within 10s`
: `ListDocuments error: ${e.message}`;
appendKnowledgeHistory(ctx, {
timestamp: new Date().toISOString(), tool: 'ListDocuments',
namespace, error, durationMs: Date.now() - startMs,
});
return { output: error, isError: true };
}
}