478 lines
16 KiB
TypeScript
478 lines
16 KiB
TypeScript
// knowledge.ts — DKS (Document Knowledge Service) client tools
|
||
import { readFileSync, mkdirSync, writeFileSync, appendFileSync } from 'fs';
|
||
import { join } from 'path';
|
||
import { ToolDef } from '../../llm/openai-compat.js';
|
||
import type { ToolContext, ToolResult } from './core.js';
|
||
import { logger } from '../../logger.js';
|
||
import { generateRawFilename } from './raw-save.js';
|
||
|
||
// --- Config access ---
|
||
|
||
interface KnowledgeNamespaceConfig {
|
||
apiKey: string;
|
||
}
|
||
|
||
function getServiceUrl(ctx: ToolContext): string | null {
|
||
return ctx.toolsConfig?.knowledgeServiceUrl ?? null;
|
||
}
|
||
|
||
function getNamespaces(ctx: ToolContext): Record<string, KnowledgeNamespaceConfig> | null {
|
||
return ctx.toolsConfig?.knowledgeNamespaces ?? null;
|
||
}
|
||
|
||
function getApiKey(ctx: ToolContext, namespace: string): string | null {
|
||
return ctx.toolsConfig?.knowledgeNamespaces?.[namespace]?.apiKey ?? null;
|
||
}
|
||
|
||
// --- Fetch helper ---
|
||
|
||
async function dksFetch(
|
||
serviceUrl: string,
|
||
path: string,
|
||
apiKey: string,
|
||
options: RequestInit & { timeoutMs?: number } = {},
|
||
): Promise<Response> {
|
||
const url = `${serviceUrl.replace(/\/+$/, '')}${path}`;
|
||
const timeoutMs = options.timeoutMs ?? 10000;
|
||
|
||
const controller = new AbortController();
|
||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||
|
||
try {
|
||
const { timeoutMs: _, ...fetchOptions } = options;
|
||
const headers: Record<string, string> = {
|
||
'Authorization': `Bearer ${apiKey}`,
|
||
...(fetchOptions.headers as Record<string, string> || {}),
|
||
};
|
||
|
||
return await fetch(url, {
|
||
...fetchOptions,
|
||
headers,
|
||
signal: controller.signal,
|
||
});
|
||
} finally {
|
||
clearTimeout(timer);
|
||
}
|
||
}
|
||
|
||
// --- History logging ---
|
||
|
||
interface KnowledgeHistoryRecord {
|
||
timestamp: string;
|
||
tool: string;
|
||
namespace?: string;
|
||
query?: string;
|
||
sectionsFound?: number;
|
||
imagesDownloaded?: number;
|
||
error?: string;
|
||
durationMs?: number;
|
||
[key: string]: unknown;
|
||
}
|
||
|
||
function appendKnowledgeHistory(ctx: ToolContext, record: KnowledgeHistoryRecord): void {
|
||
try {
|
||
const logsDir = join(ctx.workspacePath, 'logs');
|
||
const historyPath = join(logsDir, 'knowledge-history.jsonl');
|
||
mkdirSync(logsDir, { recursive: true });
|
||
appendFileSync(historyPath, `${JSON.stringify(record)}\n`, 'utf-8');
|
||
} catch (e) {
|
||
logger.warn(`[knowledge] failed to write history: ${(e as Error).message}`);
|
||
}
|
||
}
|
||
|
||
// --- Raw response save helper ---
|
||
|
||
function saveRawResponse(ctx: ToolContext, toolName: string, data: unknown): void {
|
||
try {
|
||
const rawDir = join(ctx.workspacePath, 'logs', 'raw');
|
||
mkdirSync(rawDir, { recursive: true });
|
||
const filename = generateRawFilename(toolName, '.json');
|
||
writeFileSync(join(rawDir, filename), JSON.stringify(data, null, 2), 'utf-8');
|
||
|
||
const indexPath = join(ctx.workspacePath, 'logs', 'rawdata-history.jsonl');
|
||
const content = JSON.stringify(data);
|
||
appendFileSync(indexPath, JSON.stringify({
|
||
timestamp: new Date().toISOString(),
|
||
tool: toolName,
|
||
filename,
|
||
bytes: Buffer.byteLength(content, 'utf-8'),
|
||
}) + '\n', 'utf-8');
|
||
} catch (e) {
|
||
logger.warn(`[knowledge] failed to save raw response: ${(e as Error).message}`);
|
||
}
|
||
}
|
||
|
||
// --- Page image download helper ---
|
||
|
||
async function downloadPageImages(
|
||
serviceUrl: string,
|
||
apiKey: string,
|
||
imageUrls: string[],
|
||
ctx: ToolContext,
|
||
namespace: string,
|
||
): Promise<{ localPaths: string[]; errors: string[] }> {
|
||
const localPaths: string[] = [];
|
||
const errors: string[] = [];
|
||
if (imageUrls.length === 0) return { localPaths, errors };
|
||
|
||
const saveDir = join(ctx.workspacePath, 'input', 'knowledge', namespace);
|
||
mkdirSync(saveDir, { recursive: true });
|
||
|
||
for (const relUrl of imageUrls) {
|
||
const fullUrl = `${serviceUrl.replace(/\/+$/, '')}${relUrl}`;
|
||
try {
|
||
const resp = await dksFetch(serviceUrl, relUrl, apiKey, { timeoutMs: 15000 });
|
||
if (!resp.ok) {
|
||
errors.push(`${relUrl}: HTTP ${resp.status}`);
|
||
continue;
|
||
}
|
||
const buffer = Buffer.from(await resp.arrayBuffer());
|
||
// Extract filename from URL path (e.g. /pages/abc123.png -> abc123.png)
|
||
const urlPath = relUrl.split('/').pop() || `page-${Date.now()}.png`;
|
||
const localPath = join(saveDir, urlPath);
|
||
writeFileSync(localPath, buffer);
|
||
localPaths.push(`input/knowledge/${namespace}/${urlPath}`);
|
||
} catch (e) {
|
||
const msg = (e as Error).name === 'AbortError' ? 'timeout' : (e as Error).message;
|
||
errors.push(`${fullUrl}: ${msg}`);
|
||
}
|
||
}
|
||
return { localPaths, errors };
|
||
}
|
||
|
||
// --- Tool Definitions ---
|
||
|
||
const INGEST_DOCUMENT_DEF: ToolDef = {
|
||
type: 'function',
|
||
function: {
|
||
name: 'IngestDocument',
|
||
description: 'ドキュメントをナレッジベースに取り込む(非同期)。PDF, Word, PowerPoint, Excel, 画像, CSV/TSV に対応。',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {
|
||
namespace: { type: 'string', description: '対象ネームスペース' },
|
||
file_path: { type: 'string', description: 'ワークスペース内のファイルパス' },
|
||
},
|
||
required: ['namespace', 'file_path'],
|
||
},
|
||
},
|
||
};
|
||
|
||
const INGEST_STATUS_DEF: ToolDef = {
|
||
type: 'function',
|
||
function: {
|
||
name: 'IngestStatus',
|
||
description: '取込ジョブの進捗状況を確認する。',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {
|
||
namespace: { type: 'string', description: '対象ネームスペース' },
|
||
job_id: { type: 'string', description: 'IngestDocument で返却されたジョブID' },
|
||
},
|
||
required: ['namespace', 'job_id'],
|
||
},
|
||
},
|
||
};
|
||
|
||
const SEARCH_KNOWLEDGE_DEF: ToolDef = {
|
||
type: 'function',
|
||
function: {
|
||
name: 'SearchKnowledge',
|
||
description: 'DKS(社内ナレッジ)を自然言語で検索する。関連セクション(テキスト)+ ページ画像が返り、画像は input/knowledge/{ns}/ に自動保存され ReadImage で閲覧可能。詳細は ReadToolDoc({ name: "SearchKnowledge" })。',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {
|
||
namespace: { type: 'string', description: '検索対象ネームスペース' },
|
||
query: { type: 'string', description: '検索クエリ(自然言語)' },
|
||
},
|
||
required: ['namespace', 'query'],
|
||
},
|
||
},
|
||
};
|
||
|
||
const LIST_NAMESPACES_DEF: ToolDef = {
|
||
type: 'function',
|
||
function: {
|
||
name: 'ListNamespaces',
|
||
description: '利用可能なナレッジベースのネームスペース一覧を表示する。',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {},
|
||
required: [],
|
||
},
|
||
},
|
||
};
|
||
|
||
const LIST_DOCUMENTS_DEF: ToolDef = {
|
||
type: 'function',
|
||
function: {
|
||
name: 'ListDocuments',
|
||
description: 'ネームスペース内の文書一覧を表示する。',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {
|
||
namespace: { type: 'string', description: '対象ネームスペース' },
|
||
},
|
||
required: ['namespace'],
|
||
},
|
||
},
|
||
};
|
||
|
||
export const TOOL_DEFS: Record<string, ToolDef> = {
|
||
IngestDocument: INGEST_DOCUMENT_DEF,
|
||
IngestStatus: INGEST_STATUS_DEF,
|
||
SearchKnowledge: SEARCH_KNOWLEDGE_DEF,
|
||
ListNamespaces: LIST_NAMESPACES_DEF,
|
||
ListDocuments: LIST_DOCUMENTS_DEF,
|
||
};
|
||
|
||
// --- Tool Execution ---
|
||
|
||
export async function executeTool(
|
||
name: string,
|
||
input: Record<string, unknown>,
|
||
ctx: ToolContext,
|
||
): Promise<ToolResult | null> {
|
||
if (name === 'IngestDocument') return executeIngestDocument(input, ctx);
|
||
if (name === 'IngestStatus') return executeIngestStatus(input, ctx);
|
||
if (name === 'SearchKnowledge') return executeSearchKnowledge(input, ctx);
|
||
if (name === 'ListNamespaces') return executeListNamespaces(ctx);
|
||
if (name === 'ListDocuments') return executeListDocuments(input, ctx);
|
||
return null;
|
||
}
|
||
|
||
// --- Tool Implementations ---
|
||
|
||
async function executeIngestDocument(input: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
|
||
const serviceUrl = getServiceUrl(ctx);
|
||
if (!serviceUrl) return { output: 'Knowledge service not configured', isError: true };
|
||
|
||
const namespace = input.namespace as string;
|
||
const filePath = input.file_path as string;
|
||
const apiKey = getApiKey(ctx, namespace);
|
||
if (!apiKey) return { output: `Namespace "${namespace}" not configured`, isError: true };
|
||
|
||
const startMs = Date.now();
|
||
try {
|
||
const resolvedPath = filePath.startsWith('/') ? filePath : `${ctx.workspacePath}/${filePath}`;
|
||
const fileData = readFileSync(resolvedPath);
|
||
const fileName = resolvedPath.split('/').pop() || 'unknown';
|
||
|
||
const formData = new FormData();
|
||
formData.append('file', new Blob([fileData]), fileName);
|
||
|
||
const resp = await dksFetch(serviceUrl, `/namespaces/${namespace}/ingest`, apiKey, {
|
||
method: 'POST',
|
||
body: formData,
|
||
});
|
||
|
||
if (!resp.ok) {
|
||
const errText = await resp.text();
|
||
const output = `Ingest failed (${resp.status}): ${errText}`;
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'IngestDocument',
|
||
namespace, error: output, durationMs: Date.now() - startMs,
|
||
});
|
||
return { output, isError: true };
|
||
}
|
||
|
||
const data = await resp.json() as any;
|
||
const output = `取込を開始しました (job: ${data.job_id}, ${data.pages_detected}ページ検出)。完了確認は IngestStatus で可能です。他の作業を続行できます。`;
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'IngestDocument',
|
||
namespace, fileName, jobId: data.job_id, pagesDetected: data.pages_detected,
|
||
durationMs: Date.now() - startMs,
|
||
});
|
||
return { output, isError: false };
|
||
} catch (e: any) {
|
||
const error = e.name === 'AbortError'
|
||
? `IngestDocument timeout: DKS server did not respond within 10s`
|
||
: `IngestDocument error: ${e.message}`;
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'IngestDocument',
|
||
namespace, error, durationMs: Date.now() - startMs,
|
||
});
|
||
return { output: error, isError: true };
|
||
}
|
||
}
|
||
|
||
async function executeIngestStatus(input: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
|
||
const serviceUrl = getServiceUrl(ctx);
|
||
if (!serviceUrl) return { output: 'Knowledge service not configured', isError: true };
|
||
|
||
const namespace = input.namespace as string;
|
||
const jobId = input.job_id as string;
|
||
const apiKey = getApiKey(ctx, namespace);
|
||
if (!apiKey) return { output: `Namespace "${namespace}" not configured`, isError: true };
|
||
|
||
try {
|
||
const resp = await dksFetch(serviceUrl, `/namespaces/${namespace}/jobs/${jobId}`, apiKey);
|
||
if (!resp.ok) {
|
||
return { output: `Job not found (${resp.status})`, isError: true };
|
||
}
|
||
|
||
const data = await resp.json() as any;
|
||
const p = data.progress || {};
|
||
const statusLine = data.status === 'completed'
|
||
? `完了 (${data.document_name})`
|
||
: data.status === 'failed'
|
||
? `失敗: ${data.error || 'unknown'}`
|
||
: `処理中: VLM ${p.vlm_completed || 0}/${p.total_pages || 0}ページ, ツリー構築: ${p.tree_built ? '完了' : '未完了'}`;
|
||
|
||
return { output: `ジョブ ${data.job_id}: ${statusLine}`, isError: false };
|
||
} catch (e: any) {
|
||
if (e.name === 'AbortError') {
|
||
return { output: `IngestStatus timeout: DKS server did not respond within 10s`, isError: true };
|
||
}
|
||
return { output: `IngestStatus error: ${e.message}`, isError: true };
|
||
}
|
||
}
|
||
|
||
async function executeSearchKnowledge(input: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
|
||
const serviceUrl = getServiceUrl(ctx);
|
||
if (!serviceUrl) return { output: 'Knowledge service not configured', isError: true };
|
||
|
||
const namespace = input.namespace as string;
|
||
const query = input.query as string;
|
||
const apiKey = getApiKey(ctx, namespace);
|
||
if (!apiKey) return { output: `Namespace "${namespace}" not configured`, isError: true };
|
||
|
||
const startMs = Date.now();
|
||
try {
|
||
const resp = await dksFetch(serviceUrl, `/namespaces/${namespace}/search`, apiKey, {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({ query }),
|
||
timeoutMs: 30000,
|
||
});
|
||
|
||
if (!resp.ok) {
|
||
const errText = await resp.text();
|
||
const output = `Search failed (${resp.status}): ${errText}`;
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'SearchKnowledge',
|
||
namespace, query, error: output, durationMs: Date.now() - startMs,
|
||
});
|
||
return { output, isError: true };
|
||
}
|
||
|
||
const data = await resp.json() as any;
|
||
|
||
// DKS 生レスポンスを logs/raw/ に保存
|
||
saveRawResponse(ctx, 'SearchKnowledge', { query, namespace, response: data });
|
||
|
||
const sections = data.sections || [];
|
||
const rawImageUrls: string[] = data.page_image_urls || [];
|
||
|
||
// Download page images to workspace so ReadImage can access them
|
||
const { localPaths, errors: imgErrors } = await downloadPageImages(
|
||
serviceUrl, apiKey, rawImageUrls, ctx, namespace,
|
||
);
|
||
|
||
// Format response
|
||
const lines: string[] = [];
|
||
for (const section of sections) {
|
||
lines.push(`## ${section.title} (${section.doc_name}, pages: ${section.pages.join(', ')})`);
|
||
lines.push(section.content);
|
||
lines.push('');
|
||
}
|
||
|
||
if (localPaths.length > 0) {
|
||
lines.push('### ページ画像(ReadImage で閲覧可能)');
|
||
for (const p of localPaths) {
|
||
lines.push(`- ${p}`);
|
||
}
|
||
}
|
||
if (imgErrors.length > 0) {
|
||
lines.push(`\n[画像ダウンロードエラー: ${imgErrors.length}件]`);
|
||
}
|
||
|
||
if (data.system_status?.message) {
|
||
lines.push(`\n[Info] ${data.system_status.message}`);
|
||
}
|
||
|
||
const output = lines.join('\n');
|
||
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'SearchKnowledge',
|
||
namespace, query, sectionsFound: sections.length,
|
||
imagesDownloaded: localPaths.length, imageErrors: imgErrors.length,
|
||
durationMs: Date.now() - startMs,
|
||
});
|
||
|
||
return { output, isError: false };
|
||
} catch (e: any) {
|
||
const error = e.name === 'AbortError'
|
||
? `SearchKnowledge timeout: DKS server did not respond within 30s`
|
||
: `SearchKnowledge error: ${e.message}`;
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'SearchKnowledge',
|
||
namespace, query, error, durationMs: Date.now() - startMs,
|
||
});
|
||
return { output: error, isError: true };
|
||
}
|
||
}
|
||
|
||
async function executeListNamespaces(ctx: ToolContext): Promise<ToolResult> {
|
||
const serviceUrl = getServiceUrl(ctx);
|
||
const namespaces = getNamespaces(ctx);
|
||
|
||
if (!serviceUrl || !namespaces) {
|
||
return { output: 'Knowledge service not configured', isError: true };
|
||
}
|
||
|
||
const names = Object.keys(namespaces);
|
||
if (names.length === 0) {
|
||
return { output: '利用可能なネームスペースはありません', isError: false };
|
||
}
|
||
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'ListNamespaces',
|
||
namespacesFound: names.length,
|
||
});
|
||
|
||
return { output: `利用可能なネームスペース:\n${names.map(n => `- ${n}`).join('\n')}`, isError: false };
|
||
}
|
||
|
||
async function executeListDocuments(input: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
|
||
const serviceUrl = getServiceUrl(ctx);
|
||
if (!serviceUrl) return { output: 'Knowledge service not configured', isError: true };
|
||
|
||
const namespace = input.namespace as string;
|
||
const apiKey = getApiKey(ctx, namespace);
|
||
if (!apiKey) return { output: `Namespace "${namespace}" not configured`, isError: true };
|
||
|
||
const startMs = Date.now();
|
||
try {
|
||
const resp = await dksFetch(serviceUrl, `/namespaces/${namespace}/documents`, apiKey);
|
||
if (!resp.ok) {
|
||
return { output: `ListDocuments failed (${resp.status})`, isError: true };
|
||
}
|
||
|
||
const data = await resp.json() as any;
|
||
const docs = data.documents || [];
|
||
if (docs.length === 0) {
|
||
return { output: `"${namespace}" にはまだ文書がありません`, isError: false };
|
||
}
|
||
|
||
const lines = docs.map((d: any) => `- ${d.name} (${d.page_count}ページ, id: ${d.id})`);
|
||
const output = `"${namespace}" の文書一覧:\n${lines.join('\n')}`;
|
||
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'ListDocuments',
|
||
namespace, documentsFound: docs.length, durationMs: Date.now() - startMs,
|
||
});
|
||
|
||
return { output, isError: false };
|
||
} catch (e: any) {
|
||
const error = e.name === 'AbortError'
|
||
? `ListDocuments timeout: DKS server did not respond within 10s`
|
||
: `ListDocuments error: ${e.message}`;
|
||
appendKnowledgeHistory(ctx, {
|
||
timestamp: new Date().toISOString(), tool: 'ListDocuments',
|
||
namespace, error, durationMs: Date.now() - startMs,
|
||
});
|
||
return { output: error, isError: true };
|
||
}
|
||
}
|