import * as fs from 'fs'; import * as path from 'path'; import { ToolDef } from '../../llm/openai-compat.js'; import type { ToolContext, ToolResult } from './core.js'; import { resolveAndGuard } from './core.js'; import { logger } from '../../logger.js'; const SUPPORTED_EXTENSIONS = new Set(['mp3', 'wav']); const TRANSCRIBE_AUDIO_DEF: ToolDef = { type: 'function', function: { name: 'TranscribeAudio', description: '音声ファイル(mp3/wav)を文字起こしする(話者分離対応、外部音声認識サーバーへ送信)。詳細は ReadToolDoc({ name: "TranscribeAudio" })。', parameters: { type: 'object', properties: { file_path: { type: 'string', description: 'workspace 内の音声ファイルパス(mp3, wav)' }, language: { type: 'string', description: '言語コード(省略時: config の speech_language or "ja")' }, diarize: { type: 'boolean', description: '話者分離を有効にする(省略時: true)' }, prompt: { type: 'string', description: '文字起こしヒント(固有名詞・専門用語等)' }, }, required: ['file_path'], }, }, }; export const TOOL_DEFS: Record = { TranscribeAudio: TRANSCRIBE_AUDIO_DEF, }; interface TranscriptionSegment { text: string; speaker?: string; } interface TranscriptionResponse { text?: string; segments?: TranscriptionSegment[]; } function formatTranscription(response: TranscriptionResponse, diarize: boolean): string { if (!diarize || !response.segments?.length) { if (response.segments?.length) { return response.segments.map(s => s.text).join(''); } return response.text ?? ''; } const lines: string[] = []; let currentSpeaker: string | undefined; let currentText = ''; for (const seg of response.segments) { const speaker = seg.speaker ?? 'Unknown'; if (speaker !== currentSpeaker) { if (currentText) { lines.push(`[${currentSpeaker}] ${currentText.trim()}`); } currentSpeaker = speaker; currentText = seg.text; } else { currentText += seg.text; } } if (currentText && currentSpeaker) { lines.push(`[${currentSpeaker}] ${currentText.trim()}`); } return lines.join('\n'); } export async function executeTool( name: string, input: Record, ctx: ToolContext, ): Promise { if (name !== 'TranscribeAudio') return null; const filePath = input.file_path as string | undefined; if (!filePath) { return { output: 'file_path は必須です', isError: true }; } const serverUrl = ctx.toolsConfig?.speechServerUrl; if (!serverUrl) { return { output: 'speech_server_url が config.yaml に未設定です', isError: true }; } const resolved = resolveAndGuard(ctx.workspacePath, filePath); const ext = path.extname(resolved).toLowerCase().replace('.', ''); if (!SUPPORTED_EXTENSIONS.has(ext)) { return { output: `対応フォーマット: mp3, wav(指定: .${ext})`, isError: true }; } if (!fs.existsSync(resolved)) { return { output: `ファイルが見つかりません: ${filePath}`, isError: true }; } const language = (input.language as string) ?? ctx.toolsConfig?.speechLanguage ?? 'ja'; const diarize = input.diarize !== false; const prompt = input.prompt as string | undefined; const timeout = (ctx.toolsConfig?.speechTimeout ?? 300) * 1000; try { const fileBuffer = fs.readFileSync(resolved); const fileName = path.basename(resolved); const mimeType = ext === 'mp3' ? 'audio/mpeg' : 'audio/wav'; const blob = new Blob([fileBuffer], { type: mimeType }); const formData = new FormData(); formData.append('file', blob, fileName); formData.append('language', language); formData.append('response_format', 'verbose_json'); if (prompt) { formData.append('prompt', prompt); } const headers: Record = {}; if (diarize) { headers['X-Diarize'] = 'true'; } const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeout); const url = `${serverUrl.replace(/\/+$/, '')}/audio/transcriptions`; const response = await fetch(url, { method: 'POST', body: formData, headers, signal: controller.signal, }); clearTimeout(timer); if (!response.ok) { const errText = await response.text().catch(() => ''); return { output: `音声認識サーバーエラー (${response.status}): ${errText}`.slice(0, 2000), isError: true, }; } const data = await response.json() as TranscriptionResponse; const text = formatTranscription(data, diarize); if (!text) { return { output: '文字起こし結果が空です', isError: true }; } const header = `## 文字起こし結果: ${fileName}\n\n`; return { output: header + text, isError: false }; } catch (err: unknown) { if (err instanceof Error && err.name === 'AbortError') { return { output: `タイムアウト(${timeout / 1000}秒): ${filePath}`, isError: true }; } const msg = err instanceof Error ? err.message : String(err); return { output: `音声認識サーバーに接続できません: ${serverUrl} (${msg})`, isError: true }; } }