import { describe, expect, it, vi } from 'vitest'; import { ContextManager, fetchOllamaContextLimit } from './context-manager.js'; import type { ContextConfig } from '../config.js'; function makeConfig(overrides?: Partial): ContextConfig { return { thresholds: [ { ratio: 0.7, action: 'warn' }, { ratio: 0.85, action: 'prompt' }, { ratio: 0.95, action: 'force_transition' }, ], ...overrides, }; } describe('ContextManager', () => { it('returns null when usage is below all thresholds', () => { const cm = new ContextManager(makeConfig()); cm.setContextLimit(1000); const action = cm.update({ prompt_tokens: 100, completion_tokens: 50 }); expect(action).toBeNull(); expect(cm.getRatio()).toBeCloseTo(0.1); }); it('returns warn action when crossing 0.7 threshold', () => { const cm = new ContextManager(makeConfig()); cm.setContextLimit(1000); const action = cm.update({ prompt_tokens: 750, completion_tokens: 50 }); expect(action).not.toBeNull(); expect(action!.type).toBe('warn'); }); it('returns prompt action when crossing 0.85 threshold', () => { const cm = new ContextManager(makeConfig()); cm.setContextLimit(1000); cm.update({ prompt_tokens: 750, completion_tokens: 0 }); const action = cm.update({ prompt_tokens: 870, completion_tokens: 0 }); expect(action).not.toBeNull(); expect(action!.type).toBe('prompt'); }); it('returns force_transition when crossing 0.95 threshold', () => { const cm = new ContextManager(makeConfig()); cm.setContextLimit(1000); cm.update({ prompt_tokens: 750, completion_tokens: 0 }); cm.update({ prompt_tokens: 870, completion_tokens: 0 }); const action = cm.update({ prompt_tokens: 960, completion_tokens: 0 }); expect(action).not.toBeNull(); expect(action!.type).toBe('force_transition'); }); it('fires each threshold only once', () => { const cm = new ContextManager(makeConfig()); cm.setContextLimit(1000); cm.update({ prompt_tokens: 750, completion_tokens: 0 }); const second = cm.update({ prompt_tokens: 760, completion_tokens: 0 }); expect(second).toBeNull(); }); it('uses default context limit 128000 when not set', () => { const cm = new ContextManager(makeConfig()); const action = cm.update({ prompt_tokens: 100, completion_tokens: 0 }); expect(action).toBeNull(); expect(cm.getRatio()).toBeCloseTo(100 / 128000); }); it('uses config limitTokens when provided', () => { const cm = new ContextManager(makeConfig({ limitTokens: 500 })); const action = cm.update({ prompt_tokens: 400, completion_tokens: 0 }); expect(action).not.toBeNull(); expect(action!.type).toBe('warn'); expect(cm.getRatio()).toBeCloseTo(0.8); }); it('isExhausted returns true when ratio >= 0.99', () => { const cm = new ContextManager(makeConfig()); cm.setContextLimit(1000); cm.update({ prompt_tokens: 995, completion_tokens: 0 }); expect(cm.isExhausted()).toBe(true); }); it('hasUsageData returns false before first update', () => { const cm = new ContextManager(makeConfig()); expect(cm.hasUsageData()).toBe(false); }); it('hasUsageData returns true after update', () => { const cm = new ContextManager(makeConfig()); cm.update({ prompt_tokens: 100, completion_tokens: 0 }); expect(cm.hasUsageData()).toBe(true); }); it('handles prompt action message content', () => { const cm = new ContextManager(makeConfig({ thresholds: [{ ratio: 0.5, action: 'prompt' }] })); cm.setContextLimit(1000); const action = cm.update({ prompt_tokens: 600, completion_tokens: 0 }); expect(action).not.toBeNull(); expect(action!.type).toBe('prompt'); if (action!.type === 'prompt') { expect(action!.message).toBeTruthy(); expect(action!.message.length).toBeGreaterThan(0); } }); it('falls back to char-based estimation when updateFromChars is used', () => { const cm = new ContextManager(makeConfig({ limitTokens: 1000 })); const action = cm.updateFromChars(1050); expect(action).not.toBeNull(); expect(action!.type).toBe('warn'); }); it('uses default thresholds when thresholds is undefined', () => { const cm = new ContextManager({}); cm.setContextLimit(1000); const action = cm.update({ prompt_tokens: 750, completion_tokens: 0 }); expect(action).not.toBeNull(); expect(action!.type).toBe('warn'); }); }); describe('fetchOllamaContextLimit', () => { it('returns default when fetch fails', async () => { const result = await fetchOllamaContextLimit('http://localhost:99999', 'nonexistent'); expect(result).toBe(128_000); }); it('prefers parameters.num_ctx (runtime) over model_info.context_length (theoretical)', async () => { const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify({ model_info: { 'qwen3.context_length': 262_144 }, parameters: 'num_ctx 200000\nstop "<|im_end|>"', }), { status: 200, headers: { 'Content-Type': 'application/json' } })); vi.stubGlobal('fetch', fetchMock); try { const result = await fetchOllamaContextLimit('http://llm.test', 'qwen3:32b'); expect(result).toBe(200_000); } finally { vi.unstubAllGlobals(); } }); it('falls back to model_info.context_length when num_ctx is absent', async () => { const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify({ model_info: { 'qwen3.context_length': 262_144 }, parameters: 'stop "<|im_end|>"', }), { status: 200, headers: { 'Content-Type': 'application/json' } })); vi.stubGlobal('fetch', fetchMock); try { const result = await fetchOllamaContextLimit('http://llm.test', 'qwen3:32b'); expect(result).toBe(262_144); } finally { vi.unstubAllGlobals(); } }); it('falls back to llama.cpp /props when Ollama /api/show is unavailable', async () => { const fetchMock = vi.fn() .mockResolvedValueOnce(new Response('not found', { status: 404 })) .mockResolvedValueOnce(new Response(JSON.stringify({ default_generation_settings: { n_ctx: 1_010_176 }, model_meta: { 'qwen3.context_length': 1_010_176 }, }), { status: 200, headers: { 'Content-Type': 'application/json' } })); vi.stubGlobal('fetch', fetchMock); try { const result = await fetchOllamaContextLimit('http://llama.test/v1', 'Qwen3.6-27B-Q8_0.gguf'); expect(result).toBe(1_010_176); expect(fetchMock).toHaveBeenNthCalledWith(1, 'http://llama.test/api/show', expect.any(Object)); expect(fetchMock).toHaveBeenNthCalledWith(2, 'http://llama.test/props', expect.objectContaining({ method: 'GET' })); } finally { vi.unstubAllGlobals(); } }); it('uses llama.cpp context metadata when n_ctx is absent from /props', async () => { const fetchMock = vi.fn() .mockResolvedValueOnce(new Response('not found', { status: 404 })) .mockResolvedValueOnce(new Response(JSON.stringify({ model_meta: { 'llama.context_length': '65536' }, }), { status: 200, headers: { 'Content-Type': 'application/json' } })); vi.stubGlobal('fetch', fetchMock); try { const result = await fetchOllamaContextLimit('http://llama.test/v1/', 'llama'); expect(result).toBe(65_536); } finally { vi.unstubAllGlobals(); } }); });