188 lines
7.2 KiB
TypeScript
188 lines
7.2 KiB
TypeScript
import { describe, expect, it, vi } from 'vitest';
|
|
import { ContextManager, fetchOllamaContextLimit } from './context-manager.js';
|
|
import type { ContextConfig } from '../config.js';
|
|
|
|
function makeConfig(overrides?: Partial<ContextConfig>): ContextConfig {
|
|
return {
|
|
thresholds: [
|
|
{ ratio: 0.7, action: 'warn' },
|
|
{ ratio: 0.85, action: 'prompt' },
|
|
{ ratio: 0.95, action: 'force_transition' },
|
|
],
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
describe('ContextManager', () => {
|
|
it('returns null when usage is below all thresholds', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
cm.setContextLimit(1000);
|
|
const action = cm.update({ prompt_tokens: 100, completion_tokens: 50 });
|
|
expect(action).toBeNull();
|
|
expect(cm.getRatio()).toBeCloseTo(0.1);
|
|
});
|
|
|
|
it('returns warn action when crossing 0.7 threshold', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
cm.setContextLimit(1000);
|
|
const action = cm.update({ prompt_tokens: 750, completion_tokens: 50 });
|
|
expect(action).not.toBeNull();
|
|
expect(action!.type).toBe('warn');
|
|
});
|
|
|
|
it('returns prompt action when crossing 0.85 threshold', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
cm.setContextLimit(1000);
|
|
cm.update({ prompt_tokens: 750, completion_tokens: 0 });
|
|
const action = cm.update({ prompt_tokens: 870, completion_tokens: 0 });
|
|
expect(action).not.toBeNull();
|
|
expect(action!.type).toBe('prompt');
|
|
});
|
|
|
|
it('returns force_transition when crossing 0.95 threshold', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
cm.setContextLimit(1000);
|
|
cm.update({ prompt_tokens: 750, completion_tokens: 0 });
|
|
cm.update({ prompt_tokens: 870, completion_tokens: 0 });
|
|
const action = cm.update({ prompt_tokens: 960, completion_tokens: 0 });
|
|
expect(action).not.toBeNull();
|
|
expect(action!.type).toBe('force_transition');
|
|
});
|
|
|
|
it('fires each threshold only once', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
cm.setContextLimit(1000);
|
|
cm.update({ prompt_tokens: 750, completion_tokens: 0 });
|
|
const second = cm.update({ prompt_tokens: 760, completion_tokens: 0 });
|
|
expect(second).toBeNull();
|
|
});
|
|
|
|
it('uses default context limit 128000 when not set', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
const action = cm.update({ prompt_tokens: 100, completion_tokens: 0 });
|
|
expect(action).toBeNull();
|
|
expect(cm.getRatio()).toBeCloseTo(100 / 128000);
|
|
});
|
|
|
|
it('uses config limitTokens when provided', () => {
|
|
const cm = new ContextManager(makeConfig({ limitTokens: 500 }));
|
|
const action = cm.update({ prompt_tokens: 400, completion_tokens: 0 });
|
|
expect(action).not.toBeNull();
|
|
expect(action!.type).toBe('warn');
|
|
expect(cm.getRatio()).toBeCloseTo(0.8);
|
|
});
|
|
|
|
it('isExhausted returns true when ratio >= 0.99', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
cm.setContextLimit(1000);
|
|
cm.update({ prompt_tokens: 995, completion_tokens: 0 });
|
|
expect(cm.isExhausted()).toBe(true);
|
|
});
|
|
|
|
it('hasUsageData returns false before first update', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
expect(cm.hasUsageData()).toBe(false);
|
|
});
|
|
|
|
it('hasUsageData returns true after update', () => {
|
|
const cm = new ContextManager(makeConfig());
|
|
cm.update({ prompt_tokens: 100, completion_tokens: 0 });
|
|
expect(cm.hasUsageData()).toBe(true);
|
|
});
|
|
|
|
it('handles prompt action message content', () => {
|
|
const cm = new ContextManager(makeConfig({ thresholds: [{ ratio: 0.5, action: 'prompt' }] }));
|
|
cm.setContextLimit(1000);
|
|
const action = cm.update({ prompt_tokens: 600, completion_tokens: 0 });
|
|
expect(action).not.toBeNull();
|
|
expect(action!.type).toBe('prompt');
|
|
if (action!.type === 'prompt') {
|
|
expect(action!.message).toBeTruthy();
|
|
expect(action!.message.length).toBeGreaterThan(0);
|
|
}
|
|
});
|
|
|
|
it('falls back to char-based estimation when updateFromChars is used', () => {
|
|
const cm = new ContextManager(makeConfig({ limitTokens: 1000 }));
|
|
const action = cm.updateFromChars(1050);
|
|
expect(action).not.toBeNull();
|
|
expect(action!.type).toBe('warn');
|
|
});
|
|
|
|
it('uses default thresholds when thresholds is undefined', () => {
|
|
const cm = new ContextManager({});
|
|
cm.setContextLimit(1000);
|
|
const action = cm.update({ prompt_tokens: 750, completion_tokens: 0 });
|
|
expect(action).not.toBeNull();
|
|
expect(action!.type).toBe('warn');
|
|
});
|
|
});
|
|
|
|
describe('fetchOllamaContextLimit', () => {
|
|
it('returns default when fetch fails', async () => {
|
|
const result = await fetchOllamaContextLimit('http://localhost:99999', 'nonexistent');
|
|
expect(result).toBe(128_000);
|
|
});
|
|
|
|
it('prefers parameters.num_ctx (runtime) over model_info.context_length (theoretical)', async () => {
|
|
const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify({
|
|
model_info: { 'qwen3.context_length': 262_144 },
|
|
parameters: 'num_ctx 200000\nstop "<|im_end|>"',
|
|
}), { status: 200, headers: { 'Content-Type': 'application/json' } }));
|
|
vi.stubGlobal('fetch', fetchMock);
|
|
try {
|
|
const result = await fetchOllamaContextLimit('http://llm.test', 'qwen3:32b');
|
|
expect(result).toBe(200_000);
|
|
} finally {
|
|
vi.unstubAllGlobals();
|
|
}
|
|
});
|
|
|
|
it('falls back to model_info.context_length when num_ctx is absent', async () => {
|
|
const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify({
|
|
model_info: { 'qwen3.context_length': 262_144 },
|
|
parameters: 'stop "<|im_end|>"',
|
|
}), { status: 200, headers: { 'Content-Type': 'application/json' } }));
|
|
vi.stubGlobal('fetch', fetchMock);
|
|
try {
|
|
const result = await fetchOllamaContextLimit('http://llm.test', 'qwen3:32b');
|
|
expect(result).toBe(262_144);
|
|
} finally {
|
|
vi.unstubAllGlobals();
|
|
}
|
|
});
|
|
|
|
it('falls back to llama.cpp /props when Ollama /api/show is unavailable', async () => {
|
|
const fetchMock = vi.fn()
|
|
.mockResolvedValueOnce(new Response('not found', { status: 404 }))
|
|
.mockResolvedValueOnce(new Response(JSON.stringify({
|
|
default_generation_settings: { n_ctx: 1_010_176 },
|
|
model_meta: { 'qwen3.context_length': 1_010_176 },
|
|
}), { status: 200, headers: { 'Content-Type': 'application/json' } }));
|
|
vi.stubGlobal('fetch', fetchMock);
|
|
try {
|
|
const result = await fetchOllamaContextLimit('http://llama.test/v1', 'Qwen3.6-27B-Q8_0.gguf');
|
|
expect(result).toBe(1_010_176);
|
|
expect(fetchMock).toHaveBeenNthCalledWith(1, 'http://llama.test/api/show', expect.any(Object));
|
|
expect(fetchMock).toHaveBeenNthCalledWith(2, 'http://llama.test/props', expect.objectContaining({ method: 'GET' }));
|
|
} finally {
|
|
vi.unstubAllGlobals();
|
|
}
|
|
});
|
|
|
|
it('uses llama.cpp context metadata when n_ctx is absent from /props', async () => {
|
|
const fetchMock = vi.fn()
|
|
.mockResolvedValueOnce(new Response('not found', { status: 404 }))
|
|
.mockResolvedValueOnce(new Response(JSON.stringify({
|
|
model_meta: { 'llama.context_length': '65536' },
|
|
}), { status: 200, headers: { 'Content-Type': 'application/json' } }));
|
|
vi.stubGlobal('fetch', fetchMock);
|
|
try {
|
|
const result = await fetchOllamaContextLimit('http://llama.test/v1/', 'llama');
|
|
expect(result).toBe(65_536);
|
|
} finally {
|
|
vi.unstubAllGlobals();
|
|
}
|
|
});
|
|
});
|