maestro/src/engine/context-manager.test.ts
2026-06-03 05:08:00 +00:00

188 lines
7.2 KiB
TypeScript

import { describe, expect, it, vi } from 'vitest';
import { ContextManager, fetchOllamaContextLimit } from './context-manager.js';
import type { ContextConfig } from '../config.js';
function makeConfig(overrides?: Partial<ContextConfig>): ContextConfig {
return {
thresholds: [
{ ratio: 0.7, action: 'warn' },
{ ratio: 0.85, action: 'prompt' },
{ ratio: 0.95, action: 'force_transition' },
],
...overrides,
};
}
describe('ContextManager', () => {
it('returns null when usage is below all thresholds', () => {
const cm = new ContextManager(makeConfig());
cm.setContextLimit(1000);
const action = cm.update({ prompt_tokens: 100, completion_tokens: 50 });
expect(action).toBeNull();
expect(cm.getRatio()).toBeCloseTo(0.1);
});
it('returns warn action when crossing 0.7 threshold', () => {
const cm = new ContextManager(makeConfig());
cm.setContextLimit(1000);
const action = cm.update({ prompt_tokens: 750, completion_tokens: 50 });
expect(action).not.toBeNull();
expect(action!.type).toBe('warn');
});
it('returns prompt action when crossing 0.85 threshold', () => {
const cm = new ContextManager(makeConfig());
cm.setContextLimit(1000);
cm.update({ prompt_tokens: 750, completion_tokens: 0 });
const action = cm.update({ prompt_tokens: 870, completion_tokens: 0 });
expect(action).not.toBeNull();
expect(action!.type).toBe('prompt');
});
it('returns force_transition when crossing 0.95 threshold', () => {
const cm = new ContextManager(makeConfig());
cm.setContextLimit(1000);
cm.update({ prompt_tokens: 750, completion_tokens: 0 });
cm.update({ prompt_tokens: 870, completion_tokens: 0 });
const action = cm.update({ prompt_tokens: 960, completion_tokens: 0 });
expect(action).not.toBeNull();
expect(action!.type).toBe('force_transition');
});
it('fires each threshold only once', () => {
const cm = new ContextManager(makeConfig());
cm.setContextLimit(1000);
cm.update({ prompt_tokens: 750, completion_tokens: 0 });
const second = cm.update({ prompt_tokens: 760, completion_tokens: 0 });
expect(second).toBeNull();
});
it('uses default context limit 128000 when not set', () => {
const cm = new ContextManager(makeConfig());
const action = cm.update({ prompt_tokens: 100, completion_tokens: 0 });
expect(action).toBeNull();
expect(cm.getRatio()).toBeCloseTo(100 / 128000);
});
it('uses config limitTokens when provided', () => {
const cm = new ContextManager(makeConfig({ limitTokens: 500 }));
const action = cm.update({ prompt_tokens: 400, completion_tokens: 0 });
expect(action).not.toBeNull();
expect(action!.type).toBe('warn');
expect(cm.getRatio()).toBeCloseTo(0.8);
});
it('isExhausted returns true when ratio >= 0.99', () => {
const cm = new ContextManager(makeConfig());
cm.setContextLimit(1000);
cm.update({ prompt_tokens: 995, completion_tokens: 0 });
expect(cm.isExhausted()).toBe(true);
});
it('hasUsageData returns false before first update', () => {
const cm = new ContextManager(makeConfig());
expect(cm.hasUsageData()).toBe(false);
});
it('hasUsageData returns true after update', () => {
const cm = new ContextManager(makeConfig());
cm.update({ prompt_tokens: 100, completion_tokens: 0 });
expect(cm.hasUsageData()).toBe(true);
});
it('handles prompt action message content', () => {
const cm = new ContextManager(makeConfig({ thresholds: [{ ratio: 0.5, action: 'prompt' }] }));
cm.setContextLimit(1000);
const action = cm.update({ prompt_tokens: 600, completion_tokens: 0 });
expect(action).not.toBeNull();
expect(action!.type).toBe('prompt');
if (action!.type === 'prompt') {
expect(action!.message).toBeTruthy();
expect(action!.message.length).toBeGreaterThan(0);
}
});
it('falls back to char-based estimation when updateFromChars is used', () => {
const cm = new ContextManager(makeConfig({ limitTokens: 1000 }));
const action = cm.updateFromChars(1050);
expect(action).not.toBeNull();
expect(action!.type).toBe('warn');
});
it('uses default thresholds when thresholds is undefined', () => {
const cm = new ContextManager({});
cm.setContextLimit(1000);
const action = cm.update({ prompt_tokens: 750, completion_tokens: 0 });
expect(action).not.toBeNull();
expect(action!.type).toBe('warn');
});
});
describe('fetchOllamaContextLimit', () => {
it('returns default when fetch fails', async () => {
const result = await fetchOllamaContextLimit('http://localhost:99999', 'nonexistent');
expect(result).toBe(128_000);
});
it('prefers parameters.num_ctx (runtime) over model_info.context_length (theoretical)', async () => {
const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify({
model_info: { 'qwen3.context_length': 262_144 },
parameters: 'num_ctx 200000\nstop "<|im_end|>"',
}), { status: 200, headers: { 'Content-Type': 'application/json' } }));
vi.stubGlobal('fetch', fetchMock);
try {
const result = await fetchOllamaContextLimit('http://llm.test', 'qwen3:32b');
expect(result).toBe(200_000);
} finally {
vi.unstubAllGlobals();
}
});
it('falls back to model_info.context_length when num_ctx is absent', async () => {
const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify({
model_info: { 'qwen3.context_length': 262_144 },
parameters: 'stop "<|im_end|>"',
}), { status: 200, headers: { 'Content-Type': 'application/json' } }));
vi.stubGlobal('fetch', fetchMock);
try {
const result = await fetchOllamaContextLimit('http://llm.test', 'qwen3:32b');
expect(result).toBe(262_144);
} finally {
vi.unstubAllGlobals();
}
});
it('falls back to llama.cpp /props when Ollama /api/show is unavailable', async () => {
const fetchMock = vi.fn()
.mockResolvedValueOnce(new Response('not found', { status: 404 }))
.mockResolvedValueOnce(new Response(JSON.stringify({
default_generation_settings: { n_ctx: 1_010_176 },
model_meta: { 'qwen3.context_length': 1_010_176 },
}), { status: 200, headers: { 'Content-Type': 'application/json' } }));
vi.stubGlobal('fetch', fetchMock);
try {
const result = await fetchOllamaContextLimit('http://llama.test/v1', 'Qwen3.6-27B-Q8_0.gguf');
expect(result).toBe(1_010_176);
expect(fetchMock).toHaveBeenNthCalledWith(1, 'http://llama.test/api/show', expect.any(Object));
expect(fetchMock).toHaveBeenNthCalledWith(2, 'http://llama.test/props', expect.objectContaining({ method: 'GET' }));
} finally {
vi.unstubAllGlobals();
}
});
it('uses llama.cpp context metadata when n_ctx is absent from /props', async () => {
const fetchMock = vi.fn()
.mockResolvedValueOnce(new Response('not found', { status: 404 }))
.mockResolvedValueOnce(new Response(JSON.stringify({
model_meta: { 'llama.context_length': '65536' },
}), { status: 200, headers: { 'Content-Type': 'application/json' } }));
vi.stubGlobal('fetch', fetchMock);
try {
const result = await fetchOllamaContextLimit('http://llama.test/v1/', 'llama');
expect(result).toBe(65_536);
} finally {
vi.unstubAllGlobals();
}
});
});