maestro/src/engine/context-manager.test.ts

import { describe, expect, it, vi } from 'vitest';
import { ContextManager, fetchOllamaContextLimit } from './context-manager.js';
import type { ContextConfig } from '../config.js';

function makeConfig(overrides?: Partial<ContextConfig>): ContextConfig {
  return {
    thresholds: [
      { ratio: 0.7, action: 'warn' },
      { ratio: 0.85, action: 'prompt' },
      { ratio: 0.95, action: 'force_transition' },
    ],
    ...overrides,
  };
}

describe('ContextManager', () => {
  it('returns null when usage is below all thresholds', () => {
    const cm = new ContextManager(makeConfig());
    cm.setContextLimit(1000);
    const action = cm.update({ prompt_tokens: 100, completion_tokens: 50 });
    expect(action).toBeNull();
    expect(cm.getRatio()).toBeCloseTo(0.1);
  });

  it('returns warn action when crossing 0.7 threshold', () => {
    const cm = new ContextManager(makeConfig());
    cm.setContextLimit(1000);
    const action = cm.update({ prompt_tokens: 750, completion_tokens: 50 });
    expect(action).not.toBeNull();
    expect(action!.type).toBe('warn');
  });

  it('returns prompt action when crossing 0.85 threshold', () => {
    const cm = new ContextManager(makeConfig());
    cm.setContextLimit(1000);
    cm.update({ prompt_tokens: 750, completion_tokens: 0 });
    const action = cm.update({ prompt_tokens: 870, completion_tokens: 0 });
    expect(action).not.toBeNull();
    expect(action!.type).toBe('prompt');
  });

  it('returns force_transition when crossing 0.95 threshold', () => {
    const cm = new ContextManager(makeConfig());
    cm.setContextLimit(1000);
    cm.update({ prompt_tokens: 750, completion_tokens: 0 });
    cm.update({ prompt_tokens: 870, completion_tokens: 0 });
    const action = cm.update({ prompt_tokens: 960, completion_tokens: 0 });
    expect(action).not.toBeNull();
    expect(action!.type).toBe('force_transition');
  });

  it('fires each threshold only once', () => {
    const cm = new ContextManager(makeConfig());
    cm.setContextLimit(1000);
    cm.update({ prompt_tokens: 750, completion_tokens: 0 });
    const second = cm.update({ prompt_tokens: 760, completion_tokens: 0 });
    expect(second).toBeNull();
  });

  it('uses default context limit 128000 when not set', () => {
    const cm = new ContextManager(makeConfig());
    const action = cm.update({ prompt_tokens: 100, completion_tokens: 0 });
    expect(action).toBeNull();
    expect(cm.getRatio()).toBeCloseTo(100 / 128000);
  });

  it('uses config limitTokens when provided', () => {
    const cm = new ContextManager(makeConfig({ limitTokens: 500 }));
    const action = cm.update({ prompt_tokens: 400, completion_tokens: 0 });
    expect(action).not.toBeNull();
    expect(action!.type).toBe('warn');
    expect(cm.getRatio()).toBeCloseTo(0.8);
  });

  it('isExhausted returns true when ratio >= 0.99', () => {
    const cm = new ContextManager(makeConfig());
    cm.setContextLimit(1000);
    cm.update({ prompt_tokens: 995, completion_tokens: 0 });
    expect(cm.isExhausted()).toBe(true);
  });

  it('hasUsageData returns false before first update', () => {
    const cm = new ContextManager(makeConfig());
    expect(cm.hasUsageData()).toBe(false);
  });

  it('hasUsageData returns true after update', () => {
    const cm = new ContextManager(makeConfig());
    cm.update({ prompt_tokens: 100, completion_tokens: 0 });
    expect(cm.hasUsageData()).toBe(true);
  });

  it('handles prompt action message content', () => {
    const cm = new ContextManager(makeConfig({ thresholds: [{ ratio: 0.5, action: 'prompt' }] }));
    cm.setContextLimit(1000);
    const action = cm.update({ prompt_tokens: 600, completion_tokens: 0 });
    expect(action).not.toBeNull();
    expect(action!.type).toBe('prompt');
    if (action!.type === 'prompt') {
      expect(action!.message).toBeTruthy();
      expect(action!.message.length).toBeGreaterThan(0);
    }
  });

  it('falls back to char-based estimation when updateFromChars is used', () => {
    const cm = new ContextManager(makeConfig({ limitTokens: 1000 }));
    const action = cm.updateFromChars(1050);
    expect(action).not.toBeNull();
    expect(action!.type).toBe('warn');
  });

  it('uses default thresholds when thresholds is undefined', () => {
    const cm = new ContextManager({});
    cm.setContextLimit(1000);
    const action = cm.update({ prompt_tokens: 750, completion_tokens: 0 });
    expect(action).not.toBeNull();
    expect(action!.type).toBe('warn');
  });
});

describe('fetchOllamaContextLimit', () => {
  it('returns default when fetch fails', async () => {
    const result = await fetchOllamaContextLimit('http://localhost:99999', 'nonexistent');
    expect(result).toBe(128_000);
  });

  it('prefers parameters.num_ctx (runtime) over model_info.context_length (theoretical)', async () => {
    const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify({
      model_info: { 'qwen3.context_length': 262_144 },
      parameters: 'num_ctx 200000\nstop "<|im_end|>"',
    }), { status: 200, headers: { 'Content-Type': 'application/json' } }));
    vi.stubGlobal('fetch', fetchMock);
    try {
      const result = await fetchOllamaContextLimit('http://llm.test', 'qwen3:32b');
      expect(result).toBe(200_000);
    } finally {
      vi.unstubAllGlobals();
    }
  });

  it('falls back to model_info.context_length when num_ctx is absent', async () => {
    const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify({
      model_info: { 'qwen3.context_length': 262_144 },
      parameters: 'stop "<|im_end|>"',
    }), { status: 200, headers: { 'Content-Type': 'application/json' } }));
    vi.stubGlobal('fetch', fetchMock);
    try {
      const result = await fetchOllamaContextLimit('http://llm.test', 'qwen3:32b');
      expect(result).toBe(262_144);
    } finally {
      vi.unstubAllGlobals();
    }
  });

  it('falls back to llama.cpp /props when Ollama /api/show is unavailable', async () => {
    const fetchMock = vi.fn()
      .mockResolvedValueOnce(new Response('not found', { status: 404 }))
      .mockResolvedValueOnce(new Response(JSON.stringify({
        default_generation_settings: { n_ctx: 1_010_176 },
        model_meta: { 'qwen3.context_length': 1_010_176 },
      }), { status: 200, headers: { 'Content-Type': 'application/json' } }));
    vi.stubGlobal('fetch', fetchMock);
    try {
      const result = await fetchOllamaContextLimit('http://llama.test/v1', 'Qwen3.6-27B-Q8_0.gguf');
      expect(result).toBe(1_010_176);
      expect(fetchMock).toHaveBeenNthCalledWith(1, 'http://llama.test/api/show', expect.any(Object));
      expect(fetchMock).toHaveBeenNthCalledWith(2, 'http://llama.test/props', expect.objectContaining({ method: 'GET' }));
    } finally {
      vi.unstubAllGlobals();
    }
  });

  it('uses llama.cpp context metadata when n_ctx is absent from /props', async () => {
    const fetchMock = vi.fn()
      .mockResolvedValueOnce(new Response('not found', { status: 404 }))
      .mockResolvedValueOnce(new Response(JSON.stringify({
        model_meta: { 'llama.context_length': '65536' },
      }), { status: 200, headers: { 'Content-Type': 'application/json' } }));
    vi.stubGlobal('fetch', fetchMock);
    try {
      const result = await fetchOllamaContextLimit('http://llama.test/v1/', 'llama');
      expect(result).toBe(65_536);
    } finally {
      vi.unstubAllGlobals();
    }
  });
});