context-window-management

i implemented a way that build the prompt respecting context limit which specific things in priority order

#[test]
fn test_build_instructions_context_limit() {
    let original_instructions = "Solve the quadratic equation x^2 + 5x + 6 = 0.";
    let file_contents = vec![
        "# Python script to solve quadratic equations\nimport cmath\ndef solve_quadratic(a, b, c):\n    # calculate the discriminant\n    d = (b**2) - (4*a*c)\n    # find two solutions\n    sol1 = (-b-cmath.sqrt(d))/(2*a)\n    sol2 = (-b+cmath.sqrt(d))/(2*a)\n    return sol1, sol2\n".to_string(),
        "# Another Python script\nprint('Hello, world!')\n".to_string(),
    ];
    let previous_messages = "<message>\n{\"role\": \"user\", \"content\": \"Can you solve a quadratic equation for me?\"}\n</message>\n<message>\n{\"role\": \"assistant\", \"content\": \"Sure, I can help with that. What's the equation?\"}\n</message>\n";
    let tools = "code_interpreter";
    let code_output = Some("The solutions are (-2+0j) and (-3+0j)");
    let context_size = 200; // Set a realistic context size
    let retrieval_chunks = vec![
        "Here's a chunk of text retrieved from a large document...".to_string(),
        "And here's another chunk of text...".to_string(),
    ];

    let instructions = build_instructions(
        original_instructions,
        &file_contents,
        previous_messages,
        tools,
        code_output,
        &retrieval_chunks,
        Some(context_size),
    );

    // Use tiktoken to count tokens
    let bpe = p50k_base().unwrap();
    let tokens = bpe.encode_with_special_tokens(&instructions);

    // Check that the instructions do not exceed the context limit
    assert!(tokens.len() <= context_size, "The instructions exceed the context limit");

    // Check that the instructions contain the most important parts
    assert!(instructions.contains(original_instructions), "The instructions do not contain the original instructions");
    assert!(instructions.contains(tools), "The instructions do not contain the tools");
    assert!(instructions.contains(previous_messages), "The instructions do not contain the previous messages");

    // Check that the instructions do not contain the less important parts
    assert!(!instructions.contains(&file_contents[0]), "The instructions contain the file contents");
    assert!(!instructions.contains(&retrieval_chunks[0]), "The instructions contain the retrieval chunks");
}

next is to remove old messages if the limit is hit and it make sense to do this - maybe configurable (e.g. use case ppl conversation is very important, more than other contexts)

llm-edge / hal-9100

context-window-management #19