[Bug]: DashScope default setting cause prompt repack issue

run-llama / llama_index

LlamaIndex is a data framework for your LLM applications

MIT License

36.92k stars 5.29k forks source link

DASHSCOPE_MODEL_META = { DashScopeGenerationModels.QWEN_TURBO: { "context_window": 1024 * 8, "num_output": 1024 * 8, "is_chat_model": True, }, DashScopeGenerationModels.QWEN_PLUS: { "context_window": 1024 * 32, "num_output": 1024 * 32, "is_chat_model": True, }, DashScopeGenerationModels.QWEN_MAX: { "context_window": 1024 * 8, "num_output": 1024 * 8, "is_chat_model": True, }, DashScopeGenerationModels.QWEN_MAX_1201: { "context_window": 1024 * 8, "num_output": 1024 * 8, "is_chat_model": True, }, DashScopeGenerationModels.QWEN_MAX_LONGCONTEXT: { "context_window": 1024 * 30, "num_output": 1024 * 30, "is_chat_model": True, }, }

def _get_available_context_size(self, num_prompt_tokens: int) -> int: """Get available context size. This is calculated as: available context window = total context window - input (partially filled prompt) - output (room reserved for response) Notes: - Available context size is further clamped to be non-negative. """ context_size_tokens = self.context_window - num_prompt_tokens - self.num_output if context_size_tokens < 0: raise ValueError( f"Calculated available context size {context_size_tokens} was" " not non-negative." ) return context_size_tokens

import os from llama_index.indices.managed.dashscope import DashScopeCloudIndex from llama_index.llms.dashscope import DashScope, DashScopeGenerationModels os.environ["DASHSCOPE_API_KEY"] = "sk-xxx" llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX, max_tokens=None, incremental_output=False) index = DashScopeCloudIndex(name='my_index') query_engine = index.as_query_engine(llm=llm) response = query_engine.query("test query") print(response)

Traceback (most recent call last): File "/mnt/workspace/nas-alinlp/ada.drx/search_llm/scripts/llama-index-dashscope-cloud/test/test_cust1.py", line 13, in <module> response = query_engine.query("test query") File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/instrumentation/dispatcher.py", line 274, in wrapper result = func(*args, **kwargs) File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/base/base_query_engine.py", line 53, in query query_result = self._query(str_or_query_bundle) File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/instrumentation/dispatcher.py", line 274, in wrapper result = func(*args, **kwargs) File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/query_engine/retriever_query_engine.py", line 190, in _query response = self._response_synthesizer.synthesize( File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/instrumentation/dispatcher.py", line 274, in wrapper result = func(*args, **kwargs) File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/response_synthesizers/base.py", line 242, in synthesize response_str = self.get_response( File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/instrumentation/dispatcher.py", line 274, in wrapper result = func(*args, **kwargs) File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/response_synthesizers/compact_and_refine.py", line 42, in get_response new_texts = self._make_compact_text_chunks(query_str, text_chunks) File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/response_synthesizers/compact_and_refine.py", line 57, in _make_compact_text_chunks return self._prompt_helper.repack(max_prompt, text_chunks) File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/indices/prompt_helper.py", line 276, in repack text_splitter = self.get_text_splitter_given_prompt( File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/indices/prompt_helper.py", line 234, in get_text_splitter_given_prompt chunk_size = self._get_available_chunk_size( File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/indices/prompt_helper.py", line 218, in _get_available_chunk_size available_context_size = self._get_available_context_size(num_prompt_tokens) File "/mnt/workspace/nas-alinlp/ada.drx/py_llama1034/lib/python3.10/site-packages/llama_index/core/indices/prompt_helper.py", line 150, in _get_available_context_size raise ValueError( ValueError: Calculated available context size -75 was not non-negative.

To resolve the ValueError in the prompt_helper module caused by the default settings of DASHSCOPE_MODEL_META, you need to ensure that the context_window is greater than the sum of num_prompt_tokens and num_output. The current default settings have context_window equal to num_output, which leaves no space for num_prompt_tokens.

Here's how you can adjust the DASHSCOPE_MODEL_META settings to avoid this issue:

Update the DASHSCOPE_MODEL_META dictionary: Ensure that context_window is sufficiently larger than num_output to accommodate the prompt tokens.

DASHSCOPE_MODEL_META = {
    DashScopeGenerationModels.QWEN_TURBO: {
        "context_window": 1024 * 8,
        "num_output": 1024 * 6,  # Adjusted to leave space for prompt tokens
        "is_chat_model": True,
    },
    DashScopeGenerationModels.QWEN_PLUS: {
        "context_window": 1024 * 32,
        "num_output": 1024 * 30,  # Adjusted to leave space for prompt tokens
        "is_chat_model": True,
    },
    DashScopeGenerationModels.QWEN_MAX: {
        "context_window": 1024 * 8,
        "num_output": 1024 * 6,  # Adjusted to leave space for prompt tokens
        "is_chat_model": True,
    },
    DashScopeGenerationModels.QWEN_MAX_1201: {
        "context_window": 1024 * 8,
        "num_output": 1024 * 6,  # Adjusted to leave space for prompt tokens
        "is_chat_model": True,
    },
    DashScopeGenerationModels.QWEN_MAX_LONGCONTEXT: {
        "context_window": 1024 * 30,
        "num_output": 1024 * 28,  # Adjusted to leave space for prompt tokens
        "is_chat_model": True,
    },
}

Modify the DashScope class to validate model_name: Ensure that the model_name used is valid and exists in the DASHSCOPE_MODEL_META dictionary.

class DashScope(CustomLLM):
    # ... other code ...

    @property
    def metadata(self) -> LLMMetadata:
        if self.model_name not in DASHSCOPE_MODEL_META:
            raise ValueError(f"Model name '{self.model_name}' is not valid. Please choose from {list(DASHSCOPE_MODEL_META.keys())}.")

        DASHSCOPE_MODEL_META[self.model_name]["num_output"] = (
            self.max_tokens or DASHSCOPE_MODEL_META[self.model_name]["num_output"]
        )
        return LLMMetadata(
            model_name=self.model_name, **DASHSCOPE_MODEL_META[self.model_name]
        )

    # ... other code ...

Ensure environment variables are set correctly: Make sure the environment variables DASHSCOPE_WORKSPACE_ID, DASHSCOPE_API_KEY, and DASHSCOPE_BASE_URL are correctly set, as these are required for the DashScopeCloudIndex class to function properly.

class DashScopeCloudIndex(BaseManagedIndex):
    """DashScope Cloud Platform Index."""

    def __init__(
        self,
        name: str,
        nodes: Optional[List[BaseNode]] = None,
        transformations: Optional[List[TransformComponent]] = None,
        timeout: int = 60,
        workspace_id: Optional[str] = None,
        api_key: Optional[str] = None,
        base_url: Optional[str] = DASHSCOPE_DEFAULT_BASE_URL,
        show_progress: bool = False,
        callback_manager: Optional[CallbackManager] = None,
        **kwargs: Any,
    ) -> None:
        """Initialize the Platform Index."""
        self.name = name
        self.transformations = transformations or []

        if nodes is not None:
            raise ValueError(
                "DashScopeCloudIndex does not support nodes on initialization"
            )

        self.workspace_id = workspace_id or os.environ.get("DASHSCOPE_WORKSPACE_ID")
        self._api_key = api_key or os.environ.get("DASHSCOPE_API_KEY")
        self._base_url = os.environ.get("DASHSCOPE_BASE_URL", None) or base_url
        self._headers = {
            "Content-Type": "application/json",
            "Accept-Encoding": "utf-8",
            "X-DashScope-WorkSpace": self.workspace_id,
            "Authorization": "Bearer " + self._api_key,
            "X-DashScope-OpenAPISource": "CloudSDK",
        }
        self._timeout = timeout
        self._show_progress = show_progress
        self._service_context = None
        self._callback_manager = callback_manager or Settings.callback_manager

By making these adjustments, you should be able to avoid the ValueError in the prompt_helper module ^[1]^[2].

To continue talking to Dosu, mention @dosu.

run-llama / llama_index

[Bug]: DashScope default setting cause prompt repack issue #14722

Bug Description

Version

Steps to Reproduce

Relevant Logs/Tracbacks