Arize-ai / phoenix

AI Observability & Evaluation
https://docs.arize.com/phoenix
Other
3.92k stars 292 forks source link

inconsistent opentelemetry versions #5177

Closed pavanjava closed 2 weeks ago

pavanjava commented 2 weeks ago

Describe the bug When integrating LlamaIndex with Arize Phoenix it fails with an error message ModuleNotFoundError: No module named 'opentelemetry.semconv.attributes'

To Reproduce Steps to reproduce the behavior:

  1. Follow the doc for integration and you get this error

Expected behavior integration should log the traces into phoenix webui.

Screenshots

Screenshot 2024-10-24 at 10 49 22 PM

Environment (please complete the following information):

Additional context below is the requirements.txt


llama-index==0.11.19
qdrant-client==1.12.0
pydantic==2.9.2
litserve==0.2.3
deepeval==1.4.4
llama-index-llms-openai==0.2.16
llama-index-llms-ollama==0.3.4
llama-index-embeddings-openai==0.2.5
llama-index-embeddings-ollama==0.3.1
llama-index-vector-stores-qdrant==0.3.2
arize-phoenix==5.5.2
arize-phoenix-otel==0.5.1
openinference-instrumentation-llama-index==3.0.2
opentelemetry-proto==1.24.0```

**The LlamaIndex Script**

```from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    Settings
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.agent import ReActAgent
from llama_index.llms.ollama import Ollama
from llama_index.core.base.response.schema import Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
from rag_evaluator import RAGEvaluator
from dotenv import load_dotenv, find_dotenv
from typing import Union
from phoenix.otel import register
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
import phoenix as px
import qdrant_client
import logging
import os

_ = load_dotenv(find_dotenv())

logging.basicConfig(level=int(os.environ['INFO']))
logger = logging.getLogger(__name__)

# instrumenting observability
tracer_provider = register()
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

class ReActWithQueryEngine:
    RESPONSE_TYPE = Union[
        Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
    ]

    def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int = 128, chunk_overlap: int = 100,
                 show_progress: bool = False, no_of_iterations: int = 5, required_exts: list[str] = ['.pdf', '.txt']):
        self.index_loaded = False
        self.similarity_top_k = similarity_top_k
        self.input_dir = input_dir
        self._index = None
        self._engine = None
        self.agent: ReActAgent = None
        self.query_engine_tools = []
        self.show_progress = show_progress
        self.no_of_iterations = no_of_iterations
        self.required_exts = required_exts

        # use your prefered vector embeddings model
        logger.info("initializing the OllamaEmbedding")
        embed_model = OllamaEmbedding(model_name=os.environ['OLLAMA_EMBED_MODEL'],
                                      base_url=os.environ['OLLAMA_BASE_URL'])
        # openai embeddings, embedding_model_name="text-embedding-3-large"
        # embed_model = OpenAIEmbedding(embed_batch_size=10, model=embedding_model_name)

        # use your prefered llm
        llm = Ollama(model=os.environ['OLLAMA_LLM_MODEL'], base_url=os.environ['OLLAMA_BASE_URL'], request_timeout=600)
        # llm = OpenAI(model="gpt-4o")

        logger.info("initializing the global settings")
        Settings.embed_model = embed_model
        Settings.llm = llm
        Settings.chunk_size = chunk_size
        Settings.chunk_overlap = chunk_overlap

        self.rag_evaluator = RAGEvaluator()

        # Create a local Qdrant vector store
        logger.info("initializing the vector store related objects")
        self.client: qdrant_client.QdrantClient = qdrant_client.QdrantClient(url=os.environ['DB_URL'],
                                                                             api_key=os.environ['DB_API_KEY'])
        self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME'])
        self._load_data_and_create_engine()

    def _load_data_and_create_engine(self):
        if self.client.collection_exists(collection_name=os.environ['COLLECTION_NAME']):
            try:
                self._index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store)
                self.index_loaded = True
            except Exception as e:
                self.index_loaded = False

        if not self.index_loaded:
            # load data
            _docs = (SimpleDirectoryReader(input_dir=self.input_dir, required_exts=self.required_exts)
                     .load_data(show_progress=self.show_progress))

            # build and persist index
            storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
            logger.info("indexing the docs in VectorStoreIndex")
            self._index = VectorStoreIndex.from_documents(documents=_docs, storage_context=storage_context,
                                                          show_progress=self.show_progress)

        self._engine = self._index.as_query_engine(similarity_top_k=self.similarity_top_k)
        self._create_query_engine_tools()

    def _create_query_engine_tools(self):
        # can have more than one as per the requirement
        self.query_engine_tools.append(
            QueryEngineTool(
                query_engine=self._engine,
                metadata=ToolMetadata(
                    name="test_tool_engine",  # change this accordingly
                    description=(
                        "Provides information about user query based on the information that you have. "
                        "Use a detailed plain text question as input to the tool."
                    ),
                ),
            )
        )
        self._create_react_agent()

    def _create_react_agent(self):
        # [Optional] Add Context
        # context = """\
        # You are a stock market sorcerer who is an expert on the companies Lyft and Uber.\
        #     You will answer questions about Uber and Lyft as in the persona of a sorcerer \
        #     and veteran stock market investor.
        # """
        try:
            self.agent = ReActAgent.from_tools(
                self.query_engine_tools,
                llm=Settings.llm,
                verbose=True,
                # context=context
                max_iterations=self.no_of_iterations
            )
        except Exception as e:
            logger.error(e)

    def query(self, user_query: str) -> RESPONSE_TYPE:
        try:
            response = self.agent.query(str_or_query_bundle=user_query)
            if os.environ.get('IS_EVALUATION_NEEDED') == 'true':
                self.rag_evaluator.evaluate(user_query=user_query, response_obj=response)
            return response
        except Exception as e:
            logger.error(f'Error while generating response: {e}')```
axiomofjoy commented 2 weeks ago

Hi @pavanjava, thanks for reporting. Looks like you're missing an OTel dependency. You can install with pip install opentelemetry-semantic-conventions. Thanks!

axiomofjoy commented 2 weeks ago

Please let us know if the documentation needs to be updated!

pavanjava commented 2 weeks ago

I have added the recommended module but still no luck

observability

arize-phoenix==5.5.2 openinference-instrumentation-llama-index==2.2.4 opentelemetry-proto==1.24.0 arize-phoenix-otel==0.5.1 opentelemetry-semantic-conventions

Error: ╭─    ~/Desktop/react_rag ······························································ ✔  react_rag   base   at 12:52:07 AM  ╰─ python main.py
Traceback (most recent call last): File "/Users/pavanmantha/Desktop/react_rag/main.py", line 2, in from react_agent_with_query_engine import ReActWithQueryEngine File "/Users/pavanmantha/Desktop/react_rag/react_agent_with_query_engine.py", line 16, in from openinference.instrumentation.llama_index import LlamaIndexInstrumentor File "/Users/pavanmantha/Desktop/react_rag/venv/lib/python3.10/site-packages/openinference/instrumentation/llama_index/init.py", line 8, in from opentelemetry.instrumentation.instrumentor import BaseInstrumentor # type: ignore File "/Users/pavanmantha/Desktop/react_rag/venv/lib/python3.10/site-packages/opentelemetry/instrumentation/instrumentor.py", line 24, in from opentelemetry.instrumentation._semconv import ( File "/Users/pavanmantha/Desktop/react_rag/venv/lib/python3.10/site-packages/opentelemetry/instrumentation/_semconv.py", line 20, in from opentelemetry.semconv.attributes.client_attributes import ( ModuleNotFoundError: No module named 'opentelemetry.semconv.attributes'

pavanjava commented 2 weeks ago

Please let us know if the documentation needs to be updated!

The issue is not resolved and the ticket is closed...