[Bug]: Unpickled Index Returns "AttributeError: _llm" when Queried

nickjtay commented 10 months ago

Bug Description

I placed the code below in full to make it easy to understand what I did in order to replicate the problem. The first snippet is the loading, indexing, and storing. The second snippet is reloading the index into memory and querying against it.

I wrote the script to create an index and store as a pickle file to azure blob storage, so I could retrieve it from the front end app when the user sends a prompt.

Here is that code:

! pip install llama_index
! pip install azure-identity
! pip install docx2txt
! pip install pypdf

from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
import logging
import sys
from llama_index import set_global_service_context
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
    LLMPredictor,
    load_index_from_storage,
)
import pickle
import io

storage_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
storage_url = "https://XXXXXX.blob.core.windows.net/"

container_client = ContainerClient(account_url=storage_url, 
    credential=storage_key, 
    container_name="powerpoints3")

loader = AzStorageBlobReader(account_url=storage_url, 
    credential=storage_key, 
    container_name="powerpoints3")

documents = loader.load_data()

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

api_key = "XXXXXXXXXXXXXX"
endpoint = "https://XXXXXXXXX.openai.azure.com/"
deployment = "XXXXXXXXX"
api_version="2023-09-01-preview"

llm = AzureOpenAI(
    model = "gpt-35-turbo",
    deployment_name=deployment,
    api_key=api_key,
    api_version=api_version,
    azure_endpoint=endpoint
)

embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="testembedding",
    api_key=api_key,
    azure_endpoint=endpoint,
    api_version=api_version,
)

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

set_global_service_context(service_context)

index_name = "doc_idx"
if os.path.exists(index_name):
    index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir=index_name),
        service_context=service_context,
    )
else:
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=index_name)

container_name = "xxxxxxxxxxx"
storage_account_name = "xxxxxxxxxxx"
storage_key = "xxxxxxxxxxxxxx"

pickled_object = pickle.dumps(index)
blob_service_client = BlobServiceClient(account_url=f"https://{storage_account_name}.blob.core.windows.net", credential=storage_key)
blob_client = blob_service_client.get_blob_client(container=container_name, blob=index_name)
blob_client.upload_blob(io.BytesIO(pickled_object), overwrite=True)

To test it, I used the same notebook I used to pickle and store the index, but I reset the kernel. Next, I unpickled the vector store index and reinstantiated the LLM, before querying.

Here's that code:

from llama_index import set_global_service_context
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
import logging
import sys
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import pickle
import io

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

api_key = "XXXXXXXXXXXXX"
endpoint = "https://XXXXXXXXXx.openai.azure.com/"
deployment = "XXXXXXXXXXx"
api_version="2023-09-01-preview"

llm = AzureOpenAI(
    model = "gpt-35-turbo",
    deployment_name=deployment,
    api_key=api_key,
    api_version=api_version,
    azure_endpoint=endpoint
)

embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="testembedding",
    api_key=api_key,
    azure_endpoint=endpoint,
    api_version=api_version,
)

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

set_global_service_context(service_context)

index_name = "doc_idx"
container_name = "XXXXXXx"
storage_account_name = "XXXXXXXx"
storage_key = "XXXXXXXXXXXXXXXX"

blob_service_client = BlobServiceClient(account_url=f"https://{storage_account_name}.blob.core.windows.net", credential=storage_key)

blob_client = blob_service_client.get_blob_client(container=container_name, blob=index_name)

pickled_index = blob_client.download_blob().readall()
index = pickle.loads(pickled_index)

query = "Who is widget company? What was our pitch to them?"
query_engine = index.as_query_engine()
answer = query_engine.query(query)

This returned the error message I reported:

{
    "name": "AttributeError",
    "message": "_llm",
    "stack": "---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[9], line 3
      1 query = \"Who is champlain investment partners. What was our pitch to them?\"
      2 query_engine = index.as_query_engine()
----> 3 answer = query_engine.query(query)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/core/base_query_engine.py:30, in BaseQueryEngine.query(self, str_or_query_bundle)
     28 if isinstance(str_or_query_bundle, str):
     29     str_or_query_bundle = QueryBundle(str_or_query_bundle)
---> 30 return self._query(str_or_query_bundle)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/query_engine/retriever_query_engine.py:171, in RetrieverQueryEngine._query(self, query_bundle)
    167 with self.callback_manager.event(
    168     CBEventType.QUERY, payload={EventPayload.QUERY_STR: query_bundle.query_str}
    169 ) as query_event:
    170     nodes = self.retrieve(query_bundle)
--> 171     response = self._response_synthesizer.synthesize(
    172         query=query_bundle,
    173         nodes=nodes,
    174     )
    176     query_event.on_end(payload={EventPayload.RESPONSE: response})
    178 return response

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/base.py:146, in BaseSynthesizer.synthesize(self, query, nodes, additional_source_nodes, **response_kwargs)
    141     query = QueryBundle(query_str=query)
    143 with self._callback_manager.event(
    144     CBEventType.SYNTHESIZE, payload={EventPayload.QUERY_STR: query.query_str}
    145 ) as event:
--> 146     response_str = self.get_response(
    147         query_str=query.query_str,
    148         text_chunks=[
    149             n.node.get_content(metadata_mode=MetadataMode.LLM) for n in nodes
    150         ],
    151         **response_kwargs,
    152     )
    154     additional_source_nodes = additional_source_nodes or []
    155     source_nodes = list(nodes) + list(additional_source_nodes)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/compact_and_refine.py:38, in CompactAndRefine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
     34 # use prompt helper to fix compact text_chunks under the prompt limitation
     35 # TODO: This is a temporary fix - reason it's temporary is that
     36 # the refine template does not account for size of previous answer.
     37 new_texts = self._make_compact_text_chunks(query_str, text_chunks)
---> 38 return super().get_response(
     39     query_str=query_str,
     40     text_chunks=new_texts,
     41     prev_response=prev_response,
     42     **response_kwargs,
     43 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:146, in Refine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
    142 for text_chunk in text_chunks:
    143     if prev_response is None:
    144         # if this is the first chunk, and text chunk already
    145         # is an answer, then return it
--> 146         response = self._give_response_single(
    147             query_str, text_chunk, **response_kwargs
    148         )
    149     else:
    150         # refine response if possible
    151         response = self._refine_response_single(
    152             prev_response, query_str, text_chunk, **response_kwargs
    153         )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:194, in Refine._give_response_single(self, query_str, text_chunk, **response_kwargs)
    189 text_chunks = self._service_context.prompt_helper.repack(
    190     text_qa_template, [text_chunk]
    191 )
    193 response: Optional[RESPONSE_TEXT_TYPE] = None
--> 194 program = self._program_factory(text_qa_template)
    195 # TODO: consolidate with loop in get_response_default
    196 for cur_text_chunk in text_chunks:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:177, in Refine._default_program_factory(self, prompt)
    168     return get_program_for_llm(
    169         StructuredRefineResponse,
    170         prompt,
    171         self._service_context.llm,
    172         verbose=self._verbose,
    173     )
    174 else:
    175     return DefaultRefineProgram(
    176         prompt=prompt,
--> 177         llm=self._service_context.llm,
    178         output_cls=self._output_cls,
    179     )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/service_context.py:322, in ServiceContext.llm(self)
    320 @property
    321 def llm(self) -> LLM:
--> 322     return self.llm_predictor.llm

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/llm_predictor/base.py:143, in LLMPredictor.llm(self)
    140 @property
    141 def llm(self) -> LLM:
    142     \"\"\"Get LLM.\"\"\"
--> 143     return self._llm

AttributeError: _llm"
}

Version

Current

Steps to Reproduce

Running the first snippet of code, then resetting your kernel and running the second snippet of code should replicate the problem. Other alternatives would also be helpful, such as how to store an index to Azure storage and retrieve it later to be used by Azure Open AI in the front end app.

Relevant Logs/Tracbacks

{
    "name": "AttributeError",
    "message": "_llm",
    "stack": "---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[9], line 3
      1 query = \"Who is champlain investment partners. What was our pitch to them?\"
      2 query_engine = index.as_query_engine()
----> 3 answer = query_engine.query(query)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/core/base_query_engine.py:30, in BaseQueryEngine.query(self, str_or_query_bundle)
     28 if isinstance(str_or_query_bundle, str):
     29     str_or_query_bundle = QueryBundle(str_or_query_bundle)
---> 30 return self._query(str_or_query_bundle)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/query_engine/retriever_query_engine.py:171, in RetrieverQueryEngine._query(self, query_bundle)
    167 with self.callback_manager.event(
    168     CBEventType.QUERY, payload={EventPayload.QUERY_STR: query_bundle.query_str}
    169 ) as query_event:
    170     nodes = self.retrieve(query_bundle)
--> 171     response = self._response_synthesizer.synthesize(
    172         query=query_bundle,
    173         nodes=nodes,
    174     )
    176     query_event.on_end(payload={EventPayload.RESPONSE: response})
    178 return response

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/base.py:146, in BaseSynthesizer.synthesize(self, query, nodes, additional_source_nodes, **response_kwargs)
    141     query = QueryBundle(query_str=query)
    143 with self._callback_manager.event(
    144     CBEventType.SYNTHESIZE, payload={EventPayload.QUERY_STR: query.query_str}
    145 ) as event:
--> 146     response_str = self.get_response(
    147         query_str=query.query_str,
    148         text_chunks=[
    149             n.node.get_content(metadata_mode=MetadataMode.LLM) for n in nodes
    150         ],
    151         **response_kwargs,
    152     )
    154     additional_source_nodes = additional_source_nodes or []
    155     source_nodes = list(nodes) + list(additional_source_nodes)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/compact_and_refine.py:38, in CompactAndRefine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
     34 # use prompt helper to fix compact text_chunks under the prompt limitation
     35 # TODO: This is a temporary fix - reason it's temporary is that
     36 # the refine template does not account for size of previous answer.
     37 new_texts = self._make_compact_text_chunks(query_str, text_chunks)
---> 38 return super().get_response(
     39     query_str=query_str,
     40     text_chunks=new_texts,
     41     prev_response=prev_response,
     42     **response_kwargs,
     43 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:146, in Refine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
    142 for text_chunk in text_chunks:
    143     if prev_response is None:
    144         # if this is the first chunk, and text chunk already
    145         # is an answer, then return it
--> 146         response = self._give_response_single(
    147             query_str, text_chunk, **response_kwargs
    148         )
    149     else:
    150         # refine response if possible
    151         response = self._refine_response_single(
    152             prev_response, query_str, text_chunk, **response_kwargs
    153         )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:194, in Refine._give_response_single(self, query_str, text_chunk, **response_kwargs)
    189 text_chunks = self._service_context.prompt_helper.repack(
    190     text_qa_template, [text_chunk]
    191 )
    193 response: Optional[RESPONSE_TEXT_TYPE] = None
--> 194 program = self._program_factory(text_qa_template)
    195 # TODO: consolidate with loop in get_response_default
    196 for cur_text_chunk in text_chunks:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:177, in Refine._default_program_factory(self, prompt)
    168     return get_program_for_llm(
    169         StructuredRefineResponse,
    170         prompt,
    171         self._service_context.llm,
    172         verbose=self._verbose,
    173     )
    174 else:
    175     return DefaultRefineProgram(
    176         prompt=prompt,
--> 177         llm=self._service_context.llm,
    178         output_cls=self._output_cls,
    179     )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/service_context.py:322, in ServiceContext.llm(self)
    320 @property
    321 def llm(self) -> LLM:
--> 322     return self.llm_predictor.llm

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/llm_predictor/base.py:143, in LLMPredictor.llm(self)
    140 @property
    141 def llm(self) -> LLM:
    142     \"\"\"Get LLM.\"\"\"
--> 143     return self._llm

AttributeError: _llm"
}

dosubot[bot] commented 10 months ago

🤖

Hey @nickjtay! Nice to meet you over here on GitHub. I'm Dosu, a friendly bot, and I'm here to help you sort out bugs, answer questions, and help you become a contributor! While we're waiting for a human maintainer, I'm here to assist. Don't hesitate to ask any questions!

Based on the error message you provided, it seems like the issue is related to the Language Model (LLM) not being properly initialized or referenced in the ServiceContext. This is likely causing the "AttributeError: _llm" when you're trying to unpickle a VectorStoreIndex and query against it using the llama_index library.

In the LlamaIndex codebase, the LLM is called in the _retrieve method of the VectorIndexAutoRetriever class. Here, self._service_context.llm is trying to access the LLM from the ServiceContext. If the LLM is not properly initialized in the ServiceContext, or if the ServiceContext is not correctly set up, it could lead to the "AttributeError: _llm" error.

To resolve this issue, ensure that the LLM is correctly initialized and referenced in the ServiceContext. If you're unpickling a VectorStoreIndex, you might need to re-initialize the ServiceContext and the LLM after unpickling, as these objects might not be correctly restored during the unpickling process.

Here's an example of how you might re-initialize the ServiceContext and the LLM:

from llama_index import set_global_service_context
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import pickle
import io

# ... (code to set up Azure and llama_index)

index_name = "doc_idx"
blob_service_client = BlobServiceClient(account_url=f"https://{storage_account_name}.blob.core.windows.net", credential=storage_key)
blob_client = blob_service_client.get_blob_client(container=container_name, blob=index_name)
pickled_index = blob_client.download_blob().readall()
index = pickle.loads(pickled_index)

# Re-initialize the ServiceContext and the LLM
service_context = ServiceContext(
    llm=AzureOpenAI(),
    embedding_model=AzureOpenAIEmbedding(),
)
set_global_service_context(service_context)

query = "Who is widget company? What was our pitch to them?"
query_engine = index.as_query_engine()
answer = query_engine.query(query)

Please try this and let me know if it resolves your issue. If not, we can continue to investigate further.

Sources

This response is meant to be useful and save you time. It is not meant to be a precise solution, but rather a starting point for your own research.

Help me be more useful! Please leave a 👍 if this is helpful and 👎 if it is irrelevant.

If you want to continue the conversation, start your reply with @dosu-bot.

logan-markewich commented 10 months ago

I wouldn't rely on pickling these objects -- too many attributes are unpicklable. It's generally a very unreliable strategy.

But I did run a similar test, and it worked for me using the latest llama-index version

(llama-index-py3.11) (base) loganm@gamingpc:~/llama_index_proper/llama_index$ python
Python 3.11.0 (main, Mar  1 2023, 18:26:19) [GCC 11.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from llama_index import VectorStoreIndex, Document
>>> index = VectorStoreIndex.from_documents([Document.example()])
>>> import pickle
>>> data = pickle.dumps(index)
>>> new_index = pickle.loads(data)
>>> response = new_index.as_query_engine().query("Tell me something about LLMs?")
>>> print(response)
LLMs are a type of technology that is used for knowledge generation and reasoning. They are pre-trained on large amounts of publicly available data. To augment LLMs with our own private data, we need a comprehensive toolkit like LlamaIndex, which provides data connectors to ingest various data sources and formats, ways to structure the data for easy use with LLMs, and an advanced retrieval/query interface. LlamaIndex also allows for easy integrations with other application frameworks and provides tools for both beginner and advanced users.
>>>

nickjtay commented 10 months ago

Ah, perhaps that is the cause of the issue in my case. Would you recommend another approach using Azure? Should I be using an vector store like Cosmos? Or is there another workaround?

logan-markewich commented 10 months ago

I think actually it should be working fine for azure -- but I might make sure you actually have the latest version of llama-index

Alternatively, you can save the index to a dict and save that, and then also load that.

data = index.storage_context.to_dict()
json_str = json.dumps(data)
...

from llama_index import StorageContext, load_index_from_storage
storage_context = StorageContext.from_dict(json.loads(json_str))
loaded_index = load_index_from_storage(storage_context, service_context=service_context)

Last alternative is using a vector store, which will store all the nodes in the vector store

nickjtay commented 10 months ago

Ah, the above makes sense, thank you. I did update my notebook to store my embeddings to Pinecone. Assuming I changed the vector store references in the code below from chromadb to Pinecone, would it work? I took from here and removed the document loading and indexing, since I am separating the indexing and the querying processes.

Due to the abstraction of the library, I don't understand how the below would know which LLM was used since it is not a required argument in as_query_engine(). I believe I'm missing what is actually happening under the surface.

import chromadb
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext

db = chromadb.PersistentClient(path="./chroma_db")

chroma_collection = db.get_or_create_collection("quickstart")

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

query_engine = index.as_query_engine()
response = query_engine.query("What is the meaning of life?")
print(response)

Do I need to set the global service context?

nickjtay commented 10 months ago

I have reinstalled from the main branch. My indexing finished so I went ahead and took the above and replaced ChromaDB with Pinecone. The below is failing on numpy issues, but again having difficulty understanding why. Since the indexing was successful, I thought loading would not present any problems.

import os
import logging
import sys
import pinecone
from openai import AzureOpenAI
import streamlit as st
from azure.storage.blob import (
    BlobServiceClient, 
    BlobClient, 
    ContainerClient
)
from llama_index import (
    ServiceContext,
    StorageContext,
    load_index_from_storage,
    VectorStoreIndex,
    Document
)
from llama_index import set_global_service_context
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding

api_key = ""
endpoint = "https://XXXXXXXXXXXXXXXX.openai.azure.com/"
deployment = "testdeployment"
api_version="2023-09-01-preview"

llm = AzureOpenAI(
    model = "gpt-35-turbo",
    deployment_name=deployment,
    api_key=api_key,
    api_version=api_version,
    azure_endpoint=endpoint
)

embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="testembedding",
    api_key=api_key,
    azure_endpoint=endpoint,
    api_version=api_version,
)

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

set_global_service_context(service_context)

index_name = "docidx"
api_key = "xxxxxxxxxxxxx"

pinecone.init(api_key=api_key, environment="gcp-starter")
pinecone_index = pinecone.Index(index_name)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)

query_engine = index.as_query_engine()
response = query_engine.query("What was our pitch to Johnson & Johnson?")

/anaconda/envs/azureml_py38/lib/python3.8/site-packages/pinecone/index.py:4: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)
  from tqdm.autonotebook import tqdm
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[34], line 44
     28 llm = AzureOpenAI(
     29     model = "gpt-35-turbo",
     30     deployment_name=deployment,
   (...)
     33     azure_endpoint=endpoint
     34 )
     36 embed_model = AzureOpenAIEmbedding(
     37     model="text-embedding-ada-002",
     38     deployment_name="testembedding",
   (...)
     41     api_version=api_version,
     42 )
---> 44 service_context = ServiceContext.from_defaults(
     45     llm=llm,
     46     embed_model=embed_model,
     47 )
     49 set_global_service_context(service_context)
     51 index_name = "docidx"

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/service_context.py:203, in ServiceContext.from_defaults(cls, llm_predictor, llm, prompt_helper, embed_model, node_parser, text_splitter, transformations, llama_logger, callback_manager, system_prompt, query_wrapper_prompt, pydantic_program_mode, chunk_size, chunk_overlap, context_window, num_output, chunk_size_limit)
    197 if text_splitter is not None and node_parser is not None:
    198     raise ValueError("Cannot specify both text_splitter and node_parser")
    200 node_parser = (
    201     text_splitter  # text splitter extends node parser
    202     or node_parser
--> 203     or _get_default_node_parser(
    204         chunk_size=chunk_size or DEFAULT_CHUNK_SIZE,
    205         chunk_overlap=chunk_overlap or SENTENCE_CHUNK_OVERLAP,
    206         callback_manager=callback_manager,
    207     )
    208 )
    210 transformations = transformations or [node_parser]
    212 llama_logger = llama_logger or LlamaLogger()

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/service_context.py:35, in _get_default_node_parser(chunk_size, chunk_overlap, callback_manager)
     29 def _get_default_node_parser(
     30     chunk_size: int = DEFAULT_CHUNK_SIZE,
     31     chunk_overlap: int = SENTENCE_CHUNK_OVERLAP,
     32     callback_manager: Optional[CallbackManager] = None,
     33 ) -> NodeParser:
     34     """Get default node parser."""
---> 35     return SentenceSplitter(
     36         chunk_size=chunk_size,
     37         chunk_overlap=chunk_overlap,
     38         callback_manager=callback_manager or CallbackManager(),
     39     )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/node_parser/text/sentence.py:84, in SentenceSplitter.__init__(self, separator, chunk_size, chunk_overlap, tokenizer, paragraph_separator, chunking_tokenizer_fn, secondary_chunking_regex, callback_manager, include_metadata, include_prev_next_rel)
     77     raise ValueError(
     78         f"Got a larger chunk overlap ({chunk_overlap}) than chunk size "
     79         f"({chunk_size}), should be smaller."
     80     )
     82 callback_manager = callback_manager or CallbackManager([])
     83 self._chunking_tokenizer_fn = (
---> 84     chunking_tokenizer_fn or split_by_sentence_tokenizer()
     85 )
     86 self._tokenizer = tokenizer or get_tokenizer()
     88 self._split_fns = [
     89     split_by_sep(paragraph_separator),
     90     self._chunking_tokenizer_fn,
     91 ]

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/node_parser/text/utils.py:40, in split_by_sentence_tokenizer()
     37 def split_by_sentence_tokenizer() -> Callable[[str], List[str]]:
     38     import os
---> 40     import nltk
     42     from llama_index.utils import get_cache_dir
     44     cache_dir = get_cache_dir()

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/__init__.py:146
    140 from nltk.jsontags import *
    142 ###########################################################
    143 # PACKAGES
    144 ###########################################################
--> 146 from nltk.chunk import *
    147 from nltk.classify import *
    148 from nltk.inference import *

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/chunk/__init__.py:155
      1 # Natural Language Toolkit: Chunkers
      2 #
      3 # Copyright (C) 2001-2023 NLTK Project
   (...)
      7 # For license information, see LICENSE.TXT
      8 #
     10 """
     11 Classes and interfaces for identifying non-overlapping linguistic
     12 groups (such as base noun phrases) in unrestricted text.  This task is
   (...)
    152      pattern is valid.
    153 """
--> 155 from nltk.chunk.api import ChunkParserI
    156 from nltk.chunk.regexp import RegexpChunkParser, RegexpParser
    157 from nltk.chunk.util import (
    158     ChunkScore,
    159     accuracy,
   (...)
    165     tree2conlltags,
    166 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/chunk/api.py:13
      1 # Natural Language Toolkit: Chunk parsing API
      2 #
      3 # Copyright (C) 2001-2023 NLTK Project
   (...)
     10 ##  Chunk Parser Interface
     11 ##//////////////////////////////////////////////////////
---> 13 from nltk.chunk.util import ChunkScore
     14 from nltk.internals import deprecated
     15 from nltk.parse import ParserI

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/chunk/util.py:12
      9 import re
     11 from nltk.metrics import accuracy as _accuracy
---> 12 from nltk.tag.mapping import map_tag
     13 from nltk.tag.util import str2tuple
     14 from nltk.tree import Tree

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/tag/__init__.py:70
     68 from nltk.tag.api import TaggerI
     69 from nltk.tag.util import str2tuple, tuple2str, untag
---> 70 from nltk.tag.sequential import (
     71     SequentialBackoffTagger,
     72     ContextTagger,
     73     DefaultTagger,
     74     NgramTagger,
     75     UnigramTagger,
     76     BigramTagger,
     77     TrigramTagger,
     78     AffixTagger,
     79     RegexpTagger,
     80     ClassifierBasedTagger,
     81     ClassifierBasedPOSTagger,
     82 )
     83 from nltk.tag.brill import BrillTagger
     84 from nltk.tag.brill_trainer import BrillTaggerTrainer

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/tag/sequential.py:26
     23 from typing import List, Optional, Tuple
     25 from nltk import jsontags
---> 26 from nltk.classify import NaiveBayesClassifier
     27 from nltk.probability import ConditionalFreqDist
     28 from nltk.tag.api import FeaturesetTaggerI, TaggerI

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/classify/__init__.py:97
     95 from nltk.classify.positivenaivebayes import PositiveNaiveBayesClassifier
     96 from nltk.classify.rte_classify import RTEFeatureExtractor, rte_classifier, rte_features
---> 97 from nltk.classify.scikitlearn import SklearnClassifier
     98 from nltk.classify.senna import Senna
     99 from nltk.classify.textcat import TextCat

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/classify/scikitlearn.py:38
     35 from nltk.probability import DictionaryProbDist
     37 try:
---> 38     from sklearn.feature_extraction import DictVectorizer
     39     from sklearn.preprocessing import LabelEncoder
     40 except ImportError:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/sklearn/feature_extraction/__init__.py:9
      7 from ._dict_vectorizer import DictVectorizer
      8 from ._hash import FeatureHasher
----> 9 from .image import img_to_graph, grid_to_graph
     10 from . import text
     12 __all__ = ['DictVectorizer', 'image', 'img_to_graph', 'grid_to_graph', 'text',
     13            'FeatureHasher']

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/sklearn/feature_extraction/image.py:167
    162     n_x, n_y, n_z = img.shape
    163     return _to_graph(n_x, n_y, n_z, mask, img, return_as, dtype)
    166 def grid_to_graph(n_x, n_y, n_z=1, mask=None, return_as=sparse.coo_matrix,
--> 167                   dtype=np.int):
    168     """Graph of the pixel-to-pixel connections
    169 
    170     Edges exist if 2 voxels are connected.
   (...)
    195     calls in ``np.asarray`` to avoid type issues.
    196     """
    197     return _to_graph(n_x, n_y, n_z, mask=mask, return_as=return_as,
    198                      dtype=dtype)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/numpy/__init__.py:305, in __getattr__(attr)
    300     warnings.warn(
    301         f"In the future `np.{attr}` will be defined as the "
    302         "corresponding NumPy scalar.", FutureWarning, stacklevel=2)
    304 if attr in __former_attrs__:
--> 305     raise AttributeError(__former_attrs__[attr])
    307 # Importing Tester requires importing all of UnitTest which is not a
    308 # cheap import Since it is mainly used in test suits, we lazy import it
    309 # here to save on the order of 10 ms of import time for most users
    310 #
    311 # The previous way Tester was imported also had a side effect of adding
    312 # the full `numpy.testing` namespace
    313 if attr == 'testing':

AttributeError: module 'numpy' has no attribute 'int'.
`np.int` was a deprecated alias for the builtin `int`. To avoid this error in existing code, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

nickjtay commented 10 months ago

Actually, after following these steps:

! pip install numpy==1.19.5
! pip install -U scikit-learn
! pip uninstall nltk -y
! pip install -U nltk

The old error message isn't returned, but a new error message is below. Wondering if I need to reindex and reload with a specific version of the dependencies to prevent conflicts? It seems like there are conflicts occurring. Also, it seems like it would be possible to encounter issues if the dependencies used when indexing are changed when retrieving from storage?

Found existing installation: openai 1.3.8
Uninstalling openai-1.3.8:
  Successfully uninstalled openai-1.3.8
Collecting openai<2.0.0,>=1.0.0
  Using cached openai-1.3.8-py3-none-any.whl (221 kB)
Requirement already satisfied: distro<2,>=1.7.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai<2.0.0,>=1.0.0) (1.8.0)
Requirement already satisfied: sniffio in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai<2.0.0,>=1.0.0) (1.3.0)
Requirement already satisfied: tqdm>4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai<2.0.0,>=1.0.0) (4.65.0)
Requirement already satisfied: anyio<5,>=3.5.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai<2.0.0,>=1.0.0) (3.6.2)
Requirement already satisfied: typing-extensions<5,>=4.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai<2.0.0,>=1.0.0) (4.6.0)
Requirement already satisfied: httpx<1,>=0.23.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai<2.0.0,>=1.0.0) (0.25.2)
Requirement already satisfied: pydantic<3,>=1.9.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai<2.0.0,>=1.0.0) (1.10.8)
Requirement already satisfied: idna>=2.8 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.0.0) (3.4)
Requirement already satisfied: httpcore==1.* in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.0.0) (1.0.2)
Requirement already satisfied: certifi in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.0.0) (2022.9.24)
Requirement already satisfied: h11<0.15,>=0.13 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.0.0) (0.14.0)
Installing collected packages: openai
Successfully installed openai-1.3.8
Requirement already satisfied: streamlit in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (1.29.0)
Requirement already satisfied: cachetools<6,>=4.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (5.3.0)
Requirement already satisfied: click<9,>=7.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (8.1.3)
Requirement already satisfied: blinker<2,>=1.0.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (1.6.2)
Requirement already satisfied: python-dateutil<3,>=2.7.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (2.8.2)
Requirement already satisfied: altair<6,>=4.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (5.2.0)
Requirement already satisfied: validators<1,>=0.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (0.22.0)
Requirement already satisfied: watchdog>=2.1.5; platform_system != "Darwin" in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (3.0.0)
Requirement already satisfied: protobuf<5,>=3.20 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (3.20.3)
Requirement already satisfied: pandas<3,>=1.3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (2.0.3)
Requirement already satisfied: typing-extensions<5,>=4.3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (4.6.0)
Requirement already satisfied: toml<2,>=0.10.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (0.10.2)
Requirement already satisfied: packaging<24,>=16.8 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (23.0)
Requirement already satisfied: requests<3,>=2.27 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (2.31.0)
Requirement already satisfied: pyarrow>=6.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (9.0.0)
Requirement already satisfied: pydeck<1,>=0.8.0b4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (0.8.1b0)
Requirement already satisfied: pillow<11,>=7.1.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (9.2.0)
Requirement already satisfied: importlib-metadata<7,>=1.4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (6.6.0)
Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (3.1.31)
Requirement already satisfied: rich<14,>=10.14.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (13.4.2)
Requirement already satisfied: tornado<7,>=6.0.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (6.3.2)
Requirement already satisfied: tzlocal<6,>=1.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (5.0.1)
Requirement already satisfied: tenacity<9,>=8.1.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (8.2.2)
Requirement already satisfied: numpy<2,>=1.19.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from streamlit) (1.21.6)
Requirement already satisfied: six>=1.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from python-dateutil<3,>=2.7.3->streamlit) (1.16.0)
Requirement already satisfied: jsonschema>=3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from altair<6,>=4.0->streamlit) (4.17.3)
Requirement already satisfied: jinja2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from altair<6,>=4.0->streamlit) (2.11.2)
Requirement already satisfied: toolz in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from altair<6,>=4.0->streamlit) (0.12.0)
Requirement already satisfied: pytz>=2020.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pandas<3,>=1.3.0->streamlit) (2022.5)
Requirement already satisfied: tzdata>=2022.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pandas<3,>=1.3.0->streamlit) (2023.3)
Requirement already satisfied: idna<4,>=2.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests<3,>=2.27->streamlit) (3.4)
Requirement already satisfied: charset-normalizer<4,>=2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests<3,>=2.27->streamlit) (3.1.0)
Requirement already satisfied: urllib3<3,>=1.21.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests<3,>=2.27->streamlit) (1.26.16)
Requirement already satisfied: certifi>=2017.4.17 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests<3,>=2.27->streamlit) (2022.9.24)
Requirement already satisfied: zipp>=0.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from importlib-metadata<7,>=1.4->streamlit) (3.12.0)
Requirement already satisfied: gitdb<5,>=4.0.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.10)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from rich<14,>=10.14.0->streamlit) (2.15.1)
Requirement already satisfied: markdown-it-py>=2.2.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from rich<14,>=10.14.0->streamlit) (2.2.0)
Requirement already satisfied: backports.zoneinfo; python_version < "3.9" in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from tzlocal<6,>=1.1->streamlit) (0.2.1)
Requirement already satisfied: pkgutil-resolve-name>=1.3.10; python_version < "3.9" in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (1.3.10)
Requirement already satisfied: attrs>=17.4.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (23.1.0)
Requirement already satisfied: importlib-resources>=1.4.0; python_version < "3.9" in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (5.12.0)
Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (0.19.3)
Requirement already satisfied: MarkupSafe>=0.23 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from jinja2->altair<6,>=4.0->streamlit) (2.0.1)
Requirement already satisfied: smmap<6,>=3.0.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit) (5.0.0)
Requirement already satisfied: mdurl~=0.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit) (0.1.2)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/streamlit/runtime/state/session_state.py:394, in SessionState.__getitem__(self, key)
    393 try:
--> 394     return self._getitem(widget_id, key)
    395 except KeyError:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/streamlit/runtime/state/session_state.py:439, in SessionState._getitem(self, widget_id, user_key)
    438 # We'll never get here
--> 439 raise KeyError

KeyError: 

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/streamlit/runtime/state/session_state_proxy.py:119, in SessionStateProxy.__getattr__(self, key)
    118 try:
--> 119     return self[key]
    120 except KeyError:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/streamlit/runtime/state/session_state_proxy.py:90, in SessionStateProxy.__getitem__(self, key)
     89 require_valid_user_key(key)
---> 90 return get_session_state()[key]

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/streamlit/runtime/state/safe_session_state.py:89, in SafeSessionState.__getitem__(self, key)
     88 with self._lock:
---> 89     return self._state[key]

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/streamlit/runtime/state/session_state.py:396, in SessionState.__getitem__(self, key)
    395 except KeyError:
--> 396     raise KeyError(_missing_key_error_message(key))

KeyError: 'st.session_state has no key "messages". Did you forget to initialize it? More info: https://docs.streamlit.io/library/advanced-features/session-state#initialization'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
Cell In[9], line 25
     20 if "messages" not in st.session_state:
     21     st.session_state.messages = [
     22         {"role": "system", "content": "You are a helpful assistant."}
     23     ]
---> 25 for message in st.session_state.messages:
     26     with st.chat_message(message["role"]):
     27         st.markdown(message["content"])

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/streamlit/runtime/state/session_state_proxy.py:121, in SessionStateProxy.__getattr__(self, key)
    119     return self[key]
    120 except KeyError:
--> 121     raise AttributeError(_missing_attr_error_message(key))

AttributeError: st.session_state has no attribute "messages". Did you forget to initialize it? More info: https://docs.streamlit.io/library/advanced-features/session-state#initialization
Requirement already satisfied: llama_index in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (0.9.14.post3)
Requirement already satisfied: SQLAlchemy[asyncio]>=1.4.49 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (2.0.23)
Requirement already satisfied: typing-extensions>=4.5.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (4.6.0)
Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (3.9.1)
Requirement already satisfied: pandas in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (2.0.3)
Requirement already satisfied: deprecated>=1.2.9.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (1.2.14)
Requirement already satisfied: fsspec>=2023.5.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (2023.5.0)
Requirement already satisfied: tiktoken>=0.3.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (0.5.2)
Requirement already satisfied: typing-inspect>=0.8.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (0.9.0)
Requirement already satisfied: httpx in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (0.25.2)
Requirement already satisfied: requests>=2.31.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (2.31.0)
Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (1.5.8)
Requirement already satisfied: openai>=1.1.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (1.3.8)
Requirement already satisfied: numpy in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (1.24.4)
Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (4.12.2)
Requirement already satisfied: dataclasses-json in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (0.6.3)
Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (3.8.1)
Requirement already satisfied: tenacity<9.0.0,>=8.2.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama_index) (8.2.2)
Requirement already satisfied: greenlet!=0.4.17; platform_machine == "aarch64" or (platform_machine == "ppc64le" or (platform_machine == "x86_64" or (platform_machine == "amd64" or (platform_machine == "AMD64" or (platform_machine == "win32" or platform_machine == "WIN32"))))) in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama_index) (2.0.2)
Requirement already satisfied: multidict<7.0,>=4.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama_index) (6.0.4)
Requirement already satisfied: attrs>=17.3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama_index) (23.1.0)
Requirement already satisfied: frozenlist>=1.1.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama_index) (1.3.3)
Requirement already satisfied: yarl<2.0,>=1.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama_index) (1.9.2)
Requirement already satisfied: aiosignal>=1.1.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama_index) (1.3.1)
Requirement already satisfied: async-timeout<5.0,>=4.0; python_version < "3.11" in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama_index) (4.0.2)
Requirement already satisfied: pytz>=2020.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pandas->llama_index) (2022.5)
Requirement already satisfied: tzdata>=2022.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pandas->llama_index) (2023.3)
Requirement already satisfied: python-dateutil>=2.8.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pandas->llama_index) (2.8.2)
Requirement already satisfied: wrapt<2,>=1.10 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from deprecated>=1.2.9.3->llama_index) (1.12.1)
Requirement already satisfied: regex>=2022.1.18 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from tiktoken>=0.3.3->llama_index) (2023.5.5)
Requirement already satisfied: mypy-extensions>=0.3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from typing-inspect>=0.8.0->llama_index) (1.0.0)
Requirement already satisfied: sniffio in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpx->llama_index) (1.3.0)
Requirement already satisfied: httpcore==1.* in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpx->llama_index) (1.0.2)
Requirement already satisfied: anyio in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpx->llama_index) (3.6.2)
Requirement already satisfied: certifi in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpx->llama_index) (2022.9.24)
Requirement already satisfied: idna in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpx->llama_index) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.31.0->llama_index) (1.26.16)
Requirement already satisfied: charset-normalizer<4,>=2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.31.0->llama_index) (3.1.0)
Requirement already satisfied: distro<2,>=1.7.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai>=1.1.0->llama_index) (1.8.0)
Requirement already satisfied: pydantic<3,>=1.9.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai>=1.1.0->llama_index) (1.10.8)
Requirement already satisfied: tqdm>4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai>=1.1.0->llama_index) (4.65.0)
Requirement already satisfied: soupsieve>1.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from beautifulsoup4<5.0.0,>=4.12.2->llama_index) (2.4.1)
Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from dataclasses-json->llama_index) (3.20.1)
Requirement already satisfied: click in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama_index) (8.1.3)
Requirement already satisfied: joblib in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama_index) (1.2.0)
Requirement already satisfied: six>=1.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->llama_index) (1.16.0)
Requirement already satisfied: h11<0.15,>=0.13 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpcore==1.*->httpx->llama_index) (0.14.0)
Requirement already satisfied: packaging>=17.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama_index) (23.0)
Requirement already satisfied: azure-identity in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (1.13.0)
Requirement already satisfied: cryptography>=2.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure-identity) (38.0.4)
Requirement already satisfied: six>=1.12.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure-identity) (1.16.0)
Requirement already satisfied: msal<2.0.0,>=1.20.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure-identity) (1.22.0)
Requirement already satisfied: msal-extensions<2.0.0,>=0.3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure-identity) (1.0.0)
Requirement already satisfied: azure-core<2.0.0,>=1.11.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure-identity) (1.29.5)
Requirement already satisfied: cffi>=1.12 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from cryptography>=2.5->azure-identity) (1.15.1)
Requirement already satisfied: requests<3,>=2.0.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from msal<2.0.0,>=1.20.0->azure-identity) (2.31.0)
Requirement already satisfied: PyJWT[crypto]<3,>=1.0.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from msal<2.0.0,>=1.20.0->azure-identity) (2.4.0)
Requirement already satisfied: portalocker<3,>=1.0; python_version >= "3.5" and platform_system != "Windows" in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from msal-extensions<2.0.0,>=0.3.0->azure-identity) (2.7.0)
Requirement already satisfied: typing-extensions>=4.6.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure-core<2.0.0,>=1.11.0->azure-identity) (4.6.0)
Requirement already satisfied: pycparser in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from cffi>=1.12->cryptography>=2.5->azure-identity) (2.21)
Requirement already satisfied: idna<4,>=2.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests<3,>=2.0.0->msal<2.0.0,>=1.20.0->azure-identity) (3.4)
Requirement already satisfied: charset-normalizer<4,>=2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests<3,>=2.0.0->msal<2.0.0,>=1.20.0->azure-identity) (3.1.0)
Requirement already satisfied: certifi>=2017.4.17 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests<3,>=2.0.0->msal<2.0.0,>=1.20.0->azure-identity) (2022.9.24)
Requirement already satisfied: urllib3<3,>=1.21.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests<3,>=2.0.0->msal<2.0.0,>=1.20.0->azure-identity) (1.26.16)
Requirement already satisfied: docx2txt in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (0.8)
Requirement already satisfied: pypdf in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (3.17.2)
Requirement already satisfied: typing_extensions>=3.7.4.3; python_version < "3.10" in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pypdf) (4.6.0)
Collecting azure.storage.blob
  Using cached azure_storage_blob-12.19.0-py3-none-any.whl (394 kB)
Requirement already satisfied: isodate>=0.6.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure.storage.blob) (0.6.1)
Requirement already satisfied: typing-extensions>=4.3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure.storage.blob) (4.6.0)
Requirement already satisfied: cryptography>=2.1.4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure.storage.blob) (38.0.4)
Requirement already satisfied: azure-core<2.0.0,>=1.28.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure.storage.blob) (1.29.5)
Requirement already satisfied: six in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from isodate>=0.6.1->azure.storage.blob) (1.16.0)
Requirement already satisfied: cffi>=1.12 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from cryptography>=2.1.4->azure.storage.blob) (1.15.1)
Requirement already satisfied: requests>=2.18.4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from azure-core<2.0.0,>=1.28.0->azure.storage.blob) (2.31.0)
Requirement already satisfied: pycparser in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from cffi>=1.12->cryptography>=2.1.4->azure.storage.blob) (2.21)
Requirement already satisfied: charset-normalizer<4,>=2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.18.4->azure-core<2.0.0,>=1.28.0->azure.storage.blob) (3.1.0)
Requirement already satisfied: urllib3<3,>=1.21.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.18.4->azure-core<2.0.0,>=1.28.0->azure.storage.blob) (1.26.16)
Requirement already satisfied: idna<4,>=2.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.18.4->azure-core<2.0.0,>=1.28.0->azure.storage.blob) (3.4)
Requirement already satisfied: certifi>=2017.4.17 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.18.4->azure-core<2.0.0,>=1.28.0->azure.storage.blob) (2022.9.24)
ERROR: azureml-mlflow 1.51.0 has requirement azure-storage-blob<=12.13.0,>=12.5.0, but you'll have azure-storage-blob 12.19.0 which is incompatible.
Installing collected packages: azure.storage.blob
Successfully installed azure.storage.blob
ERROR: Could not find a version that satisfies the requirement AzStorageBlobReader (from versions: none)
ERROR: No matching distribution found for AzStorageBlobReader
INFO:custom_module:Listing blobs
Listing blobs
Listing blobs
Listing blobs
Listing blobs
Listing blobs
Listing blobs
I Downloading pinecone_client-2.2.4-py3-none-any.whl (179 kB)
     |████████████████████████████████| 179 kB 2.8 MB/s eta 0:00:01
Requirement already satisfied: requests>=2.19.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pinecone-client) (2.31.0)
Requirement already satisfied: python-dateutil>=2.5.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pinecone-client) (2.8.2)
Collecting loguru>=0.5.0
  Downloading loguru-0.7.2-py3-none-any.whl (62 kB)
     |████████████████████████████████| 62 kB 550 kB/s  eta 0:00:01
Collecting numpy>=1.22.0
  Downloading numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
     |████████████████████████████████| 17.3 MB 14 kB/s s eta 0:00:01B 16.5 MB/s eta 0:00:01     |█████████████████████████▋      | 13.9 MB 16.5 MB/s eta 0:00:01     |███████████████████████████▏    | 14.7 MB 16.5 MB/s eta 0:00:01
Requirement already satisfied: tqdm>=4.64.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pinecone-client) (4.65.0)
Requirement already satisfied: urllib3>=1.21.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pinecone-client) (1.26.16)
Collecting dnspython>=2.0.0
  Downloading dnspython-2.4.2-py3-none-any.whl (300 kB)
     |████████████████████████████████| 300 kB 66.4 MB/s eta 0:00:01
Requirement already satisfied: typing-extensions>=3.7.4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pinecone-client) (4.6.0)
Requirement already satisfied: pyyaml>=5.4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pinecone-client) (6.0)
Requirement already satisfied: certifi>=2017.4.17 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.19.0->pinecone-client) (2022.9.24)
Requirement already satisfied: idna<4,>=2.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.19.0->pinecone-client) (3.4)
Requirement already satisfied: charset-normalizer<4,>=2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.19.0->pinecone-client) (3.1.0)
Requirement already satisfied: six>=1.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from python-dateutil>=2.5.3->pinecone-client) (1.16.0)
ERROR: pandas-ml 0.6.1 requires enum34, which is not installed.
ERROR: tensorflow 2.11.0 has requirement protobuf<3.20,>=3.9.2, but you'll have protobuf 3.20.3 which is incompatible.
ERROR: tensorboardx 2.6.1 has requirement protobuf>=4.22.3, but you'll have protobuf 3.20.3 which is incompatible.
ERROR: scikit-image 0.21.0 has requirement networkx>=2.8, but you'll have networkx 2.5 which is incompatible.
ERROR: scikit-image 0.21.0 has requirement scipy>=1.8, but you'll have scipy 1.5.3 which is incompatible.
ERROR: responsibleai 0.27.0 has requirement ipykernel<=6.8.0, but you'll have ipykernel 6.22.0 which is incompatible.
ERROR: responsibleai 0.27.0 has requirement numpy<1.24.0,>=1.17.2, but you'll have numpy 1.24.4 which is incompatible.
ERROR: responsibleai 0.27.0 has requirement pandas<2.0.0,>=0.25.1, but you'll have pandas 2.0.3 which is incompatible.
ERROR: ray 2.0.0 has requirement click<=8.0.4,>=7.0, but you'll have click 8.1.3 which is incompatible.
ERROR: ray 2.0.0 has requirement grpcio<=1.43.0,>=1.28.1; python_version < "3.10", but you'll have grpcio 1.54.2 which is incompatible.
ERROR: raiwidgets 0.27.0 has requirement numpy<1.24.0,>=1.17.2, but you'll have numpy 1.24.4 which is incompatible.
ERROR: raiwidgets 0.27.0 has requirement pandas<2.0.0,>=0.25.1, but you'll have pandas 2.0.3 which is incompatible.
ERROR: numba 0.55.2 has requirement numpy<1.23,>=1.18, but you'll have numpy 1.24.4 which is incompatible.
ERROR: ml-wrappers 0.4.8 has requirement pandas<2.0.0, but you'll have pandas 2.0.3 which is incompatible.
ERROR: interpret-community 0.29.0 has requirement pandas<2.0.0, but you'll have pandas 2.0.3 which is incompatible.
ERROR: erroranalysis 0.4.3 has requirement pandas<2.0.0,>=0.25.1, but you'll have pandas 2.0.3 which is incompatible.
ERROR: datasets 2.3.2 has requirement dill<0.3.6, but you'll have dill 0.3.6 which is incompatible.
ERROR: azureml-training-tabular 1.51.0.post1 has requirement numpy<=1.22.3,>=1.16.0; python_version >= "3.8", but you'll have numpy 1.24.4 which is incompatible.
ERROR: azureml-training-tabular 1.51.0.post1 has requirement pandas==1.1.5, but you'll have pandas 2.0.3 which is incompatible.
ERROR: azureml-train-automl-runtime 1.51.0.post2 has requirement numpy<=1.22.3,>=1.16.0; python_version >= "3.8", but you'll have numpy 1.24.4 which is incompatible.
ERROR: azureml-train-automl-runtime 1.51.0.post2 has requirement pandas==1.1.5, but you'll have pandas 2.0.3 which is incompatible.
ERROR: azureml-opendatasets 1.51.0 has requirement pandas<=2.0.0,>=0.21.0, but you'll have pandas 2.0.3 which is incompatible.
ERROR: azureml-interpret 1.51.0 has requirement numpy<=1.22.3; python_version >= "3.8", but you'll have numpy 1.24.4 which is incompatible.
ERROR: azureml-dataset-runtime 1.51.0 has requirement numpy!=1.19.3,<1.24; sys_platform == "linux", but you'll have numpy 1.24.4 which is incompatible.
ERROR: azureml-automl-runtime 1.51.0.post1 has requirement numpy<=1.22.3,>=1.16.0; python_version >= "3.8", but you'll have numpy 1.24.4 which is incompatible.
ERROR: azureml-automl-runtime 1.51.0.post1 has requirement pandas==1.1.5, but you'll have pandas 2.0.3 which is incompatible.
ERROR: autokeras 1.0.16 has requirement tensorflow<=2.5.0,>=2.3.0, but you'll have tensorflow 2.11.0 which is incompatible.
ERROR: arviz 0.11.2 has requirement typing-extensions<4,>=3.7.4.3, but you'll have typing-extensions 4.6.0 which is incompatible.
Installing collected packages: loguru, numpy, dnspython, pinecone-client
  Attempting uninstall: numpy
    Found existing installation: numpy 1.21.6
    Uninstalling numpy-1.21.6:
      Successfully uninstalled numpy-1.21.6
Successfully installed dnspython-2.4.2 loguru-0.7.2 numpy-1.24.4 pinecone-client-2.2.4
INFO:httpx:HTTP Request: POST 
Show all (4,460 kB)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[2], line 3
      1 query = "Who is champlain investment partners. What was our pitch to them?"
      2 query_engine = index.as_query_engine()
----> 3 answer = query_engine.query(query)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/core/base_query_engine.py:30, in BaseQueryEngine.query(self, str_or_query_bundle)
     28 if isinstance(str_or_query_bundle, str):
     29     str_or_query_bundle = QueryBundle(str_or_query_bundle)
---> 30 return self._query(str_or_query_bundle)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/query_engine/retriever_query_engine.py:171, in RetrieverQueryEngine._query(self, query_bundle)
    167 with self.callback_manager.event(
    168     CBEventType.QUERY, payload={EventPayload.QUERY_STR: query_bundle.query_str}
    169 ) as query_event:
    170     nodes = self.retrieve(query_bundle)
--> 171     response = self._response_synthesizer.synthesize(
    172         query=query_bundle,
    173         nodes=nodes,
    174     )
    176     query_event.on_end(payload={EventPayload.RESPONSE: response})
    178 return response

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/base.py:146, in BaseSynthesizer.synthesize(self, query, nodes, additional_source_nodes, **response_kwargs)
    141     query = QueryBundle(query_str=query)
    143 with self._callback_manager.event(
    144     CBEventType.SYNTHESIZE, payload={EventPayload.QUERY_STR: query.query_str}
    145 ) as event:
--> 146     response_str = self.get_response(
    147         query_str=query.query_str,
    148         text_chunks=[
    149             n.node.get_content(metadata_mode=MetadataMode.LLM) for n in nodes
    150         ],
    151         **response_kwargs,
    152     )
    154     additional_source_nodes = additional_source_nodes or []
    155     source_nodes = list(nodes) + list(additional_source_nodes)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/compact_and_refine.py:38, in CompactAndRefine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
     34 # use prompt helper to fix compact text_chunks under the prompt limitation
     35 # TODO: This is a temporary fix - reason it's temporary is that
     36 # the refine template does not account for size of previous answer.
     37 new_texts = self._make_compact_text_chunks(query_str, text_chunks)
---> 38 return super().get_response(
     39     query_str=query_str,
     40     text_chunks=new_texts,
     41     prev_response=prev_response,
     42     **response_kwargs,
     43 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:146, in Refine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
    142 for text_chunk in text_chunks:
    143     if prev_response is None:
    144         # if this is the first chunk, and text chunk already
    145         # is an answer, then return it
--> 146         response = self._give_response_single(
    147             query_str, text_chunk, **response_kwargs
    148         )
    149     else:
    150         # refine response if possible
    151         response = self._refine_response_single(
    152             prev_response, query_str, text_chunk, **response_kwargs
    153         )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:194, in Refine._give_response_single(self, query_str, text_chunk, **response_kwargs)
    189 text_chunks = self._service_context.prompt_helper.repack(
    190     text_qa_template, [text_chunk]
    191 )
    193 response: Optional[RESPONSE_TEXT_TYPE] = None
--> 194 program = self._program_factory(text_qa_template)
    195 # TODO: consolidate with loop in get_response_default
    196 for cur_text_chunk in text_chunks:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/response_synthesizers/refine.py:177, in Refine._default_program_factory(self, prompt)
    168     return get_program_for_llm(
    169         StructuredRefineResponse,
    170         prompt,
    171         self._service_context.llm,
    172         verbose=self._verbose,
    173     )
    174 else:
    175     return DefaultRefineProgram(
    176         prompt=prompt,
--> 177         llm=self._service_context.llm,
    178         output_cls=self._output_cls,
    179     )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/service_context.py:322, in ServiceContext.llm(self)
    320 @property
    321 def llm(self) -> LLM:
--> 322     return self.llm_predictor.llm

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/llm_predictor/base.py:143, in LLMPredictor.llm(self)
    140 @property
    141 def llm(self) -> LLM:
    142     """Get LLM."""
--> 143     return self._llm

AttributeError: _llm
Champlain Investment Partners is an independent, employee-owned asset management firm headquartered in Burlington, Vermont with an emerging markets team located in Irvine, California. The firm's investment strategies focus on providing absolute returns while managing risk and they aim to create wealth through consistent execution of their investment processes. Our pitch to them emphasized our expertise in data management and our ability to provide information to help determine the suitability of prospective consultants for their investment strategies.
Collecting git+https://github.com/run-llama/llama_index.git
  Cloning https://github.com/run-llama/llama_index.git to /tmp/pip-req-build-xhcidetp
  Running command git clone -q https://github.com/run-llama/llama_index.git /tmp/pip-req-build-xhcidetp
  Running command git submodule update --init --recursive -q
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
    Preparing wheel metadata ... done
Requirement already satisfied (use --upgrade to upgrade): llama-index==0.9.15 from git+https://github.com/run-llama/llama_index.git in /anaconda/envs/azureml_py38/lib/python3.8/site-packages
Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (3.8.1)
Requirement already satisfied: deprecated>=1.2.9.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (1.2.14)
Requirement already satisfied: requests>=2.31.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (2.31.0)
Requirement already satisfied: tiktoken>=0.3.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (0.5.2)
Requirement already satisfied: dataclasses-json in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (0.6.3)
Requirement already satisfied: openai>=1.1.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (1.3.8)
Requirement already satisfied: pandas in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (2.0.3)
Requirement already satisfied: tenacity<9.0.0,>=8.2.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (8.2.2)
Requirement already satisfied: typing-extensions>=4.5.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (4.6.0)
Requirement already satisfied: typing-inspect>=0.8.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (0.9.0)
Requirement already satisfied: types-protobuf<5.0.0.0,>=4.24.0.4 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (4.24.0.4)
Requirement already satisfied: httpx in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (0.25.2)
Requirement already satisfied: SQLAlchemy[asyncio]>=1.4.49 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (2.0.23)
Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (1.5.8)
Requirement already satisfied: fsspec>=2023.5.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (2023.5.0)
Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (3.9.1)
Requirement already satisfied: numpy in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (1.19.5)
Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from llama-index==0.9.15) (4.12.2)
Requirement already satisfied: click in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama-index==0.9.15) (8.1.3)
Requirement already satisfied: regex>=2021.8.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama-index==0.9.15) (2023.5.5)
Requirement already satisfied: joblib in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama-index==0.9.15) (1.2.0)
Requirement already satisfied: tqdm in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama-index==0.9.15) (4.65.0)
Requirement already satisfied: wrapt<2,>=1.10 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from deprecated>=1.2.9.3->llama-index==0.9.15) (1.12.1)
Requirement already satisfied: idna<4,>=2.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.31.0->llama-index==0.9.15) (3.4)
Requirement already satisfied: charset-normalizer<4,>=2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.31.0->llama-index==0.9.15) (3.1.0)
Requirement already satisfied: urllib3<3,>=1.21.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.31.0->llama-index==0.9.15) (1.26.16)
Requirement already satisfied: certifi>=2017.4.17 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from requests>=2.31.0->llama-index==0.9.15) (2022.9.24)
Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from dataclasses-json->llama-index==0.9.15) (3.20.1)
Requirement already satisfied: anyio<5,>=3.5.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai>=1.1.0->llama-index==0.9.15) (3.6.2)
Requirement already satisfied: sniffio in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai>=1.1.0->llama-index==0.9.15) (1.3.0)
Requirement already satisfied: distro<2,>=1.7.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai>=1.1.0->llama-index==0.9.15) (1.8.0)
Requirement already satisfied: pydantic<3,>=1.9.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from openai>=1.1.0->llama-index==0.9.15) (1.10.8)
Requirement already satisfied: pytz>=2020.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pandas->llama-index==0.9.15) (2022.5)
Requirement already satisfied: python-dateutil>=2.8.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pandas->llama-index==0.9.15) (2.8.2)
Requirement already satisfied: tzdata>=2022.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from pandas->llama-index==0.9.15) (2023.3)
Requirement already satisfied: mypy-extensions>=0.3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from typing-inspect>=0.8.0->llama-index==0.9.15) (1.0.0)
Requirement already satisfied: httpcore==1.* in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpx->llama-index==0.9.15) (1.0.2)
Requirement already satisfied: greenlet!=0.4.17; platform_machine == "aarch64" or (platform_machine == "ppc64le" or (platform_machine == "x86_64" or (platform_machine == "amd64" or (platform_machine == "AMD64" or (platform_machine == "win32" or platform_machine == "WIN32"))))) in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index==0.9.15) (2.0.2)
Requirement already satisfied: multidict<7.0,>=4.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index==0.9.15) (6.0.4)
Requirement already satisfied: async-timeout<5.0,>=4.0; python_version < "3.11" in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index==0.9.15) (4.0.2)
Requirement already satisfied: frozenlist>=1.1.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index==0.9.15) (1.3.3)
Requirement already satisfied: aiosignal>=1.1.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index==0.9.15) (1.3.1)
Requirement already satisfied: yarl<2.0,>=1.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index==0.9.15) (1.9.2)
Requirement already satisfied: attrs>=17.3.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index==0.9.15) (23.1.0)
Requirement already satisfied: soupsieve>1.2 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from beautifulsoup4<5.0.0,>=4.12.2->llama-index==0.9.15) (2.4.1)
Requirement already satisfied: packaging>=17.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index==0.9.15) (23.0)
Requirement already satisfied: six>=1.5 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->llama-index==0.9.15) (1.16.0)
Requirement already satisfied: h11<0.15,>=0.13 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from httpcore==1.*->httpx->llama-index==0.9.15) (0.14.0)
Building wheels for collected packages: llama-index
  Building wheel for llama-index (PEP 517) ... done
  Created wheel for llama-index: filename=llama_index-0.9.15-py3-none-any.whl size=965855 sha256=55e4ba5338b91fc7503e8f54ffb03cc7a7f611952f81c1a7cfe638182f8c5898
  Stored in directory: /tmp/pip-ephem-wheel-cache-irjywy8y/wheels/22/c5/ac/726535cbe80cc3a427c8e1dcc91622f3df81024f1491095864
Successfully built llama-index
Found existing installation: numpy 1.19.5
Uninstalling numpy-1.19.5:
  Successfully uninstalled numpy-1.19.5
Collecting numpy==1.19.5
  Using cached numpy-1.19.5-cp38-cp38-manylinux2010_x86_64.whl (14.9 MB)
ERROR: pandas-ml 0.6.1 requires enum34, which is not installed.
ERROR: tensorflow 2.11.0 has requirement numpy>=1.20, but you'll have numpy 1.19.5 which is incompatible.
ERROR: tensorflow 2.11.0 has requirement protobuf<3.20,>=3.9.2, but you'll have protobuf 3.20.3 which is incompatible.
ERROR: tensorboardx 2.6.1 has requirement protobuf>=4.22.3, but you'll have protobuf 3.20.3 which is incompatible.
ERROR: scikit-image 0.21.0 has requirement networkx>=2.8, but you'll have networkx 2.5 which is incompatible.
ERROR: scikit-image 0.21.0 has requirement numpy>=1.21.1, but you'll have numpy 1.19.5 which is incompatible.
ERROR: scikit-image 0.21.0 has requirement scipy>=1.8, but you'll have scipy 1.5.3 which is incompatible.
ERROR: responsibleai 0.27.0 has requirement ipykernel<=6.8.0, but you'll have ipykernel 6.22.0 which is incompatible.
ERROR: responsibleai 0.27.0 has requirement pandas<2.0.0,>=0.25.1, but you'll have pandas 2.0.3 which is incompatible.
ERROR: responsibleai 0.27.0 has requirement scikit-learn<1.1,>=0.22.1, but you'll have scikit-learn 1.3.2 which is incompatible.
ERROR: ray 2.0.0 has requirement click<=8.0.4,>=7.0, but you'll have click 8.1.3 which is incompatible.
ERROR: ray 2.0.0 has requirement grpcio<=1.43.0,>=1.28.1; python_version < "3.10", but you'll have grpcio 1.54.2 which is incompatible.
ERROR: raiwidgets 0.27.0 has requirement pandas<2.0.0,>=0.25.1, but you'll have pandas 2.0.3 which is incompatible.
ERROR: pinecone-client 2.2.4 has requirement numpy>=1.22.0, but you'll have numpy 1.19.5 which is incompatible.
ERROR: pandas 2.0.3 has requirement numpy>=1.20.3; python_version < "3.10", but you'll have numpy 1.19.5 which is incompatible.
ERROR: onnxruntime 1.11.1 has requirement numpy>=1.21.0, but you'll have numpy 1.19.5 which is incompatible.
ERROR: ml-wrappers 0.4.8 has requirement pandas<2.0.0, but you'll have pandas 2.0.3 which is incompatible.
ERROR: interpret-community 0.29.0 has requirement pandas<2.0.0, but you'll have pandas 2.0.3 which is incompatible.
ERROR: fastparquet 2023.4.0 has requirement numpy>=1.20.3, but you'll have numpy 1.19.5 which is incompatible.
ERROR: erroranalysis 0.4.3 has requirement pandas<2.0.0,>=0.25.1, but you'll have pandas 2.0.3 which is incompatible.
ERROR: econml 0.14.1 has requirement scikit-learn<1.3,>0.22.0, but you'll have scikit-learn 1.3.2 which is incompatible.
ERROR: datasets 2.3.2 has requirement dill<0.3.6, but you'll have dill 0.3.6 which is incompatible.
ERROR: azureml-training-tabular 1.51.0.post1 has requirement pandas==1.1.5, but you'll have pandas 2.0.3 which is incompatible.
ERROR: azureml-training-tabular 1.51.0.post1 has requirement scikit-learn<0.23.0,>=0.19.0, but you'll have scikit-learn 1.3.2 which is incompatible.
ERROR: azureml-train-automl-runtime 1.51.0.post2 has requirement pandas==1.1.5, but you'll have pandas 2.0.3 which is incompatible.
ERROR: azureml-train-automl-runtime 1.51.0.post2 has requirement scikit-learn<0.23.0,>=0.19.0, but you'll have scikit-learn 1.3.2 which is incompatible.
ERROR: azureml-opendatasets 1.51.0 has requirement pandas<=2.0.0,>=0.21.0, but you'll have pandas 2.0.3 which is incompatible.
ERROR: azureml-automl-runtime 1.51.0.post1 has requirement pandas==1.1.5, but you'll have pandas 2.0.3 which is incompatible.
ERROR: azureml-automl-runtime 1.51.0.post1 has requirement scikit-learn<0.23.0,>=0.19.0, but you'll have scikit-learn 1.3.2 which is incompatible.
ERROR: autokeras 1.0.16 has requirement tensorflow<=2.5.0,>=2.3.0, but you'll have tensorflow 2.11.0 which is incompatible.
ERROR: arviz 0.11.2 has requirement typing-extensions<4,>=3.7.4.3, but you'll have typing-extensions 4.6.0 which is incompatible.
Installing collected packages: numpy
Successfully installed numpy-1.19.5
Requirement already up-to-date: scikit-learn in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (1.3.2)
Requirement already satisfied, skipping upgrade: numpy<2.0,>=1.17.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from scikit-learn) (1.19.5)
Requirement already satisfied, skipping upgrade: scipy>=1.5.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from scikit-learn) (1.5.3)
Requirement already satisfied, skipping upgrade: joblib>=1.1.1 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from scikit-learn) (1.2.0)
Requirement already satisfied, skipping upgrade: threadpoolctl>=2.0.0 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from scikit-learn) (3.1.0)
Found existing installation: nltk 3.8.1
Uninstalling nltk-3.8.1:
  Successfully uninstalled nltk-3.8.1
Collecting nltk
  Using cached nltk-3.8.1-py3-none-any.whl (1.5 MB)
Requirement already satisfied, skipping upgrade: click in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk) (8.1.3)
Requirement already satisfied, skipping upgrade: regex>=2021.8.3 in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk) (2023.5.5)
Requirement already satisfied, skipping upgrade: joblib in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk) (1.2.0)
Requirement already satisfied, skipping upgrade: tqdm in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (from nltk) (4.65.0)
Installing collected packages: nltk
Successfully installed nltk-3.8.1
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[44], line 44
     28 llm = AzureOpenAI(
     29     model = "gpt-35-turbo",
     30     deployment_name=deployment,
   (...)
     33     azure_endpoint=endpoint
     34 )
     36 embed_model = AzureOpenAIEmbedding(
     37     model="text-embedding-ada-002",
     38     deployment_name="testembedding",
   (...)
     41     api_version=api_version,
     42 )
---> 44 service_context = ServiceContext.from_defaults(
     45     llm=llm,
     46     embed_model=embed_model,
     47 )
     49 set_global_service_context(service_context)
     51 index_name = "docidx"

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/service_context.py:203, in ServiceContext.from_defaults(cls, llm_predictor, llm, prompt_helper, embed_model, node_parser, text_splitter, transformations, llama_logger, callback_manager, system_prompt, query_wrapper_prompt, pydantic_program_mode, chunk_size, chunk_overlap, context_window, num_output, chunk_size_limit)
    197 if text_splitter is not None and node_parser is not None:
    198     raise ValueError("Cannot specify both text_splitter and node_parser")
    200 node_parser = (
    201     text_splitter  # text splitter extends node parser
    202     or node_parser
--> 203     or _get_default_node_parser(
    204         chunk_size=chunk_size or DEFAULT_CHUNK_SIZE,
    205         chunk_overlap=chunk_overlap or SENTENCE_CHUNK_OVERLAP,
    206         callback_manager=callback_manager,
    207     )
    208 )
    210 transformations = transformations or [node_parser]
    212 llama_logger = llama_logger or LlamaLogger()

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/service_context.py:35, in _get_default_node_parser(chunk_size, chunk_overlap, callback_manager)
     29 def _get_default_node_parser(
     30     chunk_size: int = DEFAULT_CHUNK_SIZE,
     31     chunk_overlap: int = SENTENCE_CHUNK_OVERLAP,
     32     callback_manager: Optional[CallbackManager] = None,
     33 ) -> NodeParser:
     34     """Get default node parser."""
---> 35     return SentenceSplitter(
     36         chunk_size=chunk_size,
     37         chunk_overlap=chunk_overlap,
     38         callback_manager=callback_manager or CallbackManager(),
     39     )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/node_parser/text/sentence.py:84, in SentenceSplitter.__init__(self, separator, chunk_size, chunk_overlap, tokenizer, paragraph_separator, chunking_tokenizer_fn, secondary_chunking_regex, callback_manager, include_metadata, include_prev_next_rel)
     77     raise ValueError(
     78         f"Got a larger chunk overlap ({chunk_overlap}) than chunk size "
     79         f"({chunk_size}), should be smaller."
     80     )
     82 callback_manager = callback_manager or CallbackManager([])
     83 self._chunking_tokenizer_fn = (
---> 84     chunking_tokenizer_fn or split_by_sentence_tokenizer()
     85 )
     86 self._tokenizer = tokenizer or get_tokenizer()
     88 self._split_fns = [
     89     split_by_sep(paragraph_separator),
     90     self._chunking_tokenizer_fn,
     91 ]

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/llama_index/node_parser/text/utils.py:40, in split_by_sentence_tokenizer()
     37 def split_by_sentence_tokenizer() -> Callable[[str], List[str]]:
     38     import os
---> 40     import nltk
     42     from llama_index.utils import get_cache_dir
     44     cache_dir = get_cache_dir()

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/__init__.py:153
    151 from nltk.tag import *
    152 from nltk.tokenize import *
--> 153 from nltk.translate import *
    154 from nltk.tree import *
    155 from nltk.sem import *

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/translate/__init__.py:24
     22 from nltk.translate.bleu_score import sentence_bleu as bleu
     23 from nltk.translate.ribes_score import sentence_ribes as ribes
---> 24 from nltk.translate.meteor_score import meteor_score as meteor
     25 from nltk.translate.metrics import alignment_error_rate
     26 from nltk.translate.stack_decoder import StackDecoder

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/translate/meteor_score.py:13
     10 from itertools import chain, product
     11 from typing import Callable, Iterable, List, Tuple
---> 13 from nltk.corpus import WordNetCorpusReader, wordnet
     14 from nltk.stem.api import StemmerI
     15 from nltk.stem.porter import PorterStemmer

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/corpus/__init__.py:64
      9 """
     10 NLTK corpus readers.  The modules in this package provide functions
     11 that can be used to read corpus files in a variety of formats.  These
   (...)
     59 
     60 """
     62 import re
---> 64 from nltk.corpus.reader import *
     65 from nltk.corpus.util import LazyCorpusLoader
     66 from nltk.tokenize import RegexpTokenizer

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/corpus/reader/__init__.py:57
      1 # Natural Language Toolkit: Corpus Readers
      2 #
      3 # Copyright (C) 2001-2023 NLTK Project
   (...)
      6 # URL: <https://www.nltk.org/>
      7 # For license information, see LICENSE.TXT
      9 """
     10 NLTK corpus readers.  The modules in this package provide functions
     11 that can be used to read corpus fileids in a variety of formats.  These
   (...)
     54 isort:skip_file
     55 """
---> 57 from nltk.corpus.reader.plaintext import *
     58 from nltk.corpus.reader.util import *
     59 from nltk.corpus.reader.api import *

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/corpus/reader/plaintext.py:20
     16 from nltk.corpus.reader.util import *
     17 from nltk.tokenize import *
---> 20 class PlaintextCorpusReader(CorpusReader):
     21     """
     22     Reader for corpora that consist of plaintext documents.  Paragraphs
     23     are assumed to be split using blank lines.  Sentences and words can
   (...)
     29     overriding the ``CorpusView`` class variable.
     30     """
     32     CorpusView = StreamBackedCorpusView

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/nltk/corpus/reader/plaintext.py:42, in PlaintextCorpusReader()
     32 CorpusView = StreamBackedCorpusView
     33 """The corpus view class used by this reader.  Subclasses of
     34    ``PlaintextCorpusReader`` may specify alternative corpus view
     35    classes (e.g., to skip the preface sections of documents.)"""
     37 def __init__(
     38     self,
     39     root,
     40     fileids,
     41     word_tokenizer=WordPunctTokenizer(),
---> 42     sent_tokenizer=nltk.data.LazyLoader("tokenizers/punkt/english.pickle"),
     43     para_block_reader=read_blankline_block,
     44     encoding="utf8",
     45 ):
     46     r"""
     47     Construct a new plaintext corpus reader for a set of documents
     48     located at the given root directory.  Example usage:
   (...)
     60         corpus into paragraph blocks.
     61     """
     62     CorpusReader.__init__(self, root, fileids, encoding)

AttributeError: partially initialized module 'nltk' has no attribute 'data' (most likely due to a circular import)

nickjtay commented 10 months ago

Ah, it was some sort of dependency issue. I resolved with trial and error uninstalling and reinstalling.

run-llama / llama_index