pymupdf / RAG

RAG (Retrieval-Augmented Generation) Chatbot Examples Using PyMuPDF
https://pymupdf.readthedocs.io/en/latest/pymupdf4llm
GNU Affero General Public License v3.0
302 stars 57 forks source link

ValueError: Expected collection name that... #80

Open natea opened 2 months ago

natea commented 2 months ago

I'm getting this error when I ask a question in the GUI browser app:

Traceback (most recent call last):
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/routes.py", line 442, in run_predict
    output = await app.get_blocks().process_api(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/blocks.py", line 1392, in process_api
    result = await self.call_function(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/blocks.py", line 1111, in call_function
    prediction = await utils.async_iteration(iterator)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 346, in async_iteration
    return await iterator.__anext__()
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 339, in __anext__
    return await anyio.to_thread.run_sync(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
    return await future
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 807, in run
    result = context.run(func, *args)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 322, in run_sync_iterator_async
    return next(iterator)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 691, in gen_wrapper
    yield from f(*args, **kwargs)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 119, in get_response
    chain = app(file)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 83, in __call__
    self.chain = self.build_chain(file)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 103, in build_chain
    pdfsearch = Chroma.from_documents(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 778, in from_documents
    return cls.from_texts(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 714, in from_texts
    chroma_collection = cls(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 126, in __init__
    self._collection = self._client.get_or_create_collection(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/api/client.py", line 166, in get_or_create_collection
    model = self._server.get_or_create_collection(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py", line 146, in wrapper
    return f(*args, **kwargs)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/api/segment.py", line 221, in get_or_create_collection
    return self.create_collection(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py", line 146, in wrapper
    return f(*args, **kwargs)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/api/segment.py", line 160, in create_collection
    check_index_name(name)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/api/segment.py", line 75, in check_index_name
    raise ValueError(msg)
ValueError: Expected collection name that (1) contains 3-63 characters, (2) starts and ends with an alphanumeric character, (3) otherwise contains only alphanumeric characters, underscores or hyphens (-), (4) contains no two consecutive periods (..) and (5) is not a valid IPv4 address, got PTSD Docs 1 redacted.pdf

I renamed the file to PTSDDocsRedacted.pdf and when I tried again, I got this new error:

Traceback (most recent call last):
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/routes.py", line 442, in run_predict
    output = await app.get_blocks().process_api(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/blocks.py", line 1392, in process_api
    result = await self.call_function(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/blocks.py", line 1111, in call_function
    prediction = await utils.async_iteration(iterator)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 346, in async_iteration
    return await iterator.__anext__()
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 339, in __anext__
    return await anyio.to_thread.run_sync(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
    return await future
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 807, in run
    result = context.run(func, *args)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 322, in run_sync_iterator_async
    return next(iterator)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 691, in gen_wrapper
    yield from f(*args, **kwargs)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 119, in get_response
    chain = app(file)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 83, in __call__
    self.chain = self.build_chain(file)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 103, in build_chain
    pdfsearch = Chroma.from_documents(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 778, in from_documents
    return cls.from_texts(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 714, in from_texts
    chroma_collection = cls(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 126, in __init__
    self._collection = self._client.get_or_create_collection(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/api/client.py", line 166, in get_or_create_collection
    model = self._server.get_or_create_collection(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py", line 146, in wrapper
    return f(*args, **kwargs)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/api/segment.py", line 221, in get_or_create_collection
    return self.create_collection(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py", line 146, in wrapper
    return f(*args, **kwargs)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/api/segment.py", line 176, in create_collection
    coll, created = self._sysdb.create_collection(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py", line 146, in wrapper
    return f(*args, **kwargs)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/db/mixins/sysdb.py", line 257, in create_collection
    ParameterValue(self.uuid_to_db(collection["id"])),
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/chromadb/types.py", line 99, in __getitem__
    if key in self.model_fields:
AttributeError: 'Collection' object has no attribute 'model_fields'

Also, earlier I got an error saying that the chromadb package hadn't been installed, so I installed it.

Traceback (most recent call last):
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 81, in __init__
    import chromadb
ModuleNotFoundError: No module named 'chromadb'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/routes.py", line 442, in run_predict
    output = await app.get_blocks().process_api(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/blocks.py", line 1392, in process_api
    result = await self.call_function(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/blocks.py", line 1111, in call_function
    prediction = await utils.async_iteration(iterator)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 346, in async_iteration
    return await iterator.__anext__()
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 339, in __anext__
    return await anyio.to_thread.run_sync(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
    return await future
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 807, in run
    result = context.run(func, *args)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 322, in run_sync_iterator_async
    return next(iterator)
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/gradio/utils.py", line 691, in gen_wrapper
    yield from f(*args, **kwargs)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 119, in get_response
    chain = app(file)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 83, in __call__
    self.chain = self.build_chain(file)
  File "/Users/nateaune/Documents/code/PyMuPDF-RAG-Chatbot/examples/GUI/browser-app.py", line 103, in build_chain
    pdfsearch = Chroma.from_documents(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 778, in from_documents
    return cls.from_texts(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 714, in from_texts
    chroma_collection = cls(
  File "/Users/nateaune/.pyenv/versions/3.10.10/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py", line 84, in __init__
    raise ImportError(
ImportError: Could not import chromadb python package. Please install it with `pip install chromadb`.