run-llama / llama_docs_bot

Bottoms Up Development with LlamaIndex - Building a Documentation Chatbot
MIT License
138 stars 44 forks source link

ValueError: No files found in /workspace/llama_docs_bot/data/paul_graham. #4

Closed andysingal closed 10 months ago

andysingal commented 10 months ago

i tried

def load_markdown_docs(filepath):
    """Load markdown docs from a directory, excluding all other file types."""
    loader = SimpleDirectoryReader(
        input_dir=filepath, 
        exclude=["*.rst", "*.ipynb", "*.py", "*.bat", "*.txt", "*.png", "*.jpg", "*.jpeg", "*.csv", "*.html", "*.js", "*.css", "*.pdf", "*.json"],
        file_extractor={".md": MarkdownDocsReader()},
        recursive=True
    )

    return loader.load_data()

!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

documents = load_markdown_docs("/workspace/llama_docs_bot/data/paul_graham")

gives error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[32], line 1
----> 1 documents = load_markdown_docs("/workspace/llama_docs_bot/data/paul_graham")

Cell In[17], line 3, in load_markdown_docs(filepath)
      1 def load_markdown_docs(filepath):
      2     """Load markdown docs from a directory, excluding all other file types."""
----> 3     loader = SimpleDirectoryReader(
      4         input_dir=filepath, 
      5         exclude=["*.rst", "*.ipynb", "*.py", "*.bat", "*.txt", "*.png", "*.jpg", "*.jpeg", "*.csv", "*.html", "*.js", "*.css", "*.pdf", "*.json"],
      6         file_extractor={".md": MarkdownDocsReader()},
      7         recursive=True
      8     )
     10     return loader.load_data()

File /usr/local/lib/python3.10/dist-packages/llama_index/readers/file/base.py:135, in SimpleDirectoryReader.__init__(self, input_dir, input_files, exclude, exclude_hidden, errors, recursive, encoding, filename_as_id, required_exts, file_extractor, num_files_limit, file_metadata)
    133     self.input_dir = Path(input_dir)
    134     self.exclude = exclude
--> 135     self.input_files = self._add_files(self.input_dir)
    137 if file_extractor is not None:
    138     self.file_extractor = file_extractor

File /usr/local/lib/python3.10/dist-packages/llama_index/readers/file/base.py:191, in SimpleDirectoryReader._add_files(self, input_dir)
    188 new_input_files = sorted(all_files)
    190 if len(new_input_files) == 0:
--> 191     raise ValueError(f"No files found in {input_dir}.")
    193 if self.num_files_limit is not None and self.num_files_limit > 0:
    194     new_input_files = new_input_files[0 : self.num_files_limit]

ValueError: No files found in /workspace/llama_docs_bot/data/paul_graham.