Adding a vectorstore and document loader

If you check out my branch , I have added a new script for creating a vector store using Faiss vector store

import os
import logging
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import FAISS

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to load or create a FAISS vector store and save locally
def load_or_create_vector_store(documents, collection_name, persist_directory):
    embedding_function = GPT4AllEmbeddings()

    # Path to save the FAISS index
    index_file_path = os.path.join(persist_directory, collection_name)

    # Check if the FAISS index exists
    if os.path.exists(index_file_path):
        logger.info(f"Loading existing FAISS vector store from {index_file_path}")
        return FAISS.load_local(index_file_path, embedding_function)

    else:
        logger.info(f"Creating new FAISS vector store in {persist_directory}")
        vector_store = FAISS.from_documents(documents, embedding_function)

        # Save the FAISS index locally
        os.makedirs(persist_directory, exist_ok=True)
        vector_store.save_local(index_file_path)

        return vector_store

if __name__ == "__main__":
    # Replace with actual documents and paths
    documents = []  # Load documents here
    collection_name = "example_collection"
    persist_directory = "./faiss_vectorstore"

    vector_store = load_or_create_vector_store(documents, collection_name, persist_directory)

here is the code if someone wants to take a look and change it

# load_documents_to_vector_store.py import os import logging from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from create_vector_store import load_or_create_vector_store # Assuming this is in the same directory as script 1 # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Function to load and split PDF documents from a directory def load_documents_from_directory(document_path: str): logger.info(f"Loading documents from {document_path}...") documents = PyPDFDirectoryLoader(document_path).load_and_split() text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=2048, chunk_overlap=200) return text_splitter.split_documents(documents) if __name__ == "__main__": # Path to the directory containing the PDF documents document_path = './docs' # Load and split documents documents = load_documents_from_directory(document_path) # Load or create vector store collection_name = "example_collection" persist_directory = "./faiss_vectorstore" # Load or create vector store using the documents loaded vector_store = load_or_create_vector_store(documents, collection_name, persist_directory)

DrAlzahraniProjects / csusb_fall2024_cse6550_team3

Adding a vectorstore and document loader #101