i have a strange problem , when i want to write a csv into Chroma db with chromadb.PersistentClient after writing 99 rows, code just stops without any error or anything else,
this is my code :
from os.path import isfile, join
from tqdm import tqdm
import chromadb
import pandas as pd
from langchain_huggingface import HuggingFaceEmbeddings
import time
TEXT_EMBEDDING_MODEL = "BAAI/bge-m3" # Or any other Embedding model from HuggingFace
VECTOR_DB_NAME = "my_collection"
VECTOR_DB_PATH = "./DB/"
docs_path="./data/"
file_names = [f for f in listdir(docs_path) if isfile(join(docs_path, f))]
chroma_client = chromadb.PersistentClient(path = VECTOR_DB_PATH)
collection=chroma_client.get_or_create_collection(name = VECTOR_DB_NAME)
existing_ids = set(collection.get()["ids"]) # reading the doc ids to prevent from writing duplicate docs
text_embedder=HuggingFaceEmbeddings(
model_name = TEXT_EMBEDDING_MODEL,
)
def write_2_DB(filename):
print("Trying to Read", filename)
df = pd.read_excel(filename)
print("\n Number of rows:", len(df)) # No need to subtract 1, since header is not counted in len
print("\n Loaded Successfully!")
# Get column names
cols = df.columns
for idx in df.index :
print(idx)
doc_id = str(df[cols[0]].loc[idx]) # Use .loc to access the row with index `idx`
row = df[cols[1]].loc[idx] # Access the row using .loc for correct indexing
if doc_id not in existing_ids:
doc_text = str(row)
documents_list = []
embeddings_list = []
ids_list = []
vector = text_embedder.embed_query(doc_text) # Embed the chunk
documents_list.append(doc_text)
embeddings_list.append(vector)
ids_list.append(doc_id) # Use column "knowledge_number" as the ID
met={"id":doc_id}
try:
print(ids_list)
collection.add(
embeddings=embeddings_list,
documents=documents_list,
ids=ids_list,
metadatas = met,
)
except Exception as e:
print(f"Error writing to Chroma: {e}")
else:
pass
print("all done!")
# Example usage
write_2_DB("./test.xlsx")
What happened?
i have a strange problem , when i want to write a csv into Chroma db with
chromadb.PersistentClient
after writing 99 rows, code just stops without any error or anything else, this is my code :Versions
Python 3.10 , Chroma 0.5.11
Relevant log output
No response