codes:
from dotenv import load_dotenv
load_dotenv()
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredFileLoader
from langchain.vectorstores.faiss import FAISS
from langchain.embeddings import OpenAIEmbeddings
import pickle
codes: from dotenv import load_dotenv load_dotenv()
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import UnstructuredFileLoader from langchain.vectorstores.faiss import FAISS from langchain.embeddings import OpenAIEmbeddings import pickle
Load Data
loader = UnstructuredFileLoader("a.txt") raw_documents = loader.load()
Split text
text_splitter = RecursiveCharacterTextSplitter() documents = text_splitter.split_documents(raw_documents)
Load Data to vectorstore
embeddings = OpenAIEmbeddings() vectorstore = FAISS.from_documents(documents, embeddings)
error:
(base) PS F:\test> python .\inges.py D:\miniconda3\lib\site-packages\requests__init.py:102: RequestsDependencyWarning: urllib3 (1.26.8) or chardet (5.1.0)/charset_normalizer (2.1.1) doesn't match a supported version! warnings.warn("urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported " Traceback (most recent call last): File "F:\test\ingest.py", line 22, in
raw_documents = loader.load()
File "D:\miniconda3\lib\site-packages\langchain\document_loaders\unstructured.py", line 38, in load
elements = self._get_elements()
File "D:\miniconda3\lib\site-packages\langchain\document_loaders\unstructured.py", line 70, in _get_elements
return partition(filename=self.file_path)
File "D:\miniconda3\lib\site-packages\unstructured\partition\auto.py", line 58, in partition
return partition_text(filename=filename, file=file)
File "D:\miniconda3\lib\site-packages\unstructured\partition\text.py", line 41, in partition_text
file_text = f.read()
File "D:\miniconda3\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte
(base) PS F:\test> python .\ingestNeovim.py
D:\miniconda3\lib\site-packages\requests__init.py:102: RequestsDependencyWarning: urllib3 (1.26.8) or chardet (5.1.0)/charset_normalizer (2.1.1) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported "
(base) PS F:\test> python .\ingestNeovim.py >1.log
D:\miniconda3\lib\site-packages\requests__init.py:102: RequestsDependencyWarning: urllib3 (1.26.8) or chardet (5.1.0)/charset_normalizer (2.1.1) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported "
(base) PS F:\test> python .\ingestNeovim.py
D:\miniconda3\lib\site-packages\requests__init.py:102: RequestsDependencyWarning: urllib3 (1.26.8) or chardet (5.1.0)/charset_normalizer (2.1.1) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported "
Retrying langchain.embeddings.openai.embed_with_retry.._completion_with_retry in 4.0 seconds as it raised APIError: Internal error {
"error": {
"message": "Internal error",
"type": "internal_error",
"param": null,
"code": "internal_error"
}
}
500 {'error': {'message': 'Internal error', 'type': 'internal_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Sat, 11 Mar 2023 12:41:21 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '152', 'Connection': 'keep-alive', 'Vary': 'Origin', 'X-Request-Id': 'c3d10e2343a2b3408ed03a5c4ffe61a3', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}.
Retrying langchain.embeddings.openai.embed_with_retry.._completion_with_retry in 4.0 seconds as it raised APIError: Internal error {
"error": {
"message": "Internal error",
"type": "internal_error",
"param": null,
"code": "internal_error"
}
}
500 {'error': {'message': 'Internal error', 'type': 'internal_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Sat, 11 Mar 2023 12:42:18 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '152', 'Connection': 'keep-alive', 'Vary': 'Origin', 'X-Request-Id': '8ad519d0b7c0acac76dff60931b41a86', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}.
Retrying langchain.embeddings.openai.embed_with_retry.._completion_with_retry in 4.0 seconds as it raised APIError: Internal error {
"error": {
"message": "Internal error",
"type": "internal_error",
"param": null,
"code": "internal_error"
}
}
500 {'error': {'message': 'Internal error', 'type': 'internal_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Sat, 11 Mar 2023 12:43:02 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '152', 'Connection': 'keep-alive', 'Vary': 'Origin', 'X-Request-Id': '9e4d69bd26627466aff64ef69b1379cf', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}.
Retrying langchain.embeddings.openai.embed_with_retry.._completion_with_retry in 8.0 seconds as it raised APIError: Internal error {
"error": {
"message": "Internal error",
"type": "internal_error",
"param": null,
"code": "internal_error"
}
}
500 {'error': {'message': 'Internal error', 'type': 'internal_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Sat, 11 Mar 2023 12:43:49 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '152', 'Connection': 'keep-alive', 'Vary': 'Origin', 'X-Request-Id': '1fce29a7209b4fe29836a86db29ab6ae', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}.
Retrying langchain.embeddings.openai.embed_with_retry.._completion_with_retry in 10.0 seconds as it raised APIError: Internal error {
"error": {
"message": "Internal error",
"type": "internal_error",
"param": null,
"code": "internal_error"
}
}
500 {'error': {'message': 'Internal error', 'type': 'internal_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Sat, 11 Mar 2023 12:44:50 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '152', 'Connection': 'keep-alive', 'Vary': 'Origin', 'X-Request-Id': 'f10ec050ead37c907556b22b91fd888e', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}.
Traceback (most recent call last):
File "F:\test\ingestNeovim.py", line 31, in
vectorstore = FAISS.from_documents(documents, embeddings)
File "D:\miniconda3\lib\site-packages\langchain\vectorstores\base.py", line 113, in from_documents
return cls.from_texts(texts, embedding, metadatas=metadatas, kwargs)
File "D:\miniconda3\lib\site-packages\langchain\vectorstores\faiss.py", line 250, in from_texts
embeddings = embedding.embed_documents(texts)
File "D:\miniconda3\lib\site-packages\langchain\embeddings\openai.py", line 203, in embed_documents
response = embed_with_retry(
File "D:\miniconda3\lib\site-packages\langchain\embeddings\openai.py", line 53, in embed_with_retry
return _completion_with_retry(kwargs)
File "D:\miniconda3\lib\site-packages\tenacity\ init__.py", line 289, in wrapped_f
return self(f, *args, **kw)
File "D:\miniconda3\lib\site-packages\tenacity\init.py", line 379, in call
do = self.iter(retry_state=retry_state)
File "D:\miniconda3\lib\site-packages\tenacity\init__.py", line 325, in iter
raise retry_exc.reraise()
File "D:\miniconda3\lib\site-packages\tenacity\init__.py", line 158, in reraise
raise self.last_attempt.result()
File "D:\miniconda3\lib\concurrent\futures_base.py", line 451, in result
return self. get_result()
File "D:\miniconda3\lib\concurrent\futures_base.py", line 403, in get_result
raise self._exception
File "D:\miniconda3\lib\site-packages\tenacity\init.py", line 382, in call__
result = fn(args, kwargs)
File "D:\miniconda3\lib\site-packages\langchain\embeddings\openai.py", line 51, in _completion_with_retry
return embeddings.client.create(kwargs)
File "D:\miniconda3\lib\site-packages\openai\api_resources\embedding.py", line 34, in create
response = super().create(args, **kwargs)
File "D:\miniconda3\lib\site-packages\openai\api_resources\abstract\engine_apiresource.py", line 115, in create
response, , api_key = requestor.request(
File "D:\miniconda3\lib\site-packages\openai\api_requestor.py", line 181, in request
resp, got_stream = self._interpret_response(result, stream)
File "D:\miniconda3\lib\site-packages\openai\api_requestor.py", line 396, in _interpret_response
self._interpret_response_line(
File "D:\miniconda3\lib\site-packages\openai\api_requestor.py", line 429, in _interpret_response_line
raise self.handle_error_response(
openai.error.APIError: Internal error {
"error": {
"message": "Internal error",
"type": "internal_error",
"param": null,
"code": "internal_error"
}
}
500 {'error': {'message': 'Internal error', 'type': 'internal_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Sat, 11 Mar 2023 12:45:47 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '152', 'Connection': 'keep-alive', 'Vary': 'Origin', 'X-Request-Id': '39e42631e12228f19c485055b24bb8fe', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}